Skip to main content

script/dom/file/
blob.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::ptr;
6use std::rc::Rc;
7
8use dom_struct::dom_struct;
9use encoding_rs::UTF_8;
10use js::context::JSContext;
11use js::jsapi::JSObject;
12use js::realm::CurrentRealm;
13use js::rust::HandleObject;
14use js::typedarray::{ArrayBufferU8, Uint8};
15use net_traits::filemanager_thread::RelativePos;
16use rustc_hash::FxHashMap;
17use script_bindings::codegen::GenericBindings::TextDecoderStreamBinding::TextDecoderStreamMethods;
18use script_bindings::reflector::{Reflector, reflect_dom_object_with_proto_and_cx};
19use servo_base::id::{BlobId, BlobIndex};
20use servo_constellation_traits::{BlobData, BlobImpl};
21use uuid::Uuid;
22
23use crate::dom::bindings::buffer_source::create_buffer_source;
24use crate::dom::bindings::codegen::Bindings::BlobBinding;
25use crate::dom::bindings::codegen::Bindings::BlobBinding::BlobMethods;
26use crate::dom::bindings::codegen::UnionTypes::ArrayBufferOrArrayBufferViewOrBlobOrString;
27use crate::dom::bindings::error::{Error, Fallible};
28use crate::dom::bindings::reflector::DomGlobal;
29use crate::dom::bindings::root::DomRoot;
30use crate::dom::bindings::serializable::Serializable;
31use crate::dom::bindings::str::DOMString;
32use crate::dom::bindings::structuredclone::StructuredData;
33use crate::dom::encoding::textdecoderstream::TextDecoderStream;
34use crate::dom::globalscope::GlobalScope;
35use crate::dom::promise::Promise;
36use crate::dom::stream::readablestream::{ReadableStream, pipe_through};
37use crate::script_runtime::CanGc;
38
39/// <https://w3c.github.io/FileAPI/#dfn-Blob>
40#[dom_struct]
41pub(crate) struct Blob {
42    reflector_: Reflector,
43    #[no_trace]
44    blob_id: BlobId,
45}
46
47impl Blob {
48    pub(crate) fn new(
49        cx: &mut JSContext,
50        global: &GlobalScope,
51        blob_impl: BlobImpl,
52    ) -> DomRoot<Blob> {
53        Self::new_with_proto(cx, global, None, blob_impl)
54    }
55
56    fn new_with_proto(
57        cx: &mut JSContext,
58        global: &GlobalScope,
59        proto: Option<HandleObject>,
60        blob_impl: BlobImpl,
61    ) -> DomRoot<Blob> {
62        let dom_blob = reflect_dom_object_with_proto_and_cx(
63            Box::new(Blob::new_inherited(&blob_impl)),
64            global,
65            proto,
66            cx,
67        );
68        global.track_blob(&dom_blob, blob_impl);
69        dom_blob
70    }
71
72    pub(crate) fn new_inherited(blob_impl: &BlobImpl) -> Blob {
73        Blob {
74            reflector_: Reflector::new(),
75            blob_id: blob_impl.blob_id(),
76        }
77    }
78
79    /// Get a slice to inner data, this might incur synchronous read and caching
80    pub(crate) fn get_bytes(&self) -> Result<Vec<u8>, ()> {
81        self.global().get_blob_bytes(&self.blob_id)
82    }
83
84    /// Get a copy of the type_string
85    pub(crate) fn type_string(&self) -> String {
86        self.global().get_blob_type_string(&self.blob_id)
87    }
88
89    /// Get a FileID representing the Blob content,
90    /// used by URL.createObjectURL
91    pub(crate) fn get_blob_url_id(&self) -> Uuid {
92        self.global().get_blob_url_id(&self.blob_id)
93    }
94
95    /// <https://w3c.github.io/FileAPI/#blob-get-stream>
96    pub(crate) fn get_stream(&self, cx: &mut JSContext) -> Fallible<DomRoot<ReadableStream>> {
97        self.global().get_blob_stream(cx, &self.blob_id)
98    }
99}
100
101impl Serializable for Blob {
102    type Index = BlobIndex;
103    type Data = BlobImpl;
104
105    /// <https://w3c.github.io/FileAPI/#ref-for-serialization-steps>
106    fn serialize(&self) -> Result<(BlobId, BlobImpl), ()> {
107        let blob_id = self.blob_id;
108
109        // 1. Get a clone of the blob impl.
110        let blob_impl = self.global().serialize_blob(&blob_id);
111
112        // We clone the data, but the clone gets its own Id.
113        let new_blob_id = blob_impl.blob_id();
114
115        Ok((new_blob_id, blob_impl))
116    }
117
118    /// <https://w3c.github.io/FileAPI/#ref-for-deserialization-steps>
119    fn deserialize(
120        cx: &mut JSContext,
121        owner: &GlobalScope,
122        serialized: BlobImpl,
123    ) -> Result<DomRoot<Self>, ()> {
124        Ok(Blob::new(cx, owner, serialized))
125    }
126
127    fn serialized_storage<'a>(
128        reader: StructuredData<'a, '_>,
129    ) -> &'a mut Option<FxHashMap<BlobId, Self::Data>> {
130        match reader {
131            StructuredData::Reader(r) => &mut r.blob_impls,
132            StructuredData::Writer(w) => &mut w.blobs,
133        }
134    }
135}
136
137/// <https://w3c.github.io/FileAPI/#convert-line-endings-to-native>
138fn convert_line_endings_to_native(s: &[u8]) -> Vec<u8> {
139    let native_line_ending: &[u8] = if cfg!(target_os = "windows") {
140        // Step 2. If the underlying platform’s conventions are to represent newlines
141        // as a carriage return and line feed sequence,
142        // set native line ending to the code point U+000D CR followed by the code point U+000A LF.
143        b"\r\n"
144    } else {
145        // Step 1. Let native line ending be the code point U+000A LF.
146        b"\n"
147    };
148
149    let len = s.len();
150    // Step 3. Set result to the empty string.
151    let mut result = Vec::with_capacity(len);
152
153    // Step 4. Let position be a position variable for s, initially pointing at the start of s.
154    let mut position = 0;
155
156    // <https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points>
157    let collect_a_sequence_of_code_points = |position: &mut usize| -> &[u8] {
158        let start = *position;
159        while *position < len && s[*position] != b'\r' && s[*position] != b'\n' {
160            *position += 1;
161        }
162        &s[start..*position]
163    };
164
165    // Step 5: Let token be the result of collecting a sequence of code points
166    // that are not equal to U+000A LF or U+000D CR from s given position.
167    // Step 6: Append token to result.
168    result.extend_from_slice(collect_a_sequence_of_code_points(&mut position));
169
170    // Step 7: While position is not past the end of s:
171    while position < len {
172        let byte = s[position];
173        // Step 7.1: If the code point at position within s equals U+000D CR:
174        if byte == b'\r' {
175            // Step 7.1.1: Append native line ending to result.
176            result.extend_from_slice(native_line_ending);
177            // Step 7.1.2: Advance position by 1.
178            position += 1;
179            // Step 7.1.3: If position is not past the end of s and the code point
180            // at position within s equals U+000A LF, advance position by 1.
181            if position < len && s[position] == b'\n' {
182                position += 1;
183            }
184        }
185        // Step 7.2: Otherwise, if the code point at position within s equals U+000A LF:
186        else if byte == b'\n' {
187            // Advance position by 1 and append native line ending to result.
188            position += 1;
189            result.extend_from_slice(native_line_ending);
190        }
191
192        // Step 7.3: Let token be the result of collecting a sequence of code points
193        // that are not equal to U+000A LF or U+000D CR from s given position.
194        // Step 7.4: Append token to result.
195        result.extend_from_slice(collect_a_sequence_of_code_points(&mut position));
196    }
197
198    // Step 8: Return result.
199    result
200}
201
202/// <https://w3c.github.io/FileAPI/#process-blob-parts>
203#[expect(unsafe_code)]
204pub(crate) fn process_blob_parts(
205    mut blobparts: Vec<ArrayBufferOrArrayBufferViewOrBlobOrString>,
206    endings: BlobBinding::EndingType,
207) -> Result<Vec<u8>, ()> {
208    // Step 1. Let bytes be an empty sequence of bytes.
209    let mut bytes = vec![];
210    // Step 2. For each blobpart in blobparts:
211    for blobpart in &mut blobparts {
212        match blobpart {
213            // Step 2.1. If blobpart is a USVString, run the following substeps:
214            ArrayBufferOrArrayBufferViewOrBlobOrString::String(s) => {
215                // Step 2.1.1. Let s be blobpart.
216                // Step 2.1.2. If the endings member of options is "native",
217                // set s to the result of converting line endings to native of blobpart.
218                if endings == BlobBinding::EndingType::Native {
219                    let converted = convert_line_endings_to_native(&s.as_bytes());
220                    // Step 2.1.3. Append the result of UTF-8 encoding s to bytes.
221                    bytes.extend(converted);
222                } else {
223                    // Step 2.1.3: Append the result of UTF-8 encoding s to bytes.
224                    bytes.extend_from_slice(&s.as_bytes());
225                }
226            },
227            // Step 2.2. If element is a BufferSource,
228            // get a copy of the bytes held by the buffer source,
229            // and append those bytes to bytes.
230            ArrayBufferOrArrayBufferViewOrBlobOrString::ArrayBuffer(a) => unsafe {
231                let array_bytes = a.as_slice();
232                bytes.extend(array_bytes);
233            },
234            ArrayBufferOrArrayBufferViewOrBlobOrString::ArrayBufferView(a) => unsafe {
235                let view_bytes = a.as_slice();
236                bytes.extend(view_bytes);
237            },
238            // Step 2.3. If element is a Blob, append the bytes it represents to bytes.
239            ArrayBufferOrArrayBufferViewOrBlobOrString::Blob(b) => {
240                let blob_bytes = b.get_bytes().unwrap_or(vec![]);
241                bytes.extend(blob_bytes);
242            },
243        }
244    }
245
246    // Step 3. Return bytes.
247    Ok(bytes)
248}
249
250impl BlobMethods<crate::DomTypeHolder> for Blob {
251    // https://w3c.github.io/FileAPI/#constructorBlob
252    #[expect(non_snake_case)]
253    fn Constructor(
254        cx: &mut JSContext,
255        global: &GlobalScope,
256        proto: Option<HandleObject>,
257        blobParts: Option<Vec<ArrayBufferOrArrayBufferViewOrBlobOrString>>,
258        blobPropertyBag: &BlobBinding::BlobPropertyBag,
259    ) -> Fallible<DomRoot<Blob>> {
260        let bytes: Vec<u8> = match blobParts {
261            None => Vec::new(),
262            Some(blobparts) => match process_blob_parts(blobparts, blobPropertyBag.endings) {
263                Ok(bytes) => bytes,
264                Err(_) => return Err(Error::InvalidCharacter(None)),
265            },
266        };
267
268        let type_string = normalize_type_string(&blobPropertyBag.type_.str());
269        let blob_impl = BlobImpl::new_from_bytes(bytes, type_string);
270
271        Ok(Blob::new_with_proto(cx, global, proto, blob_impl))
272    }
273
274    /// <https://w3c.github.io/FileAPI/#dfn-size>
275    fn Size(&self) -> u64 {
276        self.global().get_blob_size(&self.blob_id)
277    }
278
279    /// <https://w3c.github.io/FileAPI/#dfn-type>
280    fn Type(&self) -> DOMString {
281        DOMString::from(self.type_string())
282    }
283
284    // <https://w3c.github.io/FileAPI/#blob-get-stream>
285    fn Stream(&self, cx: &mut JSContext) -> Fallible<DomRoot<ReadableStream>> {
286        self.get_stream(cx)
287    }
288
289    /// <https://w3c.github.io/FileAPI/#text-stream-method-algo>
290    fn TextStream(&self, cx: &mut JSContext) -> Fallible<DomRoot<ReadableStream>> {
291        // Step 1: Let stream be the result of calling get stream on this.
292        let stream = self.get_stream(cx)?;
293        // Step 2: Let decoder be a new TextDecoderStream in this's relevant realm.
294        // Step 3: Set up decoder with UTF-8.
295        let decoder = TextDecoderStream::new_with_proto(
296            cx,
297            &self.global(),
298            None,
299            UTF_8,
300            false, // fatal
301            false, // ignoreBOM
302        )?;
303        // Step 4: Return the result of calling stream, piped through decoder.
304        Ok(pipe_through(
305            &stream,
306            cx,
307            &self.global(),
308            &decoder.Writable(),
309            decoder.Readable(),
310        ))
311    }
312
313    /// <https://w3c.github.io/FileAPI/#slice-method-algo>
314    fn Slice(
315        &self,
316        cx: &mut JSContext,
317        start: Option<i64>,
318        end: Option<i64>,
319        content_type: Option<DOMString>,
320    ) -> DomRoot<Blob> {
321        let global = self.global();
322        let type_string = normalize_type_string(&content_type.unwrap_or_default().str());
323
324        // If our parent is already a sliced blob then we reference the data from the grandparent instead,
325        // to keep the blob ancestry chain short.
326        let (parent, range) = match *global.get_blob_data(&self.blob_id) {
327            BlobData::Sliced(grandparent, parent_range) => {
328                let range = RelativePos {
329                    start: parent_range.start + start.unwrap_or_default(),
330                    end: end.map(|end| end + parent_range.start).or(parent_range.end),
331                };
332                (grandparent, range)
333            },
334            _ => (self.blob_id, RelativePos::from_opts(start, end)),
335        };
336
337        let blob_impl = BlobImpl::new_sliced(range, parent, type_string);
338        Blob::new(cx, &global, blob_impl)
339    }
340
341    /// <https://w3c.github.io/FileAPI/#text-method-algo>
342    fn Text(&self, cx: &mut CurrentRealm) -> Rc<Promise> {
343        let global = self.global();
344        let p = Promise::new_in_realm(cx);
345        let id = self.get_blob_url_id();
346        global.read_file_async(
347            id,
348            p.clone(),
349            Box::new(|cx, promise, bytes| match bytes {
350                Ok(b) => {
351                    let (text, _) = UTF_8.decode_with_bom_removal(&b);
352                    let text = DOMString::from(text);
353                    promise.resolve_native(cx, &text);
354                },
355                Err(e) => {
356                    promise.reject_error(cx, e);
357                },
358            }),
359        );
360        p
361    }
362
363    /// <https://w3c.github.io/FileAPI/#arraybuffer-method-algo>
364    fn ArrayBuffer(&self, cx: &mut CurrentRealm) -> Rc<Promise> {
365        let promise = Promise::new_in_realm(cx);
366
367        // 1. Let stream be the result of calling get stream on this.
368        let stream = self.get_stream(cx);
369
370        // 2. Let reader be the result of getting a reader from stream.
371        //    If that threw an exception, return a new promise rejected with that exception.
372        let reader = match stream.and_then(|s| s.acquire_default_reader(cx)) {
373            Ok(reader) => reader,
374            Err(error) => {
375                promise.reject_error(cx, error);
376                return promise;
377            },
378        };
379
380        // 3. Let promise be the result of reading all bytes from stream with reader.
381        let success_promise = promise.clone();
382        let failure_promise = promise.clone();
383        reader.read_all_bytes(
384            cx,
385            Rc::new(move |cx, bytes| {
386                rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
387                // 4. Return the result of transforming promise by a fulfillment handler that returns a new
388                //    [ArrayBuffer]
389                let array_buffer = create_buffer_source::<ArrayBufferU8>(
390                    cx.into(),
391                    bytes,
392                    js_object.handle_mut(),
393                    CanGc::from_cx(cx),
394                )
395                .expect("Converting input to ArrayBufferU8 should never fail");
396                success_promise.resolve_native(cx, &array_buffer);
397            }),
398            Rc::new(move |cx, value| {
399                failure_promise.reject(cx, value);
400            }),
401        );
402
403        promise
404    }
405
406    /// <https://w3c.github.io/FileAPI/#dom-blob-bytes>
407    fn Bytes(&self, cx: &mut CurrentRealm) -> Rc<Promise> {
408        let p = Promise::new_in_realm(cx);
409
410        // 1. Let stream be the result of calling get stream on this.
411        let stream = self.get_stream(cx);
412
413        // 2. Let reader be the result of getting a reader from stream.
414        //    If that threw an exception, return a new promise rejected with that exception.
415        let reader = match stream.and_then(|s| s.acquire_default_reader(cx)) {
416            Ok(r) => r,
417            Err(e) => {
418                p.reject_error(cx, e);
419                return p;
420            },
421        };
422
423        // 3. Let promise be the result of reading all bytes from stream with reader.
424        let p_success = p.clone();
425        let p_failure = p.clone();
426        reader.read_all_bytes(
427            cx,
428            Rc::new(move |cx, bytes| {
429                rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
430                let arr = create_buffer_source::<Uint8>(
431                    cx.into(),
432                    bytes,
433                    js_object.handle_mut(),
434                    CanGc::from_cx(cx),
435                )
436                .expect("Converting input to uint8 array should never fail");
437                p_success.resolve_native(cx, &arr);
438            }),
439            Rc::new(move |cx, v| {
440                p_failure.reject(cx, v);
441            }),
442        );
443        p
444    }
445}
446
447/// Get the normalized, MIME-parsable type string
448/// <https://w3c.github.io/FileAPI/#dfn-type>
449/// XXX: We will relax the restriction here,
450/// since the spec has some problem over this part.
451/// see <https://github.com/w3c/FileAPI/issues/43>
452pub(crate) fn normalize_type_string(s: &str) -> String {
453    if is_ascii_printable(s) {
454        s.to_ascii_lowercase()
455        // match s_lower.parse() as Result<Mime, ()> {
456        // Ok(_) => s_lower,
457        // Err(_) => "".to_string()
458    } else {
459        "".to_string()
460    }
461}
462
463fn is_ascii_printable(string: &str) -> bool {
464    // Step 5.1 in Sec 5.1 of File API spec
465    // <https://w3c.github.io/FileAPI/#constructorBlob>
466    string.chars().all(|c| ('\x20'..='\x7E').contains(&c))
467}