Skip to main content

script/dom/
blob.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::ptr;
6use std::rc::Rc;
7
8use dom_struct::dom_struct;
9use encoding_rs::UTF_8;
10use js::jsapi::JSObject;
11use js::realm::CurrentRealm;
12use js::rust::HandleObject;
13use js::typedarray::{ArrayBufferU8, Uint8};
14use net_traits::filemanager_thread::RelativePos;
15use rustc_hash::FxHashMap;
16use script_bindings::reflector::{
17    Reflector, reflect_dom_object_with_proto, reflect_dom_object_with_proto_and_cx,
18};
19use servo_base::id::{BlobId, BlobIndex};
20use servo_constellation_traits::{BlobData, BlobImpl};
21use uuid::Uuid;
22
23use crate::dom::bindings::buffer_source::create_buffer_source;
24use crate::dom::bindings::codegen::Bindings::BlobBinding;
25use crate::dom::bindings::codegen::Bindings::BlobBinding::BlobMethods;
26use crate::dom::bindings::codegen::UnionTypes::ArrayBufferOrArrayBufferViewOrBlobOrString;
27use crate::dom::bindings::error::{Error, Fallible};
28use crate::dom::bindings::reflector::DomGlobal;
29use crate::dom::bindings::root::DomRoot;
30use crate::dom::bindings::serializable::Serializable;
31use crate::dom::bindings::str::DOMString;
32use crate::dom::bindings::structuredclone::StructuredData;
33use crate::dom::globalscope::GlobalScope;
34use crate::dom::promise::Promise;
35use crate::dom::stream::readablestream::ReadableStream;
36use crate::script_runtime::CanGc;
37
38/// <https://w3c.github.io/FileAPI/#dfn-Blob>
39#[dom_struct]
40pub(crate) struct Blob {
41    reflector_: Reflector,
42    #[no_trace]
43    blob_id: BlobId,
44}
45
46impl Blob {
47    pub(crate) fn new(global: &GlobalScope, blob_impl: BlobImpl, can_gc: CanGc) -> DomRoot<Blob> {
48        Self::new_with_proto(global, None, blob_impl, can_gc)
49    }
50
51    fn new_with_proto(
52        global: &GlobalScope,
53        proto: Option<HandleObject>,
54        blob_impl: BlobImpl,
55        can_gc: CanGc,
56    ) -> DomRoot<Blob> {
57        let dom_blob = reflect_dom_object_with_proto(
58            Box::new(Blob::new_inherited(&blob_impl)),
59            global,
60            proto,
61            can_gc,
62        );
63        global.track_blob(&dom_blob, blob_impl);
64        dom_blob
65    }
66
67    fn new_with_proto_and_cx(
68        global: &GlobalScope,
69        proto: Option<HandleObject>,
70        blob_impl: BlobImpl,
71        cx: &mut js::context::JSContext,
72    ) -> DomRoot<Blob> {
73        let dom_blob = reflect_dom_object_with_proto_and_cx(
74            Box::new(Blob::new_inherited(&blob_impl)),
75            global,
76            proto,
77            cx,
78        );
79        global.track_blob(&dom_blob, blob_impl);
80        dom_blob
81    }
82
83    pub(crate) fn new_inherited(blob_impl: &BlobImpl) -> Blob {
84        Blob {
85            reflector_: Reflector::new(),
86            blob_id: blob_impl.blob_id(),
87        }
88    }
89
90    /// Get a slice to inner data, this might incur synchronous read and caching
91    pub(crate) fn get_bytes(&self) -> Result<Vec<u8>, ()> {
92        self.global().get_blob_bytes(&self.blob_id)
93    }
94
95    /// Get a copy of the type_string
96    pub(crate) fn type_string(&self) -> String {
97        self.global().get_blob_type_string(&self.blob_id)
98    }
99
100    /// Get a FileID representing the Blob content,
101    /// used by URL.createObjectURL
102    pub(crate) fn get_blob_url_id(&self) -> Uuid {
103        self.global().get_blob_url_id(&self.blob_id)
104    }
105
106    /// <https://w3c.github.io/FileAPI/#blob-get-stream>
107    pub(crate) fn get_stream(
108        &self,
109        cx: &mut js::context::JSContext,
110    ) -> Fallible<DomRoot<ReadableStream>> {
111        self.global().get_blob_stream(cx, &self.blob_id)
112    }
113}
114
115impl Serializable for Blob {
116    type Index = BlobIndex;
117    type Data = BlobImpl;
118
119    /// <https://w3c.github.io/FileAPI/#ref-for-serialization-steps>
120    fn serialize(&self) -> Result<(BlobId, BlobImpl), ()> {
121        let blob_id = self.blob_id;
122
123        // 1. Get a clone of the blob impl.
124        let blob_impl = self.global().serialize_blob(&blob_id);
125
126        // We clone the data, but the clone gets its own Id.
127        let new_blob_id = blob_impl.blob_id();
128
129        Ok((new_blob_id, blob_impl))
130    }
131
132    /// <https://w3c.github.io/FileAPI/#ref-for-deserialization-steps>
133    fn deserialize(
134        owner: &GlobalScope,
135        serialized: BlobImpl,
136        can_gc: CanGc,
137    ) -> Result<DomRoot<Self>, ()> {
138        let deserialized_blob = Blob::new(owner, serialized, can_gc);
139        Ok(deserialized_blob)
140    }
141
142    fn serialized_storage<'a>(
143        reader: StructuredData<'a, '_>,
144    ) -> &'a mut Option<FxHashMap<BlobId, Self::Data>> {
145        match reader {
146            StructuredData::Reader(r) => &mut r.blob_impls,
147            StructuredData::Writer(w) => &mut w.blobs,
148        }
149    }
150}
151
152/// <https://w3c.github.io/FileAPI/#convert-line-endings-to-native>
153fn convert_line_endings_to_native(s: &[u8]) -> Vec<u8> {
154    let native_line_ending: &[u8] = if cfg!(target_os = "windows") {
155        // Step 2. If the underlying platform’s conventions are to represent newlines
156        // as a carriage return and line feed sequence,
157        // set native line ending to the code point U+000D CR followed by the code point U+000A LF.
158        b"\r\n"
159    } else {
160        // Step 1. Let native line ending be the code point U+000A LF.
161        b"\n"
162    };
163
164    let len = s.len();
165    // Step 3. Set result to the empty string.
166    let mut result = Vec::with_capacity(len);
167
168    // Step 4. Let position be a position variable for s, initially pointing at the start of s.
169    let mut position = 0;
170
171    // <https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points>
172    let collect_a_sequence_of_code_points = |position: &mut usize| -> &[u8] {
173        let start = *position;
174        while *position < len && s[*position] != b'\r' && s[*position] != b'\n' {
175            *position += 1;
176        }
177        &s[start..*position]
178    };
179
180    // Step 5: Let token be the result of collecting a sequence of code points
181    // that are not equal to U+000A LF or U+000D CR from s given position.
182    // Step 6: Append token to result.
183    result.extend_from_slice(collect_a_sequence_of_code_points(&mut position));
184
185    // Step 7: While position is not past the end of s:
186    while position < len {
187        let byte = s[position];
188        // Step 7.1: If the code point at position within s equals U+000D CR:
189        if byte == b'\r' {
190            // Step 7.1.1: Append native line ending to result.
191            result.extend_from_slice(native_line_ending);
192            // Step 7.1.2: Advance position by 1.
193            position += 1;
194            // Step 7.1.3: If position is not past the end of s and the code point
195            // at position within s equals U+000A LF, advance position by 1.
196            if position < len && s[position] == b'\n' {
197                position += 1;
198            }
199        }
200        // Step 7.2: Otherwise, if the code point at position within s equals U+000A LF:
201        else if byte == b'\n' {
202            // Advance position by 1 and append native line ending to result.
203            position += 1;
204            result.extend_from_slice(native_line_ending);
205        }
206
207        // Step 7.3: Let token be the result of collecting a sequence of code points
208        // that are not equal to U+000A LF or U+000D CR from s given position.
209        // Step 7.4: Append token to result.
210        result.extend_from_slice(collect_a_sequence_of_code_points(&mut position));
211    }
212
213    // Step 8: Return result.
214    result
215}
216
217/// <https://w3c.github.io/FileAPI/#process-blob-parts>
218#[expect(unsafe_code)]
219pub(crate) fn process_blob_parts(
220    mut blobparts: Vec<ArrayBufferOrArrayBufferViewOrBlobOrString>,
221    endings: BlobBinding::EndingType,
222) -> Result<Vec<u8>, ()> {
223    // Step 1. Let bytes be an empty sequence of bytes.
224    let mut bytes = vec![];
225    // Step 2. For each blobpart in blobparts:
226    for blobpart in &mut blobparts {
227        match blobpart {
228            // Step 2.1. If blobpart is a USVString, run the following substeps:
229            ArrayBufferOrArrayBufferViewOrBlobOrString::String(s) => {
230                // Step 2.1.1. Let s be blobpart.
231                // Step 2.1.2. If the endings member of options is "native",
232                // set s to the result of converting line endings to native of blobpart.
233                if endings == BlobBinding::EndingType::Native {
234                    let converted = convert_line_endings_to_native(&s.as_bytes());
235                    // Step 2.1.3. Append the result of UTF-8 encoding s to bytes.
236                    bytes.extend(converted);
237                } else {
238                    // Step 2.1.3: Append the result of UTF-8 encoding s to bytes.
239                    bytes.extend_from_slice(&s.as_bytes());
240                }
241            },
242            // Step 2.2. If element is a BufferSource,
243            // get a copy of the bytes held by the buffer source,
244            // and append those bytes to bytes.
245            ArrayBufferOrArrayBufferViewOrBlobOrString::ArrayBuffer(a) => unsafe {
246                let array_bytes = a.as_slice();
247                bytes.extend(array_bytes);
248            },
249            ArrayBufferOrArrayBufferViewOrBlobOrString::ArrayBufferView(a) => unsafe {
250                let view_bytes = a.as_slice();
251                bytes.extend(view_bytes);
252            },
253            // Step 2.3. If element is a Blob, append the bytes it represents to bytes.
254            ArrayBufferOrArrayBufferViewOrBlobOrString::Blob(b) => {
255                let blob_bytes = b.get_bytes().unwrap_or(vec![]);
256                bytes.extend(blob_bytes);
257            },
258        }
259    }
260
261    // Step 3. Return bytes.
262    Ok(bytes)
263}
264
265impl BlobMethods<crate::DomTypeHolder> for Blob {
266    // https://w3c.github.io/FileAPI/#constructorBlob
267    #[expect(non_snake_case)]
268    fn Constructor(
269        cx: &mut js::context::JSContext,
270        global: &GlobalScope,
271        proto: Option<HandleObject>,
272        blobParts: Option<Vec<ArrayBufferOrArrayBufferViewOrBlobOrString>>,
273        blobPropertyBag: &BlobBinding::BlobPropertyBag,
274    ) -> Fallible<DomRoot<Blob>> {
275        let bytes: Vec<u8> = match blobParts {
276            None => Vec::new(),
277            Some(blobparts) => match process_blob_parts(blobparts, blobPropertyBag.endings) {
278                Ok(bytes) => bytes,
279                Err(_) => return Err(Error::InvalidCharacter(None)),
280            },
281        };
282
283        let type_string = normalize_type_string(&blobPropertyBag.type_.str());
284        let blob_impl = BlobImpl::new_from_bytes(bytes, type_string);
285
286        Ok(Blob::new_with_proto_and_cx(global, proto, blob_impl, cx))
287    }
288
289    /// <https://w3c.github.io/FileAPI/#dfn-size>
290    fn Size(&self) -> u64 {
291        self.global().get_blob_size(&self.blob_id)
292    }
293
294    /// <https://w3c.github.io/FileAPI/#dfn-type>
295    fn Type(&self) -> DOMString {
296        DOMString::from(self.type_string())
297    }
298
299    // <https://w3c.github.io/FileAPI/#blob-get-stream>
300    fn Stream(&self, cx: &mut js::context::JSContext) -> Fallible<DomRoot<ReadableStream>> {
301        self.get_stream(cx)
302    }
303
304    /// <https://w3c.github.io/FileAPI/#slice-method-algo>
305    fn Slice(
306        &self,
307        cx: &mut js::context::JSContext,
308        start: Option<i64>,
309        end: Option<i64>,
310        content_type: Option<DOMString>,
311    ) -> DomRoot<Blob> {
312        let global = self.global();
313        let type_string = normalize_type_string(&content_type.unwrap_or_default().str());
314
315        // If our parent is already a sliced blob then we reference the data from the grandparent instead,
316        // to keep the blob ancestry chain short.
317        let (parent, range) = match *global.get_blob_data(&self.blob_id) {
318            BlobData::Sliced(grandparent, parent_range) => {
319                let range = RelativePos {
320                    start: parent_range.start + start.unwrap_or_default(),
321                    end: end.map(|end| end + parent_range.start).or(parent_range.end),
322                };
323                (grandparent, range)
324            },
325            _ => (self.blob_id, RelativePos::from_opts(start, end)),
326        };
327
328        let blob_impl = BlobImpl::new_sliced(range, parent, type_string);
329        Blob::new(&global, blob_impl, CanGc::from_cx(cx))
330    }
331
332    /// <https://w3c.github.io/FileAPI/#text-method-algo>
333    fn Text(&self, cx: &mut CurrentRealm) -> Rc<Promise> {
334        let global = self.global();
335        let p = Promise::new_in_realm(cx);
336        let id = self.get_blob_url_id();
337        global.read_file_async(
338            id,
339            p.clone(),
340            Box::new(|cx, promise, bytes| match bytes {
341                Ok(b) => {
342                    let (text, _) = UTF_8.decode_with_bom_removal(&b);
343                    let text = DOMString::from(text);
344                    promise.resolve_native(&text, CanGc::from_cx(cx));
345                },
346                Err(e) => {
347                    promise.reject_error(e, CanGc::from_cx(cx));
348                },
349            }),
350        );
351        p
352    }
353
354    /// <https://w3c.github.io/FileAPI/#arraybuffer-method-algo>
355    fn ArrayBuffer(&self, cx: &mut CurrentRealm) -> Rc<Promise> {
356        let promise = Promise::new_in_realm(cx);
357
358        // 1. Let stream be the result of calling get stream on this.
359        let stream = self.get_stream(cx);
360
361        // 2. Let reader be the result of getting a reader from stream.
362        //    If that threw an exception, return a new promise rejected with that exception.
363        let reader = match stream.and_then(|s| s.acquire_default_reader(CanGc::from_cx(cx))) {
364            Ok(reader) => reader,
365            Err(error) => {
366                promise.reject_error(error, CanGc::from_cx(cx));
367                return promise;
368            },
369        };
370
371        // 3. Let promise be the result of reading all bytes from stream with reader.
372        let success_promise = promise.clone();
373        let failure_promise = promise.clone();
374        reader.read_all_bytes(
375            cx,
376            Rc::new(move |cx, bytes| {
377                rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
378                // 4. Return the result of transforming promise by a fulfillment handler that returns a new
379                //    [ArrayBuffer]
380                let array_buffer = create_buffer_source::<ArrayBufferU8>(
381                    cx.into(),
382                    bytes,
383                    js_object.handle_mut(),
384                    CanGc::from_cx(cx),
385                )
386                .expect("Converting input to ArrayBufferU8 should never fail");
387                success_promise.resolve_native(&array_buffer, CanGc::from_cx(cx));
388            }),
389            Rc::new(move |cx, value| {
390                failure_promise.reject(cx.into(), value, CanGc::from_cx(cx));
391            }),
392        );
393
394        promise
395    }
396
397    /// <https://w3c.github.io/FileAPI/#dom-blob-bytes>
398    fn Bytes(&self, cx: &mut CurrentRealm) -> Rc<Promise> {
399        let p = Promise::new_in_realm(cx);
400
401        // 1. Let stream be the result of calling get stream on this.
402        let stream = self.get_stream(cx);
403
404        // 2. Let reader be the result of getting a reader from stream.
405        //    If that threw an exception, return a new promise rejected with that exception.
406        let reader = match stream.and_then(|s| s.acquire_default_reader(CanGc::from_cx(cx))) {
407            Ok(r) => r,
408            Err(e) => {
409                p.reject_error(e, CanGc::from_cx(cx));
410                return p;
411            },
412        };
413
414        // 3. Let promise be the result of reading all bytes from stream with reader.
415        let p_success = p.clone();
416        let p_failure = p.clone();
417        reader.read_all_bytes(
418            cx,
419            Rc::new(move |cx, bytes| {
420                rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
421                let arr = create_buffer_source::<Uint8>(
422                    cx.into(),
423                    bytes,
424                    js_object.handle_mut(),
425                    CanGc::from_cx(cx),
426                )
427                .expect("Converting input to uint8 array should never fail");
428                p_success.resolve_native(&arr, CanGc::from_cx(cx));
429            }),
430            Rc::new(move |cx, v| {
431                p_failure.reject(cx.into(), v, CanGc::from_cx(cx));
432            }),
433        );
434        p
435    }
436}
437
438/// Get the normalized, MIME-parsable type string
439/// <https://w3c.github.io/FileAPI/#dfn-type>
440/// XXX: We will relax the restriction here,
441/// since the spec has some problem over this part.
442/// see <https://github.com/w3c/FileAPI/issues/43>
443pub(crate) fn normalize_type_string(s: &str) -> String {
444    if is_ascii_printable(s) {
445        s.to_ascii_lowercase()
446        // match s_lower.parse() as Result<Mime, ()> {
447        // Ok(_) => s_lower,
448        // Err(_) => "".to_string()
449    } else {
450        "".to_string()
451    }
452}
453
454fn is_ascii_printable(string: &str) -> bool {
455    // Step 5.1 in Sec 5.1 of File API spec
456    // <https://w3c.github.io/FileAPI/#constructorBlob>
457    string.chars().all(|c| ('\x20'..='\x7E').contains(&c))
458}