Skip to main content

script/dom/
blob.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::ptr;
6use std::rc::Rc;
7
8use dom_struct::dom_struct;
9use encoding_rs::UTF_8;
10use js::context::JSContext;
11use js::jsapi::JSObject;
12use js::realm::CurrentRealm;
13use js::rust::HandleObject;
14use js::typedarray::{ArrayBufferU8, Uint8};
15use net_traits::filemanager_thread::RelativePos;
16use rustc_hash::FxHashMap;
17use script_bindings::reflector::{Reflector, reflect_dom_object_with_proto_and_cx};
18use servo_base::id::{BlobId, BlobIndex};
19use servo_constellation_traits::{BlobData, BlobImpl};
20use uuid::Uuid;
21
22use crate::dom::bindings::buffer_source::create_buffer_source;
23use crate::dom::bindings::codegen::Bindings::BlobBinding;
24use crate::dom::bindings::codegen::Bindings::BlobBinding::BlobMethods;
25use crate::dom::bindings::codegen::UnionTypes::ArrayBufferOrArrayBufferViewOrBlobOrString;
26use crate::dom::bindings::error::{Error, Fallible};
27use crate::dom::bindings::reflector::DomGlobal;
28use crate::dom::bindings::root::DomRoot;
29use crate::dom::bindings::serializable::Serializable;
30use crate::dom::bindings::str::DOMString;
31use crate::dom::bindings::structuredclone::StructuredData;
32use crate::dom::globalscope::GlobalScope;
33use crate::dom::promise::Promise;
34use crate::dom::stream::readablestream::ReadableStream;
35use crate::script_runtime::CanGc;
36
37/// <https://w3c.github.io/FileAPI/#dfn-Blob>
38#[dom_struct]
39pub(crate) struct Blob {
40    reflector_: Reflector,
41    #[no_trace]
42    blob_id: BlobId,
43}
44
45impl Blob {
46    pub(crate) fn new(
47        cx: &mut JSContext,
48        global: &GlobalScope,
49        blob_impl: BlobImpl,
50    ) -> DomRoot<Blob> {
51        Self::new_with_proto(cx, global, None, blob_impl)
52    }
53
54    fn new_with_proto(
55        cx: &mut JSContext,
56        global: &GlobalScope,
57        proto: Option<HandleObject>,
58        blob_impl: BlobImpl,
59    ) -> DomRoot<Blob> {
60        let dom_blob = reflect_dom_object_with_proto_and_cx(
61            Box::new(Blob::new_inherited(&blob_impl)),
62            global,
63            proto,
64            cx,
65        );
66        global.track_blob(&dom_blob, blob_impl);
67        dom_blob
68    }
69
70    pub(crate) fn new_inherited(blob_impl: &BlobImpl) -> Blob {
71        Blob {
72            reflector_: Reflector::new(),
73            blob_id: blob_impl.blob_id(),
74        }
75    }
76
77    /// Get a slice to inner data, this might incur synchronous read and caching
78    pub(crate) fn get_bytes(&self) -> Result<Vec<u8>, ()> {
79        self.global().get_blob_bytes(&self.blob_id)
80    }
81
82    /// Get a copy of the type_string
83    pub(crate) fn type_string(&self) -> String {
84        self.global().get_blob_type_string(&self.blob_id)
85    }
86
87    /// Get a FileID representing the Blob content,
88    /// used by URL.createObjectURL
89    pub(crate) fn get_blob_url_id(&self) -> Uuid {
90        self.global().get_blob_url_id(&self.blob_id)
91    }
92
93    /// <https://w3c.github.io/FileAPI/#blob-get-stream>
94    pub(crate) fn get_stream(&self, cx: &mut JSContext) -> Fallible<DomRoot<ReadableStream>> {
95        self.global().get_blob_stream(cx, &self.blob_id)
96    }
97}
98
99impl Serializable for Blob {
100    type Index = BlobIndex;
101    type Data = BlobImpl;
102
103    /// <https://w3c.github.io/FileAPI/#ref-for-serialization-steps>
104    fn serialize(&self) -> Result<(BlobId, BlobImpl), ()> {
105        let blob_id = self.blob_id;
106
107        // 1. Get a clone of the blob impl.
108        let blob_impl = self.global().serialize_blob(&blob_id);
109
110        // We clone the data, but the clone gets its own Id.
111        let new_blob_id = blob_impl.blob_id();
112
113        Ok((new_blob_id, blob_impl))
114    }
115
116    /// <https://w3c.github.io/FileAPI/#ref-for-deserialization-steps>
117    fn deserialize(
118        cx: &mut JSContext,
119        owner: &GlobalScope,
120        serialized: BlobImpl,
121    ) -> Result<DomRoot<Self>, ()> {
122        Ok(Blob::new(cx, owner, serialized))
123    }
124
125    fn serialized_storage<'a>(
126        reader: StructuredData<'a, '_>,
127    ) -> &'a mut Option<FxHashMap<BlobId, Self::Data>> {
128        match reader {
129            StructuredData::Reader(r) => &mut r.blob_impls,
130            StructuredData::Writer(w) => &mut w.blobs,
131        }
132    }
133}
134
135/// <https://w3c.github.io/FileAPI/#convert-line-endings-to-native>
136fn convert_line_endings_to_native(s: &[u8]) -> Vec<u8> {
137    let native_line_ending: &[u8] = if cfg!(target_os = "windows") {
138        // Step 2. If the underlying platform’s conventions are to represent newlines
139        // as a carriage return and line feed sequence,
140        // set native line ending to the code point U+000D CR followed by the code point U+000A LF.
141        b"\r\n"
142    } else {
143        // Step 1. Let native line ending be the code point U+000A LF.
144        b"\n"
145    };
146
147    let len = s.len();
148    // Step 3. Set result to the empty string.
149    let mut result = Vec::with_capacity(len);
150
151    // Step 4. Let position be a position variable for s, initially pointing at the start of s.
152    let mut position = 0;
153
154    // <https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points>
155    let collect_a_sequence_of_code_points = |position: &mut usize| -> &[u8] {
156        let start = *position;
157        while *position < len && s[*position] != b'\r' && s[*position] != b'\n' {
158            *position += 1;
159        }
160        &s[start..*position]
161    };
162
163    // Step 5: Let token be the result of collecting a sequence of code points
164    // that are not equal to U+000A LF or U+000D CR from s given position.
165    // Step 6: Append token to result.
166    result.extend_from_slice(collect_a_sequence_of_code_points(&mut position));
167
168    // Step 7: While position is not past the end of s:
169    while position < len {
170        let byte = s[position];
171        // Step 7.1: If the code point at position within s equals U+000D CR:
172        if byte == b'\r' {
173            // Step 7.1.1: Append native line ending to result.
174            result.extend_from_slice(native_line_ending);
175            // Step 7.1.2: Advance position by 1.
176            position += 1;
177            // Step 7.1.3: If position is not past the end of s and the code point
178            // at position within s equals U+000A LF, advance position by 1.
179            if position < len && s[position] == b'\n' {
180                position += 1;
181            }
182        }
183        // Step 7.2: Otherwise, if the code point at position within s equals U+000A LF:
184        else if byte == b'\n' {
185            // Advance position by 1 and append native line ending to result.
186            position += 1;
187            result.extend_from_slice(native_line_ending);
188        }
189
190        // Step 7.3: Let token be the result of collecting a sequence of code points
191        // that are not equal to U+000A LF or U+000D CR from s given position.
192        // Step 7.4: Append token to result.
193        result.extend_from_slice(collect_a_sequence_of_code_points(&mut position));
194    }
195
196    // Step 8: Return result.
197    result
198}
199
200/// <https://w3c.github.io/FileAPI/#process-blob-parts>
201#[expect(unsafe_code)]
202pub(crate) fn process_blob_parts(
203    mut blobparts: Vec<ArrayBufferOrArrayBufferViewOrBlobOrString>,
204    endings: BlobBinding::EndingType,
205) -> Result<Vec<u8>, ()> {
206    // Step 1. Let bytes be an empty sequence of bytes.
207    let mut bytes = vec![];
208    // Step 2. For each blobpart in blobparts:
209    for blobpart in &mut blobparts {
210        match blobpart {
211            // Step 2.1. If blobpart is a USVString, run the following substeps:
212            ArrayBufferOrArrayBufferViewOrBlobOrString::String(s) => {
213                // Step 2.1.1. Let s be blobpart.
214                // Step 2.1.2. If the endings member of options is "native",
215                // set s to the result of converting line endings to native of blobpart.
216                if endings == BlobBinding::EndingType::Native {
217                    let converted = convert_line_endings_to_native(&s.as_bytes());
218                    // Step 2.1.3. Append the result of UTF-8 encoding s to bytes.
219                    bytes.extend(converted);
220                } else {
221                    // Step 2.1.3: Append the result of UTF-8 encoding s to bytes.
222                    bytes.extend_from_slice(&s.as_bytes());
223                }
224            },
225            // Step 2.2. If element is a BufferSource,
226            // get a copy of the bytes held by the buffer source,
227            // and append those bytes to bytes.
228            ArrayBufferOrArrayBufferViewOrBlobOrString::ArrayBuffer(a) => unsafe {
229                let array_bytes = a.as_slice();
230                bytes.extend(array_bytes);
231            },
232            ArrayBufferOrArrayBufferViewOrBlobOrString::ArrayBufferView(a) => unsafe {
233                let view_bytes = a.as_slice();
234                bytes.extend(view_bytes);
235            },
236            // Step 2.3. If element is a Blob, append the bytes it represents to bytes.
237            ArrayBufferOrArrayBufferViewOrBlobOrString::Blob(b) => {
238                let blob_bytes = b.get_bytes().unwrap_or(vec![]);
239                bytes.extend(blob_bytes);
240            },
241        }
242    }
243
244    // Step 3. Return bytes.
245    Ok(bytes)
246}
247
248impl BlobMethods<crate::DomTypeHolder> for Blob {
249    // https://w3c.github.io/FileAPI/#constructorBlob
250    #[expect(non_snake_case)]
251    fn Constructor(
252        cx: &mut JSContext,
253        global: &GlobalScope,
254        proto: Option<HandleObject>,
255        blobParts: Option<Vec<ArrayBufferOrArrayBufferViewOrBlobOrString>>,
256        blobPropertyBag: &BlobBinding::BlobPropertyBag,
257    ) -> Fallible<DomRoot<Blob>> {
258        let bytes: Vec<u8> = match blobParts {
259            None => Vec::new(),
260            Some(blobparts) => match process_blob_parts(blobparts, blobPropertyBag.endings) {
261                Ok(bytes) => bytes,
262                Err(_) => return Err(Error::InvalidCharacter(None)),
263            },
264        };
265
266        let type_string = normalize_type_string(&blobPropertyBag.type_.str());
267        let blob_impl = BlobImpl::new_from_bytes(bytes, type_string);
268
269        Ok(Blob::new_with_proto(cx, global, proto, blob_impl))
270    }
271
272    /// <https://w3c.github.io/FileAPI/#dfn-size>
273    fn Size(&self) -> u64 {
274        self.global().get_blob_size(&self.blob_id)
275    }
276
277    /// <https://w3c.github.io/FileAPI/#dfn-type>
278    fn Type(&self) -> DOMString {
279        DOMString::from(self.type_string())
280    }
281
282    // <https://w3c.github.io/FileAPI/#blob-get-stream>
283    fn Stream(&self, cx: &mut JSContext) -> Fallible<DomRoot<ReadableStream>> {
284        self.get_stream(cx)
285    }
286
287    /// <https://w3c.github.io/FileAPI/#slice-method-algo>
288    fn Slice(
289        &self,
290        cx: &mut JSContext,
291        start: Option<i64>,
292        end: Option<i64>,
293        content_type: Option<DOMString>,
294    ) -> DomRoot<Blob> {
295        let global = self.global();
296        let type_string = normalize_type_string(&content_type.unwrap_or_default().str());
297
298        // If our parent is already a sliced blob then we reference the data from the grandparent instead,
299        // to keep the blob ancestry chain short.
300        let (parent, range) = match *global.get_blob_data(&self.blob_id) {
301            BlobData::Sliced(grandparent, parent_range) => {
302                let range = RelativePos {
303                    start: parent_range.start + start.unwrap_or_default(),
304                    end: end.map(|end| end + parent_range.start).or(parent_range.end),
305                };
306                (grandparent, range)
307            },
308            _ => (self.blob_id, RelativePos::from_opts(start, end)),
309        };
310
311        let blob_impl = BlobImpl::new_sliced(range, parent, type_string);
312        Blob::new(cx, &global, blob_impl)
313    }
314
315    /// <https://w3c.github.io/FileAPI/#text-method-algo>
316    fn Text(&self, cx: &mut CurrentRealm) -> Rc<Promise> {
317        let global = self.global();
318        let p = Promise::new_in_realm(cx);
319        let id = self.get_blob_url_id();
320        global.read_file_async(
321            id,
322            p.clone(),
323            Box::new(|cx, promise, bytes| match bytes {
324                Ok(b) => {
325                    let (text, _) = UTF_8.decode_with_bom_removal(&b);
326                    let text = DOMString::from(text);
327                    promise.resolve_native_with_cx(cx, &text);
328                },
329                Err(e) => {
330                    promise.reject_error_with_cx(cx, e);
331                },
332            }),
333        );
334        p
335    }
336
337    /// <https://w3c.github.io/FileAPI/#arraybuffer-method-algo>
338    fn ArrayBuffer(&self, cx: &mut CurrentRealm) -> Rc<Promise> {
339        let promise = Promise::new_in_realm(cx);
340
341        // 1. Let stream be the result of calling get stream on this.
342        let stream = self.get_stream(cx);
343
344        // 2. Let reader be the result of getting a reader from stream.
345        //    If that threw an exception, return a new promise rejected with that exception.
346        let reader = match stream.and_then(|s| s.acquire_default_reader(CanGc::from_cx(cx))) {
347            Ok(reader) => reader,
348            Err(error) => {
349                promise.reject_error_with_cx(cx, error);
350                return promise;
351            },
352        };
353
354        // 3. Let promise be the result of reading all bytes from stream with reader.
355        let success_promise = promise.clone();
356        let failure_promise = promise.clone();
357        reader.read_all_bytes(
358            cx,
359            Rc::new(move |cx, bytes| {
360                rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
361                // 4. Return the result of transforming promise by a fulfillment handler that returns a new
362                //    [ArrayBuffer]
363                let array_buffer = create_buffer_source::<ArrayBufferU8>(
364                    cx.into(),
365                    bytes,
366                    js_object.handle_mut(),
367                    CanGc::from_cx(cx),
368                )
369                .expect("Converting input to ArrayBufferU8 should never fail");
370                success_promise.resolve_native_with_cx(cx, &array_buffer);
371            }),
372            Rc::new(move |cx, value| {
373                failure_promise.reject_with_cx(cx, value);
374            }),
375        );
376
377        promise
378    }
379
380    /// <https://w3c.github.io/FileAPI/#dom-blob-bytes>
381    fn Bytes(&self, cx: &mut CurrentRealm) -> Rc<Promise> {
382        let p = Promise::new_in_realm(cx);
383
384        // 1. Let stream be the result of calling get stream on this.
385        let stream = self.get_stream(cx);
386
387        // 2. Let reader be the result of getting a reader from stream.
388        //    If that threw an exception, return a new promise rejected with that exception.
389        let reader = match stream.and_then(|s| s.acquire_default_reader(CanGc::from_cx(cx))) {
390            Ok(r) => r,
391            Err(e) => {
392                p.reject_error_with_cx(cx, e);
393                return p;
394            },
395        };
396
397        // 3. Let promise be the result of reading all bytes from stream with reader.
398        let p_success = p.clone();
399        let p_failure = p.clone();
400        reader.read_all_bytes(
401            cx,
402            Rc::new(move |cx, bytes| {
403                rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
404                let arr = create_buffer_source::<Uint8>(
405                    cx.into(),
406                    bytes,
407                    js_object.handle_mut(),
408                    CanGc::from_cx(cx),
409                )
410                .expect("Converting input to uint8 array should never fail");
411                p_success.resolve_native_with_cx(cx, &arr);
412            }),
413            Rc::new(move |cx, v| {
414                p_failure.reject_with_cx(cx, v);
415            }),
416        );
417        p
418    }
419}
420
421/// Get the normalized, MIME-parsable type string
422/// <https://w3c.github.io/FileAPI/#dfn-type>
423/// XXX: We will relax the restriction here,
424/// since the spec has some problem over this part.
425/// see <https://github.com/w3c/FileAPI/issues/43>
426pub(crate) fn normalize_type_string(s: &str) -> String {
427    if is_ascii_printable(s) {
428        s.to_ascii_lowercase()
429        // match s_lower.parse() as Result<Mime, ()> {
430        // Ok(_) => s_lower,
431        // Err(_) => "".to_string()
432    } else {
433        "".to_string()
434    }
435}
436
437fn is_ascii_printable(string: &str) -> bool {
438    // Step 5.1 in Sec 5.1 of File API spec
439    // <https://w3c.github.io/FileAPI/#constructorBlob>
440    string.chars().all(|c| ('\x20'..='\x7E').contains(&c))
441}