script/dom/
textencoderstream.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::cell::Cell;
6use std::num::{NonZero, NonZeroU16};
7use std::ptr::{self, NonNull};
8
9use dom_struct::dom_struct;
10use js::conversions::latin1_to_string;
11use js::jsapi::{
12    JS_DeprecatedStringHasLatin1Chars, JS_GetTwoByteStringCharsAndLength, JS_IsExceptionPending,
13    JSObject, JSType, ToPrimitive,
14};
15use js::jsval::UndefinedValue;
16use js::rust::{
17    HandleObject as SafeHandleObject, HandleValue as SafeHandleValue,
18    MutableHandleValue as SafeMutableHandleValue, ToString,
19};
20use js::typedarray::Uint8Array;
21use script_bindings::conversions::SafeToJSValConvertible;
22
23use crate::dom::bindings::buffer_source::create_buffer_source;
24use crate::dom::bindings::codegen::Bindings::TextEncoderStreamBinding::TextEncoderStreamMethods;
25use crate::dom::bindings::error::{Error, Fallible, throw_dom_exception};
26use crate::dom::bindings::reflector::{Reflector, reflect_dom_object_with_proto};
27use crate::dom::bindings::root::{Dom, DomRoot};
28use crate::dom::bindings::str::DOMString;
29use crate::dom::transformstreamdefaultcontroller::TransformerType;
30use crate::dom::types::{GlobalScope, TransformStream, TransformStreamDefaultController};
31use crate::script_runtime::{CanGc, JSContext as SafeJSContext};
32use crate::{DomTypeHolder, DomTypes};
33
34/// String converted from an input JS Value
35enum ConvertedInput<'a> {
36    String(String),
37    CodeUnits(&'a [u16]),
38}
39
40/// Converts a JS value to primitive type so that it can be used with
41/// `ToString`.
42///
43/// Set `rval` to `chunk` if `chunk` is a primitive JS value. Otherwise, convert
44/// `chunk` into a primitive JS value and then set `rval` to the converted
45/// primitive. This follows the `ToString` procedure with the exception that it
46/// does not convert the value to string.
47///
48/// See below for the `ToString` procedure in spec:
49/// <https://tc39.es/ecma262/multipage/abstract-operations.html#sec-tostring>
50#[expect(unsafe_code)]
51fn jsval_to_primitive(
52    cx: SafeJSContext,
53    global: &GlobalScope,
54    chunk: SafeHandleValue,
55    mut rval: SafeMutableHandleValue,
56    can_gc: CanGc,
57) -> Fallible<()> {
58    // Step 1. If argument is a String, return argument.
59    // Step 2. If argument is a Symbol, throw a TypeError exception.
60    // Step 3. If argument is undefined, return "undefined".
61    // Step 4. If argument is null, return "null".
62    // Step 5. If argument is true, return "true".
63    // Step 6. If argument is false, return "false".
64    // Step 7. If argument is a Number, return Number::toString(argument, 10).
65    // Step 8. If argument is a BigInt, return BigInt::toString(argument, 10).
66    if chunk.is_primitive() {
67        rval.set(chunk.get());
68
69        return Ok(());
70    }
71
72    // Step 9. Assert: argument is an Object.
73    assert!(chunk.is_object());
74
75    // Step 10. Let primValue be ? ToPrimitive(argument, string).
76    rooted!(in(*cx) let obj = chunk.to_object());
77    let is_success =
78        unsafe { ToPrimitive(*cx, obj.handle().into(), JSType::JSTYPE_STRING, rval.into()) };
79    log::debug!("ToPrimitive is_success={:?}", is_success);
80    if !is_success {
81        unsafe {
82            if !JS_IsExceptionPending(*cx) {
83                throw_dom_exception(
84                    cx,
85                    global,
86                    Error::Type("Cannot convert JSObject to primitive".to_owned()),
87                    can_gc,
88                );
89            }
90        }
91        return Err(Error::JSFailed);
92    }
93
94    Ok(())
95}
96
97/// <https://encoding.spec.whatwg.org/#textencoderstream-encoder>
98#[derive(Default, JSTraceable, MallocSizeOf)]
99pub(crate) struct Encoder {
100    /// <https://encoding.spec.whatwg.org/#textencoderstream-pending-high-surrogate>
101    leading_surrogate: Cell<Option<NonZeroU16>>,
102}
103
104impl Encoder {
105    fn encode(&self, maybe_ill_formed: ConvertedInput<'_>) -> String {
106        match maybe_ill_formed {
107            ConvertedInput::String(s) => {
108                // Rust String is already UTF-8 encoded and cannot contain
109                // surrogate
110                if !s.is_empty() && self.leading_surrogate.take().is_some() {
111                    let mut output = String::with_capacity(1 + s.len());
112                    output.push('\u{FFFD}');
113                    output.push_str(&s);
114                    return output;
115                }
116
117                s
118            },
119            ConvertedInput::CodeUnits(code_units) => self.encode_from_code_units(code_units),
120        }
121    }
122
123    /// Encode an input slice of code unit into unicode scalar values
124    fn encode_from_code_units(&self, input: &[u16]) -> String {
125        // <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
126        //
127        // Step 3. Let output be the I/O queue of bytes « end-of-queue ».
128        let mut output = String::with_capacity(input.len());
129        // Step 4. While true:
130        // Step 4.1 Let item be the result of reading from input.
131        for result in char::decode_utf16(input.iter().cloned()) {
132            // Step 4.3 Let result be the result of executing the convert code unit
133            //      to scalar value algorithm with encoder, item and input.
134
135            // <https://encoding.spec.whatwg.org/#convert-code-unit-to-scalar-value>
136            match result {
137                Ok(c) => {
138                    // Step 1. If encoder’s leading surrogate is non-null:
139                    // Step 1.1 Let leadingSurrogate be encoder’s leading surrogate.
140                    // Step 1.2 Set encoder’s leading surrogate to null.
141                    if self.leading_surrogate.take().is_some() {
142                        // Step 1.5 Return U+FFFD (�).
143                        output.push('\u{FFFD}');
144                    }
145
146                    // Step 1.4 Restore item to input.
147                    // Note: pushing item to output is equivalent to restoring item to input
148                    //      and rerun the convert-code-unit-to-scalar-value algo
149                    output.push(c);
150                },
151                Err(error) => {
152                    let unpaired_surrogate = error.unpaired_surrogate();
153                    match code_point_type(unpaired_surrogate) {
154                        CodePointType::LeadingSurrogate => {
155                            // Step 1.1 If encoder’s leading surrogate is non-null:
156                            // Step 1.2 Set encoder’s leading surrogate to null.
157                            if self.leading_surrogate.take().is_some() {
158                                output.push('\u{FFFD}');
159                            }
160
161                            // Step 1.4 Restore item to input.
162                            // Note: Replacing encoder's leading_surrogate is equivalent
163                            //      to restore item back to input and rerun the convert-
164                            //      code-unit-to-scalar-value algo.
165                            // Step 2. If item is a leading surrogate, then set encoder’s
166                            //      leading surrogate to item and return continue.
167                            self.leading_surrogate
168                                .replace(NonZero::new(unpaired_surrogate));
169                        },
170                        CodePointType::TrailingSurrogate => match self.leading_surrogate.take() {
171                            // Step 1.1 If encoder’s leading surrogate is non-null:
172                            // Step 1.2 Set encoder’s leading surrogate to null.
173                            Some(leading_surrogate) => {
174                                // Step 1.3 If item is a trailing surrogate, then return a scalar
175                                //      value from surrogates given leadingSurrogate and item.
176                                let c = char::decode_utf16([
177                                    leading_surrogate.get(),
178                                    unpaired_surrogate,
179                                ])
180                                .next()
181                                .expect("A pair of surrogate is supplied")
182                                .expect("Decoding a pair of surrogate cannot fail");
183                                output.push(c);
184                            },
185                            // Step 3. If item is a trailing surrogate, then return U+FFFD (�).
186                            None => output.push('\u{FFFD}'),
187                        },
188                        CodePointType::ScalarValue => unreachable!("Scalar Value won't fail"),
189                    }
190                },
191            }
192        }
193
194        output
195    }
196}
197
198enum CodePointType {
199    ScalarValue,
200    LeadingSurrogate,
201    TrailingSurrogate,
202}
203
204fn code_point_type(value: u16) -> CodePointType {
205    match value {
206        0xD800..=0xDBFF => CodePointType::LeadingSurrogate,
207        0xDC00..=0xDFFF => CodePointType::TrailingSurrogate,
208        _ => CodePointType::ScalarValue,
209    }
210}
211
212/// <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
213#[expect(unsafe_code)]
214pub(crate) fn encode_and_enqueue_a_chunk(
215    cx: SafeJSContext,
216    global: &GlobalScope,
217    chunk: SafeHandleValue,
218    encoder: &Encoder,
219    controller: &TransformStreamDefaultController,
220    can_gc: CanGc,
221) -> Fallible<()> {
222    // Step 1. Let input be the result of converting chunk to a DOMString.
223    // Step 2. Convert input to an I/O queue of code units.
224    rooted!(in(*cx) let mut rval = UndefinedValue());
225    jsval_to_primitive(cx, global, chunk, rval.handle_mut(), can_gc)?;
226
227    assert!(!rval.is_object());
228    rooted!(in(*cx) let jsstr = unsafe { ToString(*cx, rval.handle()) });
229    if jsstr.is_null() {
230        unsafe {
231            if !JS_IsExceptionPending(*cx) {
232                throw_dom_exception(
233                    cx,
234                    global,
235                    Error::Type("Cannot convert JS primitive to string".to_owned()),
236                    can_gc,
237                );
238            }
239        }
240
241        return Err(Error::JSFailed);
242    }
243
244    let input = unsafe {
245        if JS_DeprecatedStringHasLatin1Chars(*jsstr) {
246            let s = NonNull::new(*jsstr).expect("jsstr cannot be null");
247            ConvertedInput::String(latin1_to_string(*cx, s))
248        } else {
249            let mut len = 0;
250            let data = JS_GetTwoByteStringCharsAndLength(*cx, std::ptr::null(), *jsstr, &mut len);
251            let maybe_ill_formed_code_units = std::slice::from_raw_parts(data, len);
252            ConvertedInput::CodeUnits(maybe_ill_formed_code_units)
253        }
254    };
255
256    // Step 3. Let output be the I/O queue of bytes « end-of-queue ».
257    // Step 4. While true:
258    // Step 4.1 Let item be the result of reading from input.
259    // Step 4.3 Let result be the result of executing the convert code unit
260    //      to scalar value algorithm with encoder, item and input.
261    // Step 4.4 If result is not continue, then process an item with result,
262    //      encoder’s encoder, input, output, and "fatal".
263    let output = encoder.encode(input);
264
265    // Step 4.2 If item is end-of-queue:
266    // Step 4.2.1 Convert output into a byte sequence.
267    let output = output.as_bytes();
268    // Step 4.2.2 If output is not empty:
269    if output.is_empty() {
270        // Step 4.2.3
271        return Ok(());
272    }
273
274    // Step 4.2.2.1 Let chunk be the result of creating a Uint8Array object
275    //      given output and encoder’s relevant realm.
276    rooted!(in(*cx) let mut js_object = ptr::null_mut::<JSObject>());
277    let chunk: Uint8Array = create_buffer_source(cx, output, js_object.handle_mut(), can_gc)
278        .map_err(|_| Error::Type("Cannot convert byte sequence to Uint8Array".to_owned()))?;
279    rooted!(in(*cx) let mut rval = UndefinedValue());
280    chunk.safe_to_jsval(cx, rval.handle_mut(), can_gc);
281    // Step 4.2.2.2 Enqueue chunk into encoder’s transform.
282    controller.enqueue(cx, global, rval.handle(), can_gc)?;
283    Ok(())
284}
285
286/// <https://encoding.spec.whatwg.org/#encode-and-flush>
287pub(crate) fn encode_and_flush(
288    cx: SafeJSContext,
289    global: &GlobalScope,
290    encoder: &Encoder,
291    controller: &TransformStreamDefaultController,
292    can_gc: CanGc,
293) -> Fallible<()> {
294    // Step 1. If encoder’s leading surrogate is non-null:
295    if encoder.leading_surrogate.get().is_some() {
296        // Step 1.1 Let chunk be the result of creating a Uint8Array object
297        //      given « 0xEF, 0xBF, 0xBD » and encoder’s relevant realm.
298        rooted!(in(*cx) let mut js_object = ptr::null_mut::<JSObject>());
299        let chunk: Uint8Array =
300            create_buffer_source(cx, &[0xEF_u8, 0xBF, 0xBD], js_object.handle_mut(), can_gc)
301                .map_err(|_| {
302                    Error::Type("Cannot convert byte sequence to Uint8Array".to_owned())
303                })?;
304        rooted!(in(*cx) let mut rval = UndefinedValue());
305        chunk.safe_to_jsval(cx, rval.handle_mut(), can_gc);
306        // Step 1.2 Enqueue chunk into encoder’s transform.
307        return controller.enqueue(cx, global, rval.handle(), can_gc);
308    }
309
310    Ok(())
311}
312
313/// <https://encoding.spec.whatwg.org/#textencoderstream>
314#[dom_struct]
315pub(crate) struct TextEncoderStream {
316    reflector_: Reflector,
317
318    /// <https://streams.spec.whatwg.org/#generictransformstream>
319    transform: Dom<TransformStream>,
320}
321
322impl TextEncoderStream {
323    fn new_inherited(transform: &TransformStream) -> TextEncoderStream {
324        Self {
325            reflector_: Reflector::new(),
326            transform: Dom::from_ref(transform),
327        }
328    }
329
330    /// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
331    fn new_with_proto(
332        cx: SafeJSContext,
333        global: &GlobalScope,
334        proto: Option<SafeHandleObject>,
335        can_gc: CanGc,
336    ) -> Fallible<DomRoot<TextEncoderStream>> {
337        // Step 1. Set this’s encoder to an instance of the UTF-8 encoder.
338        let encoder = Encoder::default();
339
340        // Step 2. Let transformAlgorithm be an algorithm which takes a chunk argument
341        //      and runs the encode and enqueue a chunk algorithm with this and chunk.
342        // Step 3. Let flushAlgorithm be an algorithm which runs the encode and flush
343        //      algorithm with this.
344        let transformer_type = TransformerType::Encoder(encoder);
345
346        // Step 4. Let transformStream be a new TransformStream.
347        let transform = TransformStream::new_with_proto(global, None, can_gc);
348        // Step 5. Set up transformStream with transformAlgorithm set to transformAlgorithm
349        //      and flushAlgorithm set to flushAlgorithm.
350        transform.set_up(cx, global, transformer_type, can_gc)?;
351
352        // Step 6. Set this’s transform to transformStream.
353        Ok(reflect_dom_object_with_proto(
354            Box::new(TextEncoderStream::new_inherited(&transform)),
355            global,
356            proto,
357            can_gc,
358        ))
359    }
360}
361
362#[allow(non_snake_case)]
363impl TextEncoderStreamMethods<DomTypeHolder> for TextEncoderStream {
364    /// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
365    fn Constructor(
366        global: &<DomTypeHolder as DomTypes>::GlobalScope,
367        proto: Option<SafeHandleObject>,
368        can_gc: CanGc,
369    ) -> Fallible<DomRoot<<DomTypeHolder as DomTypes>::TextEncoderStream>> {
370        TextEncoderStream::new_with_proto(GlobalScope::get_cx(), global, proto, can_gc)
371    }
372
373    /// <https://encoding.spec.whatwg.org/#dom-textencoder-encoding>
374    fn Encoding(&self) -> DOMString {
375        // Returns "utf-8".
376        DOMString::from("utf-8")
377    }
378
379    /// <https://streams.spec.whatwg.org/#dom-generictransformstream-readable>
380    fn Readable(&self) -> DomRoot<<DomTypeHolder as DomTypes>::ReadableStream> {
381        self.transform.get_readable()
382    }
383
384    /// <https://streams.spec.whatwg.org/#dom-generictransformstream-writable>
385    fn Writable(&self) -> DomRoot<<DomTypeHolder as DomTypes>::WritableStream> {
386        self.transform.get_writable()
387    }
388}