script/dom/encoding/
textencoderstream.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::cell::Cell;
6use std::num::{NonZero, NonZeroU16};
7use std::ptr::{self, NonNull};
8
9use dom_struct::dom_struct;
10use js::conversions::latin1_to_string;
11use js::jsapi::{
12    JS_DeprecatedStringHasLatin1Chars, JS_GetTwoByteStringCharsAndLength, JS_IsExceptionPending,
13    JSObject, JSType, ToPrimitive,
14};
15use js::jsval::UndefinedValue;
16use js::rust::{
17    HandleObject as SafeHandleObject, HandleValue as SafeHandleValue,
18    MutableHandleValue as SafeMutableHandleValue, ToString,
19};
20use js::typedarray::Uint8;
21use script_bindings::conversions::SafeToJSValConvertible;
22
23use crate::dom::bindings::buffer_source::create_buffer_source;
24use crate::dom::bindings::codegen::Bindings::TextEncoderStreamBinding::TextEncoderStreamMethods;
25use crate::dom::bindings::error::{Error, Fallible, throw_dom_exception};
26use crate::dom::bindings::reflector::{Reflector, reflect_dom_object_with_proto_and_cx};
27use crate::dom::bindings::root::{Dom, DomRoot};
28use crate::dom::bindings::str::DOMString;
29use crate::dom::stream::readablestream::ReadableStream;
30use crate::dom::stream::transformstreamdefaultcontroller::TransformerType;
31use crate::dom::stream::writablestream::WritableStream;
32use crate::dom::types::{GlobalScope, TransformStream, TransformStreamDefaultController};
33use crate::script_runtime::{CanGc, JSContext as SafeJSContext};
34
35/// String converted from an input JS Value
36enum ConvertedInput<'a> {
37    String(String),
38    CodeUnits(&'a [u16]),
39}
40
41/// Converts a JS value to primitive type so that it can be used with
42/// `ToString`.
43///
44/// Set `rval` to `chunk` if `chunk` is a primitive JS value. Otherwise, convert
45/// `chunk` into a primitive JS value and then set `rval` to the converted
46/// primitive. This follows the `ToString` procedure with the exception that it
47/// does not convert the value to string.
48///
49/// See below for the `ToString` procedure in spec:
50/// <https://tc39.es/ecma262/multipage/abstract-operations.html#sec-tostring>
51#[expect(unsafe_code)]
52fn jsval_to_primitive(
53    cx: SafeJSContext,
54    global: &GlobalScope,
55    chunk: SafeHandleValue,
56    mut rval: SafeMutableHandleValue,
57    can_gc: CanGc,
58) -> Fallible<()> {
59    // Step 1. If argument is a String, return argument.
60    // Step 2. If argument is a Symbol, throw a TypeError exception.
61    // Step 3. If argument is undefined, return "undefined".
62    // Step 4. If argument is null, return "null".
63    // Step 5. If argument is true, return "true".
64    // Step 6. If argument is false, return "false".
65    // Step 7. If argument is a Number, return Number::toString(argument, 10).
66    // Step 8. If argument is a BigInt, return BigInt::toString(argument, 10).
67    if chunk.is_primitive() {
68        rval.set(chunk.get());
69
70        return Ok(());
71    }
72
73    // Step 9. Assert: argument is an Object.
74    assert!(chunk.is_object());
75
76    // Step 10. Let primValue be ? ToPrimitive(argument, string).
77    rooted!(in(*cx) let obj = chunk.to_object());
78    let is_success =
79        unsafe { ToPrimitive(*cx, obj.handle().into(), JSType::JSTYPE_STRING, rval.into()) };
80    log::debug!("ToPrimitive is_success={:?}", is_success);
81    if !is_success {
82        unsafe {
83            if !JS_IsExceptionPending(*cx) {
84                throw_dom_exception(
85                    cx,
86                    global,
87                    Error::Type(c"Cannot convert JSObject to primitive".to_owned()),
88                    can_gc,
89                );
90            }
91        }
92        return Err(Error::JSFailed);
93    }
94
95    Ok(())
96}
97
98/// <https://encoding.spec.whatwg.org/#textencoderstream-encoder>
99#[derive(Default, JSTraceable, MallocSizeOf)]
100pub(crate) struct Encoder {
101    /// <https://encoding.spec.whatwg.org/#textencoderstream-pending-high-surrogate>
102    leading_surrogate: Cell<Option<NonZeroU16>>,
103}
104
105impl Encoder {
106    fn encode(&self, maybe_ill_formed: ConvertedInput<'_>) -> String {
107        match maybe_ill_formed {
108            ConvertedInput::String(s) => {
109                // Rust String is already UTF-8 encoded and cannot contain
110                // surrogate
111                if !s.is_empty() && self.leading_surrogate.take().is_some() {
112                    let mut output = String::with_capacity(1 + s.len());
113                    output.push('\u{FFFD}');
114                    output.push_str(&s);
115                    return output;
116                }
117
118                s
119            },
120            ConvertedInput::CodeUnits(code_units) => self.encode_from_code_units(code_units),
121        }
122    }
123
124    /// Encode an input slice of code unit into unicode scalar values
125    fn encode_from_code_units(&self, input: &[u16]) -> String {
126        // <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
127        //
128        // Step 3. Let output be the I/O queue of bytes « end-of-queue ».
129        let mut output = String::with_capacity(input.len());
130        // Step 4. While true:
131        // Step 4.1 Let item be the result of reading from input.
132        for result in char::decode_utf16(input.iter().cloned()) {
133            // Step 4.3 Let result be the result of executing the convert code unit
134            //      to scalar value algorithm with encoder, item and input.
135
136            // <https://encoding.spec.whatwg.org/#convert-code-unit-to-scalar-value>
137            match result {
138                Ok(c) => {
139                    // Step 1. If encoder’s leading surrogate is non-null:
140                    // Step 1.1 Let leadingSurrogate be encoder’s leading surrogate.
141                    // Step 1.2 Set encoder’s leading surrogate to null.
142                    if self.leading_surrogate.take().is_some() {
143                        // Step 1.5 Return U+FFFD (�).
144                        output.push('\u{FFFD}');
145                    }
146
147                    // Step 1.4 Restore item to input.
148                    // Note: pushing item to output is equivalent to restoring item to input
149                    //      and rerun the convert-code-unit-to-scalar-value algo
150                    output.push(c);
151                },
152                Err(error) => {
153                    let unpaired_surrogate = error.unpaired_surrogate();
154                    match code_point_type(unpaired_surrogate) {
155                        CodePointType::LeadingSurrogate => {
156                            // Step 1.1 If encoder’s leading surrogate is non-null:
157                            // Step 1.2 Set encoder’s leading surrogate to null.
158                            if self.leading_surrogate.take().is_some() {
159                                output.push('\u{FFFD}');
160                            }
161
162                            // Step 1.4 Restore item to input.
163                            // Note: Replacing encoder's leading_surrogate is equivalent
164                            //      to restore item back to input and rerun the convert-
165                            //      code-unit-to-scalar-value algo.
166                            // Step 2. If item is a leading surrogate, then set encoder’s
167                            //      leading surrogate to item and return continue.
168                            self.leading_surrogate
169                                .replace(NonZero::new(unpaired_surrogate));
170                        },
171                        CodePointType::TrailingSurrogate => match self.leading_surrogate.take() {
172                            // Step 1.1 If encoder’s leading surrogate is non-null:
173                            // Step 1.2 Set encoder’s leading surrogate to null.
174                            Some(leading_surrogate) => {
175                                // Step 1.3 If item is a trailing surrogate, then return a scalar
176                                //      value from surrogates given leadingSurrogate and item.
177                                let c = char::decode_utf16([
178                                    leading_surrogate.get(),
179                                    unpaired_surrogate,
180                                ])
181                                .next()
182                                .expect("A pair of surrogate is supplied")
183                                .expect("Decoding a pair of surrogate cannot fail");
184                                output.push(c);
185                            },
186                            // Step 3. If item is a trailing surrogate, then return U+FFFD (�).
187                            None => output.push('\u{FFFD}'),
188                        },
189                        CodePointType::ScalarValue => unreachable!("Scalar Value won't fail"),
190                    }
191                },
192            }
193        }
194
195        output
196    }
197}
198
199enum CodePointType {
200    ScalarValue,
201    LeadingSurrogate,
202    TrailingSurrogate,
203}
204
205fn code_point_type(value: u16) -> CodePointType {
206    match value {
207        0xD800..=0xDBFF => CodePointType::LeadingSurrogate,
208        0xDC00..=0xDFFF => CodePointType::TrailingSurrogate,
209        _ => CodePointType::ScalarValue,
210    }
211}
212
213/// <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
214#[expect(unsafe_code)]
215pub(crate) fn encode_and_enqueue_a_chunk(
216    cx: &mut js::context::JSContext,
217    global: &GlobalScope,
218    chunk: SafeHandleValue,
219    encoder: &Encoder,
220    controller: &TransformStreamDefaultController,
221) -> Fallible<()> {
222    // Step 1. Let input be the result of converting chunk to a DOMString.
223    // Step 2. Convert input to an I/O queue of code units.
224    rooted!(&in(cx) let mut rval = UndefinedValue());
225    jsval_to_primitive(
226        cx.into(),
227        global,
228        chunk,
229        rval.handle_mut(),
230        CanGc::from_cx(cx),
231    )?;
232
233    assert!(!rval.is_object());
234    rooted!(&in(cx) let jsstr = unsafe { ToString(cx.raw_cx(), rval.handle()) });
235    if jsstr.is_null() {
236        unsafe {
237            if !JS_IsExceptionPending(cx.raw_cx()) {
238                throw_dom_exception(
239                    cx.into(),
240                    global,
241                    Error::Type(c"Cannot convert JS primitive to string".to_owned()),
242                    CanGc::from_cx(cx),
243                );
244            }
245        }
246
247        return Err(Error::JSFailed);
248    }
249
250    let input = unsafe {
251        if JS_DeprecatedStringHasLatin1Chars(*jsstr) {
252            let s = NonNull::new(*jsstr).expect("jsstr cannot be null");
253            ConvertedInput::String(latin1_to_string(cx.raw_cx(), s))
254        } else {
255            let mut len = 0;
256            let data =
257                JS_GetTwoByteStringCharsAndLength(cx.raw_cx(), std::ptr::null(), *jsstr, &mut len);
258            let maybe_ill_formed_code_units = std::slice::from_raw_parts(data, len);
259            ConvertedInput::CodeUnits(maybe_ill_formed_code_units)
260        }
261    };
262
263    // Step 3. Let output be the I/O queue of bytes « end-of-queue ».
264    // Step 4. While true:
265    // Step 4.1 Let item be the result of reading from input.
266    // Step 4.3 Let result be the result of executing the convert code unit
267    //      to scalar value algorithm with encoder, item and input.
268    // Step 4.4 If result is not continue, then process an item with result,
269    //      encoder’s encoder, input, output, and "fatal".
270    let output = encoder.encode(input);
271
272    // Step 4.2 If item is end-of-queue:
273    // Step 4.2.1 Convert output into a byte sequence.
274    let output = output.as_bytes();
275    // Step 4.2.2 If output is not empty:
276    if output.is_empty() {
277        // Step 4.2.3
278        return Ok(());
279    }
280
281    // Step 4.2.2.1 Let chunk be the result of creating a Uint8Array object
282    //      given output and encoder’s relevant realm.
283    rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
284    let chunk = create_buffer_source::<Uint8>(
285        cx.into(),
286        output,
287        js_object.handle_mut(),
288        CanGc::from_cx(cx),
289    )
290    .map_err(|_| Error::Type(c"Cannot convert byte sequence to Uint8Array".to_owned()))?;
291    rooted!(&in(cx) let mut rval = UndefinedValue());
292    chunk.safe_to_jsval(cx.into(), rval.handle_mut(), CanGc::from_cx(cx));
293    // Step 4.2.2.2 Enqueue chunk into encoder’s transform.
294    controller.enqueue(cx, global, rval.handle())?;
295    Ok(())
296}
297
298/// <https://encoding.spec.whatwg.org/#encode-and-flush>
299pub(crate) fn encode_and_flush(
300    cx: &mut js::context::JSContext,
301    global: &GlobalScope,
302    encoder: &Encoder,
303    controller: &TransformStreamDefaultController,
304) -> Fallible<()> {
305    // Step 1. If encoder’s leading surrogate is non-null:
306    if encoder.leading_surrogate.get().is_some() {
307        // Step 1.1 Let chunk be the result of creating a Uint8Array object
308        //      given « 0xEF, 0xBF, 0xBD » and encoder’s relevant realm.
309        rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
310        let chunk = create_buffer_source::<Uint8>(
311            cx.into(),
312            &[0xEF_u8, 0xBF, 0xBD],
313            js_object.handle_mut(),
314            CanGc::from_cx(cx),
315        )
316        .map_err(|_| Error::Type(c"Cannot convert byte sequence to Uint8Array".to_owned()))?;
317        rooted!(&in(cx) let mut rval = UndefinedValue());
318        chunk.safe_to_jsval(cx.into(), rval.handle_mut(), CanGc::from_cx(cx));
319        // Step 1.2 Enqueue chunk into encoder’s transform.
320        return controller.enqueue(cx, global, rval.handle());
321    }
322
323    Ok(())
324}
325
326/// <https://encoding.spec.whatwg.org/#textencoderstream>
327#[dom_struct]
328pub(crate) struct TextEncoderStream {
329    reflector_: Reflector,
330
331    /// <https://streams.spec.whatwg.org/#generictransformstream>
332    transform: Dom<TransformStream>,
333}
334
335impl TextEncoderStream {
336    fn new_inherited(transform: &TransformStream) -> TextEncoderStream {
337        Self {
338            reflector_: Reflector::new(),
339            transform: Dom::from_ref(transform),
340        }
341    }
342
343    /// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
344    fn new_with_proto(
345        cx: &mut js::context::JSContext,
346        global: &GlobalScope,
347        proto: Option<SafeHandleObject>,
348    ) -> Fallible<DomRoot<TextEncoderStream>> {
349        // Step 1. Set this’s encoder to an instance of the UTF-8 encoder.
350        let encoder = Encoder::default();
351
352        // Step 2. Let transformAlgorithm be an algorithm which takes a chunk argument
353        //      and runs the encode and enqueue a chunk algorithm with this and chunk.
354        // Step 3. Let flushAlgorithm be an algorithm which runs the encode and flush
355        //      algorithm with this.
356        let transformer_type = TransformerType::Encoder(encoder);
357
358        // Step 4. Let transformStream be a new TransformStream.
359        let transform = TransformStream::new_with_proto(global, None, CanGc::from_cx(cx));
360        // Step 5. Set up transformStream with transformAlgorithm set to transformAlgorithm
361        //      and flushAlgorithm set to flushAlgorithm.
362        transform.set_up(cx, global, transformer_type)?;
363
364        // Step 6. Set this’s transform to transformStream.
365        Ok(reflect_dom_object_with_proto_and_cx(
366            Box::new(TextEncoderStream::new_inherited(&transform)),
367            global,
368            proto,
369            cx,
370        ))
371    }
372}
373
374impl TextEncoderStreamMethods<crate::DomTypeHolder> for TextEncoderStream {
375    /// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
376    fn Constructor(
377        cx: &mut js::context::JSContext,
378        global: &GlobalScope,
379        proto: Option<SafeHandleObject>,
380    ) -> Fallible<DomRoot<TextEncoderStream>> {
381        TextEncoderStream::new_with_proto(cx, global, proto)
382    }
383
384    /// <https://encoding.spec.whatwg.org/#dom-textencoder-encoding>
385    fn Encoding(&self) -> DOMString {
386        // Returns "utf-8".
387        DOMString::from("utf-8")
388    }
389
390    /// <https://streams.spec.whatwg.org/#dom-generictransformstream-readable>
391    fn Readable(&self) -> DomRoot<ReadableStream> {
392        self.transform.get_readable()
393    }
394
395    /// <https://streams.spec.whatwg.org/#dom-generictransformstream-writable>
396    fn Writable(&self) -> DomRoot<WritableStream> {
397        self.transform.get_writable()
398    }
399}