Skip to main content

script/dom/encoding/
textencoderstream.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::cell::Cell;
6use std::num::{NonZero, NonZeroU16};
7use std::ptr::{self, NonNull};
8
9use dom_struct::dom_struct;
10use js::context::JSContext;
11use js::conversions::latin1_to_string;
12use js::jsapi::{JS_DeprecatedStringHasLatin1Chars, JSObject, JSType};
13use js::jsval::UndefinedValue;
14use js::rust::wrappers2::{JS_GetTwoByteStringCharsAndLength, JS_IsExceptionPending, ToPrimitive};
15use js::rust::{
16    HandleObject as SafeHandleObject, HandleValue as SafeHandleValue,
17    MutableHandleValue as SafeMutableHandleValue, ToString,
18};
19use js::typedarray::Uint8;
20use script_bindings::conversions::SafeToJSValConvertible;
21use script_bindings::reflector::{Reflector, reflect_dom_object_with_proto_and_cx};
22
23use crate::dom::bindings::buffer_source::create_buffer_source;
24use crate::dom::bindings::codegen::Bindings::TextEncoderStreamBinding::TextEncoderStreamMethods;
25use crate::dom::bindings::error::{Error, Fallible, throw_dom_exception};
26use crate::dom::bindings::root::{Dom, DomRoot};
27use crate::dom::bindings::str::DOMString;
28use crate::dom::stream::readablestream::ReadableStream;
29use crate::dom::stream::transformstreamdefaultcontroller::TransformerType;
30use crate::dom::stream::writablestream::WritableStream;
31use crate::dom::types::{GlobalScope, TransformStream, TransformStreamDefaultController};
32use crate::script_runtime::CanGc;
33
34/// String converted from an input JS Value
35enum ConvertedInput<'a> {
36    String(String),
37    CodeUnits(&'a [u16]),
38}
39
40/// Converts a JS value to primitive type so that it can be used with
41/// `ToString`.
42///
43/// Set `rval` to `chunk` if `chunk` is a primitive JS value. Otherwise, convert
44/// `chunk` into a primitive JS value and then set `rval` to the converted
45/// primitive. This follows the `ToString` procedure with the exception that it
46/// does not convert the value to string.
47///
48/// See below for the `ToString` procedure in spec:
49/// <https://tc39.es/ecma262/multipage/abstract-operations.html#sec-tostring>
50#[expect(unsafe_code)]
51fn jsval_to_primitive(
52    cx: &mut JSContext,
53    global: &GlobalScope,
54    chunk: SafeHandleValue,
55    mut rval: SafeMutableHandleValue,
56) -> Fallible<()> {
57    // Step 1. If argument is a String, return argument.
58    // Step 2. If argument is a Symbol, throw a TypeError exception.
59    // Step 3. If argument is undefined, return "undefined".
60    // Step 4. If argument is null, return "null".
61    // Step 5. If argument is true, return "true".
62    // Step 6. If argument is false, return "false".
63    // Step 7. If argument is a Number, return Number::toString(argument, 10).
64    // Step 8. If argument is a BigInt, return BigInt::toString(argument, 10).
65    if chunk.is_primitive() {
66        rval.set(chunk.get());
67
68        return Ok(());
69    }
70
71    // Step 9. Assert: argument is an Object.
72    assert!(chunk.is_object());
73
74    // Step 10. Let primValue be ? ToPrimitive(argument, string).
75    rooted!(&in(cx) let obj = chunk.to_object());
76    let is_success = unsafe { ToPrimitive(cx, obj.handle(), JSType::JSTYPE_STRING, rval) };
77    log::debug!("ToPrimitive is_success={:?}", is_success);
78    if !is_success {
79        unsafe {
80            if !JS_IsExceptionPending(cx) {
81                throw_dom_exception(
82                    cx.into(),
83                    global,
84                    Error::Type(c"Cannot convert JSObject to primitive".to_owned()),
85                    CanGc::from_cx(cx),
86                );
87            }
88        }
89        return Err(Error::JSFailed);
90    }
91
92    Ok(())
93}
94
95/// <https://encoding.spec.whatwg.org/#textencoderstream-encoder>
96#[derive(Default, JSTraceable, MallocSizeOf)]
97pub(crate) struct Encoder {
98    /// <https://encoding.spec.whatwg.org/#textencoderstream-pending-high-surrogate>
99    leading_surrogate: Cell<Option<NonZeroU16>>,
100}
101
102impl Encoder {
103    fn encode(&self, maybe_ill_formed: ConvertedInput<'_>) -> String {
104        match maybe_ill_formed {
105            ConvertedInput::String(s) => {
106                // Rust String is already UTF-8 encoded and cannot contain
107                // surrogate
108                if !s.is_empty() && self.leading_surrogate.take().is_some() {
109                    let mut output = String::with_capacity(1 + s.len());
110                    output.push('\u{FFFD}');
111                    output.push_str(&s);
112                    return output;
113                }
114
115                s
116            },
117            ConvertedInput::CodeUnits(code_units) => self.encode_from_code_units(code_units),
118        }
119    }
120
121    /// Encode an input slice of code unit into unicode scalar values
122    fn encode_from_code_units(&self, input: &[u16]) -> String {
123        // <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
124        //
125        // Step 3. Let output be the I/O queue of bytes « end-of-queue ».
126        let mut output = String::with_capacity(input.len());
127        // Step 4. While true:
128        // Step 4.1 Let item be the result of reading from input.
129        for result in char::decode_utf16(input.iter().cloned()) {
130            // Step 4.3 Let result be the result of executing the convert code unit
131            //      to scalar value algorithm with encoder, item and input.
132
133            // <https://encoding.spec.whatwg.org/#convert-code-unit-to-scalar-value>
134            match result {
135                Ok(c) => {
136                    // Step 1. If encoder’s leading surrogate is non-null:
137                    // Step 1.1 Let leadingSurrogate be encoder’s leading surrogate.
138                    // Step 1.2 Set encoder’s leading surrogate to null.
139                    if self.leading_surrogate.take().is_some() {
140                        // Step 1.5 Return U+FFFD (�).
141                        output.push('\u{FFFD}');
142                    }
143
144                    // Step 1.4 Restore item to input.
145                    // Note: pushing item to output is equivalent to restoring item to input
146                    //      and rerun the convert-code-unit-to-scalar-value algo
147                    output.push(c);
148                },
149                Err(error) => {
150                    let unpaired_surrogate = error.unpaired_surrogate();
151                    match code_point_type(unpaired_surrogate) {
152                        CodePointType::LeadingSurrogate => {
153                            // Step 1.1 If encoder’s leading surrogate is non-null:
154                            // Step 1.2 Set encoder’s leading surrogate to null.
155                            if self.leading_surrogate.take().is_some() {
156                                output.push('\u{FFFD}');
157                            }
158
159                            // Step 1.4 Restore item to input.
160                            // Note: Replacing encoder's leading_surrogate is equivalent
161                            //      to restore item back to input and rerun the convert-
162                            //      code-unit-to-scalar-value algo.
163                            // Step 2. If item is a leading surrogate, then set encoder’s
164                            //      leading surrogate to item and return continue.
165                            self.leading_surrogate
166                                .replace(NonZero::new(unpaired_surrogate));
167                        },
168                        CodePointType::TrailingSurrogate => match self.leading_surrogate.take() {
169                            // Step 1.1 If encoder’s leading surrogate is non-null:
170                            // Step 1.2 Set encoder’s leading surrogate to null.
171                            Some(leading_surrogate) => {
172                                // Step 1.3 If item is a trailing surrogate, then return a scalar
173                                //      value from surrogates given leadingSurrogate and item.
174                                let c = char::decode_utf16([
175                                    leading_surrogate.get(),
176                                    unpaired_surrogate,
177                                ])
178                                .next()
179                                .expect("A pair of surrogate is supplied")
180                                .expect("Decoding a pair of surrogate cannot fail");
181                                output.push(c);
182                            },
183                            // Step 3. If item is a trailing surrogate, then return U+FFFD (�).
184                            None => output.push('\u{FFFD}'),
185                        },
186                        CodePointType::ScalarValue => unreachable!("Scalar Value won't fail"),
187                    }
188                },
189            }
190        }
191
192        output
193    }
194}
195
196enum CodePointType {
197    ScalarValue,
198    LeadingSurrogate,
199    TrailingSurrogate,
200}
201
202fn code_point_type(value: u16) -> CodePointType {
203    match value {
204        0xD800..=0xDBFF => CodePointType::LeadingSurrogate,
205        0xDC00..=0xDFFF => CodePointType::TrailingSurrogate,
206        _ => CodePointType::ScalarValue,
207    }
208}
209
210/// <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
211#[expect(unsafe_code)]
212pub(crate) fn encode_and_enqueue_a_chunk(
213    cx: &mut JSContext,
214    global: &GlobalScope,
215    chunk: SafeHandleValue,
216    encoder: &Encoder,
217    controller: &TransformStreamDefaultController,
218) -> Fallible<()> {
219    // Step 1. Let input be the result of converting chunk to a DOMString.
220    // Step 2. Convert input to an I/O queue of code units.
221    rooted!(&in(cx) let mut rval = UndefinedValue());
222    jsval_to_primitive(cx, global, chunk, rval.handle_mut())?;
223
224    assert!(!rval.is_object());
225    rooted!(&in(cx) let jsstr = unsafe { ToString(cx, rval.handle()) });
226    if jsstr.is_null() {
227        unsafe {
228            if !JS_IsExceptionPending(cx) {
229                throw_dom_exception(
230                    cx.into(),
231                    global,
232                    Error::Type(c"Cannot convert JS primitive to string".to_owned()),
233                    CanGc::from_cx(cx),
234                );
235            }
236        }
237
238        return Err(Error::JSFailed);
239    }
240
241    let input = unsafe {
242        if JS_DeprecatedStringHasLatin1Chars(*jsstr) {
243            let s = NonNull::new(*jsstr).expect("jsstr cannot be null");
244            ConvertedInput::String(latin1_to_string(cx, s))
245        } else {
246            let mut len = 0;
247            let data = JS_GetTwoByteStringCharsAndLength(cx, *jsstr, &mut len);
248            let maybe_ill_formed_code_units = std::slice::from_raw_parts(data, len);
249            ConvertedInput::CodeUnits(maybe_ill_formed_code_units)
250        }
251    };
252
253    // Step 3. Let output be the I/O queue of bytes « end-of-queue ».
254    // Step 4. While true:
255    // Step 4.1 Let item be the result of reading from input.
256    // Step 4.3 Let result be the result of executing the convert code unit
257    //      to scalar value algorithm with encoder, item and input.
258    // Step 4.4 If result is not continue, then process an item with result,
259    //      encoder’s encoder, input, output, and "fatal".
260    let output = encoder.encode(input);
261
262    // Step 4.2 If item is end-of-queue:
263    // Step 4.2.1 Convert output into a byte sequence.
264    let output = output.as_bytes();
265    // Step 4.2.2 If output is not empty:
266    if output.is_empty() {
267        // Step 4.2.3
268        return Ok(());
269    }
270
271    // Step 4.2.2.1 Let chunk be the result of creating a Uint8Array object
272    //      given output and encoder’s relevant realm.
273    rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
274    let chunk = create_buffer_source::<Uint8>(
275        cx.into(),
276        output,
277        js_object.handle_mut(),
278        CanGc::from_cx(cx),
279    )
280    .map_err(|_| Error::Type(c"Cannot convert byte sequence to Uint8Array".to_owned()))?;
281    rooted!(&in(cx) let mut rval = UndefinedValue());
282    chunk.safe_to_jsval(cx.into(), rval.handle_mut(), CanGc::from_cx(cx));
283    // Step 4.2.2.2 Enqueue chunk into encoder’s transform.
284    controller.enqueue(cx, global, rval.handle())?;
285    Ok(())
286}
287
288/// <https://encoding.spec.whatwg.org/#encode-and-flush>
289pub(crate) fn encode_and_flush(
290    cx: &mut JSContext,
291    global: &GlobalScope,
292    encoder: &Encoder,
293    controller: &TransformStreamDefaultController,
294) -> Fallible<()> {
295    // Step 1. If encoder’s leading surrogate is non-null:
296    if encoder.leading_surrogate.get().is_some() {
297        // Step 1.1 Let chunk be the result of creating a Uint8Array object
298        //      given « 0xEF, 0xBF, 0xBD » and encoder’s relevant realm.
299        rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
300        let chunk = create_buffer_source::<Uint8>(
301            cx.into(),
302            &[0xEF_u8, 0xBF, 0xBD],
303            js_object.handle_mut(),
304            CanGc::from_cx(cx),
305        )
306        .map_err(|_| Error::Type(c"Cannot convert byte sequence to Uint8Array".to_owned()))?;
307        rooted!(&in(cx) let mut rval = UndefinedValue());
308        chunk.safe_to_jsval(cx.into(), rval.handle_mut(), CanGc::from_cx(cx));
309        // Step 1.2 Enqueue chunk into encoder’s transform.
310        return controller.enqueue(cx, global, rval.handle());
311    }
312
313    Ok(())
314}
315
316/// <https://encoding.spec.whatwg.org/#textencoderstream>
317#[dom_struct]
318pub(crate) struct TextEncoderStream {
319    reflector_: Reflector,
320
321    /// <https://streams.spec.whatwg.org/#generictransformstream>
322    transform: Dom<TransformStream>,
323}
324
325impl TextEncoderStream {
326    fn new_inherited(transform: &TransformStream) -> TextEncoderStream {
327        Self {
328            reflector_: Reflector::new(),
329            transform: Dom::from_ref(transform),
330        }
331    }
332
333    /// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
334    fn new_with_proto(
335        cx: &mut JSContext,
336        global: &GlobalScope,
337        proto: Option<SafeHandleObject>,
338    ) -> Fallible<DomRoot<TextEncoderStream>> {
339        // Step 1. Set this’s encoder to an instance of the UTF-8 encoder.
340        let encoder = Encoder::default();
341
342        // Step 2. Let transformAlgorithm be an algorithm which takes a chunk argument
343        //      and runs the encode and enqueue a chunk algorithm with this and chunk.
344        // Step 3. Let flushAlgorithm be an algorithm which runs the encode and flush
345        //      algorithm with this.
346        let transformer_type = TransformerType::Encoder(encoder);
347
348        // Step 4. Let transformStream be a new TransformStream.
349        let transform = TransformStream::new_with_proto(global, None, CanGc::from_cx(cx));
350        // Step 5. Set up transformStream with transformAlgorithm set to transformAlgorithm
351        //      and flushAlgorithm set to flushAlgorithm.
352        transform.set_up(cx, global, transformer_type)?;
353
354        // Step 6. Set this’s transform to transformStream.
355        Ok(reflect_dom_object_with_proto_and_cx(
356            Box::new(TextEncoderStream::new_inherited(&transform)),
357            global,
358            proto,
359            cx,
360        ))
361    }
362}
363
364impl TextEncoderStreamMethods<crate::DomTypeHolder> for TextEncoderStream {
365    /// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
366    fn Constructor(
367        cx: &mut JSContext,
368        global: &GlobalScope,
369        proto: Option<SafeHandleObject>,
370    ) -> Fallible<DomRoot<TextEncoderStream>> {
371        TextEncoderStream::new_with_proto(cx, global, proto)
372    }
373
374    /// <https://encoding.spec.whatwg.org/#dom-textencoder-encoding>
375    fn Encoding(&self) -> DOMString {
376        // Returns "utf-8".
377        DOMString::from("utf-8")
378    }
379
380    /// <https://streams.spec.whatwg.org/#dom-generictransformstream-readable>
381    fn Readable(&self) -> DomRoot<ReadableStream> {
382        self.transform.get_readable()
383    }
384
385    /// <https://streams.spec.whatwg.org/#dom-generictransformstream-writable>
386    fn Writable(&self) -> DomRoot<WritableStream> {
387        self.transform.get_writable()
388    }
389}