Skip to main content

script/dom/encoding/
textencoderstream.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::cell::Cell;
6use std::num::{NonZero, NonZeroU16};
7use std::ptr::{self, NonNull};
8
9use dom_struct::dom_struct;
10use js::context::JSContext;
11use js::conversions::latin1_to_string;
12use js::jsapi::{JS_DeprecatedStringHasLatin1Chars, JSObject, JSType};
13use js::jsval::UndefinedValue;
14use js::rust::wrappers2::{JS_GetTwoByteStringCharsAndLength, JS_IsExceptionPending, ToPrimitive};
15use js::rust::{
16    HandleObject as SafeHandleObject, HandleValue as SafeHandleValue,
17    MutableHandleValue as SafeMutableHandleValue, ToString,
18};
19use js::typedarray::Uint8;
20use script_bindings::conversions::SafeToJSValConvertible;
21use script_bindings::reflector::{Reflector, reflect_dom_object_with_proto_and_cx};
22
23use crate::dom::bindings::buffer_source::create_buffer_source;
24use crate::dom::bindings::codegen::Bindings::TextEncoderStreamBinding::TextEncoderStreamMethods;
25use crate::dom::bindings::error::{Error, Fallible, throw_dom_exception};
26use crate::dom::bindings::root::{Dom, DomRoot};
27use crate::dom::bindings::str::DOMString;
28use crate::dom::stream::readablestream::ReadableStream;
29use crate::dom::stream::transformstreamdefaultcontroller::TransformerType;
30use crate::dom::stream::writablestream::WritableStream;
31use crate::dom::types::{GlobalScope, TransformStream, TransformStreamDefaultController};
32
33/// String converted from an input JS Value
34enum ConvertedInput<'a> {
35    String(String),
36    CodeUnits(&'a [u16]),
37}
38
39/// Converts a JS value to primitive type so that it can be used with
40/// `ToString`.
41///
42/// Set `rval` to `chunk` if `chunk` is a primitive JS value. Otherwise, convert
43/// `chunk` into a primitive JS value and then set `rval` to the converted
44/// primitive. This follows the `ToString` procedure with the exception that it
45/// does not convert the value to string.
46///
47/// See below for the `ToString` procedure in spec:
48/// <https://tc39.es/ecma262/multipage/abstract-operations.html#sec-tostring>
49#[expect(unsafe_code)]
50fn jsval_to_primitive(
51    cx: &mut JSContext,
52    global: &GlobalScope,
53    chunk: SafeHandleValue,
54    mut rval: SafeMutableHandleValue,
55) -> Fallible<()> {
56    // Step 1. If argument is a String, return argument.
57    // Step 2. If argument is a Symbol, throw a TypeError exception.
58    // Step 3. If argument is undefined, return "undefined".
59    // Step 4. If argument is null, return "null".
60    // Step 5. If argument is true, return "true".
61    // Step 6. If argument is false, return "false".
62    // Step 7. If argument is a Number, return Number::toString(argument, 10).
63    // Step 8. If argument is a BigInt, return BigInt::toString(argument, 10).
64    if chunk.is_primitive() {
65        rval.set(chunk.get());
66
67        return Ok(());
68    }
69
70    // Step 9. Assert: argument is an Object.
71    assert!(chunk.is_object());
72
73    // Step 10. Let primValue be ? ToPrimitive(argument, string).
74    rooted!(&in(cx) let obj = chunk.to_object());
75    let is_success = unsafe { ToPrimitive(cx, obj.handle(), JSType::JSTYPE_STRING, rval) };
76    log::debug!("ToPrimitive is_success={:?}", is_success);
77    if !is_success {
78        unsafe {
79            if !JS_IsExceptionPending(cx) {
80                throw_dom_exception(
81                    cx,
82                    global,
83                    Error::Type(c"Cannot convert JSObject to primitive".to_owned()),
84                );
85            }
86        }
87        return Err(Error::JSFailed);
88    }
89
90    Ok(())
91}
92
93/// <https://encoding.spec.whatwg.org/#textencoderstream-encoder>
94#[derive(Default, JSTraceable, MallocSizeOf)]
95pub(crate) struct Encoder {
96    /// <https://encoding.spec.whatwg.org/#textencoderstream-pending-high-surrogate>
97    leading_surrogate: Cell<Option<NonZeroU16>>,
98}
99
100impl Encoder {
101    fn encode(&self, maybe_ill_formed: ConvertedInput<'_>) -> String {
102        match maybe_ill_formed {
103            ConvertedInput::String(s) => {
104                // Rust String is already UTF-8 encoded and cannot contain
105                // surrogate
106                if !s.is_empty() && self.leading_surrogate.take().is_some() {
107                    let mut output = String::with_capacity(1 + s.len());
108                    output.push('\u{FFFD}');
109                    output.push_str(&s);
110                    return output;
111                }
112
113                s
114            },
115            ConvertedInput::CodeUnits(code_units) => self.encode_from_code_units(code_units),
116        }
117    }
118
119    /// Encode an input slice of code unit into unicode scalar values
120    fn encode_from_code_units(&self, input: &[u16]) -> String {
121        // <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
122        //
123        // Step 3. Let output be the I/O queue of bytes « end-of-queue ».
124        let mut output = String::with_capacity(input.len());
125        // Step 4. While true:
126        // Step 4.1 Let item be the result of reading from input.
127        for result in char::decode_utf16(input.iter().cloned()) {
128            // Step 4.3 Let result be the result of executing the convert code unit
129            //      to scalar value algorithm with encoder, item and input.
130
131            // <https://encoding.spec.whatwg.org/#convert-code-unit-to-scalar-value>
132            match result {
133                Ok(c) => {
134                    // Step 1. If encoder’s leading surrogate is non-null:
135                    // Step 1.1 Let leadingSurrogate be encoder’s leading surrogate.
136                    // Step 1.2 Set encoder’s leading surrogate to null.
137                    if self.leading_surrogate.take().is_some() {
138                        // Step 1.5 Return U+FFFD (�).
139                        output.push('\u{FFFD}');
140                    }
141
142                    // Step 1.4 Restore item to input.
143                    // Note: pushing item to output is equivalent to restoring item to input
144                    //      and rerun the convert-code-unit-to-scalar-value algo
145                    output.push(c);
146                },
147                Err(error) => {
148                    let unpaired_surrogate = error.unpaired_surrogate();
149                    match code_point_type(unpaired_surrogate) {
150                        CodePointType::LeadingSurrogate => {
151                            // Step 1.1 If encoder’s leading surrogate is non-null:
152                            // Step 1.2 Set encoder’s leading surrogate to null.
153                            if self.leading_surrogate.take().is_some() {
154                                output.push('\u{FFFD}');
155                            }
156
157                            // Step 1.4 Restore item to input.
158                            // Note: Replacing encoder's leading_surrogate is equivalent
159                            //      to restore item back to input and rerun the convert-
160                            //      code-unit-to-scalar-value algo.
161                            // Step 2. If item is a leading surrogate, then set encoder’s
162                            //      leading surrogate to item and return continue.
163                            self.leading_surrogate
164                                .replace(NonZero::new(unpaired_surrogate));
165                        },
166                        CodePointType::TrailingSurrogate => match self.leading_surrogate.take() {
167                            // Step 1.1 If encoder’s leading surrogate is non-null:
168                            // Step 1.2 Set encoder’s leading surrogate to null.
169                            Some(leading_surrogate) => {
170                                // Step 1.3 If item is a trailing surrogate, then return a scalar
171                                //      value from surrogates given leadingSurrogate and item.
172                                let c = char::decode_utf16([
173                                    leading_surrogate.get(),
174                                    unpaired_surrogate,
175                                ])
176                                .next()
177                                .expect("A pair of surrogate is supplied")
178                                .expect("Decoding a pair of surrogate cannot fail");
179                                output.push(c);
180                            },
181                            // Step 3. If item is a trailing surrogate, then return U+FFFD (�).
182                            None => output.push('\u{FFFD}'),
183                        },
184                        CodePointType::ScalarValue => unreachable!("Scalar Value won't fail"),
185                    }
186                },
187            }
188        }
189
190        output
191    }
192}
193
194enum CodePointType {
195    ScalarValue,
196    LeadingSurrogate,
197    TrailingSurrogate,
198}
199
200fn code_point_type(value: u16) -> CodePointType {
201    match value {
202        0xD800..=0xDBFF => CodePointType::LeadingSurrogate,
203        0xDC00..=0xDFFF => CodePointType::TrailingSurrogate,
204        _ => CodePointType::ScalarValue,
205    }
206}
207
208/// <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
209#[expect(unsafe_code)]
210pub(crate) fn encode_and_enqueue_a_chunk(
211    cx: &mut JSContext,
212    global: &GlobalScope,
213    chunk: SafeHandleValue,
214    encoder: &Encoder,
215    controller: &TransformStreamDefaultController,
216) -> Fallible<()> {
217    // Step 1. Let input be the result of converting chunk to a DOMString.
218    // Step 2. Convert input to an I/O queue of code units.
219    rooted!(&in(cx) let mut rval = UndefinedValue());
220    jsval_to_primitive(cx, global, chunk, rval.handle_mut())?;
221
222    assert!(!rval.is_object());
223    rooted!(&in(cx) let jsstr = unsafe { ToString(cx, rval.handle()) });
224    if jsstr.is_null() {
225        unsafe {
226            if !JS_IsExceptionPending(cx) {
227                throw_dom_exception(
228                    cx,
229                    global,
230                    Error::Type(c"Cannot convert JS primitive to string".to_owned()),
231                );
232            }
233        }
234
235        return Err(Error::JSFailed);
236    }
237
238    let input = unsafe {
239        if JS_DeprecatedStringHasLatin1Chars(*jsstr) {
240            let s = NonNull::new(*jsstr).expect("jsstr cannot be null");
241            ConvertedInput::String(latin1_to_string(cx, s))
242        } else {
243            let mut len = 0;
244            let data = JS_GetTwoByteStringCharsAndLength(cx, *jsstr, &mut len);
245            let maybe_ill_formed_code_units = std::slice::from_raw_parts(data, len);
246            ConvertedInput::CodeUnits(maybe_ill_formed_code_units)
247        }
248    };
249
250    // Step 3. Let output be the I/O queue of bytes « end-of-queue ».
251    // Step 4. While true:
252    // Step 4.1 Let item be the result of reading from input.
253    // Step 4.3 Let result be the result of executing the convert code unit
254    //      to scalar value algorithm with encoder, item and input.
255    // Step 4.4 If result is not continue, then process an item with result,
256    //      encoder’s encoder, input, output, and "fatal".
257    let output = encoder.encode(input);
258
259    // Step 4.2 If item is end-of-queue:
260    // Step 4.2.1 Convert output into a byte sequence.
261    let output = output.as_bytes();
262    // Step 4.2.2 If output is not empty:
263    if output.is_empty() {
264        // Step 4.2.3
265        return Ok(());
266    }
267
268    // Step 4.2.2.1 Let chunk be the result of creating a Uint8Array object
269    //      given output and encoder’s relevant realm.
270    rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
271    let chunk = create_buffer_source::<Uint8>(cx, output, js_object.handle_mut())
272        .map_err(|_| Error::Type(c"Cannot convert byte sequence to Uint8Array".to_owned()))?;
273    rooted!(&in(cx) let mut rval = UndefinedValue());
274    chunk.safe_to_jsval(cx, rval.handle_mut());
275    // Step 4.2.2.2 Enqueue chunk into encoder’s transform.
276    controller.enqueue(cx, global, rval.handle())?;
277    Ok(())
278}
279
280/// <https://encoding.spec.whatwg.org/#encode-and-flush>
281pub(crate) fn encode_and_flush(
282    cx: &mut JSContext,
283    global: &GlobalScope,
284    encoder: &Encoder,
285    controller: &TransformStreamDefaultController,
286) -> Fallible<()> {
287    // Step 1. If encoder’s leading surrogate is non-null:
288    if encoder.leading_surrogate.get().is_some() {
289        // Step 1.1 Let chunk be the result of creating a Uint8Array object
290        //      given « 0xEF, 0xBF, 0xBD » and encoder’s relevant realm.
291        rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
292        let chunk =
293            create_buffer_source::<Uint8>(cx, &[0xEF_u8, 0xBF, 0xBD], js_object.handle_mut())
294                .map_err(|_| {
295                    Error::Type(c"Cannot convert byte sequence to Uint8Array".to_owned())
296                })?;
297        rooted!(&in(cx) let mut rval = UndefinedValue());
298        chunk.safe_to_jsval(cx, rval.handle_mut());
299        // Step 1.2 Enqueue chunk into encoder’s transform.
300        return controller.enqueue(cx, global, rval.handle());
301    }
302
303    Ok(())
304}
305
306/// <https://encoding.spec.whatwg.org/#textencoderstream>
307#[dom_struct]
308pub(crate) struct TextEncoderStream {
309    reflector_: Reflector,
310
311    /// <https://streams.spec.whatwg.org/#generictransformstream>
312    transform: Dom<TransformStream>,
313}
314
315impl TextEncoderStream {
316    fn new_inherited(transform: &TransformStream) -> TextEncoderStream {
317        Self {
318            reflector_: Reflector::new(),
319            transform: Dom::from_ref(transform),
320        }
321    }
322
323    /// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
324    fn new_with_proto(
325        cx: &mut JSContext,
326        global: &GlobalScope,
327        proto: Option<SafeHandleObject>,
328    ) -> Fallible<DomRoot<TextEncoderStream>> {
329        // Step 1. Set this’s encoder to an instance of the UTF-8 encoder.
330        let encoder = Encoder::default();
331
332        // Step 2. Let transformAlgorithm be an algorithm which takes a chunk argument
333        //      and runs the encode and enqueue a chunk algorithm with this and chunk.
334        // Step 3. Let flushAlgorithm be an algorithm which runs the encode and flush
335        //      algorithm with this.
336        let transformer_type = TransformerType::Encoder(encoder);
337
338        // Step 4. Let transformStream be a new TransformStream.
339        let transform = TransformStream::new_with_proto(cx, global, None);
340        // Step 5. Set up transformStream with transformAlgorithm set to transformAlgorithm
341        //      and flushAlgorithm set to flushAlgorithm.
342        transform.set_up(cx, global, transformer_type)?;
343
344        // Step 6. Set this’s transform to transformStream.
345        Ok(reflect_dom_object_with_proto_and_cx(
346            Box::new(TextEncoderStream::new_inherited(&transform)),
347            global,
348            proto,
349            cx,
350        ))
351    }
352}
353
354impl TextEncoderStreamMethods<crate::DomTypeHolder> for TextEncoderStream {
355    /// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
356    fn Constructor(
357        cx: &mut JSContext,
358        global: &GlobalScope,
359        proto: Option<SafeHandleObject>,
360    ) -> Fallible<DomRoot<TextEncoderStream>> {
361        TextEncoderStream::new_with_proto(cx, global, proto)
362    }
363
364    /// <https://encoding.spec.whatwg.org/#dom-textencoder-encoding>
365    fn Encoding(&self) -> DOMString {
366        // Returns "utf-8".
367        DOMString::from("utf-8")
368    }
369
370    /// <https://streams.spec.whatwg.org/#dom-generictransformstream-readable>
371    fn Readable(&self) -> DomRoot<ReadableStream> {
372        self.transform.get_readable()
373    }
374
375    /// <https://streams.spec.whatwg.org/#dom-generictransformstream-writable>
376    fn Writable(&self) -> DomRoot<WritableStream> {
377        self.transform.get_writable()
378    }
379}