script/dom/encoding/textencoderstream.rs
1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::cell::Cell;
6use std::num::{NonZero, NonZeroU16};
7use std::ptr::{self, NonNull};
8
9use dom_struct::dom_struct;
10use js::context::JSContext;
11use js::conversions::latin1_to_string;
12use js::jsapi::{JS_DeprecatedStringHasLatin1Chars, JSObject, JSType};
13use js::jsval::UndefinedValue;
14use js::rust::wrappers2::{JS_GetTwoByteStringCharsAndLength, JS_IsExceptionPending, ToPrimitive};
15use js::rust::{
16 HandleObject as SafeHandleObject, HandleValue as SafeHandleValue,
17 MutableHandleValue as SafeMutableHandleValue, ToString,
18};
19use js::typedarray::Uint8;
20use script_bindings::conversions::SafeToJSValConvertible;
21use script_bindings::reflector::{Reflector, reflect_dom_object_with_proto_and_cx};
22
23use crate::dom::bindings::buffer_source::create_buffer_source;
24use crate::dom::bindings::codegen::Bindings::TextEncoderStreamBinding::TextEncoderStreamMethods;
25use crate::dom::bindings::error::{Error, Fallible, throw_dom_exception};
26use crate::dom::bindings::root::{Dom, DomRoot};
27use crate::dom::bindings::str::DOMString;
28use crate::dom::stream::readablestream::ReadableStream;
29use crate::dom::stream::transformstreamdefaultcontroller::TransformerType;
30use crate::dom::stream::writablestream::WritableStream;
31use crate::dom::types::{GlobalScope, TransformStream, TransformStreamDefaultController};
32use crate::script_runtime::CanGc;
33
34/// String converted from an input JS Value
35enum ConvertedInput<'a> {
36 String(String),
37 CodeUnits(&'a [u16]),
38}
39
40/// Converts a JS value to primitive type so that it can be used with
41/// `ToString`.
42///
43/// Set `rval` to `chunk` if `chunk` is a primitive JS value. Otherwise, convert
44/// `chunk` into a primitive JS value and then set `rval` to the converted
45/// primitive. This follows the `ToString` procedure with the exception that it
46/// does not convert the value to string.
47///
48/// See below for the `ToString` procedure in spec:
49/// <https://tc39.es/ecma262/multipage/abstract-operations.html#sec-tostring>
50#[expect(unsafe_code)]
51fn jsval_to_primitive(
52 cx: &mut JSContext,
53 global: &GlobalScope,
54 chunk: SafeHandleValue,
55 mut rval: SafeMutableHandleValue,
56) -> Fallible<()> {
57 // Step 1. If argument is a String, return argument.
58 // Step 2. If argument is a Symbol, throw a TypeError exception.
59 // Step 3. If argument is undefined, return "undefined".
60 // Step 4. If argument is null, return "null".
61 // Step 5. If argument is true, return "true".
62 // Step 6. If argument is false, return "false".
63 // Step 7. If argument is a Number, return Number::toString(argument, 10).
64 // Step 8. If argument is a BigInt, return BigInt::toString(argument, 10).
65 if chunk.is_primitive() {
66 rval.set(chunk.get());
67
68 return Ok(());
69 }
70
71 // Step 9. Assert: argument is an Object.
72 assert!(chunk.is_object());
73
74 // Step 10. Let primValue be ? ToPrimitive(argument, string).
75 rooted!(&in(cx) let obj = chunk.to_object());
76 let is_success = unsafe { ToPrimitive(cx, obj.handle(), JSType::JSTYPE_STRING, rval) };
77 log::debug!("ToPrimitive is_success={:?}", is_success);
78 if !is_success {
79 unsafe {
80 if !JS_IsExceptionPending(cx) {
81 throw_dom_exception(
82 cx.into(),
83 global,
84 Error::Type(c"Cannot convert JSObject to primitive".to_owned()),
85 CanGc::from_cx(cx),
86 );
87 }
88 }
89 return Err(Error::JSFailed);
90 }
91
92 Ok(())
93}
94
95/// <https://encoding.spec.whatwg.org/#textencoderstream-encoder>
96#[derive(Default, JSTraceable, MallocSizeOf)]
97pub(crate) struct Encoder {
98 /// <https://encoding.spec.whatwg.org/#textencoderstream-pending-high-surrogate>
99 leading_surrogate: Cell<Option<NonZeroU16>>,
100}
101
102impl Encoder {
103 fn encode(&self, maybe_ill_formed: ConvertedInput<'_>) -> String {
104 match maybe_ill_formed {
105 ConvertedInput::String(s) => {
106 // Rust String is already UTF-8 encoded and cannot contain
107 // surrogate
108 if !s.is_empty() && self.leading_surrogate.take().is_some() {
109 let mut output = String::with_capacity(1 + s.len());
110 output.push('\u{FFFD}');
111 output.push_str(&s);
112 return output;
113 }
114
115 s
116 },
117 ConvertedInput::CodeUnits(code_units) => self.encode_from_code_units(code_units),
118 }
119 }
120
121 /// Encode an input slice of code unit into unicode scalar values
122 fn encode_from_code_units(&self, input: &[u16]) -> String {
123 // <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
124 //
125 // Step 3. Let output be the I/O queue of bytes « end-of-queue ».
126 let mut output = String::with_capacity(input.len());
127 // Step 4. While true:
128 // Step 4.1 Let item be the result of reading from input.
129 for result in char::decode_utf16(input.iter().cloned()) {
130 // Step 4.3 Let result be the result of executing the convert code unit
131 // to scalar value algorithm with encoder, item and input.
132
133 // <https://encoding.spec.whatwg.org/#convert-code-unit-to-scalar-value>
134 match result {
135 Ok(c) => {
136 // Step 1. If encoder’s leading surrogate is non-null:
137 // Step 1.1 Let leadingSurrogate be encoder’s leading surrogate.
138 // Step 1.2 Set encoder’s leading surrogate to null.
139 if self.leading_surrogate.take().is_some() {
140 // Step 1.5 Return U+FFFD (�).
141 output.push('\u{FFFD}');
142 }
143
144 // Step 1.4 Restore item to input.
145 // Note: pushing item to output is equivalent to restoring item to input
146 // and rerun the convert-code-unit-to-scalar-value algo
147 output.push(c);
148 },
149 Err(error) => {
150 let unpaired_surrogate = error.unpaired_surrogate();
151 match code_point_type(unpaired_surrogate) {
152 CodePointType::LeadingSurrogate => {
153 // Step 1.1 If encoder’s leading surrogate is non-null:
154 // Step 1.2 Set encoder’s leading surrogate to null.
155 if self.leading_surrogate.take().is_some() {
156 output.push('\u{FFFD}');
157 }
158
159 // Step 1.4 Restore item to input.
160 // Note: Replacing encoder's leading_surrogate is equivalent
161 // to restore item back to input and rerun the convert-
162 // code-unit-to-scalar-value algo.
163 // Step 2. If item is a leading surrogate, then set encoder’s
164 // leading surrogate to item and return continue.
165 self.leading_surrogate
166 .replace(NonZero::new(unpaired_surrogate));
167 },
168 CodePointType::TrailingSurrogate => match self.leading_surrogate.take() {
169 // Step 1.1 If encoder’s leading surrogate is non-null:
170 // Step 1.2 Set encoder’s leading surrogate to null.
171 Some(leading_surrogate) => {
172 // Step 1.3 If item is a trailing surrogate, then return a scalar
173 // value from surrogates given leadingSurrogate and item.
174 let c = char::decode_utf16([
175 leading_surrogate.get(),
176 unpaired_surrogate,
177 ])
178 .next()
179 .expect("A pair of surrogate is supplied")
180 .expect("Decoding a pair of surrogate cannot fail");
181 output.push(c);
182 },
183 // Step 3. If item is a trailing surrogate, then return U+FFFD (�).
184 None => output.push('\u{FFFD}'),
185 },
186 CodePointType::ScalarValue => unreachable!("Scalar Value won't fail"),
187 }
188 },
189 }
190 }
191
192 output
193 }
194}
195
196enum CodePointType {
197 ScalarValue,
198 LeadingSurrogate,
199 TrailingSurrogate,
200}
201
202fn code_point_type(value: u16) -> CodePointType {
203 match value {
204 0xD800..=0xDBFF => CodePointType::LeadingSurrogate,
205 0xDC00..=0xDFFF => CodePointType::TrailingSurrogate,
206 _ => CodePointType::ScalarValue,
207 }
208}
209
210/// <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
211#[expect(unsafe_code)]
212pub(crate) fn encode_and_enqueue_a_chunk(
213 cx: &mut JSContext,
214 global: &GlobalScope,
215 chunk: SafeHandleValue,
216 encoder: &Encoder,
217 controller: &TransformStreamDefaultController,
218) -> Fallible<()> {
219 // Step 1. Let input be the result of converting chunk to a DOMString.
220 // Step 2. Convert input to an I/O queue of code units.
221 rooted!(&in(cx) let mut rval = UndefinedValue());
222 jsval_to_primitive(cx, global, chunk, rval.handle_mut())?;
223
224 assert!(!rval.is_object());
225 rooted!(&in(cx) let jsstr = unsafe { ToString(cx, rval.handle()) });
226 if jsstr.is_null() {
227 unsafe {
228 if !JS_IsExceptionPending(cx) {
229 throw_dom_exception(
230 cx.into(),
231 global,
232 Error::Type(c"Cannot convert JS primitive to string".to_owned()),
233 CanGc::from_cx(cx),
234 );
235 }
236 }
237
238 return Err(Error::JSFailed);
239 }
240
241 let input = unsafe {
242 if JS_DeprecatedStringHasLatin1Chars(*jsstr) {
243 let s = NonNull::new(*jsstr).expect("jsstr cannot be null");
244 ConvertedInput::String(latin1_to_string(cx, s))
245 } else {
246 let mut len = 0;
247 let data = JS_GetTwoByteStringCharsAndLength(cx, *jsstr, &mut len);
248 let maybe_ill_formed_code_units = std::slice::from_raw_parts(data, len);
249 ConvertedInput::CodeUnits(maybe_ill_formed_code_units)
250 }
251 };
252
253 // Step 3. Let output be the I/O queue of bytes « end-of-queue ».
254 // Step 4. While true:
255 // Step 4.1 Let item be the result of reading from input.
256 // Step 4.3 Let result be the result of executing the convert code unit
257 // to scalar value algorithm with encoder, item and input.
258 // Step 4.4 If result is not continue, then process an item with result,
259 // encoder’s encoder, input, output, and "fatal".
260 let output = encoder.encode(input);
261
262 // Step 4.2 If item is end-of-queue:
263 // Step 4.2.1 Convert output into a byte sequence.
264 let output = output.as_bytes();
265 // Step 4.2.2 If output is not empty:
266 if output.is_empty() {
267 // Step 4.2.3
268 return Ok(());
269 }
270
271 // Step 4.2.2.1 Let chunk be the result of creating a Uint8Array object
272 // given output and encoder’s relevant realm.
273 rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
274 let chunk = create_buffer_source::<Uint8>(
275 cx.into(),
276 output,
277 js_object.handle_mut(),
278 CanGc::from_cx(cx),
279 )
280 .map_err(|_| Error::Type(c"Cannot convert byte sequence to Uint8Array".to_owned()))?;
281 rooted!(&in(cx) let mut rval = UndefinedValue());
282 chunk.safe_to_jsval(cx.into(), rval.handle_mut(), CanGc::from_cx(cx));
283 // Step 4.2.2.2 Enqueue chunk into encoder’s transform.
284 controller.enqueue(cx, global, rval.handle())?;
285 Ok(())
286}
287
288/// <https://encoding.spec.whatwg.org/#encode-and-flush>
289pub(crate) fn encode_and_flush(
290 cx: &mut JSContext,
291 global: &GlobalScope,
292 encoder: &Encoder,
293 controller: &TransformStreamDefaultController,
294) -> Fallible<()> {
295 // Step 1. If encoder’s leading surrogate is non-null:
296 if encoder.leading_surrogate.get().is_some() {
297 // Step 1.1 Let chunk be the result of creating a Uint8Array object
298 // given « 0xEF, 0xBF, 0xBD » and encoder’s relevant realm.
299 rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
300 let chunk = create_buffer_source::<Uint8>(
301 cx.into(),
302 &[0xEF_u8, 0xBF, 0xBD],
303 js_object.handle_mut(),
304 CanGc::from_cx(cx),
305 )
306 .map_err(|_| Error::Type(c"Cannot convert byte sequence to Uint8Array".to_owned()))?;
307 rooted!(&in(cx) let mut rval = UndefinedValue());
308 chunk.safe_to_jsval(cx.into(), rval.handle_mut(), CanGc::from_cx(cx));
309 // Step 1.2 Enqueue chunk into encoder’s transform.
310 return controller.enqueue(cx, global, rval.handle());
311 }
312
313 Ok(())
314}
315
316/// <https://encoding.spec.whatwg.org/#textencoderstream>
317#[dom_struct]
318pub(crate) struct TextEncoderStream {
319 reflector_: Reflector,
320
321 /// <https://streams.spec.whatwg.org/#generictransformstream>
322 transform: Dom<TransformStream>,
323}
324
325impl TextEncoderStream {
326 fn new_inherited(transform: &TransformStream) -> TextEncoderStream {
327 Self {
328 reflector_: Reflector::new(),
329 transform: Dom::from_ref(transform),
330 }
331 }
332
333 /// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
334 fn new_with_proto(
335 cx: &mut JSContext,
336 global: &GlobalScope,
337 proto: Option<SafeHandleObject>,
338 ) -> Fallible<DomRoot<TextEncoderStream>> {
339 // Step 1. Set this’s encoder to an instance of the UTF-8 encoder.
340 let encoder = Encoder::default();
341
342 // Step 2. Let transformAlgorithm be an algorithm which takes a chunk argument
343 // and runs the encode and enqueue a chunk algorithm with this and chunk.
344 // Step 3. Let flushAlgorithm be an algorithm which runs the encode and flush
345 // algorithm with this.
346 let transformer_type = TransformerType::Encoder(encoder);
347
348 // Step 4. Let transformStream be a new TransformStream.
349 let transform = TransformStream::new_with_proto(global, None, CanGc::from_cx(cx));
350 // Step 5. Set up transformStream with transformAlgorithm set to transformAlgorithm
351 // and flushAlgorithm set to flushAlgorithm.
352 transform.set_up(cx, global, transformer_type)?;
353
354 // Step 6. Set this’s transform to transformStream.
355 Ok(reflect_dom_object_with_proto_and_cx(
356 Box::new(TextEncoderStream::new_inherited(&transform)),
357 global,
358 proto,
359 cx,
360 ))
361 }
362}
363
364impl TextEncoderStreamMethods<crate::DomTypeHolder> for TextEncoderStream {
365 /// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
366 fn Constructor(
367 cx: &mut JSContext,
368 global: &GlobalScope,
369 proto: Option<SafeHandleObject>,
370 ) -> Fallible<DomRoot<TextEncoderStream>> {
371 TextEncoderStream::new_with_proto(cx, global, proto)
372 }
373
374 /// <https://encoding.spec.whatwg.org/#dom-textencoder-encoding>
375 fn Encoding(&self) -> DOMString {
376 // Returns "utf-8".
377 DOMString::from("utf-8")
378 }
379
380 /// <https://streams.spec.whatwg.org/#dom-generictransformstream-readable>
381 fn Readable(&self) -> DomRoot<ReadableStream> {
382 self.transform.get_readable()
383 }
384
385 /// <https://streams.spec.whatwg.org/#dom-generictransformstream-writable>
386 fn Writable(&self) -> DomRoot<WritableStream> {
387 self.transform.get_writable()
388 }
389}