script/dom/encoding/textencoderstream.rs
1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::cell::Cell;
6use std::num::{NonZero, NonZeroU16};
7use std::ptr::{self, NonNull};
8
9use dom_struct::dom_struct;
10use js::conversions::latin1_to_string;
11use js::jsapi::{
12 JS_DeprecatedStringHasLatin1Chars, JS_GetTwoByteStringCharsAndLength, JS_IsExceptionPending,
13 JSObject, JSType, ToPrimitive,
14};
15use js::jsval::UndefinedValue;
16use js::rust::{
17 HandleObject as SafeHandleObject, HandleValue as SafeHandleValue,
18 MutableHandleValue as SafeMutableHandleValue, ToString,
19};
20use js::typedarray::Uint8;
21use script_bindings::conversions::SafeToJSValConvertible;
22
23use crate::dom::bindings::buffer_source::create_buffer_source;
24use crate::dom::bindings::codegen::Bindings::TextEncoderStreamBinding::TextEncoderStreamMethods;
25use crate::dom::bindings::error::{Error, Fallible, throw_dom_exception};
26use crate::dom::bindings::reflector::{Reflector, reflect_dom_object_with_proto};
27use crate::dom::bindings::root::{Dom, DomRoot};
28use crate::dom::bindings::str::DOMString;
29use crate::dom::stream::transformstreamdefaultcontroller::TransformerType;
30use crate::dom::types::{GlobalScope, TransformStream, TransformStreamDefaultController};
31use crate::script_runtime::{CanGc, JSContext as SafeJSContext};
32use crate::{DomTypeHolder, DomTypes};
33
34/// String converted from an input JS Value
35enum ConvertedInput<'a> {
36 String(String),
37 CodeUnits(&'a [u16]),
38}
39
40/// Converts a JS value to primitive type so that it can be used with
41/// `ToString`.
42///
43/// Set `rval` to `chunk` if `chunk` is a primitive JS value. Otherwise, convert
44/// `chunk` into a primitive JS value and then set `rval` to the converted
45/// primitive. This follows the `ToString` procedure with the exception that it
46/// does not convert the value to string.
47///
48/// See below for the `ToString` procedure in spec:
49/// <https://tc39.es/ecma262/multipage/abstract-operations.html#sec-tostring>
50#[expect(unsafe_code)]
51fn jsval_to_primitive(
52 cx: SafeJSContext,
53 global: &GlobalScope,
54 chunk: SafeHandleValue,
55 mut rval: SafeMutableHandleValue,
56 can_gc: CanGc,
57) -> Fallible<()> {
58 // Step 1. If argument is a String, return argument.
59 // Step 2. If argument is a Symbol, throw a TypeError exception.
60 // Step 3. If argument is undefined, return "undefined".
61 // Step 4. If argument is null, return "null".
62 // Step 5. If argument is true, return "true".
63 // Step 6. If argument is false, return "false".
64 // Step 7. If argument is a Number, return Number::toString(argument, 10).
65 // Step 8. If argument is a BigInt, return BigInt::toString(argument, 10).
66 if chunk.is_primitive() {
67 rval.set(chunk.get());
68
69 return Ok(());
70 }
71
72 // Step 9. Assert: argument is an Object.
73 assert!(chunk.is_object());
74
75 // Step 10. Let primValue be ? ToPrimitive(argument, string).
76 rooted!(in(*cx) let obj = chunk.to_object());
77 let is_success =
78 unsafe { ToPrimitive(*cx, obj.handle().into(), JSType::JSTYPE_STRING, rval.into()) };
79 log::debug!("ToPrimitive is_success={:?}", is_success);
80 if !is_success {
81 unsafe {
82 if !JS_IsExceptionPending(*cx) {
83 throw_dom_exception(
84 cx,
85 global,
86 Error::Type(c"Cannot convert JSObject to primitive".to_owned()),
87 can_gc,
88 );
89 }
90 }
91 return Err(Error::JSFailed);
92 }
93
94 Ok(())
95}
96
97/// <https://encoding.spec.whatwg.org/#textencoderstream-encoder>
98#[derive(Default, JSTraceable, MallocSizeOf)]
99pub(crate) struct Encoder {
100 /// <https://encoding.spec.whatwg.org/#textencoderstream-pending-high-surrogate>
101 leading_surrogate: Cell<Option<NonZeroU16>>,
102}
103
104impl Encoder {
105 fn encode(&self, maybe_ill_formed: ConvertedInput<'_>) -> String {
106 match maybe_ill_formed {
107 ConvertedInput::String(s) => {
108 // Rust String is already UTF-8 encoded and cannot contain
109 // surrogate
110 if !s.is_empty() && self.leading_surrogate.take().is_some() {
111 let mut output = String::with_capacity(1 + s.len());
112 output.push('\u{FFFD}');
113 output.push_str(&s);
114 return output;
115 }
116
117 s
118 },
119 ConvertedInput::CodeUnits(code_units) => self.encode_from_code_units(code_units),
120 }
121 }
122
123 /// Encode an input slice of code unit into unicode scalar values
124 fn encode_from_code_units(&self, input: &[u16]) -> String {
125 // <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
126 //
127 // Step 3. Let output be the I/O queue of bytes « end-of-queue ».
128 let mut output = String::with_capacity(input.len());
129 // Step 4. While true:
130 // Step 4.1 Let item be the result of reading from input.
131 for result in char::decode_utf16(input.iter().cloned()) {
132 // Step 4.3 Let result be the result of executing the convert code unit
133 // to scalar value algorithm with encoder, item and input.
134
135 // <https://encoding.spec.whatwg.org/#convert-code-unit-to-scalar-value>
136 match result {
137 Ok(c) => {
138 // Step 1. If encoder’s leading surrogate is non-null:
139 // Step 1.1 Let leadingSurrogate be encoder’s leading surrogate.
140 // Step 1.2 Set encoder’s leading surrogate to null.
141 if self.leading_surrogate.take().is_some() {
142 // Step 1.5 Return U+FFFD (�).
143 output.push('\u{FFFD}');
144 }
145
146 // Step 1.4 Restore item to input.
147 // Note: pushing item to output is equivalent to restoring item to input
148 // and rerun the convert-code-unit-to-scalar-value algo
149 output.push(c);
150 },
151 Err(error) => {
152 let unpaired_surrogate = error.unpaired_surrogate();
153 match code_point_type(unpaired_surrogate) {
154 CodePointType::LeadingSurrogate => {
155 // Step 1.1 If encoder’s leading surrogate is non-null:
156 // Step 1.2 Set encoder’s leading surrogate to null.
157 if self.leading_surrogate.take().is_some() {
158 output.push('\u{FFFD}');
159 }
160
161 // Step 1.4 Restore item to input.
162 // Note: Replacing encoder's leading_surrogate is equivalent
163 // to restore item back to input and rerun the convert-
164 // code-unit-to-scalar-value algo.
165 // Step 2. If item is a leading surrogate, then set encoder’s
166 // leading surrogate to item and return continue.
167 self.leading_surrogate
168 .replace(NonZero::new(unpaired_surrogate));
169 },
170 CodePointType::TrailingSurrogate => match self.leading_surrogate.take() {
171 // Step 1.1 If encoder’s leading surrogate is non-null:
172 // Step 1.2 Set encoder’s leading surrogate to null.
173 Some(leading_surrogate) => {
174 // Step 1.3 If item is a trailing surrogate, then return a scalar
175 // value from surrogates given leadingSurrogate and item.
176 let c = char::decode_utf16([
177 leading_surrogate.get(),
178 unpaired_surrogate,
179 ])
180 .next()
181 .expect("A pair of surrogate is supplied")
182 .expect("Decoding a pair of surrogate cannot fail");
183 output.push(c);
184 },
185 // Step 3. If item is a trailing surrogate, then return U+FFFD (�).
186 None => output.push('\u{FFFD}'),
187 },
188 CodePointType::ScalarValue => unreachable!("Scalar Value won't fail"),
189 }
190 },
191 }
192 }
193
194 output
195 }
196}
197
198enum CodePointType {
199 ScalarValue,
200 LeadingSurrogate,
201 TrailingSurrogate,
202}
203
204fn code_point_type(value: u16) -> CodePointType {
205 match value {
206 0xD800..=0xDBFF => CodePointType::LeadingSurrogate,
207 0xDC00..=0xDFFF => CodePointType::TrailingSurrogate,
208 _ => CodePointType::ScalarValue,
209 }
210}
211
212/// <https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk>
213#[expect(unsafe_code)]
214pub(crate) fn encode_and_enqueue_a_chunk(
215 cx: &mut js::context::JSContext,
216 global: &GlobalScope,
217 chunk: SafeHandleValue,
218 encoder: &Encoder,
219 controller: &TransformStreamDefaultController,
220) -> Fallible<()> {
221 // Step 1. Let input be the result of converting chunk to a DOMString.
222 // Step 2. Convert input to an I/O queue of code units.
223 rooted!(&in(cx) let mut rval = UndefinedValue());
224 jsval_to_primitive(
225 cx.into(),
226 global,
227 chunk,
228 rval.handle_mut(),
229 CanGc::from_cx(cx),
230 )?;
231
232 assert!(!rval.is_object());
233 rooted!(&in(cx) let jsstr = unsafe { ToString(cx.raw_cx(), rval.handle()) });
234 if jsstr.is_null() {
235 unsafe {
236 if !JS_IsExceptionPending(cx.raw_cx()) {
237 throw_dom_exception(
238 cx.into(),
239 global,
240 Error::Type(c"Cannot convert JS primitive to string".to_owned()),
241 CanGc::from_cx(cx),
242 );
243 }
244 }
245
246 return Err(Error::JSFailed);
247 }
248
249 let input = unsafe {
250 if JS_DeprecatedStringHasLatin1Chars(*jsstr) {
251 let s = NonNull::new(*jsstr).expect("jsstr cannot be null");
252 ConvertedInput::String(latin1_to_string(cx.raw_cx(), s))
253 } else {
254 let mut len = 0;
255 let data =
256 JS_GetTwoByteStringCharsAndLength(cx.raw_cx(), std::ptr::null(), *jsstr, &mut len);
257 let maybe_ill_formed_code_units = std::slice::from_raw_parts(data, len);
258 ConvertedInput::CodeUnits(maybe_ill_formed_code_units)
259 }
260 };
261
262 // Step 3. Let output be the I/O queue of bytes « end-of-queue ».
263 // Step 4. While true:
264 // Step 4.1 Let item be the result of reading from input.
265 // Step 4.3 Let result be the result of executing the convert code unit
266 // to scalar value algorithm with encoder, item and input.
267 // Step 4.4 If result is not continue, then process an item with result,
268 // encoder’s encoder, input, output, and "fatal".
269 let output = encoder.encode(input);
270
271 // Step 4.2 If item is end-of-queue:
272 // Step 4.2.1 Convert output into a byte sequence.
273 let output = output.as_bytes();
274 // Step 4.2.2 If output is not empty:
275 if output.is_empty() {
276 // Step 4.2.3
277 return Ok(());
278 }
279
280 // Step 4.2.2.1 Let chunk be the result of creating a Uint8Array object
281 // given output and encoder’s relevant realm.
282 rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
283 let chunk = create_buffer_source::<Uint8>(
284 cx.into(),
285 output,
286 js_object.handle_mut(),
287 CanGc::from_cx(cx),
288 )
289 .map_err(|_| Error::Type(c"Cannot convert byte sequence to Uint8Array".to_owned()))?;
290 rooted!(&in(cx) let mut rval = UndefinedValue());
291 chunk.safe_to_jsval(cx.into(), rval.handle_mut(), CanGc::from_cx(cx));
292 // Step 4.2.2.2 Enqueue chunk into encoder’s transform.
293 controller.enqueue(cx, global, rval.handle())?;
294 Ok(())
295}
296
297/// <https://encoding.spec.whatwg.org/#encode-and-flush>
298pub(crate) fn encode_and_flush(
299 cx: &mut js::context::JSContext,
300 global: &GlobalScope,
301 encoder: &Encoder,
302 controller: &TransformStreamDefaultController,
303) -> Fallible<()> {
304 // Step 1. If encoder’s leading surrogate is non-null:
305 if encoder.leading_surrogate.get().is_some() {
306 // Step 1.1 Let chunk be the result of creating a Uint8Array object
307 // given « 0xEF, 0xBF, 0xBD » and encoder’s relevant realm.
308 rooted!(&in(cx) let mut js_object = ptr::null_mut::<JSObject>());
309 let chunk = create_buffer_source::<Uint8>(
310 cx.into(),
311 &[0xEF_u8, 0xBF, 0xBD],
312 js_object.handle_mut(),
313 CanGc::from_cx(cx),
314 )
315 .map_err(|_| Error::Type(c"Cannot convert byte sequence to Uint8Array".to_owned()))?;
316 rooted!(&in(cx) let mut rval = UndefinedValue());
317 chunk.safe_to_jsval(cx.into(), rval.handle_mut(), CanGc::from_cx(cx));
318 // Step 1.2 Enqueue chunk into encoder’s transform.
319 return controller.enqueue(cx, global, rval.handle());
320 }
321
322 Ok(())
323}
324
325/// <https://encoding.spec.whatwg.org/#textencoderstream>
326#[dom_struct]
327pub(crate) struct TextEncoderStream {
328 reflector_: Reflector,
329
330 /// <https://streams.spec.whatwg.org/#generictransformstream>
331 transform: Dom<TransformStream>,
332}
333
334impl TextEncoderStream {
335 fn new_inherited(transform: &TransformStream) -> TextEncoderStream {
336 Self {
337 reflector_: Reflector::new(),
338 transform: Dom::from_ref(transform),
339 }
340 }
341
342 /// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
343 fn new_with_proto(
344 cx: SafeJSContext,
345 global: &GlobalScope,
346 proto: Option<SafeHandleObject>,
347 can_gc: CanGc,
348 ) -> Fallible<DomRoot<TextEncoderStream>> {
349 // Step 1. Set this’s encoder to an instance of the UTF-8 encoder.
350 let encoder = Encoder::default();
351
352 // Step 2. Let transformAlgorithm be an algorithm which takes a chunk argument
353 // and runs the encode and enqueue a chunk algorithm with this and chunk.
354 // Step 3. Let flushAlgorithm be an algorithm which runs the encode and flush
355 // algorithm with this.
356 let transformer_type = TransformerType::Encoder(encoder);
357
358 // Step 4. Let transformStream be a new TransformStream.
359 let transform = TransformStream::new_with_proto(global, None, can_gc);
360 // Step 5. Set up transformStream with transformAlgorithm set to transformAlgorithm
361 // and flushAlgorithm set to flushAlgorithm.
362 transform.set_up(cx, global, transformer_type, can_gc)?;
363
364 // Step 6. Set this’s transform to transformStream.
365 Ok(reflect_dom_object_with_proto(
366 Box::new(TextEncoderStream::new_inherited(&transform)),
367 global,
368 proto,
369 can_gc,
370 ))
371 }
372}
373
374impl TextEncoderStreamMethods<DomTypeHolder> for TextEncoderStream {
375 /// <https://encoding.spec.whatwg.org/#dom-textencoderstream>
376 fn Constructor(
377 global: &<DomTypeHolder as DomTypes>::GlobalScope,
378 proto: Option<SafeHandleObject>,
379 can_gc: CanGc,
380 ) -> Fallible<DomRoot<<DomTypeHolder as DomTypes>::TextEncoderStream>> {
381 TextEncoderStream::new_with_proto(GlobalScope::get_cx(), global, proto, can_gc)
382 }
383
384 /// <https://encoding.spec.whatwg.org/#dom-textencoder-encoding>
385 fn Encoding(&self) -> DOMString {
386 // Returns "utf-8".
387 DOMString::from("utf-8")
388 }
389
390 /// <https://streams.spec.whatwg.org/#dom-generictransformstream-readable>
391 fn Readable(&self) -> DomRoot<<DomTypeHolder as DomTypes>::ReadableStream> {
392 self.transform.get_readable()
393 }
394
395 /// <https://streams.spec.whatwg.org/#dom-generictransformstream-writable>
396 fn Writable(&self) -> DomRoot<<DomTypeHolder as DomTypes>::WritableStream> {
397 self.transform.get_writable()
398 }
399}