script/dom/textdecodercommon.rs
1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::cell::RefCell;
6
7use encoding_rs::{Decoder, DecoderResult, Encoding};
8
9use crate::dom::bindings::codegen::UnionTypes::ArrayBufferViewOrArrayBuffer;
10use crate::dom::bindings::error::{Error, Fallible};
11
12/// The shared part of `TextDecoder` and `TextDecoderStream`
13///
14/// Note that other than the three attributes defined in the `TextDecoderCommon`
15/// interface in the WebIDL, this also performs decoding.
16///
17/// <https://encoding.spec.whatwg.org/#textdecodercommon>
18#[allow(non_snake_case)]
19#[derive(JSTraceable, MallocSizeOf)]
20pub(crate) struct TextDecoderCommon {
21 /// <https://encoding.spec.whatwg.org/#dom-textdecoder-encoding>
22 #[no_trace]
23 encoding: &'static Encoding,
24
25 /// <https://encoding.spec.whatwg.org/#dom-textdecoder-fatal>
26 fatal: bool,
27
28 /// <https://encoding.spec.whatwg.org/#dom-textdecoder-ignorebom>
29 ignoreBOM: bool,
30
31 /// The native decoder that is used to perform decoding
32 ///
33 /// <https://encoding.spec.whatwg.org/#textdecodercommon-decoder>
34 #[ignore_malloc_size_of = "defined in encoding_rs"]
35 #[no_trace]
36 decoder: RefCell<Decoder>,
37
38 /// <https://encoding.spec.whatwg.org/#textdecodercommon-i-o-queue>
39 io_queue: RefCell<Vec<u8>>,
40}
41
42#[allow(non_snake_case)]
43impl TextDecoderCommon {
44 pub(crate) fn new_inherited(
45 encoding: &'static Encoding,
46 fatal: bool,
47 ignoreBOM: bool,
48 ) -> TextDecoderCommon {
49 let decoder = if ignoreBOM {
50 encoding.new_decoder_without_bom_handling()
51 } else {
52 encoding.new_decoder_with_bom_removal()
53 };
54
55 TextDecoderCommon {
56 encoding,
57 fatal,
58 ignoreBOM,
59 decoder: RefCell::new(decoder),
60 io_queue: RefCell::new(Vec::new()),
61 }
62 }
63
64 /// <https://encoding.spec.whatwg.org/#textdecoder-encoding>
65 pub(crate) fn encoding(&self) -> &'static Encoding {
66 self.encoding
67 }
68
69 /// <https://encoding.spec.whatwg.org/#textdecodercommon-decoder>
70 pub(crate) fn decoder(&self) -> &RefCell<Decoder> {
71 &self.decoder
72 }
73
74 /// <https://encoding.spec.whatwg.org/#textdecodercommon-i-o-queue>
75 pub(crate) fn io_queue(&self) -> &RefCell<Vec<u8>> {
76 &self.io_queue
77 }
78
79 /// <https://encoding.spec.whatwg.org/#textdecoder-error-mode>
80 pub(crate) fn fatal(&self) -> bool {
81 self.fatal
82 }
83
84 /// <https://encoding.spec.whatwg.org/#textdecoder-ignore-bom-flag>
85 pub(crate) fn ignore_bom(&self) -> bool {
86 self.ignoreBOM
87 }
88
89 /// Shared by `TextDecoder` and `TextDecoderStream`
90 ///
91 /// <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
92 /// <https://encoding.spec.whatwg.org/#decode-and-enqueue-a-chunk>
93 #[allow(unsafe_code)]
94 pub(crate) fn decode(
95 &self,
96 input: Option<&ArrayBufferViewOrArrayBuffer>,
97 last: bool,
98 ) -> Fallible<String> {
99 // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
100 // Step 3. If input is given, then push a copy of input to this’s I/O queue.
101 //
102 // <https://encoding.spec.whatwg.org/#decode-and-enqueue-a-chunk>
103 // Step 2. Push a copy of bufferSource to decoder’s I/O queue.
104 //
105 // NOTE: try to avoid this copy unless there are bytes left
106 let mut io_queue = self.io_queue.borrow_mut();
107 let input = match &input {
108 Some(ArrayBufferViewOrArrayBuffer::ArrayBufferView(a)) => unsafe {
109 if io_queue.is_empty() {
110 a.as_slice()
111 } else {
112 io_queue.extend_from_slice(a.as_slice());
113 &io_queue[..]
114 }
115 },
116 Some(ArrayBufferViewOrArrayBuffer::ArrayBuffer(a)) => unsafe {
117 if io_queue.is_empty() {
118 a.as_slice()
119 } else {
120 io_queue.extend_from_slice(a.as_slice());
121 &io_queue[..]
122 }
123 },
124 None => &io_queue[..],
125 };
126
127 let mut decoder = self.decoder.borrow_mut();
128 let (output, read) = if self.fatal {
129 // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
130 // Step 4. Let output be the I/O queue of scalar values « end-of-queue ».
131 //
132 // <https://encoding.spec.whatwg.org/#decode-and-enqueue-a-chunk>
133 // Step 3. Let output be the I/O queue of scalar values « end-of-queue ».
134 let mut output = String::with_capacity(
135 decoder
136 .max_utf8_buffer_length_without_replacement(input.len())
137 .ok_or_else(|| {
138 Error::Type("Expected UTF8 buffer length would overflow".to_owned())
139 })?,
140 );
141
142 // Note: The two algorithms below are implemented in
143 // `encoding_rs::Decoder::decode_to_string_without_replacement`
144 //
145 // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
146 // Step 5. While true:
147 // Step 5.1 Let item be the result of reading from this’s I/O queue.
148 // Step 5.2 If item is end-of-queue and this’s do not flush is true,
149 // then return the result of running serialize I/O queue with this and output.
150 // Step 5.3 Otherwise:
151 // Step 5.3.1 Let result be the result of processing an item with item, this’s decoder,
152 // this’s I/O queue, output, and this’s error mode.
153 //
154 // <https://encoding.spec.whatwg.org/#decode-and-enqueue-a-chunk>
155 // Step 4. While true:
156 // Step 4.1 Let item be the result of reading from decoder’s I/O queue.
157 // Step 4.2 If item is end-of-queue:
158 // Step 4.2.1 Let outputChunk be the result of running serialize I/O queue with decoder and output.
159 // Step 4.2.2 If outputChunk is not the empty string, then enqueue outputChunk in decoder’s transform.
160 // Step 4.2.3 Return.
161 // Step 4.3 Let result be the result of processing an item with item, decoder’s decoder,
162 // decoder’s I/O queue, output, and decoder’s error mode.
163 // Step 4.4 If result is error, then throw a TypeError.
164 let (result, read) =
165 decoder.decode_to_string_without_replacement(input, &mut output, last);
166 match result {
167 // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
168 // Step 5.3.2 If result is finished, then return the result of running serialize I/O
169 // queue with this and output.
170 DecoderResult::InputEmpty => (output, read),
171 // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
172 // Step 5.3.3 Otherwise, if result is error, throw a TypeError.
173 DecoderResult::Malformed(_, _) => {
174 return Err(Error::Type("Decoding failed".to_owned()));
175 },
176 DecoderResult::OutputFull => {
177 unreachable!("output is allocated with sufficient capacity")
178 },
179 }
180 } else {
181 // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
182 // Step 4. Let output be the I/O queue of scalar values « end-of-queue ».
183 let mut output =
184 String::with_capacity(decoder.max_utf8_buffer_length(input.len()).ok_or_else(
185 || Error::Type("Expected UTF8 buffer length would overflow".to_owned()),
186 )?);
187
188 // Note: The two algorithms below are implemented in
189 // `encoding_rs::Decoder::decode_to_string`
190 //
191 // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
192 // Step 5. While true:
193 // Step 5.1 Let item be the result of reading from this’s I/O queue.
194 // Step 5.2 If item is end-of-queue and this’s do not flush is true,
195 // then return the result of running serialize I/O queue with this and output.
196 // Step 5.3 Otherwise:
197 // Step 5.3.1 Let result be the result of processing an item with item, this’s decoder,
198 // this’s I/O queue, output, and this’s error mode.
199 //
200 // <https://encoding.spec.whatwg.org/#decode-and-enqueue-a-chunk>
201 // Step 4. While true:
202 // Step 4.1 Let item be the result of reading from decoder’s I/O queue.
203 // Step 4.2 If item is end-of-queue:
204 // Step 4.2.1 Let outputChunk be the result of running serialize I/O queue with decoder and output.
205 // Step 4.2.2 If outputChunk is not the empty string, then enqueue outputChunk in decoder’s transform.
206 // Step 4.2.3 Return.
207 // Step 4.3 Let result be the result of processing an item with item, decoder’s decoder,
208 // decoder’s I/O queue, output, and decoder’s error mode.
209 // Step 4.4 If result is error, then throw a TypeError.
210 let (result, read, _replaced) = decoder.decode_to_string(input, &mut output, last);
211 match result {
212 // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
213 // Step 5.3.2 If result is finished, then return the result of running serialize I/O
214 // queue with this and output.
215 encoding_rs::CoderResult::InputEmpty => (output, read),
216 encoding_rs::CoderResult::OutputFull => {
217 unreachable!("output is allocated with sufficient capacity")
218 },
219 }
220 };
221
222 let (_consumed, remaining) = input.split_at(read);
223 *io_queue = remaining.to_vec();
224
225 Ok(output)
226 }
227}