script/dom/
textdecodercommon.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::cell::RefCell;
6
7use encoding_rs::{Decoder, DecoderResult, Encoding};
8
9use crate::dom::bindings::codegen::UnionTypes::ArrayBufferViewOrArrayBuffer;
10use crate::dom::bindings::error::{Error, Fallible};
11
12/// The shared part of `TextDecoder` and `TextDecoderStream`
13///
14/// Note that other than the three attributes defined in the `TextDecoderCommon`
15/// interface in the WebIDL, this also performs decoding.
16///
17/// <https://encoding.spec.whatwg.org/#textdecodercommon>
18#[allow(non_snake_case)]
19#[derive(JSTraceable, MallocSizeOf)]
20pub(crate) struct TextDecoderCommon {
21    /// <https://encoding.spec.whatwg.org/#dom-textdecoder-encoding>
22    #[no_trace]
23    encoding: &'static Encoding,
24
25    /// <https://encoding.spec.whatwg.org/#dom-textdecoder-fatal>
26    fatal: bool,
27
28    /// <https://encoding.spec.whatwg.org/#dom-textdecoder-ignorebom>
29    ignoreBOM: bool,
30
31    /// The native decoder that is used to perform decoding
32    ///
33    /// <https://encoding.spec.whatwg.org/#textdecodercommon-decoder>
34    #[ignore_malloc_size_of = "defined in encoding_rs"]
35    #[no_trace]
36    decoder: RefCell<Decoder>,
37
38    /// <https://encoding.spec.whatwg.org/#textdecodercommon-i-o-queue>
39    io_queue: RefCell<Vec<u8>>,
40}
41
42#[allow(non_snake_case)]
43impl TextDecoderCommon {
44    pub(crate) fn new_inherited(
45        encoding: &'static Encoding,
46        fatal: bool,
47        ignoreBOM: bool,
48    ) -> TextDecoderCommon {
49        let decoder = if ignoreBOM {
50            encoding.new_decoder_without_bom_handling()
51        } else {
52            encoding.new_decoder_with_bom_removal()
53        };
54
55        TextDecoderCommon {
56            encoding,
57            fatal,
58            ignoreBOM,
59            decoder: RefCell::new(decoder),
60            io_queue: RefCell::new(Vec::new()),
61        }
62    }
63
64    /// <https://encoding.spec.whatwg.org/#textdecoder-encoding>
65    pub(crate) fn encoding(&self) -> &'static Encoding {
66        self.encoding
67    }
68
69    /// <https://encoding.spec.whatwg.org/#textdecodercommon-decoder>
70    pub(crate) fn decoder(&self) -> &RefCell<Decoder> {
71        &self.decoder
72    }
73
74    /// <https://encoding.spec.whatwg.org/#textdecodercommon-i-o-queue>
75    pub(crate) fn io_queue(&self) -> &RefCell<Vec<u8>> {
76        &self.io_queue
77    }
78
79    /// <https://encoding.spec.whatwg.org/#textdecoder-error-mode>
80    pub(crate) fn fatal(&self) -> bool {
81        self.fatal
82    }
83
84    /// <https://encoding.spec.whatwg.org/#textdecoder-ignore-bom-flag>
85    pub(crate) fn ignore_bom(&self) -> bool {
86        self.ignoreBOM
87    }
88
89    /// Shared by `TextDecoder` and `TextDecoderStream`
90    ///
91    /// <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
92    /// <https://encoding.spec.whatwg.org/#decode-and-enqueue-a-chunk>
93    #[allow(unsafe_code)]
94    pub(crate) fn decode(
95        &self,
96        input: Option<&ArrayBufferViewOrArrayBuffer>,
97        last: bool,
98    ) -> Fallible<String> {
99        // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
100        // Step 3. If input is given, then push a copy of input to this’s I/O queue.
101        //
102        // <https://encoding.spec.whatwg.org/#decode-and-enqueue-a-chunk>
103        // Step 2. Push a copy of bufferSource to decoder’s I/O queue.
104        //
105        // NOTE: try to avoid this copy unless there are bytes left
106        let mut io_queue = self.io_queue.borrow_mut();
107        let input = match &input {
108            Some(ArrayBufferViewOrArrayBuffer::ArrayBufferView(a)) => unsafe {
109                if io_queue.is_empty() {
110                    a.as_slice()
111                } else {
112                    io_queue.extend_from_slice(a.as_slice());
113                    &io_queue[..]
114                }
115            },
116            Some(ArrayBufferViewOrArrayBuffer::ArrayBuffer(a)) => unsafe {
117                if io_queue.is_empty() {
118                    a.as_slice()
119                } else {
120                    io_queue.extend_from_slice(a.as_slice());
121                    &io_queue[..]
122                }
123            },
124            None => &io_queue[..],
125        };
126
127        let mut decoder = self.decoder.borrow_mut();
128        let (output, read) = if self.fatal {
129            // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
130            // Step 4. Let output be the I/O queue of scalar values « end-of-queue ».
131            //
132            // <https://encoding.spec.whatwg.org/#decode-and-enqueue-a-chunk>
133            // Step 3. Let output be the I/O queue of scalar values « end-of-queue ».
134            let mut output = String::with_capacity(
135                decoder
136                    .max_utf8_buffer_length_without_replacement(input.len())
137                    .ok_or_else(|| {
138                        Error::Type("Expected UTF8 buffer length would overflow".to_owned())
139                    })?,
140            );
141
142            // Note: The two algorithms below are implemented in
143            // `encoding_rs::Decoder::decode_to_string_without_replacement`
144            //
145            // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
146            // Step 5. While true:
147            // Step 5.1 Let item be the result of reading from this’s I/O queue.
148            // Step 5.2 If item is end-of-queue and this’s do not flush is true,
149            //      then return the result of running serialize I/O queue with this and output.
150            // Step 5.3 Otherwise:
151            // Step 5.3.1 Let result be the result of processing an item with item, this’s decoder,
152            //      this’s I/O queue, output, and this’s error mode.
153            //
154            // <https://encoding.spec.whatwg.org/#decode-and-enqueue-a-chunk>
155            // Step 4. While true:
156            // Step 4.1 Let item be the result of reading from decoder’s I/O queue.
157            // Step 4.2 If item is end-of-queue:
158            // Step 4.2.1 Let outputChunk be the result of running serialize I/O queue with decoder and output.
159            // Step 4.2.2 If outputChunk is not the empty string, then enqueue outputChunk in decoder’s transform.
160            // Step 4.2.3 Return.
161            // Step 4.3 Let result be the result of processing an item with item, decoder’s decoder,
162            //      decoder’s I/O queue, output, and decoder’s error mode.
163            // Step 4.4 If result is error, then throw a TypeError.
164            let (result, read) =
165                decoder.decode_to_string_without_replacement(input, &mut output, last);
166            match result {
167                // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
168                // Step 5.3.2 If result is finished, then return the result of running serialize I/O
169                //      queue with this and output.
170                DecoderResult::InputEmpty => (output, read),
171                // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
172                // Step 5.3.3 Otherwise, if result is error, throw a TypeError.
173                DecoderResult::Malformed(_, _) => {
174                    return Err(Error::Type("Decoding failed".to_owned()));
175                },
176                DecoderResult::OutputFull => {
177                    unreachable!("output is allocated with sufficient capacity")
178                },
179            }
180        } else {
181            // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
182            // Step 4. Let output be the I/O queue of scalar values « end-of-queue ».
183            let mut output =
184                String::with_capacity(decoder.max_utf8_buffer_length(input.len()).ok_or_else(
185                    || Error::Type("Expected UTF8 buffer length would overflow".to_owned()),
186                )?);
187
188            // Note: The two algorithms below are implemented in
189            // `encoding_rs::Decoder::decode_to_string`
190            //
191            // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
192            // Step 5. While true:
193            // Step 5.1 Let item be the result of reading from this’s I/O queue.
194            // Step 5.2 If item is end-of-queue and this’s do not flush is true,
195            //      then return the result of running serialize I/O queue with this and output.
196            // Step 5.3 Otherwise:
197            // Step 5.3.1 Let result be the result of processing an item with item, this’s decoder,
198            //      this’s I/O queue, output, and this’s error mode.
199            //
200            // <https://encoding.spec.whatwg.org/#decode-and-enqueue-a-chunk>
201            // Step 4. While true:
202            // Step 4.1 Let item be the result of reading from decoder’s I/O queue.
203            // Step 4.2 If item is end-of-queue:
204            // Step 4.2.1 Let outputChunk be the result of running serialize I/O queue with decoder and output.
205            // Step 4.2.2 If outputChunk is not the empty string, then enqueue outputChunk in decoder’s transform.
206            // Step 4.2.3 Return.
207            // Step 4.3 Let result be the result of processing an item with item, decoder’s decoder,
208            //      decoder’s I/O queue, output, and decoder’s error mode.
209            // Step 4.4 If result is error, then throw a TypeError.
210            let (result, read, _replaced) = decoder.decode_to_string(input, &mut output, last);
211            match result {
212                // <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
213                // Step 5.3.2 If result is finished, then return the result of running serialize I/O
214                //      queue with this and output.
215                encoding_rs::CoderResult::InputEmpty => (output, read),
216                encoding_rs::CoderResult::OutputFull => {
217                    unreachable!("output is allocated with sufficient capacity")
218                },
219            }
220        };
221
222        let (_consumed, remaining) = input.split_at(read);
223        *io_queue = remaining.to_vec();
224
225        Ok(output)
226    }
227}