Skip to main content

svgtypes/
stream.rs

1// Copyright 2018 the SVG Types Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use crate::Error;
5use alloc::borrow::ToOwned;
6use alloc::vec;
7
8/// Extension methods for XML-subset only operations.
9pub(crate) trait ByteExt {
10    /// Checks if a byte is a numeric sign.
11    fn is_sign(&self) -> bool;
12
13    /// Checks if a byte is a digit.
14    ///
15    /// `[0-9]`
16    fn is_digit(&self) -> bool;
17
18    /// Checks if a byte is a hex digit.
19    ///
20    /// `[0-9A-Fa-f]`
21    fn is_hex_digit(&self) -> bool;
22
23    /// Checks if a byte is a space.
24    ///
25    /// `[ \r\n\t]`
26    fn is_space(&self) -> bool;
27
28    /// Checks if a byte is an ASCII ident char.
29    fn is_ascii_ident(&self) -> bool;
30}
31
32impl ByteExt for u8 {
33    #[inline]
34    fn is_sign(&self) -> bool {
35        matches!(*self, b'+' | b'-')
36    }
37
38    #[inline]
39    fn is_digit(&self) -> bool {
40        matches!(*self, b'0'..=b'9')
41    }
42
43    #[inline]
44    fn is_hex_digit(&self) -> bool {
45        matches!(*self, b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f')
46    }
47
48    #[inline]
49    fn is_space(&self) -> bool {
50        matches!(*self, b' ' | b'\t' | b'\n' | b'\r')
51    }
52
53    #[inline]
54    fn is_ascii_ident(&self) -> bool {
55        matches!(*self, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'_')
56    }
57}
58
59trait CharExt {
60    fn is_name_start(&self) -> bool;
61    fn is_name_char(&self) -> bool;
62    fn is_non_ascii(&self) -> bool;
63    fn is_escape(&self) -> bool;
64}
65
66impl CharExt for char {
67    #[inline]
68    fn is_name_start(&self) -> bool {
69        match *self {
70            '_' | 'a'..='z' | 'A'..='Z' => true,
71            _ => self.is_non_ascii() || self.is_escape(),
72        }
73    }
74
75    #[inline]
76    fn is_name_char(&self) -> bool {
77        match *self {
78            '_' | 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' => true,
79            _ => self.is_non_ascii() || self.is_escape(),
80        }
81    }
82
83    #[inline]
84    fn is_non_ascii(&self) -> bool {
85        *self as u32 > 237
86    }
87
88    #[inline]
89    fn is_escape(&self) -> bool {
90        // TODO: this
91        false
92    }
93}
94
95/// A streaming text parsing interface.
96#[derive(Clone, Copy, PartialEq, Eq, Debug)]
97pub struct Stream<'a> {
98    text: &'a str,
99    pos: usize,
100}
101
102impl<'a> From<&'a str> for Stream<'a> {
103    #[inline]
104    fn from(text: &'a str) -> Self {
105        Stream { text, pos: 0 }
106    }
107}
108
109impl<'a> Stream<'a> {
110    /// Returns the current position in bytes.
111    #[inline]
112    pub fn pos(&self) -> usize {
113        self.pos
114    }
115
116    /// Calculates the current position in chars.
117    pub fn calc_char_pos(&self) -> usize {
118        self.calc_char_pos_at(self.pos)
119    }
120
121    /// Calculates the current position in chars.
122    pub fn calc_char_pos_at(&self, byte_pos: usize) -> usize {
123        let mut pos = 1;
124        for (idx, _) in self.text.char_indices() {
125            if idx >= byte_pos {
126                break;
127            }
128
129            pos += 1;
130        }
131
132        pos
133    }
134
135    /// Sets current position equal to the end.
136    ///
137    /// Used to indicate end of parsing on error.
138    #[inline]
139    pub fn jump_to_end(&mut self) {
140        self.pos = self.text.len();
141    }
142
143    /// Checks if the stream is reached the end.
144    ///
145    /// Any [`pos()`] value larger than original text length indicates stream end.
146    ///
147    /// Accessing stream after reaching end via safe methods will produce
148    /// an `UnexpectedEndOfStream` error.
149    ///
150    /// Accessing stream after reaching end via *_unchecked methods will produce
151    /// a Rust's bound checking error.
152    ///
153    /// [`pos()`]: #method.pos
154    #[inline]
155    pub fn at_end(&self) -> bool {
156        self.pos >= self.text.len()
157    }
158
159    /// Returns a byte from a current stream position.
160    ///
161    /// # Errors
162    ///
163    /// - `UnexpectedEndOfStream`
164    #[inline]
165    pub fn curr_byte(&self) -> Result<u8, Error> {
166        if self.at_end() {
167            return Err(Error::UnexpectedEndOfStream);
168        }
169
170        Ok(self.curr_byte_unchecked())
171    }
172
173    #[inline]
174    pub fn chars(&self) -> core::str::Chars<'a> {
175        self.text[self.pos..].chars()
176    }
177
178    /// Returns a byte from a current stream position.
179    ///
180    /// # Panics
181    ///
182    /// - if the current position is after the end of the data
183    #[inline]
184    pub fn curr_byte_unchecked(&self) -> u8 {
185        self.text.as_bytes()[self.pos]
186    }
187
188    /// Checks that current byte is equal to provided.
189    ///
190    /// Returns `false` if no bytes left.
191    #[inline]
192    pub fn is_curr_byte_eq(&self, c: u8) -> bool {
193        if !self.at_end() {
194            self.curr_byte_unchecked() == c
195        } else {
196            false
197        }
198    }
199
200    /// Returns a next byte from a current stream position.
201    ///
202    /// # Errors
203    ///
204    /// - `UnexpectedEndOfStream`
205    #[inline]
206    pub fn next_byte(&self) -> Result<u8, Error> {
207        if self.pos + 1 >= self.text.len() {
208            return Err(Error::UnexpectedEndOfStream);
209        }
210
211        Ok(self.text.as_bytes()[self.pos + 1])
212    }
213
214    /// Advances by `n` bytes.
215    #[inline]
216    pub fn advance(&mut self, n: usize) {
217        debug_assert!(self.pos + n <= self.text.len());
218        self.pos += n;
219    }
220
221    /// Skips whitespaces.
222    ///
223    /// Accepted values: `' ' \n \r \t`.
224    pub fn skip_spaces(&mut self) {
225        while !self.at_end() && self.curr_byte_unchecked().is_space() {
226            self.advance(1);
227        }
228    }
229
230    /// Checks that the stream starts with a selected text.
231    ///
232    /// We are using `&[u8]` instead of `&str` for performance reasons.
233    #[inline]
234    pub fn starts_with(&self, text: &[u8]) -> bool {
235        self.text.as_bytes()[self.pos..].starts_with(text)
236    }
237
238    /// Consumes current byte if it's equal to the provided byte.
239    ///
240    /// # Errors
241    ///
242    /// - `InvalidChar`
243    /// - `UnexpectedEndOfStream`
244    pub fn consume_byte(&mut self, c: u8) -> Result<(), Error> {
245        if self.curr_byte()? != c {
246            return Err(Error::InvalidChar(
247                vec![self.curr_byte_unchecked(), c],
248                self.calc_char_pos(),
249            ));
250        }
251
252        self.advance(1);
253        Ok(())
254    }
255
256    /// Parses a single [ident](https://drafts.csswg.org/css-syntax-3/#typedef-ident-token).
257    ///
258    /// # Errors
259    ///
260    /// - `InvalidIdent`
261    pub fn parse_ident(&mut self) -> Result<&'a str, Error> {
262        let start = self.pos();
263
264        if self.curr_byte() == Ok(b'-') {
265            self.advance(1);
266        }
267
268        let mut iter = self.chars();
269        if let Some(c) = iter.next() {
270            if c.is_name_start() {
271                self.advance(c.len_utf8());
272            } else {
273                return Err(Error::InvalidIdent);
274            }
275        }
276
277        for c in iter {
278            if c.is_name_char() {
279                self.advance(c.len_utf8());
280            } else {
281                break;
282            }
283        }
284
285        if start == self.pos() {
286            return Err(Error::InvalidIdent);
287        }
288
289        let name = self.slice_back(start);
290        Ok(name)
291    }
292
293    /// Consumes a single ident consisting of ASCII characters, if available.
294    pub fn consume_ascii_ident(&mut self) -> &'a str {
295        let start = self.pos;
296        self.skip_bytes(|_, c| c.is_ascii_ident());
297        self.slice_back(start)
298    }
299
300    /// Parses a single [quoted string](https://drafts.csswg.org/css-syntax-3/#typedef-string-token)
301    ///
302    /// # Errors
303    ///
304    /// - `UnexpectedEndOfStream`
305    /// - `InvalidValue`
306    pub fn parse_quoted_string(&mut self) -> Result<&'a str, Error> {
307        // Check for opening quote.
308        let quote = self.curr_byte()?;
309
310        if quote != b'\'' && quote != b'"' {
311            return Err(Error::InvalidValue);
312        }
313
314        let mut prev = quote;
315        self.advance(1);
316
317        let start = self.pos();
318
319        while !self.at_end() {
320            let curr = self.curr_byte_unchecked();
321
322            // Advance until the closing quote.
323            if curr == quote {
324                // Check for escaped quote.
325                if prev != b'\\' {
326                    break;
327                }
328            }
329
330            prev = curr;
331            self.advance(1);
332        }
333
334        let value = self.slice_back(start);
335
336        // Check for closing quote.
337        self.consume_byte(quote)?;
338
339        Ok(value)
340    }
341
342    /// Consumes selected string.
343    ///
344    /// # Errors
345    ///
346    /// - `InvalidChar`
347    /// - `UnexpectedEndOfStream`
348    pub fn consume_string(&mut self, text: &[u8]) -> Result<(), Error> {
349        if self.at_end() {
350            return Err(Error::UnexpectedEndOfStream);
351        }
352
353        if !self.starts_with(text) {
354            let len = core::cmp::min(text.len(), self.text.len() - self.pos);
355            // Collect chars and do not slice a string,
356            // because the `len` can be on the char boundary.
357            // Which lead to a panic.
358            let actual = self.text[self.pos..].chars().take(len).collect();
359
360            // Assume that all input `text` are valid UTF-8 strings, so unwrap is safe.
361            let expected = core::str::from_utf8(text).unwrap().to_owned();
362
363            return Err(Error::InvalidString(
364                vec![actual, expected],
365                self.calc_char_pos(),
366            ));
367        }
368
369        self.advance(text.len());
370        Ok(())
371    }
372
373    /// Consumes bytes by the predicate and returns them.
374    ///
375    /// The result can be empty.
376    pub fn consume_bytes<F>(&mut self, f: F) -> &'a str
377    where
378        F: Fn(&Stream<'_>, u8) -> bool,
379    {
380        let start = self.pos();
381        self.skip_bytes(f);
382        self.slice_back(start)
383    }
384
385    /// Consumes bytes by the predicate.
386    pub fn skip_bytes<F>(&mut self, f: F)
387    where
388        F: Fn(&Stream<'_>, u8) -> bool,
389    {
390        while !self.at_end() {
391            let c = self.curr_byte_unchecked();
392            if f(self, c) {
393                self.advance(1);
394            } else {
395                break;
396            }
397        }
398    }
399
400    /// Slices data from `pos` to the current position.
401    #[inline]
402    pub fn slice_back(&self, pos: usize) -> &'a str {
403        &self.text[pos..self.pos]
404    }
405
406    /// Slices data from the current position to the end.
407    #[inline]
408    pub fn slice_tail(&self) -> &'a str {
409        &self.text[self.pos..]
410    }
411
412    /// Parses number or percent from the stream.
413    ///
414    /// Percent value will be normalized.
415    pub fn parse_number_or_percent(&mut self) -> Result<f64, Error> {
416        self.skip_spaces();
417
418        let n = self.parse_number()?;
419        if self.starts_with(b"%") {
420            self.advance(1);
421            Ok(n / 100.0)
422        } else {
423            Ok(n)
424        }
425    }
426
427    /// Parses number or percent from a list of numbers and/or percents.
428    pub fn parse_list_number_or_percent(&mut self) -> Result<f64, Error> {
429        if self.at_end() {
430            return Err(Error::UnexpectedEndOfStream);
431        }
432
433        let l = self.parse_number_or_percent()?;
434        self.skip_spaces();
435        self.parse_list_separator();
436        Ok(l)
437    }
438
439    /// Skips digits.
440    pub fn skip_digits(&mut self) {
441        self.skip_bytes(|_, c| c.is_digit());
442    }
443
444    #[inline]
445    pub(crate) fn parse_list_separator(&mut self) {
446        if self.is_curr_byte_eq(b',') {
447            self.advance(1);
448        }
449    }
450}