ron/
parse.rs

1#![allow(clippy::identity_op)]
2
3use alloc::{
4    format,
5    string::{String, ToString},
6    vec::Vec,
7};
8use core::{
9    char::from_u32 as char_from_u32,
10    str::{self, from_utf8, FromStr, Utf8Error},
11};
12
13use unicode_ident::{is_xid_continue, is_xid_start};
14
15use crate::{
16    error::{Error, Position, Result, Span, SpannedError, SpannedResult},
17    extensions::Extensions,
18    value::Number,
19};
20
21const fn is_int_char(c: char) -> bool {
22    c.is_ascii_hexdigit() || c == '_'
23}
24
25const fn is_float_char(c: char) -> bool {
26    c.is_ascii_digit() || matches!(c, 'e' | 'E' | '.' | '+' | '-' | '_')
27}
28
29pub fn is_ident_first_char(c: char) -> bool {
30    c == '_' || is_xid_start(c)
31}
32
33pub fn is_ident_raw_char(c: char) -> bool {
34    matches!(c, '.' | '+' | '-') | is_xid_continue(c)
35}
36
37pub const fn is_whitespace_char(c: char) -> bool {
38    matches!(
39        c,
40        ' ' | '\t'
41            | '\n'
42            | '\r'
43            | '\x0B'
44            | '\x0C'
45            | '\u{85}'
46            | '\u{200E}'
47            | '\u{200F}'
48            | '\u{2028}'
49            | '\u{2029}'
50    )
51}
52
53#[cfg(feature = "integer128")]
54pub(crate) type LargeUInt = u128;
55#[cfg(not(feature = "integer128"))]
56pub(crate) type LargeUInt = u64;
57#[cfg(feature = "integer128")]
58pub(crate) type LargeSInt = i128;
59#[cfg(not(feature = "integer128"))]
60pub(crate) type LargeSInt = i64;
61
62pub struct Parser<'a> {
63    /// Bits set according to the [`Extensions`] enum.
64    pub exts: Extensions,
65    src: &'a str,
66    cursor: ParserCursor,
67    prev_cursor: ParserCursor,
68}
69
70#[derive(Copy, Clone)] // GRCOV_EXCL_LINE
71pub struct ParserCursor {
72    cursor: usize,
73    pre_ws_cursor: usize,
74    last_ws_len: usize,
75}
76
77enum ParsedAttribute {
78    None,
79    Extensions(Extensions),
80    Ignored,
81}
82
83const WS_CURSOR_UNCLOSED_LINE: usize = usize::MAX;
84
85impl PartialEq for ParserCursor {
86    fn eq(&self, other: &Self) -> bool {
87        self.cursor == other.cursor
88    }
89}
90
91impl PartialOrd for ParserCursor {
92    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
93        self.cursor.partial_cmp(&other.cursor)
94    }
95}
96
97/// constructor and parsing utilities
98impl<'a> Parser<'a> {
99    pub fn new(src: &'a str) -> SpannedResult<Self> {
100        let mut parser = Parser {
101            exts: Extensions::empty(),
102            src,
103            cursor: ParserCursor {
104                cursor: 0,
105                pre_ws_cursor: 0,
106                last_ws_len: 0,
107            },
108            prev_cursor: ParserCursor {
109                cursor: 0,
110                pre_ws_cursor: 0,
111                last_ws_len: 0,
112            },
113        };
114
115        parser.skip_ws().map_err(|e| parser.span_error(e))?;
116
117        // Loop over all document attributes
118        loop {
119            match parser.attribute().map_err(|e| parser.span_error(e))? {
120                ParsedAttribute::None => break,
121                ParsedAttribute::Extensions(extensions) => {
122                    parser.exts |= extensions;
123                }
124                ParsedAttribute::Ignored => {}
125            }
126
127            parser.skip_ws().map_err(|e| parser.span_error(e))?;
128        }
129
130        Ok(parser)
131    }
132
133    fn set_cursor(&mut self, cursor: ParserCursor) {
134        self.cursor = cursor;
135    }
136
137    pub fn span_error(&self, code: Error) -> SpannedError {
138        SpannedError {
139            code,
140            span: Span {
141                start: Position::from_src_end(&self.src[..self.prev_cursor.cursor]),
142                end: Position::from_src_end(&self.src[..self.cursor.cursor]),
143            },
144        }
145    }
146
147    pub fn is_number_start(&self, c: char) -> bool {
148        matches!(c, '0'..='9' | '+' | '-' | '.' | 'b') && (c != 'b' || self.src().starts_with("b'"))
149    }
150
151    pub fn advance_bytes(&mut self, bytes: usize) {
152        self.prev_cursor = self.cursor;
153        self.cursor.cursor += bytes;
154    }
155
156    pub fn next_char(&mut self) -> Result<char> {
157        let c = self.peek_char_or_eof()?;
158        self.cursor.cursor += c.len_utf8();
159        Ok(c)
160    }
161
162    pub fn skip_next_char(&mut self) {
163        core::mem::drop(self.next_char());
164    }
165
166    pub fn peek_char(&self) -> Option<char> {
167        self.src().chars().next()
168    }
169
170    pub fn peek_char_or_eof(&self) -> Result<char> {
171        self.peek_char().ok_or(Error::Eof)
172    }
173
174    pub fn check_char(&self, c: char) -> bool {
175        self.src().starts_with(c)
176    }
177
178    pub fn check_str(&self, s: &str) -> bool {
179        self.src().starts_with(s)
180    }
181
182    pub fn src(&self) -> &'a str {
183        &self.src[self.cursor.cursor..]
184    }
185
186    pub fn pre_ws_src(&self) -> &'a str {
187        &self.src[self.cursor.pre_ws_cursor..]
188    }
189
190    pub fn consume_str(&mut self, s: &str) -> bool {
191        if self.check_str(s) {
192            self.advance_bytes(s.len());
193
194            true
195        } else {
196            false
197        }
198    }
199
200    pub fn consume_char(&mut self, c: char) -> bool {
201        if self.check_char(c) {
202            self.advance_bytes(c.len_utf8());
203
204            true
205        } else {
206            false
207        }
208    }
209
210    fn consume_all(&mut self, all: &[&str]) -> Result<bool> {
211        all.iter()
212            .map(|elem| {
213                if self.consume_str(elem) {
214                    self.skip_ws()?;
215
216                    Ok(true)
217                } else {
218                    Ok(false)
219                }
220            })
221            .try_fold(true, |acc, x| x.map(|x| x && acc))
222    }
223
224    pub fn expect_char(&mut self, expected: char, error: Error) -> Result<()> {
225        if self.consume_char(expected) {
226            Ok(())
227        } else {
228            Err(error)
229        }
230    }
231
232    #[must_use]
233    pub fn next_chars_while_len(&self, condition: fn(char) -> bool) -> usize {
234        self.next_chars_while_from_len(0, condition)
235    }
236
237    #[must_use]
238    pub fn next_chars_while_from_len(&self, from: usize, condition: fn(char) -> bool) -> usize {
239        self.src()[from..]
240            .find(|c| !condition(c))
241            .unwrap_or(self.src().len() - from)
242    }
243}
244
245/// actual parsing of ron tokens
246impl<'a> Parser<'a> {
247    fn parse_integer_digits<T: Num>(
248        &mut self,
249        s: &str,
250        base: u8,
251        f: fn(&mut T, u8) -> bool,
252    ) -> Result<T> {
253        let mut num_acc = T::from_u8(0);
254
255        for (i, c) in s.char_indices() {
256            if c == '_' {
257                continue;
258            }
259
260            if num_acc.checked_mul_ext(base) {
261                self.advance_bytes(s.len());
262                return Err(Error::IntegerOutOfBounds);
263            }
264
265            let digit = Self::decode_hex(c)?;
266
267            if digit >= base {
268                self.advance_bytes(i);
269                return Err(Error::InvalidIntegerDigit { digit: c, base });
270            }
271
272            if f(&mut num_acc, digit) {
273                self.advance_bytes(s.len());
274                return Err(Error::IntegerOutOfBounds);
275            }
276        }
277
278        self.advance_bytes(s.len());
279
280        Ok(num_acc)
281    }
282
283    fn parse_integer<T: Num>(&mut self, sign: i8, base: u8) -> Result<T> {
284        let num_bytes = self.next_chars_while_len(is_int_char);
285
286        if num_bytes == 0 {
287            return Err(Error::ExpectedInteger);
288        }
289
290        if self.check_char('_') {
291            return Err(Error::UnderscoreAtBeginning);
292        }
293
294        let s = &self.src()[..num_bytes];
295
296        if sign > 0 {
297            self.parse_integer_digits(s, base, T::checked_add_ext)
298        } else {
299            self.parse_integer_digits(s, base, T::checked_sub_ext)
300        }
301    }
302
303    #[allow(clippy::too_many_lines)]
304    pub fn integer<T: Integer>(&mut self) -> Result<T> {
305        let src_backup = self.src();
306
307        let is_negative = match self.peek_char_or_eof()? {
308            '+' => {
309                self.skip_next_char();
310                false
311            }
312            '-' => {
313                self.skip_next_char();
314                true
315            }
316            'b' if self.consume_str("b'") => {
317                // Parse a byte literal
318                let byte = match self.next_char()? {
319                    '\\' => match self.parse_escape(EscapeEncoding::Binary, true)? {
320                        // we know that this byte is an ASCII character
321                        EscapeCharacter::Ascii(b) => b,
322                        EscapeCharacter::Utf8(_) => {
323                            return Err(Error::InvalidEscape(
324                                "Unexpected Unicode escape in byte literal",
325                            ))
326                        }
327                    },
328                    b if b.is_ascii() => b as u8,
329                    _ => return Err(Error::ExpectedByteLiteral),
330                };
331
332                if !self.consume_char('\'') {
333                    return Err(Error::ExpectedByteLiteral);
334                }
335
336                let bytes_ron = &src_backup[..src_backup.len() - self.src().len()];
337
338                return T::try_from_parsed_integer(ParsedInteger::U8(byte), bytes_ron);
339            }
340            _ => false,
341        };
342        let sign = if is_negative { -1 } else { 1 };
343
344        let base = match () {
345            () if self.consume_str("0b") => 2,
346            () if self.consume_str("0o") => 8,
347            () if self.consume_str("0x") => 16,
348            () => 10,
349        };
350
351        let num_bytes = self.next_chars_while_len(is_int_char);
352
353        if self.src()[num_bytes..].starts_with(['i', 'u']) {
354            let int_cursor = self.cursor;
355            self.advance_bytes(num_bytes);
356
357            #[allow(clippy::never_loop)]
358            loop {
359                let (res, suffix_bytes) = if self.consume_ident("i8") {
360                    let suffix_bytes = self.src();
361                    self.set_cursor(int_cursor);
362                    (
363                        self.parse_integer::<i8>(sign, base).map(ParsedInteger::I8),
364                        suffix_bytes,
365                    )
366                } else if self.consume_ident("i16") {
367                    let suffix_bytes = self.src();
368                    self.set_cursor(int_cursor);
369                    (
370                        self.parse_integer::<i16>(sign, base)
371                            .map(ParsedInteger::I16),
372                        suffix_bytes,
373                    )
374                } else if self.consume_ident("i32") {
375                    let suffix_bytes = self.src();
376                    self.set_cursor(int_cursor);
377                    (
378                        self.parse_integer::<i32>(sign, base)
379                            .map(ParsedInteger::I32),
380                        suffix_bytes,
381                    )
382                } else if self.consume_ident("i64") {
383                    let suffix_bytes = self.src();
384                    self.set_cursor(int_cursor);
385                    (
386                        self.parse_integer::<i64>(sign, base)
387                            .map(ParsedInteger::I64),
388                        suffix_bytes,
389                    )
390                } else if self.consume_ident("u8") {
391                    let suffix_bytes = self.src();
392                    self.set_cursor(int_cursor);
393                    (
394                        self.parse_integer::<u8>(sign, base).map(ParsedInteger::U8),
395                        suffix_bytes,
396                    )
397                } else if self.consume_ident("u16") {
398                    let suffix_bytes = self.src();
399                    self.set_cursor(int_cursor);
400                    (
401                        self.parse_integer::<u16>(sign, base)
402                            .map(ParsedInteger::U16),
403                        suffix_bytes,
404                    )
405                } else if self.consume_ident("u32") {
406                    let suffix_bytes = self.src();
407                    self.set_cursor(int_cursor);
408                    (
409                        self.parse_integer::<u32>(sign, base)
410                            .map(ParsedInteger::U32),
411                        suffix_bytes,
412                    )
413                } else if self.consume_ident("u64") {
414                    let suffix_bytes = self.src();
415                    self.set_cursor(int_cursor);
416                    (
417                        self.parse_integer::<u64>(sign, base)
418                            .map(ParsedInteger::U64),
419                        suffix_bytes,
420                    )
421                } else {
422                    #[cfg(feature = "integer128")]
423                    if self.consume_ident("i128") {
424                        let suffix_bytes = self.src();
425                        self.set_cursor(int_cursor);
426                        (
427                            self.parse_integer::<i128>(sign, base)
428                                .map(ParsedInteger::I128),
429                            suffix_bytes,
430                        )
431                    } else if self.consume_ident("u128") {
432                        let suffix_bytes = self.src();
433                        self.set_cursor(int_cursor);
434                        (
435                            self.parse_integer::<u128>(sign, base)
436                                .map(ParsedInteger::U128),
437                            suffix_bytes,
438                        )
439                    } else {
440                        break;
441                    }
442                    #[cfg(not(feature = "integer128"))]
443                    {
444                        break;
445                    }
446                };
447
448                if !matches!(
449                    &res,
450                    Err(Error::UnderscoreAtBeginning | Error::InvalidIntegerDigit { .. })
451                ) {
452                    // Advance past the number suffix
453                    self.skip_identifier();
454                }
455
456                let integer_ron = &src_backup[..src_backup.len() - suffix_bytes.len()];
457
458                return res.and_then(|parsed| T::try_from_parsed_integer(parsed, integer_ron));
459            }
460
461            self.set_cursor(int_cursor);
462        }
463
464        T::parse(self, sign, base)
465    }
466
467    pub fn any_number(&mut self) -> Result<Number> {
468        if self.consume_ident("inf") || self.consume_ident("inff32") {
469            return Ok(Number::F32(crate::value::F32(core::f32::INFINITY)));
470        } else if self.consume_ident("inff64") {
471            return Ok(Number::F64(crate::value::F64(core::f64::INFINITY)));
472        } else if self.consume_ident("NaN") || self.consume_ident("NaNf32") {
473            return Ok(Number::F32(crate::value::F32(core::f32::NAN)));
474        } else if self.consume_ident("NaNf64") {
475            return Ok(Number::F64(crate::value::F64(core::f64::NAN)));
476        }
477
478        if self.next_bytes_is_float() {
479            return match self.float::<ParsedFloat>()? {
480                ParsedFloat::F32(v) => Ok(Number::F32(v.into())),
481                ParsedFloat::F64(v) => Ok(Number::F64(v.into())),
482            };
483        }
484
485        let backup_cursor = self.cursor;
486
487        let (integer_err, integer_cursor) = match self.integer::<ParsedInteger>() {
488            Ok(integer) => {
489                return match integer {
490                    ParsedInteger::I8(v) => Ok(Number::I8(v)),
491                    ParsedInteger::I16(v) => Ok(Number::I16(v)),
492                    ParsedInteger::I32(v) => Ok(Number::I32(v)),
493                    ParsedInteger::I64(v) => Ok(Number::I64(v)),
494                    #[cfg(feature = "integer128")]
495                    ParsedInteger::I128(v) => Ok(Number::I128(v)),
496                    ParsedInteger::U8(v) => Ok(Number::U8(v)),
497                    ParsedInteger::U16(v) => Ok(Number::U16(v)),
498                    ParsedInteger::U32(v) => Ok(Number::U32(v)),
499                    ParsedInteger::U64(v) => Ok(Number::U64(v)),
500                    #[cfg(feature = "integer128")]
501                    ParsedInteger::U128(v) => Ok(Number::U128(v)),
502                }
503            }
504            Err(err) => (err, self.cursor),
505        };
506
507        self.set_cursor(backup_cursor);
508
509        // Fall-back to parse an out-of-range integer as a float
510        match self.float::<ParsedFloat>() {
511            Ok(ParsedFloat::F32(v)) if self.cursor >= integer_cursor => Ok(Number::F32(v.into())),
512            Ok(ParsedFloat::F64(v)) if self.cursor >= integer_cursor => Ok(Number::F64(v.into())),
513            _ => {
514                // Return the more precise integer error
515                self.set_cursor(integer_cursor);
516                Err(integer_err)
517            }
518        }
519    }
520
521    pub fn bool(&mut self) -> Result<bool> {
522        if self.consume_ident("true") {
523            Ok(true)
524        } else if self.consume_ident("false") {
525            Ok(false)
526        } else {
527            Err(Error::ExpectedBoolean)
528        }
529    }
530
531    pub fn char(&mut self) -> Result<char> {
532        self.expect_char('\'', Error::ExpectedChar)?;
533
534        let c = self.next_char()?;
535
536        let c = if c == '\\' {
537            match self.parse_escape(EscapeEncoding::Utf8, true)? {
538                // we know that this byte is an ASCII character
539                EscapeCharacter::Ascii(b) => char::from(b),
540                EscapeCharacter::Utf8(c) => c,
541            }
542        } else {
543            c
544        };
545
546        self.expect_char('\'', Error::ExpectedChar)?;
547
548        Ok(c)
549    }
550
551    pub fn comma(&mut self) -> Result<bool> {
552        self.skip_ws()?;
553
554        if self.consume_char(',') {
555            self.skip_ws()?;
556
557            Ok(true)
558        } else {
559            Ok(false)
560        }
561    }
562
563    /// Only returns true if the char after `ident` cannot belong
564    /// to an identifier.
565    pub fn check_ident(&mut self, ident: &str) -> bool {
566        self.check_str(ident) && !self.check_ident_other_char(ident.len())
567    }
568
569    fn check_ident_other_char(&self, index: usize) -> bool {
570        self.src()[index..]
571            .chars()
572            .next()
573            .map_or(false, is_xid_continue)
574    }
575
576    /// Check which type of struct we are currently parsing. The parsing state
577    ///  is only changed in case of an error, to provide a better position.
578    ///
579    /// [`NewtypeMode::NoParensMeanUnit`] detects (tuple) structs by a leading
580    ///  opening bracket and reports a unit struct otherwise.
581    /// [`NewtypeMode::InsideNewtype`] skips an initial check for unit structs,
582    ///  and means that any leading opening bracket is not considered to open
583    ///  a (tuple) struct but to be part of the structs inner contents.
584    ///
585    /// [`TupleMode::ImpreciseTupleOrNewtype`] only performs a cheap, O(1),
586    ///  single-identifier lookahead check to distinguish tuple structs from
587    ///  non-tuple structs.
588    /// [`TupleMode::DifferentiateNewtype`] performs an expensive, O(N), look-
589    ///  ahead over the entire next value tree, which can span the entirety of
590    ///  the remaining document in the worst case.
591    pub fn check_struct_type(
592        &mut self,
593        newtype: NewtypeMode,
594        tuple: TupleMode,
595    ) -> Result<StructType> {
596        fn check_struct_type_inner(
597            parser: &mut Parser,
598            newtype: NewtypeMode,
599            tuple: TupleMode,
600        ) -> Result<StructType> {
601            if matches!(newtype, NewtypeMode::NoParensMeanUnit) && !parser.consume_char('(') {
602                return Ok(StructType::Unit);
603            }
604
605            parser.skip_ws()?;
606
607            // Check for `Ident()`, which could be
608            // - a zero-field struct or tuple (variant)
609            // - an unwrapped newtype around a unit
610            if matches!(newtype, NewtypeMode::NoParensMeanUnit) && parser.check_char(')') {
611                return Ok(StructType::EmptyTuple);
612            }
613
614            if parser.skip_identifier().is_some() {
615                parser.skip_ws()?;
616
617                match parser.peek_char() {
618                    // Definitely a struct with named fields
619                    Some(':') => return Ok(StructType::Named),
620                    // Definitely a tuple-like struct with fields
621                    Some(',') => {
622                        parser.skip_next_char();
623                        parser.skip_ws()?;
624                        if parser.check_char(')') {
625                            // A one-element tuple could be a newtype
626                            return Ok(StructType::NewtypeTuple);
627                        }
628                        // Definitely a tuple struct with more than one field
629                        return Ok(StructType::NonNewtypeTuple);
630                    }
631                    // Either a newtype or a tuple struct
632                    Some(')') => return Ok(StructType::NewtypeTuple),
633                    // Something else, let's investigate further
634                    Some(_) | None => (),
635                };
636            }
637
638            if matches!(tuple, TupleMode::ImpreciseTupleOrNewtype) {
639                return Ok(StructType::AnyTuple);
640            }
641
642            let mut braces = 1_usize;
643            let mut more_than_one = false;
644
645            // Skip ahead to see if the value is followed by another value
646            while braces > 0 {
647                // Skip spurious braces in comments, strings, and characters
648                parser.skip_ws()?;
649                let cursor_backup = parser.cursor;
650                if parser.char().is_err() {
651                    parser.set_cursor(cursor_backup);
652                }
653                let cursor_backup = parser.cursor;
654                match parser.string() {
655                    Ok(_) => (),
656                    // prevent quadratic complexity backtracking for unterminated string
657                    Err(err @ (Error::ExpectedStringEnd | Error::Eof)) => return Err(err),
658                    Err(_) => parser.set_cursor(cursor_backup),
659                }
660                let cursor_backup = parser.cursor;
661                // we have already checked for strings, which subsume base64 byte strings
662                match parser.byte_string_no_base64() {
663                    Ok(_) => (),
664                    // prevent quadratic complexity backtracking for unterminated byte string
665                    Err(err @ (Error::ExpectedStringEnd | Error::Eof)) => return Err(err),
666                    Err(_) => parser.set_cursor(cursor_backup),
667                }
668
669                let c = parser.next_char()?;
670                if matches!(c, '(' | '[' | '{') {
671                    braces += 1;
672                } else if matches!(c, ')' | ']' | '}') {
673                    braces -= 1;
674                } else if c == ',' && braces == 1 {
675                    parser.skip_ws()?;
676                    more_than_one = !parser.check_char(')');
677                    break;
678                }
679            }
680
681            if more_than_one {
682                Ok(StructType::NonNewtypeTuple)
683            } else {
684                Ok(StructType::NewtypeTuple)
685            }
686        }
687
688        // Create a temporary working copy
689        let backup_cursor = self.cursor;
690
691        let result = check_struct_type_inner(self, newtype, tuple);
692
693        if result.is_ok() {
694            // Revert the parser to before the struct type check
695            self.set_cursor(backup_cursor);
696        }
697
698        result
699    }
700
701    /// Only returns true if the char after `ident` cannot belong
702    /// to an identifier.
703    pub fn consume_ident(&mut self, ident: &str) -> bool {
704        if self.check_ident(ident) {
705            self.advance_bytes(ident.len());
706
707            true
708        } else {
709            false
710        }
711    }
712
713    pub fn consume_struct_name(&mut self, ident: &'static str) -> Result<bool> {
714        if self.check_ident("") {
715            if self.exts.contains(Extensions::EXPLICIT_STRUCT_NAMES) {
716                return Err(Error::ExpectedStructName(ident.to_string()));
717            }
718
719            return Ok(false);
720        }
721
722        let found_ident = match self.identifier() {
723            Ok(maybe_ident) => maybe_ident,
724            Err(Error::SuggestRawIdentifier(found_ident)) if found_ident == ident => {
725                return Err(Error::SuggestRawIdentifier(found_ident))
726            }
727            Err(_) => return Err(Error::ExpectedNamedStructLike(ident)),
728        };
729
730        if ident.is_empty() {
731            return Err(Error::ExpectedNamedStructLike(ident));
732        }
733
734        if found_ident != ident {
735            return Err(Error::ExpectedDifferentStructName {
736                expected: ident,
737                found: String::from(found_ident),
738            });
739        }
740
741        Ok(true)
742    }
743
744    /// Parse a document attribute at the current cursor position.
745    fn attribute(&mut self) -> Result<ParsedAttribute> {
746        if !self.check_char('#') {
747            return Ok(ParsedAttribute::None);
748        }
749
750        if !self.consume_all(&["#", "!", "["])? {
751            return Err(Error::ExpectedAttribute);
752        }
753
754        self.skip_ws()?;
755        if self.consume_ident("enable") {
756            self.skip_ws()?;
757            if !self.consume_str("(") {
758                return Err(Error::ExpectedAttribute);
759            }
760
761            self.skip_ws()?;
762            let extensions = self.extension_list()?;
763            self.skip_ws()?;
764
765            if self.consume_all(&[")", "]"])? {
766                Ok(ParsedAttribute::Extensions(extensions))
767            } else {
768                Err(Error::ExpectedAttributeEnd)
769            }
770        } else if self.consume_ident("type") || self.consume_ident("schema") {
771            self.skip_ws()?;
772            if !self.consume_str("=") {
773                return Err(Error::ExpectedAttribute);
774            }
775
776            self.skip_ws()?;
777            self.string()?;
778            self.skip_ws()?;
779
780            if self.consume_str("]") {
781                Ok(ParsedAttribute::Ignored)
782            } else {
783                Err(Error::ExpectedAttributeEnd)
784            }
785        } else {
786            Err(Error::ExpectedAttribute)
787        }
788    }
789
790    /// Returns the extensions bit mask.
791    fn extension_list(&mut self) -> Result<Extensions> {
792        let mut extensions = Extensions::empty();
793
794        loop {
795            let ident = self.identifier()?;
796            let extension = Extensions::from_ident(ident)
797                .ok_or_else(|| Error::NoSuchExtension(ident.into()))?;
798
799            extensions |= extension;
800
801            let comma = self.comma()?;
802
803            // If we have no comma but another item, return an error
804            if !comma && self.check_ident_other_char(0) {
805                return Err(Error::ExpectedComma);
806            }
807
808            // If there's no comma, assume the list ended.
809            // If there is, it might be a trailing one, thus we only
810            // continue the loop if we get an ident char.
811            if !comma || !self.check_ident_other_char(0) {
812                break;
813            }
814        }
815
816        Ok(extensions)
817    }
818
819    pub fn float<T: Float>(&mut self) -> Result<T> {
820        const F32_SUFFIX: &str = "f32";
821        const F64_SUFFIX: &str = "f64";
822
823        for (literal, value_f32, value_f64) in &[
824            ("inf", f32::INFINITY, f64::INFINITY),
825            ("+inf", f32::INFINITY, f64::INFINITY),
826            ("-inf", f32::NEG_INFINITY, f64::NEG_INFINITY),
827            ("NaN", f32::NAN, f64::NAN),
828            ("+NaN", f32::NAN, f64::NAN),
829            ("-NaN", -f32::NAN, -f64::NAN),
830        ] {
831            if self.consume_ident(literal) {
832                return T::parse(literal);
833            }
834
835            if let Some(suffix) = self.src().strip_prefix(literal) {
836                if let Some(post_suffix) = suffix.strip_prefix(F32_SUFFIX) {
837                    if !post_suffix.chars().next().map_or(false, is_xid_continue) {
838                        let float_ron = &self.src()[..literal.len() + F32_SUFFIX.len()];
839                        self.advance_bytes(literal.len() + F32_SUFFIX.len());
840                        return T::try_from_parsed_float(ParsedFloat::F32(*value_f32), float_ron);
841                    }
842                }
843
844                if let Some(post_suffix) = suffix.strip_prefix(F64_SUFFIX) {
845                    if !post_suffix.chars().next().map_or(false, is_xid_continue) {
846                        let float_ron = &self.src()[..literal.len() + F64_SUFFIX.len()];
847                        self.advance_bytes(literal.len() + F64_SUFFIX.len());
848                        return T::try_from_parsed_float(ParsedFloat::F64(*value_f64), float_ron);
849                    }
850                }
851            }
852        }
853
854        let raw_bytes = self.next_chars_while_len(is_float_char);
855        let src = &self.src()[..raw_bytes];
856        let num_bytes = src.find("..").unwrap_or(raw_bytes);
857
858        if num_bytes == 0 {
859            return Err(Error::ExpectedFloat);
860        }
861
862        if self.check_char('_') {
863            return Err(Error::UnderscoreAtBeginning);
864        }
865
866        let mut f = String::with_capacity(num_bytes);
867        let mut allow_underscore = false;
868
869        for (i, c) in self.src()[..num_bytes].char_indices() {
870            match c {
871                '_' if allow_underscore => continue,
872                '_' => {
873                    self.advance_bytes(i);
874                    return Err(Error::FloatUnderscore);
875                }
876                '0'..='9' | 'e' | 'E' => allow_underscore = true,
877                '.' => allow_underscore = false,
878                _ => (),
879            }
880
881            // we know that the byte is an ASCII character here
882            f.push(c);
883        }
884
885        if self.src()[num_bytes..].starts_with('f') {
886            let backup_cursor = self.cursor;
887            self.advance_bytes(num_bytes);
888
889            #[allow(clippy::never_loop)]
890            loop {
891                let res = if self.consume_ident(F32_SUFFIX) {
892                    f32::from_str(&f).map(ParsedFloat::F32)
893                } else if self.consume_ident(F64_SUFFIX) {
894                    f64::from_str(&f).map(ParsedFloat::F64)
895                } else {
896                    break;
897                };
898
899                let parsed = if let Ok(parsed) = res {
900                    parsed
901                } else {
902                    self.set_cursor(backup_cursor);
903                    return Err(Error::ExpectedFloat);
904                };
905
906                let float_ron = &self.src[backup_cursor.cursor..self.cursor.cursor];
907
908                return T::try_from_parsed_float(parsed, float_ron);
909            }
910
911            self.set_cursor(backup_cursor);
912        }
913
914        let value = T::parse(&f)?;
915
916        self.advance_bytes(num_bytes);
917
918        Ok(value)
919    }
920
921    pub fn skip_identifier(&mut self) -> Option<&'a str> {
922        #[allow(clippy::nonminimal_bool)]
923        if self.check_str("b\"") // byte string
924            || self.check_str("b'") // byte literal
925            || self.check_str("br#") // raw byte string
926            || self.check_str("br\"") // raw byte string
927            || self.check_str("r\"") // raw string
928            || self.check_str("r#\"") // raw string
929            || self.check_str("r##") // raw string
930            || false
931        {
932            return None;
933        }
934
935        if self.check_str("r#") {
936            // maybe a raw identifier
937            let len = self.next_chars_while_from_len(2, is_ident_raw_char);
938            if len > 0 {
939                let ident = &self.src()[2..2 + len];
940                self.advance_bytes(2 + len);
941                return Some(ident);
942            }
943            return None;
944        }
945
946        if let Some(c) = self.peek_char() {
947            // maybe a normal identifier
948            if is_ident_first_char(c) {
949                let len =
950                    c.len_utf8() + self.next_chars_while_from_len(c.len_utf8(), is_xid_continue);
951                let ident = &self.src()[..len];
952                self.advance_bytes(len);
953                return Some(ident);
954            }
955        }
956
957        None
958    }
959
960    pub fn identifier(&mut self) -> Result<&'a str> {
961        let first = self.peek_char_or_eof()?;
962        if !is_ident_first_char(first) {
963            if is_ident_raw_char(first) {
964                let ident_bytes = self.next_chars_while_len(is_ident_raw_char);
965                return Err(Error::SuggestRawIdentifier(
966                    self.src()[..ident_bytes].into(),
967                ));
968            }
969
970            return Err(Error::ExpectedIdentifier);
971        }
972
973        // If the next 2-3 bytes signify the start of a (raw) (byte) string
974        //  literal, return an error.
975        #[allow(clippy::nonminimal_bool)]
976        if self.check_str("b\"") // byte string
977            || self.check_str("b'") // byte literal
978            || self.check_str("br#") // raw byte string
979            || self.check_str("br\"") // raw byte string
980            || self.check_str("r\"") // raw string
981            || self.check_str("r#\"") // raw string
982            || self.check_str("r##") // raw string
983            || false
984        {
985            return Err(Error::ExpectedIdentifier);
986        }
987
988        let length = if self.check_str("r#") {
989            let cursor_backup = self.cursor;
990
991            self.advance_bytes(2);
992
993            // Note: it's important to check this before advancing forward, so that
994            // the value-type deserializer can fall back to parsing it differently.
995            if !matches!(self.peek_char(), Some(c) if is_ident_raw_char(c)) {
996                self.set_cursor(cursor_backup);
997                return Err(Error::ExpectedIdentifier);
998            }
999
1000            self.next_chars_while_len(is_ident_raw_char)
1001        } else if first == 'r' {
1002            let std_ident_length = self.next_chars_while_len(is_xid_continue);
1003            let raw_ident_length = self.next_chars_while_len(is_ident_raw_char);
1004
1005            if raw_ident_length > std_ident_length {
1006                return Err(Error::SuggestRawIdentifier(
1007                    self.src()[..raw_ident_length].into(),
1008                ));
1009            }
1010
1011            std_ident_length
1012        } else {
1013            let std_ident_length = first.len_utf8()
1014                + self.next_chars_while_from_len(first.len_utf8(), is_xid_continue);
1015            let raw_ident_length = self.next_chars_while_len(is_ident_raw_char);
1016
1017            if raw_ident_length > std_ident_length {
1018                return Err(Error::SuggestRawIdentifier(
1019                    self.src()[..raw_ident_length].into(),
1020                ));
1021            }
1022
1023            std_ident_length
1024        };
1025
1026        let ident = &self.src()[..length];
1027        self.advance_bytes(length);
1028
1029        Ok(ident)
1030    }
1031
1032    pub fn next_bytes_is_float(&mut self) -> bool {
1033        if let Some(c) = self.peek_char() {
1034            let skip = match c {
1035                '+' | '-' => 1,
1036                _ => 0,
1037            };
1038            let raw_float_len = self.next_chars_while_from_len(skip, is_float_char);
1039            // Trim at ".." to avoid treating range operators as float chars
1040            let valid_float_len = self.src()[skip..]
1041                .find("..")
1042                .map(|i| i.min(raw_float_len))
1043                .map_or(raw_float_len, |i| i.min(raw_float_len));
1044            let valid_int_len = self.next_chars_while_from_len(skip, is_int_char);
1045            valid_float_len > valid_int_len
1046        } else {
1047            false
1048        }
1049    }
1050
1051    pub fn skip_ws(&mut self) -> Result<()> {
1052        if (self.cursor.last_ws_len != WS_CURSOR_UNCLOSED_LINE)
1053            && ((self.cursor.pre_ws_cursor + self.cursor.last_ws_len) < self.cursor.cursor)
1054        {
1055            // the last whitespace is disjoint from this one, we need to track a new one
1056            self.cursor.pre_ws_cursor = self.cursor.cursor;
1057        }
1058
1059        if self.src().is_empty() {
1060            return Ok(());
1061        }
1062
1063        loop {
1064            self.advance_bytes(self.next_chars_while_len(is_whitespace_char));
1065
1066            match self.skip_comment()? {
1067                None => break,
1068                Some(Comment::UnclosedLine) => {
1069                    self.cursor.last_ws_len = WS_CURSOR_UNCLOSED_LINE;
1070                    return Ok(());
1071                }
1072                Some(Comment::ClosedLine | Comment::Block) => continue,
1073            }
1074        }
1075
1076        self.cursor.last_ws_len = self.cursor.cursor - self.cursor.pre_ws_cursor;
1077
1078        Ok(())
1079    }
1080
1081    pub fn has_unclosed_line_comment(&self) -> bool {
1082        self.src().is_empty() && self.cursor.last_ws_len == WS_CURSOR_UNCLOSED_LINE
1083    }
1084
1085    pub fn byte_string(&mut self) -> Result<ParsedByteStr<'a>> {
1086        fn expected_byte_string_found_base64(
1087            base64_str: &ParsedStr,
1088            byte_str: &ParsedByteStr,
1089        ) -> Error {
1090            let byte_str = match &byte_str {
1091                ParsedByteStr::Allocated(b) => b.as_slice(),
1092                ParsedByteStr::Slice(b) => b,
1093            }
1094            .iter()
1095            .flat_map(|c| core::ascii::escape_default(*c))
1096            .map(char::from)
1097            .collect::<String>();
1098            let base64_str = match &base64_str {
1099                ParsedStr::Allocated(s) => s.as_str(),
1100                ParsedStr::Slice(s) => s,
1101            };
1102
1103            Error::InvalidValueForType {
1104                expected: format!("the Rusty byte string b\"{}\"", byte_str),
1105                found: format!("the ambiguous base64 string {:?}", base64_str),
1106            }
1107        }
1108
1109        // FIXME @juntyr: remove in v0.13, since only byte_string_no_base64 will
1110        //                be used
1111        if self.consume_char('"') {
1112            let base64_str = self.escaped_string()?;
1113            let base64_result = ParsedByteStr::try_from_base64(&base64_str);
1114
1115            match base64_result {
1116                Some(byte_str) => Err(expected_byte_string_found_base64(&base64_str, &byte_str)),
1117                None => Err(Error::ExpectedByteString),
1118            }
1119        } else if self.consume_char('r') {
1120            let base64_str = self.raw_string()?;
1121            let base64_result = ParsedByteStr::try_from_base64(&base64_str);
1122
1123            match base64_result {
1124                Some(byte_str) => Err(expected_byte_string_found_base64(&base64_str, &byte_str)),
1125                None => Err(Error::ExpectedByteString),
1126            }
1127        } else {
1128            self.byte_string_no_base64()
1129        }
1130    }
1131
1132    pub fn byte_string_no_base64(&mut self) -> Result<ParsedByteStr<'a>> {
1133        if self.consume_str("b\"") {
1134            self.escaped_byte_string()
1135        } else if self.consume_str("br") {
1136            self.raw_byte_string()
1137        } else {
1138            Err(Error::ExpectedByteString)
1139        }
1140    }
1141
1142    fn escaped_byte_string(&mut self) -> Result<ParsedByteStr<'a>> {
1143        match self.escaped_byte_buf(EscapeEncoding::Binary) {
1144            Ok((bytes, advance)) => {
1145                self.advance_bytes(advance);
1146                Ok(bytes)
1147            }
1148            Err(err) => Err(err),
1149        }
1150    }
1151
1152    fn raw_byte_string(&mut self) -> Result<ParsedByteStr<'a>> {
1153        match self.raw_byte_buf() {
1154            Ok((bytes, advance)) => {
1155                self.advance_bytes(advance);
1156                Ok(bytes)
1157            }
1158            Err(Error::ExpectedString) => Err(Error::ExpectedByteString),
1159            Err(err) => Err(err),
1160        }
1161    }
1162
1163    pub fn string(&mut self) -> Result<ParsedStr<'a>> {
1164        if self.consume_char('"') {
1165            self.escaped_string()
1166        } else if self.consume_char('r') {
1167            self.raw_string()
1168        } else {
1169            Err(Error::ExpectedString)
1170        }
1171    }
1172
1173    fn escaped_string(&mut self) -> Result<ParsedStr<'a>> {
1174        match self.escaped_byte_buf(EscapeEncoding::Utf8) {
1175            Ok((bytes, advance)) => {
1176                let string = ParsedStr::try_from_bytes(bytes).map_err(Error::from)?;
1177                self.advance_bytes(advance);
1178                Ok(string)
1179            }
1180            Err(err) => Err(err),
1181        }
1182    }
1183
1184    fn raw_string(&mut self) -> Result<ParsedStr<'a>> {
1185        match self.raw_byte_buf() {
1186            Ok((bytes, advance)) => {
1187                let string = ParsedStr::try_from_bytes(bytes).map_err(Error::from)?;
1188                self.advance_bytes(advance);
1189                Ok(string)
1190            }
1191            Err(err) => Err(err),
1192        }
1193    }
1194
1195    fn escaped_byte_buf(&mut self, encoding: EscapeEncoding) -> Result<(ParsedByteStr<'a>, usize)> {
1196        // Checking for '"' and '\\' separately is faster than searching for both at the same time
1197        let str_end = self.src().find('"').ok_or(Error::ExpectedStringEnd)?;
1198        let escape = self.src()[..str_end].find('\\');
1199
1200        if let Some(escape) = escape {
1201            // Now check if escaping is used inside the string
1202            let mut i = escape;
1203            let mut s = self.src().as_bytes()[..i].to_vec();
1204
1205            loop {
1206                self.advance_bytes(i + 1);
1207
1208                match self.parse_escape(encoding, false)? {
1209                    EscapeCharacter::Ascii(c) => s.push(c),
1210                    EscapeCharacter::Utf8(c) => match c.len_utf8() {
1211                        1 => s.push(c as u8),
1212                        len => {
1213                            let start = s.len();
1214                            s.extend(core::iter::repeat(0).take(len));
1215                            c.encode_utf8(&mut s[start..]);
1216                        }
1217                    },
1218                }
1219
1220                // Checking for '"' and '\\' separately is faster than searching for both at the same time
1221                let new_str_end = self.src().find('"').ok_or(Error::ExpectedStringEnd)?;
1222                let new_escape = self.src()[..new_str_end].find('\\');
1223
1224                if let Some(new_escape) = new_escape {
1225                    s.extend_from_slice(&self.src().as_bytes()[..new_escape]);
1226                    i = new_escape;
1227                } else {
1228                    s.extend_from_slice(&self.src().as_bytes()[..new_str_end]);
1229                    // Advance to the end of the string + 1 for the `"`.
1230                    break Ok((ParsedByteStr::Allocated(s), new_str_end + 1));
1231                }
1232            }
1233        } else {
1234            let s = &self.src().as_bytes()[..str_end];
1235
1236            // Advance by the number of bytes of the string + 1 for the `"`.
1237            Ok((ParsedByteStr::Slice(s), str_end + 1))
1238        }
1239    }
1240
1241    fn raw_byte_buf(&mut self) -> Result<(ParsedByteStr<'a>, usize)> {
1242        let num_hashes = self.next_chars_while_len(|c| c == '#');
1243        let hashes = &self.src()[..num_hashes];
1244        self.advance_bytes(num_hashes);
1245
1246        self.expect_char('"', Error::ExpectedString)?;
1247
1248        let ending = ["\"", hashes].concat();
1249        let i = self.src().find(&ending).ok_or(Error::ExpectedStringEnd)?;
1250
1251        let s = &self.src().as_bytes()[..i];
1252
1253        // Advance by the number of bytes of the byte string
1254        // + `num_hashes` + 1 for the `"`.
1255        Ok((ParsedByteStr::Slice(s), i + num_hashes + 1))
1256    }
1257
1258    fn decode_ascii_escape(&mut self) -> Result<u8> {
1259        let mut n = 0;
1260        for _ in 0..2 {
1261            n <<= 4;
1262            let byte = self.next_char()?;
1263            let decoded = Self::decode_hex(byte)?;
1264            n |= decoded;
1265        }
1266
1267        Ok(n)
1268    }
1269
1270    #[inline]
1271    fn decode_hex(c: char) -> Result<u8> {
1272        if !c.is_ascii() {
1273            return Err(Error::InvalidEscape("Non-hex digit found"));
1274        }
1275
1276        // c is an ASCII character that can be losslessly cast to u8
1277        match c as u8 {
1278            c @ b'0'..=b'9' => Ok(c - b'0'),
1279            c @ b'a'..=b'f' => Ok(10 + c - b'a'),
1280            c @ b'A'..=b'F' => Ok(10 + c - b'A'),
1281            _ => Err(Error::InvalidEscape("Non-hex digit found")),
1282        }
1283    }
1284
1285    fn parse_escape(&mut self, encoding: EscapeEncoding, is_char: bool) -> Result<EscapeCharacter> {
1286        let c = match self.next_char()? {
1287            '\'' => EscapeCharacter::Ascii(b'\''),
1288            '"' => EscapeCharacter::Ascii(b'"'),
1289            '\\' => EscapeCharacter::Ascii(b'\\'),
1290            'n' => EscapeCharacter::Ascii(b'\n'),
1291            'r' => EscapeCharacter::Ascii(b'\r'),
1292            't' => EscapeCharacter::Ascii(b'\t'),
1293            '0' => EscapeCharacter::Ascii(b'\0'),
1294            'x' => {
1295                // Fast exit for ascii escape in byte string
1296                let b: u8 = self.decode_ascii_escape()?;
1297                if let EscapeEncoding::Binary = encoding {
1298                    return Ok(EscapeCharacter::Ascii(b));
1299                }
1300
1301                // Fast exit for ascii character in UTF-8 string
1302                let mut bytes = [b, 0, 0, 0];
1303                if let Ok(Some(c)) = from_utf8(&bytes[..=0]).map(|s| s.chars().next()) {
1304                    return Ok(EscapeCharacter::Utf8(c));
1305                }
1306
1307                if is_char {
1308                    // Character literals are not allowed to use multiple byte
1309                    //  escapes to build a unicode character
1310                    return Err(Error::InvalidEscape(
1311                        "Not a valid byte-escaped Unicode character",
1312                    ));
1313                }
1314
1315                // UTF-8 character needs up to four bytes and we have already
1316                //  consumed one, so at most three to go
1317                for i in 1..4 {
1318                    if !self.consume_str(r"\x") {
1319                        return Err(Error::InvalidEscape(
1320                            "Not a valid byte-escaped Unicode character",
1321                        ));
1322                    }
1323
1324                    bytes[i] = self.decode_ascii_escape()?;
1325
1326                    // Check if we now have a valid UTF-8 character
1327                    if let Ok(Some(c)) = from_utf8(&bytes[..=i]).map(|s| s.chars().next()) {
1328                        return Ok(EscapeCharacter::Utf8(c));
1329                    }
1330                }
1331
1332                return Err(Error::InvalidEscape(
1333                    "Not a valid byte-escaped Unicode character",
1334                ));
1335            }
1336            'u' => {
1337                self.expect_char('{', Error::InvalidEscape("Missing { in Unicode escape"))?;
1338
1339                let mut bytes: u32 = 0;
1340                let mut num_digits = 0;
1341
1342                while num_digits < 6 {
1343                    let byte = self.peek_char_or_eof()?;
1344
1345                    if byte == '}' {
1346                        break;
1347                    }
1348
1349                    self.skip_next_char();
1350                    num_digits += 1;
1351
1352                    let byte = Self::decode_hex(byte)?;
1353                    bytes <<= 4;
1354                    bytes |= u32::from(byte);
1355                }
1356
1357                if num_digits == 0 {
1358                    return Err(Error::InvalidEscape(
1359                        "Expected 1-6 digits, got 0 digits in Unicode escape",
1360                    ));
1361                }
1362
1363                self.expect_char(
1364                    '}',
1365                    Error::InvalidEscape("No } at the end of Unicode escape"),
1366                )?;
1367                let c = char_from_u32(bytes).ok_or(Error::InvalidEscape(
1368                    "Not a valid Unicode-escaped character",
1369                ))?;
1370
1371                EscapeCharacter::Utf8(c)
1372            }
1373            _ => return Err(Error::InvalidEscape("Unknown escape character")),
1374        };
1375
1376        Ok(c)
1377    }
1378
1379    fn skip_comment(&mut self) -> Result<Option<Comment>> {
1380        if self.consume_char('/') {
1381            match self.next_char()? {
1382                '/' => {
1383                    let bytes = self.next_chars_while_len(|c| c != '\n');
1384
1385                    self.advance_bytes(bytes);
1386
1387                    if self.src().is_empty() {
1388                        Ok(Some(Comment::UnclosedLine))
1389                    } else {
1390                        Ok(Some(Comment::ClosedLine))
1391                    }
1392                }
1393                '*' => {
1394                    let mut level = 1;
1395
1396                    while level > 0 {
1397                        let bytes = self.next_chars_while_len(|c| !matches!(c, '/' | '*'));
1398
1399                        if self.src().is_empty() {
1400                            return Err(Error::UnclosedBlockComment);
1401                        }
1402
1403                        self.advance_bytes(bytes);
1404
1405                        // check whether / or * and take action
1406                        if self.consume_str("/*") {
1407                            level += 1;
1408                        } else if self.consume_str("*/") {
1409                            level -= 1;
1410                        } else {
1411                            self.next_char().map_err(|_| Error::UnclosedBlockComment)?;
1412                        }
1413                    }
1414
1415                    Ok(Some(Comment::Block))
1416                }
1417                c => Err(Error::UnexpectedChar(c)),
1418            }
1419        } else {
1420            Ok(None)
1421        }
1422    }
1423}
1424
1425enum Comment {
1426    ClosedLine,
1427    UnclosedLine,
1428    Block,
1429}
1430
1431pub trait Num {
1432    fn from_u8(x: u8) -> Self;
1433
1434    /// Returns `true` on overflow
1435    fn checked_mul_ext(&mut self, x: u8) -> bool;
1436
1437    /// Returns `true` on overflow
1438    fn checked_add_ext(&mut self, x: u8) -> bool;
1439
1440    /// Returns `true` on overflow
1441    fn checked_sub_ext(&mut self, x: u8) -> bool;
1442}
1443
1444macro_rules! impl_num {
1445    ($ty:ty) => {
1446        impl Num for $ty {
1447            fn from_u8(x: u8) -> Self {
1448                x as $ty
1449            }
1450
1451            fn checked_mul_ext(&mut self, x: u8) -> bool {
1452                match self.checked_mul(Self::from_u8(x)) {
1453                    Some(n) => {
1454                        *self = n;
1455                        false
1456                    }
1457                    None => true,
1458                }
1459            }
1460
1461            fn checked_add_ext(&mut self, x: u8) -> bool {
1462                match self.checked_add(Self::from_u8(x)) {
1463                    Some(n) => {
1464                        *self = n;
1465                        false
1466                    }
1467                    None => true,
1468                }
1469            }
1470
1471            fn checked_sub_ext(&mut self, x: u8) -> bool {
1472                match self.checked_sub(Self::from_u8(x)) {
1473                    Some(n) => {
1474                        *self = n;
1475                        false
1476                    }
1477                    None => true,
1478                }
1479            }
1480        }
1481    };
1482    ($($tys:ty)*) => {
1483        $( impl_num!($tys); )*
1484    };
1485}
1486
1487impl_num! { i8 i16 i32 i64 u8 u16 u32 u64 }
1488
1489#[cfg(feature = "integer128")]
1490impl_num! { i128 u128 }
1491
1492pub trait Integer: Sized {
1493    fn parse(parser: &mut Parser, sign: i8, base: u8) -> Result<Self>;
1494
1495    fn try_from_parsed_integer(parsed: ParsedInteger, ron: &str) -> Result<Self>;
1496}
1497
1498macro_rules! impl_integer {
1499    ($wrap:ident($ty:ty)) => {
1500        impl Integer for $ty {
1501            fn parse(parser: &mut Parser, sign: i8, base: u8) -> Result<Self> {
1502                parser.parse_integer(sign, base)
1503            }
1504
1505            fn try_from_parsed_integer(parsed: ParsedInteger, ron: &str) -> Result<Self> {
1506                match parsed {
1507                    ParsedInteger::$wrap(v) => Ok(v),
1508                    _ => Err(Error::InvalidValueForType {
1509                        expected: format!(
1510                            "a{} {}-bit {}signed integer",
1511                            if <$ty>::BITS == 8 { "n" } else { "n" },
1512                            <$ty>::BITS,
1513                            if <$ty>::MIN == 0 { "un" } else { "" },
1514                        ),
1515                        found: String::from(ron),
1516                    }),
1517                }
1518            }
1519        }
1520    };
1521    ($($wraps:ident($tys:ty))*) => {
1522        $( impl_integer!($wraps($tys)); )*
1523    };
1524}
1525
1526impl_integer! {
1527    I8(i8) I16(i16) I32(i32) I64(i64)
1528    U8(u8) U16(u16) U32(u32) U64(u64)
1529}
1530
1531#[cfg(feature = "integer128")]
1532impl_integer! { I128(i128) U128(u128) }
1533
1534pub enum ParsedInteger {
1535    I8(i8),
1536    I16(i16),
1537    I32(i32),
1538    I64(i64),
1539    #[cfg(feature = "integer128")]
1540    I128(i128),
1541    U8(u8),
1542    U16(u16),
1543    U32(u32),
1544    U64(u64),
1545    #[cfg(feature = "integer128")]
1546    U128(u128),
1547}
1548
1549impl Integer for ParsedInteger {
1550    fn parse(parser: &mut Parser, sign: i8, base: u8) -> Result<Self> {
1551        if sign < 0 {
1552            let signed = parser.parse_integer::<LargeSInt>(-1, base)?;
1553
1554            return if let Ok(x) = i8::try_from(signed) {
1555                Ok(ParsedInteger::I8(x))
1556            } else if let Ok(x) = i16::try_from(signed) {
1557                Ok(ParsedInteger::I16(x))
1558            } else if let Ok(x) = i32::try_from(signed) {
1559                Ok(ParsedInteger::I32(x))
1560            } else {
1561                #[cfg(not(feature = "integer128"))]
1562                {
1563                    Ok(ParsedInteger::I64(signed))
1564                }
1565                #[cfg(feature = "integer128")]
1566                if let Ok(x) = i64::try_from(signed) {
1567                    Ok(ParsedInteger::I64(x))
1568                } else {
1569                    Ok(ParsedInteger::I128(signed))
1570                }
1571            };
1572        }
1573
1574        let unsigned = parser.parse_integer::<LargeUInt>(1, base)?;
1575
1576        if let Ok(x) = u8::try_from(unsigned) {
1577            Ok(ParsedInteger::U8(x))
1578        } else if let Ok(x) = u16::try_from(unsigned) {
1579            Ok(ParsedInteger::U16(x))
1580        } else if let Ok(x) = u32::try_from(unsigned) {
1581            Ok(ParsedInteger::U32(x))
1582        } else {
1583            #[cfg(not(feature = "integer128"))]
1584            {
1585                Ok(ParsedInteger::U64(unsigned))
1586            }
1587            #[cfg(feature = "integer128")]
1588            if let Ok(x) = u64::try_from(unsigned) {
1589                Ok(ParsedInteger::U64(x))
1590            } else {
1591                Ok(ParsedInteger::U128(unsigned))
1592            }
1593        }
1594    }
1595
1596    fn try_from_parsed_integer(parsed: ParsedInteger, _ron: &str) -> Result<Self> {
1597        Ok(parsed)
1598    }
1599}
1600
1601pub trait Float: Sized {
1602    fn parse(float: &str) -> Result<Self>;
1603
1604    fn try_from_parsed_float(parsed: ParsedFloat, ron: &str) -> Result<Self>;
1605}
1606
1607macro_rules! impl_float {
1608    ($wrap:ident($ty:ty: $bits:expr)) => {
1609        impl Float for $ty {
1610            fn parse(float: &str) -> Result<Self> {
1611                <$ty>::from_str(float).map_err(|_| Error::ExpectedFloat)
1612            }
1613
1614            fn try_from_parsed_float(parsed: ParsedFloat, ron: &str) -> Result<Self> {
1615                match parsed {
1616                    ParsedFloat::$wrap(v) => Ok(v),
1617                    _ => Err(Error::InvalidValueForType {
1618                        expected: format!(
1619                            "a {}-bit floating point number", $bits,
1620                        ),
1621                        found: String::from(ron),
1622                    }),
1623                }
1624            }
1625        }
1626    };
1627    ($($wraps:ident($tys:ty: $bits:expr))*) => {
1628        $( impl_float!($wraps($tys: $bits)); )*
1629    };
1630}
1631
1632impl_float! { F32(f32: 32) F64(f64: 64) }
1633
1634pub enum ParsedFloat {
1635    F32(f32),
1636    F64(f64),
1637}
1638
1639impl Float for ParsedFloat {
1640    fn parse(float: &str) -> Result<Self> {
1641        let value = f64::from_str(float).map_err(|_| Error::ExpectedFloat)?;
1642
1643        #[allow(clippy::cast_possible_truncation)]
1644        if value.total_cmp(&f64::from(value as f32)).is_eq() {
1645            Ok(ParsedFloat::F32(value as f32))
1646        } else {
1647            Ok(ParsedFloat::F64(value))
1648        }
1649    }
1650
1651    fn try_from_parsed_float(parsed: ParsedFloat, _ron: &str) -> Result<Self> {
1652        Ok(parsed)
1653    }
1654}
1655
1656pub enum StructType {
1657    AnyTuple,
1658    EmptyTuple,
1659    NewtypeTuple,
1660    NonNewtypeTuple,
1661    Named,
1662    Unit,
1663}
1664
1665#[derive(Copy, Clone)] // GRCOV_EXCL_LINE
1666pub enum NewtypeMode {
1667    NoParensMeanUnit,
1668    InsideNewtype,
1669}
1670
1671#[derive(Copy, Clone)] // GRCOV_EXCL_LINE
1672pub enum TupleMode {
1673    ImpreciseTupleOrNewtype,
1674    DifferentiateNewtype,
1675}
1676
1677pub enum ParsedStr<'a> {
1678    Allocated(String),
1679    Slice(&'a str),
1680}
1681
1682pub enum ParsedByteStr<'a> {
1683    Allocated(Vec<u8>),
1684    Slice(&'a [u8]),
1685}
1686
1687impl<'a> ParsedStr<'a> {
1688    pub fn try_from_bytes(bytes: ParsedByteStr<'a>) -> Result<Self, Utf8Error> {
1689        match bytes {
1690            ParsedByteStr::Allocated(byte_buf) => Ok(ParsedStr::Allocated(
1691                String::from_utf8(byte_buf).map_err(|e| e.utf8_error())?,
1692            )),
1693            ParsedByteStr::Slice(bytes) => Ok(ParsedStr::Slice(from_utf8(bytes)?)),
1694        }
1695    }
1696}
1697
1698impl<'a> ParsedByteStr<'a> {
1699    pub fn try_from_base64(str: &ParsedStr<'a>) -> Option<Self> {
1700        // Adapted from MIT licensed Jenin Sutradhar's base 64 decoder
1701        // https://github.com/JeninSutradhar/base64-Rust-Encoder-Decoder/blob/ee1fb08cbb78024ec8cf5e786815acb239169f02/src/lib.rs#L84-L128
1702        fn try_decode_base64(str: &str) -> Option<Vec<u8>> {
1703            const CHARSET: &[u8; 64] =
1704                b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1705            const PADDING: u8 = b'=';
1706
1707            // fast reject for missing padding
1708            if (str.len() % 4) != 0 {
1709                return None;
1710            }
1711
1712            let bstr_no_padding = str.trim_end_matches(char::from(PADDING)).as_bytes();
1713
1714            // fast reject for excessive padding
1715            if (str.len() - bstr_no_padding.len()) > 2 {
1716                return None;
1717            }
1718
1719            // fast reject for extraneous bytes after padding
1720            if bstr_no_padding.contains(&PADDING) {
1721                return None;
1722            }
1723
1724            // fast reject for non-ASCII
1725            if !str.is_ascii() {
1726                return None;
1727            }
1728
1729            let mut collected_bits = 0_u8;
1730            let mut byte_buffer = 0_u16;
1731            let mut bytes = bstr_no_padding.iter().copied();
1732            let mut binary = Vec::new();
1733
1734            'decodeloop: loop {
1735                while collected_bits < 8 {
1736                    if let Some(nextbyte) = bytes.next() {
1737                        #[allow(clippy::cast_possible_truncation)]
1738                        if let Some(idx) = CHARSET.iter().position(|&x| x == nextbyte) {
1739                            byte_buffer |= ((idx & 0b0011_1111) as u16) << (10 - collected_bits);
1740                            collected_bits += 6;
1741                        } else {
1742                            return None;
1743                        }
1744                    } else {
1745                        break 'decodeloop;
1746                    }
1747                }
1748
1749                binary.push(((0b1111_1111_0000_0000 & byte_buffer) >> 8) as u8);
1750                byte_buffer &= 0b0000_0000_1111_1111;
1751                byte_buffer <<= 8;
1752                collected_bits -= 8;
1753            }
1754
1755            if usize::from(collected_bits) != ((str.len() - bstr_no_padding.len()) * 2) {
1756                return None;
1757            }
1758
1759            Some(binary)
1760        }
1761
1762        let base64_str = match str {
1763            ParsedStr::Allocated(string) => string.as_str(),
1764            ParsedStr::Slice(str) => str,
1765        };
1766
1767        try_decode_base64(base64_str).map(ParsedByteStr::Allocated)
1768    }
1769}
1770
1771#[derive(Copy, Clone)] // GRCOV_EXCL_LINE
1772enum EscapeEncoding {
1773    Binary,
1774    Utf8,
1775}
1776
1777enum EscapeCharacter {
1778    Ascii(u8),
1779    Utf8(char),
1780}
1781
1782#[cfg(test)]
1783mod tests {
1784    use super::*;
1785
1786    #[test]
1787    fn decode_x10() {
1788        let mut bytes = Parser::new("10").unwrap();
1789        assert_eq!(bytes.decode_ascii_escape(), Ok(b'\x10'));
1790    }
1791
1792    #[test]
1793    fn track_prior_ws() {
1794        const SOURCE: &str = "   /*hey*/ 42       /*bye*/ 24  ";
1795        let mut bytes = Parser::new(SOURCE).unwrap();
1796
1797        assert_eq!(bytes.src(), "42       /*bye*/ 24  ");
1798        assert_eq!(bytes.pre_ws_src(), SOURCE);
1799
1800        bytes.skip_ws().unwrap();
1801
1802        assert_eq!(bytes.src(), "42       /*bye*/ 24  ");
1803        assert_eq!(bytes.pre_ws_src(), SOURCE);
1804
1805        assert_eq!(bytes.integer::<u8>().unwrap(), 42);
1806
1807        assert_eq!(bytes.src(), "       /*bye*/ 24  ");
1808        assert_eq!(bytes.pre_ws_src(), SOURCE);
1809
1810        bytes.skip_ws().unwrap();
1811        bytes.skip_ws().unwrap();
1812
1813        assert_eq!(bytes.src(), "24  ");
1814        assert_eq!(bytes.pre_ws_src(), "       /*bye*/ 24  ");
1815
1816        let mut bytes = Parser::new("42").unwrap();
1817        bytes.skip_ws().unwrap();
1818        bytes.skip_ws().unwrap();
1819        assert_eq!(bytes.src(), "42");
1820        assert_eq!(bytes.pre_ws_src(), "42");
1821        assert_eq!(bytes.integer::<u8>().unwrap(), 42);
1822        bytes.skip_ws().unwrap();
1823        bytes.skip_ws().unwrap();
1824        assert_eq!(bytes.src(), "");
1825        assert_eq!(bytes.pre_ws_src(), "");
1826
1827        let mut bytes = Parser::new("  42  ").unwrap();
1828        bytes.skip_ws().unwrap();
1829        bytes.skip_ws().unwrap();
1830        assert_eq!(bytes.src(), "42  ");
1831        assert_eq!(bytes.pre_ws_src(), "  42  ");
1832        assert_eq!(bytes.integer::<u8>().unwrap(), 42);
1833        bytes.skip_ws().unwrap();
1834        bytes.skip_ws().unwrap();
1835        assert_eq!(bytes.src(), "");
1836        assert_eq!(bytes.pre_ws_src(), "  ");
1837
1838        let mut bytes = Parser::new("  42  //").unwrap();
1839        bytes.skip_ws().unwrap();
1840        bytes.skip_ws().unwrap();
1841        assert_eq!(bytes.src(), "42  //");
1842        assert_eq!(bytes.pre_ws_src(), "  42  //");
1843        assert_eq!(bytes.integer::<u8>().unwrap(), 42);
1844        bytes.skip_ws().unwrap();
1845        bytes.skip_ws().unwrap();
1846        assert_eq!(bytes.src(), "");
1847        assert_eq!(bytes.pre_ws_src(), "  //");
1848    }
1849
1850    #[test]
1851    fn parser_cursor_eq_cmp() {
1852        assert!(
1853            ParserCursor {
1854                cursor: 42,
1855                pre_ws_cursor: 42,
1856                last_ws_len: 42
1857            } == ParserCursor {
1858                cursor: 42,
1859                pre_ws_cursor: 24,
1860                last_ws_len: 24
1861            }
1862        );
1863        assert!(
1864            ParserCursor {
1865                cursor: 42,
1866                pre_ws_cursor: 42,
1867                last_ws_len: 42
1868            } != ParserCursor {
1869                cursor: 24,
1870                pre_ws_cursor: 42,
1871                last_ws_len: 42
1872            }
1873        );
1874
1875        assert!(
1876            ParserCursor {
1877                cursor: 42,
1878                pre_ws_cursor: 42,
1879                last_ws_len: 42
1880            } < ParserCursor {
1881                cursor: 43,
1882                pre_ws_cursor: 24,
1883                last_ws_len: 24
1884            }
1885        );
1886        assert!(
1887            ParserCursor {
1888                cursor: 42,
1889                pre_ws_cursor: 42,
1890                last_ws_len: 42
1891            } > ParserCursor {
1892                cursor: 41,
1893                pre_ws_cursor: 24,
1894                last_ws_len: 24
1895            }
1896        );
1897    }
1898
1899    #[test]
1900    fn empty_src_is_not_a_float() {
1901        assert!(!Parser::new("").unwrap().next_bytes_is_float());
1902    }
1903
1904    #[test]
1905    fn base64_deprecation_error() {
1906        let err = crate::from_str::<bytes::Bytes>("\"SGVsbG8gcm9uIQ==\"").unwrap_err();
1907
1908        assert_eq!(
1909            err,
1910            SpannedError {
1911                code: Error::InvalidValueForType {
1912                    expected: String::from("the Rusty byte string b\"Hello ron!\""),
1913                    found: String::from("the ambiguous base64 string \"SGVsbG8gcm9uIQ==\"")
1914                },
1915                span: Span {
1916                    start: Position { line: 1, col: 2 },
1917                    end: Position { line: 1, col: 19 },
1918                }
1919            }
1920        );
1921
1922        let err = crate::from_str::<bytes::Bytes>("r\"SGVsbG8gcm9uIQ==\"").unwrap_err();
1923
1924        assert_eq!(format!("{}", err.code), "Expected the Rusty byte string b\"Hello ron!\" but found the ambiguous base64 string \"SGVsbG8gcm9uIQ==\" instead");
1925
1926        assert_eq!(
1927            crate::from_str::<bytes::Bytes>("\"invalid=\"").unwrap_err(),
1928            SpannedError {
1929                code: Error::InvalidValueForType {
1930                    expected: String::from("the Rusty byte string b\"\\x8a{\\xda\\x96\\'\""),
1931                    found: String::from("the ambiguous base64 string \"invalid=\"")
1932                },
1933                span: Span {
1934                    start: Position { line: 1, col: 2 },
1935                    end: Position { line: 1, col: 11 },
1936                }
1937            }
1938        );
1939
1940        assert_eq!(
1941            crate::from_str::<bytes::Bytes>("r\"invalid=\"").unwrap_err(),
1942            SpannedError {
1943                code: Error::InvalidValueForType {
1944                    expected: String::from("the Rusty byte string b\"\\x8a{\\xda\\x96\\'\""),
1945                    found: String::from("the ambiguous base64 string \"invalid=\"")
1946                },
1947                span: Span {
1948                    start: Position { line: 1, col: 3 },
1949                    end: Position { line: 1, col: 12 },
1950                }
1951            }
1952        );
1953
1954        assert_eq!(
1955            crate::from_str::<bytes::Bytes>("r\"invalid\"").unwrap_err(),
1956            SpannedError {
1957                code: Error::ExpectedByteString,
1958                span: Span {
1959                    start: Position { line: 1, col: 3 },
1960                    end: Position { line: 1, col: 11 },
1961                }
1962            }
1963        );
1964    }
1965}
ron/parse.rs

ron/
parse.rs