Skip to main content

toml_parser/decoder/
string.rs

1use core::ops::RangeInclusive;
2
3use winnow::stream::ContainsToken as _;
4use winnow::stream::Offset as _;
5use winnow::stream::Stream as _;
6
7use crate::ErrorSink;
8use crate::Expected;
9use crate::ParseError;
10use crate::Raw;
11use crate::Span;
12use crate::decoder::StringBuilder;
13use crate::lexer::APOSTROPHE;
14use crate::lexer::ML_BASIC_STRING_DELIM;
15use crate::lexer::ML_LITERAL_STRING_DELIM;
16use crate::lexer::QUOTATION_MARK;
17use crate::lexer::WSCHAR;
18
19const ALLOCATION_ERROR: &str = "could not allocate for string";
20
21/// Parse literal string
22///
23/// ```abnf
24/// ;; Literal String
25///
26/// literal-string = apostrophe *literal-char apostrophe
27///
28/// apostrophe = %x27 ; ' apostrophe
29///
30/// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
31/// ```
32pub(crate) fn decode_literal_string<'i>(
33    raw: Raw<'i>,
34    output: &mut dyn StringBuilder<'i>,
35    error: &mut dyn ErrorSink,
36) {
37    const INVALID_STRING: &str = "invalid literal string";
38
39    output.clear();
40
41    let s = raw.as_str();
42    let s = if let Some(stripped) = s.strip_prefix(APOSTROPHE as char) {
43        stripped
44    } else {
45        error.report_error(
46            ParseError::new(INVALID_STRING)
47                .with_context(Span::new_unchecked(0, raw.len()))
48                .with_expected(&[Expected::Literal("'")])
49                .with_unexpected(Span::new_unchecked(0, 0)),
50        );
51        s
52    };
53    let s = if let Some(stripped) = s.strip_suffix(APOSTROPHE as char) {
54        stripped
55    } else {
56        error.report_error(
57            ParseError::new(INVALID_STRING)
58                .with_context(Span::new_unchecked(0, raw.len()))
59                .with_expected(&[Expected::Literal("'")])
60                .with_unexpected(Span::new_unchecked(raw.len(), raw.len())),
61        );
62        s
63    };
64
65    for (i, b) in s.as_bytes().iter().enumerate() {
66        if !LITERAL_CHAR.contains_token(b) {
67            let offset = (&s.as_bytes()[i..]).offset_from(&raw.as_bytes());
68            error.report_error(
69                ParseError::new(INVALID_STRING)
70                    .with_context(Span::new_unchecked(0, raw.len()))
71                    .with_expected(&[Expected::Description("non-single-quote visible characters")])
72                    .with_unexpected(Span::new_unchecked(offset, offset)),
73            );
74        }
75    }
76
77    if !output.push_str(s) {
78        error.report_error(
79            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
80        );
81    }
82}
83
84/// ```abnf
85/// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
86/// ```
87const LITERAL_CHAR: (
88    u8,
89    RangeInclusive<u8>,
90    RangeInclusive<u8>,
91    RangeInclusive<u8>,
92) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII);
93
94/// ```abnf
95/// non-ascii = %x80-D7FF / %xE000-10FFFF
96/// ```
97/// - ASCII is 0xxxxxxx
98/// - First byte for UTF-8 is 11xxxxxx
99/// - Subsequent UTF-8 bytes are 10xxxxxx
100const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff;
101
102/// Parse multi-line literal string
103///
104/// ```abnf
105/// ;; Multiline Literal String
106///
107/// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
108///                     ml-literal-string-delim
109/// ml-literal-string-delim = 3apostrophe
110/// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
111///
112/// mll-content = literal-char / newline
113/// mll-quotes = 1*2apostrophe
114/// ```
115pub(crate) fn decode_ml_literal_string<'i>(
116    raw: Raw<'i>,
117    output: &mut dyn StringBuilder<'i>,
118    error: &mut dyn ErrorSink,
119) {
120    const INVALID_STRING: &str = "invalid multi-line literal string";
121    output.clear();
122
123    let s = raw.as_str();
124    let s = if let Some(stripped) = s.strip_prefix(ML_LITERAL_STRING_DELIM) {
125        stripped
126    } else {
127        error.report_error(
128            ParseError::new(INVALID_STRING)
129                .with_context(Span::new_unchecked(0, raw.len()))
130                .with_expected(&[Expected::Literal("'")])
131                .with_unexpected(Span::new_unchecked(0, 0)),
132        );
133        s
134    };
135    let s = strip_start_newline(s);
136    let s = if let Some(stripped) = s.strip_suffix(ML_LITERAL_STRING_DELIM) {
137        stripped
138    } else {
139        error.report_error(
140            ParseError::new(INVALID_STRING)
141                .with_context(Span::new_unchecked(0, raw.len()))
142                .with_expected(&[Expected::Literal("'")])
143                .with_unexpected(Span::new_unchecked(raw.len(), raw.len())),
144        );
145        s.trim_end_matches('\'')
146    };
147
148    for (i, b) in s.as_bytes().iter().enumerate() {
149        if *b == b'\'' || *b == b'\n' {
150        } else if *b == b'\r' {
151            if s.as_bytes().get(i + 1) != Some(&b'\n') {
152                let offset = (&s.as_bytes()[i + 1..]).offset_from(&raw.as_bytes());
153                error.report_error(
154                    ParseError::new("carriage return must be followed by newline")
155                        .with_context(Span::new_unchecked(0, raw.len()))
156                        .with_expected(&[Expected::Literal("\n")])
157                        .with_unexpected(Span::new_unchecked(offset, offset)),
158                );
159            }
160        } else if !LITERAL_CHAR.contains_token(b) {
161            let offset = (&s.as_bytes()[i..]).offset_from(&raw.as_bytes());
162            error.report_error(
163                ParseError::new(INVALID_STRING)
164                    .with_context(Span::new_unchecked(0, raw.len()))
165                    .with_expected(&[Expected::Description("non-single-quote characters")])
166                    .with_unexpected(Span::new_unchecked(offset, offset)),
167            );
168        }
169    }
170
171    if !output.push_str(s) {
172        error.report_error(
173            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
174        );
175    }
176}
177
178/// Parse basic string
179///
180/// ```abnf
181/// ;; Basic String
182///
183/// basic-string = quotation-mark *basic-char quotation-mark
184///
185/// basic-char = basic-unescaped / escaped
186///
187/// escaped = escape escape-seq-char
188/// ```
189pub(crate) fn decode_basic_string<'i>(
190    raw: Raw<'i>,
191    output: &mut dyn StringBuilder<'i>,
192    error: &mut dyn ErrorSink,
193) {
194    const INVALID_STRING: &str = "invalid basic string";
195    output.clear();
196
197    let s = raw.as_str();
198    let s = if let Some(stripped) = s.strip_prefix(QUOTATION_MARK as char) {
199        stripped
200    } else {
201        error.report_error(
202            ParseError::new(INVALID_STRING)
203                .with_context(Span::new_unchecked(0, raw.len()))
204                .with_expected(&[Expected::Literal("\"")])
205                .with_unexpected(Span::new_unchecked(0, 0)),
206        );
207        s
208    };
209    let mut s = if let Some(stripped) = s.strip_suffix(QUOTATION_MARK as char) {
210        stripped
211    } else {
212        error.report_error(
213            ParseError::new(INVALID_STRING)
214                .with_context(Span::new_unchecked(0, raw.len()))
215                .with_expected(&[Expected::Literal("\"")])
216                .with_unexpected(Span::new_unchecked(raw.len(), raw.len())),
217        );
218        s
219    };
220
221    let segment = basic_unescaped(&mut s);
222    if !output.push_str(segment) {
223        error.report_error(
224            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
225        );
226    }
227    while !s.is_empty() {
228        if s.starts_with("\\") {
229            let _ = s.next_token();
230
231            let c = escape_seq_char(&mut s, raw, error);
232            if !output.push_char(c) {
233                error.report_error(
234                    ParseError::new(ALLOCATION_ERROR)
235                        .with_unexpected(Span::new_unchecked(0, raw.len())),
236                );
237            }
238        } else {
239            let invalid = basic_invalid(&mut s);
240            let start = invalid.offset_from(&raw.as_str());
241            let end = start + invalid.len();
242            error.report_error(
243                ParseError::new(INVALID_STRING)
244                    .with_context(Span::new_unchecked(0, raw.len()))
245                    .with_expected(&[
246                        Expected::Description("non-double-quote visible characters"),
247                        Expected::Literal("\\"),
248                    ])
249                    .with_unexpected(Span::new_unchecked(start, end)),
250            );
251            let _ = output.push_str(invalid);
252        }
253
254        let segment = basic_unescaped(&mut s);
255        if !output.push_str(segment) {
256            let start = segment.offset_from(&raw.as_str());
257            let end = start + segment.len();
258            error.report_error(
259                ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(start, end)),
260            );
261        }
262    }
263}
264
265/// ```abnf
266/// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
267/// ```
268fn basic_unescaped<'i>(stream: &mut &'i str) -> &'i str {
269    let offset = stream
270        .as_bytes()
271        .offset_for(|b| !BASIC_UNESCAPED.contains_token(b))
272        .unwrap_or(stream.len());
273    #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary
274    unsafe {
275        stream.next_slice_unchecked(offset)
276    }
277    #[cfg(not(feature = "unsafe"))]
278    stream.next_slice(offset)
279}
280
281fn basic_invalid<'i>(stream: &mut &'i str) -> &'i str {
282    let offset = stream
283        .as_bytes()
284        .offset_for(|b| (BASIC_UNESCAPED, ESCAPE).contains_token(b))
285        .unwrap_or(stream.len());
286    #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary
287    unsafe {
288        stream.next_slice_unchecked(offset)
289    }
290    #[cfg(not(feature = "unsafe"))]
291    stream.next_slice(offset)
292}
293
294/// ```abnf
295/// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
296/// ```
297#[allow(clippy::type_complexity)]
298const BASIC_UNESCAPED: (
299    (u8, u8),
300    u8,
301    RangeInclusive<u8>,
302    RangeInclusive<u8>,
303    RangeInclusive<u8>,
304) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII);
305
306/// ```abnf
307/// escape = %x5C                    ; \
308/// ```
309const ESCAPE: u8 = b'\\';
310
311/// ```abnf
312/// escape-seq-char =  %x22         ; "    quotation mark  U+0022
313/// escape-seq-char =/ %x5C         ; \    reverse solidus U+005C
314/// escape-seq-char =/ %x62         ; b    backspace       U+0008
315/// escape-seq-char =/ %x65         ; e    escape          U+001B
316/// escape-seq-char =/ %x66         ; f    form feed       U+000C
317/// escape-seq-char =/ %x6E         ; n    line feed       U+000A
318/// escape-seq-char =/ %x72         ; r    carriage return U+000D
319/// escape-seq-char =/ %x74         ; t    tab             U+0009
320/// escape-seq-char =/ %x78 2HEXDIG ; xHH                  U+00HH
321/// escape-seq-char =/ %x75 4HEXDIG ; uHHHH                U+HHHH
322/// escape-seq-char =/ %x55 8HEXDIG ; UHHHHHHHH            U+HHHHHHHH
323/// ```
324fn escape_seq_char(stream: &mut &str, raw: Raw<'_>, error: &mut dyn ErrorSink) -> char {
325    const EXPECTED_ESCAPES: &[Expected] = &[
326        Expected::Literal("b"),
327        Expected::Literal("e"),
328        Expected::Literal("f"),
329        Expected::Literal("n"),
330        Expected::Literal("r"),
331        Expected::Literal("\\"),
332        Expected::Literal("\""),
333        Expected::Literal("x"),
334        Expected::Literal("u"),
335        Expected::Literal("U"),
336    ];
337
338    let start = stream.checkpoint();
339    let Some(id) = stream.next_token() else {
340        let offset = stream.offset_from(&raw.as_str());
341        error.report_error(
342            ParseError::new("missing escaped value")
343                .with_context(Span::new_unchecked(0, raw.len()))
344                .with_expected(EXPECTED_ESCAPES)
345                .with_unexpected(Span::new_unchecked(offset, offset)),
346        );
347        return '\\';
348    };
349    match id {
350        'b' => '\u{8}',
351        'e' => '\u{1b}',
352        'f' => '\u{c}',
353        'n' => '\n',
354        'r' => '\r',
355        't' => '\t',
356        'x' => hexescape(stream, 2, raw, error),
357        'u' => hexescape(stream, 4, raw, error),
358        'U' => hexescape(stream, 8, raw, error),
359        '\\' => '\\',
360        '"' => '"',
361        _ => {
362            stream.reset(&start);
363            let offset = stream.offset_from(&raw.as_str());
364            error.report_error(
365                ParseError::new("missing escaped value")
366                    .with_context(Span::new_unchecked(0, raw.len()))
367                    .with_expected(EXPECTED_ESCAPES)
368                    .with_unexpected(Span::new_unchecked(offset, offset)),
369            );
370            '\\'
371        }
372    }
373}
374
375fn hexescape(
376    stream: &mut &str,
377    num_digits: usize,
378    raw: Raw<'_>,
379    error: &mut dyn ErrorSink,
380) -> char {
381    let offset = stream
382        .as_bytes()
383        .offset_for(|b| !HEXDIG.contains_token(b))
384        .unwrap_or_else(|| stream.eof_offset())
385        .min(num_digits);
386    #[cfg(feature = "unsafe")] // SAFETY: HEXDIG ensure `offset` is along UTF-8 boundary
387    let value = unsafe { stream.next_slice_unchecked(offset) };
388    #[cfg(not(feature = "unsafe"))]
389    let value = stream.next_slice(offset);
390
391    if value.len() != num_digits {
392        let offset = stream.offset_from(&raw.as_str());
393        error.report_error(
394            ParseError::new("too few unicode value digits")
395                .with_context(Span::new_unchecked(0, raw.len()))
396                .with_expected(&[Expected::Description("unicode hexadecimal value")])
397                .with_unexpected(Span::new_unchecked(offset, offset)),
398        );
399        return '�';
400    }
401
402    let Some(value) = u32::from_str_radix(value, 16).ok().and_then(char::from_u32) else {
403        let offset = value.offset_from(&raw.as_str());
404        error.report_error(
405            ParseError::new("invalid value")
406                .with_context(Span::new_unchecked(0, raw.len()))
407                .with_expected(&[Expected::Description("unicode hexadecimal value")])
408                .with_unexpected(Span::new_unchecked(offset, offset)),
409        );
410        return '�';
411    };
412
413    value
414}
415
416/// ```abnf
417/// HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
418/// ```
419const HEXDIG: (RangeInclusive<u8>, RangeInclusive<u8>, RangeInclusive<u8>) =
420    (DIGIT, b'A'..=b'F', b'a'..=b'f');
421
422/// ```abnf
423/// DIGIT = %x30-39 ; 0-9
424/// ```
425const DIGIT: RangeInclusive<u8> = b'0'..=b'9';
426
427fn strip_start_newline(s: &str) -> &str {
428    s.strip_prefix('\n')
429        .or_else(|| s.strip_prefix("\r\n"))
430        .unwrap_or(s)
431}
432
433/// Parse multi-line basic string
434///
435/// ```abnf
436/// ;; Multiline Basic String
437///
438/// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
439///                   ml-basic-string-delim
440/// ml-basic-string-delim = 3quotation-mark
441/// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
442///
443/// mlb-content = basic-char / newline / mlb-escaped-nl
444/// mlb-quotes = 1*2quotation-mark
445/// ```
446pub(crate) fn decode_ml_basic_string<'i>(
447    raw: Raw<'i>,
448    output: &mut dyn StringBuilder<'i>,
449    error: &mut dyn ErrorSink,
450) {
451    const INVALID_STRING: &str = "invalid multi-line basic string";
452
453    let s = raw.as_str();
454    let s = if let Some(stripped) = s.strip_prefix(ML_BASIC_STRING_DELIM) {
455        stripped
456    } else {
457        error.report_error(
458            ParseError::new(INVALID_STRING)
459                .with_context(Span::new_unchecked(0, raw.len()))
460                .with_expected(&[Expected::Literal("\"")])
461                .with_unexpected(Span::new_unchecked(0, 0)),
462        );
463        s
464    };
465    let s = strip_start_newline(s);
466    let mut s = if let Some(stripped) = s.strip_suffix(ML_BASIC_STRING_DELIM) {
467        stripped
468    } else {
469        error.report_error(
470            ParseError::new(INVALID_STRING)
471                .with_context(Span::new_unchecked(0, raw.len()))
472                .with_expected(&[Expected::Literal("\"")])
473                .with_unexpected(Span::new_unchecked(raw.len(), raw.len())),
474        );
475        s
476    };
477
478    let segment = mlb_unescaped(&mut s);
479    if !output.push_str(segment) {
480        error.report_error(
481            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
482        );
483    }
484    while !s.is_empty() {
485        if s.starts_with("\\") {
486            let _ = s.next_token();
487
488            if s.as_bytes()
489                .first()
490                .map(|b| (WSCHAR, b'\r', b'\n').contains_token(b))
491                .unwrap_or(false)
492            {
493                mlb_escaped_nl(&mut s, raw, error);
494            } else {
495                let c = escape_seq_char(&mut s, raw, error);
496                if !output.push_char(c) {
497                    error.report_error(
498                        ParseError::new(ALLOCATION_ERROR)
499                            .with_unexpected(Span::new_unchecked(0, raw.len())),
500                    );
501                }
502            }
503        } else if s.starts_with("\r") {
504            let offset = if s.starts_with("\r\n") {
505                "\r\n".len()
506            } else {
507                let start = s.offset_from(&raw.as_str()) + 1;
508                error.report_error(
509                    ParseError::new("carriage return must be followed by newline")
510                        .with_context(Span::new_unchecked(0, raw.len()))
511                        .with_expected(&[Expected::Literal("\n")])
512                        .with_unexpected(Span::new_unchecked(start, start)),
513                );
514                "\r".len()
515            };
516            #[cfg(feature = "unsafe")]
517            // SAFETY: Newlines ensure `offset` is along UTF-8 boundary
518            let newline = unsafe { s.next_slice_unchecked(offset) };
519            #[cfg(not(feature = "unsafe"))]
520            let newline = s.next_slice(offset);
521            if !output.push_str(newline) {
522                let start = newline.offset_from(&raw.as_str());
523                let end = start + newline.len();
524                error.report_error(
525                    ParseError::new(ALLOCATION_ERROR)
526                        .with_unexpected(Span::new_unchecked(start, end)),
527                );
528            }
529        } else {
530            let invalid = mlb_invalid(&mut s);
531            let start = invalid.offset_from(&raw.as_str());
532            let end = start + invalid.len();
533            error.report_error(
534                ParseError::new(INVALID_STRING)
535                    .with_context(Span::new_unchecked(0, raw.len()))
536                    .with_expected(&[Expected::Literal("\\"), Expected::Description("characters")])
537                    .with_unexpected(Span::new_unchecked(start, end)),
538            );
539            let _ = output.push_str(invalid);
540        }
541
542        let segment = mlb_unescaped(&mut s);
543        if !output.push_str(segment) {
544            let start = segment.offset_from(&raw.as_str());
545            let end = start + segment.len();
546            error.report_error(
547                ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(start, end)),
548            );
549        }
550    }
551}
552
553/// ```abnf
554/// mlb-escaped-nl = escape ws newline *( wschar / newline )
555/// ```
556fn mlb_escaped_nl(stream: &mut &str, raw: Raw<'_>, error: &mut dyn ErrorSink) {
557    const INVALID_STRING: &str = "invalid multi-line basic string";
558    let ws_offset = stream
559        .as_bytes()
560        .offset_for(|b| !WSCHAR.contains_token(b))
561        .unwrap_or(stream.len());
562    #[cfg(feature = "unsafe")] // SAFETY: WSCHAR ensure `offset` is along UTF-8 boundary
563    unsafe {
564        stream.next_slice_unchecked(ws_offset);
565    }
566    #[cfg(not(feature = "unsafe"))]
567    stream.next_slice(ws_offset);
568
569    let start = stream.checkpoint();
570    match stream.next_token() {
571        Some('\n') => {}
572        Some('\r') => {
573            if stream.as_bytes().first() == Some(&b'\n') {
574                let _ = stream.next_token();
575            } else {
576                let start = stream.offset_from(&raw.as_str());
577                let end = start;
578                error.report_error(
579                    ParseError::new("carriage return must be followed by newline")
580                        .with_context(Span::new_unchecked(0, raw.len()))
581                        .with_expected(&[Expected::Literal("\n")])
582                        .with_unexpected(Span::new_unchecked(start, end)),
583                );
584            }
585        }
586        _ => {
587            stream.reset(&start);
588
589            let start = stream.offset_from(&raw.as_str());
590            let end = start;
591            error.report_error(
592                ParseError::new(INVALID_STRING)
593                    .with_context(Span::new_unchecked(0, raw.len()))
594                    .with_expected(&[Expected::Literal("\n")])
595                    .with_unexpected(Span::new_unchecked(start, end)),
596            );
597        }
598    }
599
600    loop {
601        let start_offset = stream.offset_from(&raw.as_str());
602
603        let offset = stream
604            .as_bytes()
605            .offset_for(|b| !(WSCHAR, b'\n').contains_token(b))
606            .unwrap_or(stream.len());
607        #[cfg(feature = "unsafe")] // SAFETY: WSCHAR ensure `offset` is along UTF-8 boundary
608        unsafe {
609            stream.next_slice_unchecked(offset);
610        }
611        #[cfg(not(feature = "unsafe"))]
612        stream.next_slice(offset);
613
614        if stream.starts_with("\r") {
615            let offset = if stream.starts_with("\r\n") {
616                "\r\n".len()
617            } else {
618                let start = stream.offset_from(&raw.as_str()) + 1;
619                error.report_error(
620                    ParseError::new("carriage return must be followed by newline")
621                        .with_context(Span::new_unchecked(0, raw.len()))
622                        .with_expected(&[Expected::Literal("\n")])
623                        .with_unexpected(Span::new_unchecked(start, start)),
624                );
625                "\r".len()
626            };
627            #[cfg(feature = "unsafe")]
628            // SAFETY: Newlines ensure `offset` is along UTF-8 boundary
629            let _ = unsafe { stream.next_slice_unchecked(offset) };
630            #[cfg(not(feature = "unsafe"))]
631            let _ = stream.next_slice(offset);
632        }
633
634        let end_offset = stream.offset_from(&raw.as_str());
635        if start_offset == end_offset {
636            break;
637        }
638    }
639}
640
641/// `mlb-unescaped` extended with `mlb-quotes` and `LF`
642///
643/// This is a specialization of [`basic_unescaped`] to help with multi-line basic strings
644///
645/// **warning:** `newline` is not validated
646///
647/// ```abnf
648/// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
649///
650/// mlb-content = basic-cha / newline / mlb-escaped-nl
651/// mlb-quotes = 1*2quotation-mark
652/// ```
653fn mlb_unescaped<'i>(stream: &mut &'i str) -> &'i str {
654    let offset = stream
655        .as_bytes()
656        .offset_for(|b| !(BASIC_UNESCAPED, b'"', b'\n').contains_token(b))
657        .unwrap_or(stream.len());
658    #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary
659    unsafe {
660        stream.next_slice_unchecked(offset)
661    }
662    #[cfg(not(feature = "unsafe"))]
663    stream.next_slice(offset)
664}
665
666fn mlb_invalid<'i>(stream: &mut &'i str) -> &'i str {
667    let offset = stream
668        .as_bytes()
669        .offset_for(|b| (BASIC_UNESCAPED, b'"', b'\n', ESCAPE, '\r').contains_token(b))
670        .unwrap_or(stream.len());
671    #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary
672    unsafe {
673        stream.next_slice_unchecked(offset)
674    }
675    #[cfg(not(feature = "unsafe"))]
676    stream.next_slice(offset)
677}
678
679/// Parse unquoted key
680///
681/// ```abnf
682/// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
683/// ```
684pub(crate) fn decode_unquoted_key<'i>(
685    raw: Raw<'i>,
686    output: &mut dyn StringBuilder<'i>,
687    error: &mut dyn ErrorSink,
688) {
689    let s = raw.as_str();
690
691    if s.is_empty() {
692        error.report_error(
693            ParseError::new("unquoted keys cannot be empty")
694                .with_context(Span::new_unchecked(0, s.len()))
695                .with_expected(&[
696                    Expected::Description("letters"),
697                    Expected::Description("numbers"),
698                    Expected::Literal("-"),
699                    Expected::Literal("_"),
700                ])
701                .with_unexpected(Span::new_unchecked(0, s.len())),
702        );
703    }
704
705    let mut span = None;
706    for (i, _b) in s
707        .as_bytes()
708        .iter()
709        .enumerate()
710        .filter(|(_, b)| !UNQUOTED_CHAR.contains_token(*b))
711    {
712        if let Some((start, end)) = span {
713            if i == end {
714                span = Some((start, i + 1));
715            } else {
716                error.report_error(
717                    ParseError::new("invalid unquoted key")
718                        .with_context(Span::new_unchecked(0, s.len()))
719                        .with_expected(&[
720                            Expected::Description("letters"),
721                            Expected::Description("numbers"),
722                            Expected::Literal("-"),
723                            Expected::Literal("_"),
724                        ])
725                        .with_unexpected(Span::new_unchecked(start, end)),
726                );
727                span = Some((i, i + 1));
728            }
729        } else {
730            span = Some((i, i + 1));
731        }
732    }
733    if let Some((start, end)) = span {
734        error.report_error(
735            ParseError::new("invalid unquoted key")
736                .with_context(Span::new_unchecked(0, s.len()))
737                .with_expected(&[
738                    Expected::Description("letters"),
739                    Expected::Description("numbers"),
740                    Expected::Literal("-"),
741                    Expected::Literal("_"),
742                ])
743                .with_unexpected(Span::new_unchecked(start, end)),
744        );
745    }
746
747    if !output.push_str(s) {
748        error.report_error(
749            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
750        );
751    }
752}
753
754/// ```abnf
755/// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
756/// ```
757const UNQUOTED_CHAR: (
758    RangeInclusive<u8>,
759    RangeInclusive<u8>,
760    RangeInclusive<u8>,
761    u8,
762    u8,
763) = (b'A'..=b'Z', b'a'..=b'z', b'0'..=b'9', b'-', b'_');
764
765#[cfg(test)]
766#[cfg(feature = "std")]
767mod test {
768    use super::*;
769    use crate::decoder::Encoding;
770
771    use alloc::borrow::Cow;
772
773    use snapbox::assert_data_eq;
774    use snapbox::prelude::*;
775    use snapbox::str;
776
777    #[test]
778    fn literal_string() {
779        let cases = [
780            (
781                r"'C:\Users\nodejs\templates'",
782                str![[r#"C:\Users\nodejs\templates"#]].raw(),
783                str![[r#"
784[]
785
786"#]]
787                .raw(),
788            ),
789            (
790                r"'\\ServerX\admin$\system32\'",
791                str![[r#"\\ServerX\admin$\system32\"#]].raw(),
792                str![[r#"
793[]
794
795"#]]
796                .raw(),
797            ),
798            (
799                r#"'Tom "Dubs" Preston-Werner'"#,
800                str![[r#"Tom "Dubs" Preston-Werner"#]].raw(),
801                str![[r#"
802[]
803
804"#]]
805                .raw(),
806            ),
807            (
808                r"'<\i\c*\s*>'",
809                str![[r#"<\i\c*\s*>"#]].raw(),
810                str![[r#"
811[]
812
813"#]]
814                .raw(),
815            ),
816        ];
817        for (input, expected, expected_error) in cases {
818            let mut error = Vec::new();
819            let mut actual = Cow::Borrowed("");
820            decode_literal_string(
821                Raw::new_unchecked(input, Some(Encoding::LiteralString), Default::default()),
822                &mut actual,
823                &mut error,
824            );
825            assert_data_eq!(actual.as_ref(), expected);
826            assert_data_eq!(error.to_debug(), expected_error);
827        }
828    }
829
830    #[test]
831    fn ml_literal_string() {
832        let cases = [
833            (
834                r"'''I [dw]on't need \d{2} apples'''",
835                str![[r#"I [dw]on't need \d{2} apples"#]].raw(),
836                str![[r#"
837[]
838
839"#]]
840                .raw(),
841            ),
842            (
843                r#"''''one_quote''''"#,
844                str!["'one_quote'"].raw(),
845                str![[r#"
846[]
847
848"#]]
849                .raw(),
850            ),
851            (
852                r#"'''
853The first newline is
854trimmed in raw strings.
855   All other whitespace
856   is preserved.
857'''"#,
858                str![[r#"
859The first newline is
860trimmed in raw strings.
861   All other whitespace
862   is preserved.
863
864"#]]
865                .raw(),
866                str![[r#"
867[]
868
869"#]]
870                .raw(),
871            ),
872        ];
873        for (input, expected, expected_error) in cases {
874            let mut error = Vec::new();
875            let mut actual = Cow::Borrowed("");
876            decode_ml_literal_string(
877                Raw::new_unchecked(input, Some(Encoding::MlLiteralString), Default::default()),
878                &mut actual,
879                &mut error,
880            );
881            assert_data_eq!(actual.as_ref(), expected);
882            assert_data_eq!(error.to_debug(), expected_error);
883        }
884    }
885
886    #[test]
887    fn basic_string() {
888        let cases = [
889            (
890                r#""""#,
891                str![""].raw(),
892                str![[r#"
893[]
894
895"#]]
896                .raw(),
897            ),
898            (
899                r#""content\"trailing""#,
900                str![[r#"content"trailing"#]].raw(),
901                str![[r#"
902[]
903
904"#]]
905                .raw(),
906            ),
907            (
908                r#""content\""#,
909                str![[r#"content\"#]].raw(),
910                str![[r#"
911[
912    ParseError {
913        context: Some(
914            0..10,
915        ),
916        description: "missing escaped value",
917        expected: Some(
918            [
919                Literal(
920                    "b",
921                ),
922                Literal(
923                    "e",
924                ),
925                Literal(
926                    "f",
927                ),
928                Literal(
929                    "n",
930                ),
931                Literal(
932                    "r",
933                ),
934                Literal(
935                    "\\",
936                ),
937                Literal(
938                    "\"",
939                ),
940                Literal(
941                    "x",
942                ),
943                Literal(
944                    "u",
945                ),
946                Literal(
947                    "U",
948                ),
949            ],
950        ),
951        unexpected: Some(
952            9..9,
953        ),
954    },
955]
956
957"#]]
958                .raw(),
959            ),
960            (
961                r#""content
962trailing""#,
963                str![[r#"
964content
965trailing
966"#]]
967                .raw(),
968                str![[r#"
969[
970    ParseError {
971        context: Some(
972            0..18,
973        ),
974        description: "invalid basic string",
975        expected: Some(
976            [
977                Description(
978                    "non-double-quote visible characters",
979                ),
980                Literal(
981                    "\\",
982                ),
983            ],
984        ),
985        unexpected: Some(
986            8..9,
987        ),
988    },
989]
990
991"#]]
992                .raw(),
993            ),
994            (
995                r#""I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF. \U0002070E""#,
996                str![[r#"
997I'm a string. "You can quote me". Name	José
998Location	SF. 𠜎
999"#]]
1000                .raw(),
1001                str![[r#"
1002[]
1003
1004"#]]
1005                .raw(),
1006            ),
1007        ];
1008        for (input, expected, expected_error) in cases {
1009            let mut error = Vec::new();
1010            let mut actual = Cow::Borrowed("");
1011            decode_basic_string(
1012                Raw::new_unchecked(input, Some(Encoding::BasicString), Default::default()),
1013                &mut actual,
1014                &mut error,
1015            );
1016            assert_data_eq!(actual.as_ref(), expected);
1017            assert_data_eq!(error.to_debug(), expected_error);
1018        }
1019    }
1020
1021    #[test]
1022    fn ml_basic_string() {
1023        let cases = [
1024            (
1025                r#""""
1026Roses are red
1027Violets are blue""""#,
1028                str![[r#"
1029Roses are red
1030Violets are blue
1031"#]]
1032                .raw(),
1033                str![[r#"
1034[]
1035
1036"#]]
1037                .raw(),
1038            ),
1039            (
1040                r#"""" \""" """"#,
1041                str![[r#" """ "#]].raw(),
1042                str![[r#"
1043[]
1044
1045"#]]
1046                .raw(),
1047            ),
1048            (
1049                r#"""" \\""""#,
1050                str![[r#" \"#]].raw(),
1051                str![[r#"
1052[]
1053
1054"#]]
1055                .raw(),
1056            ),
1057            (
1058                r#""""
1059The quick brown \
1060
1061
1062  fox jumps over \
1063    the lazy dog.""""#,
1064                str!["The quick brown fox jumps over the lazy dog."].raw(),
1065                str![[r#"
1066[]
1067
1068"#]]
1069                .raw(),
1070            ),
1071            (
1072                r#""""\
1073       The quick brown \
1074       fox jumps over \
1075       the lazy dog.\
1076       """"#,
1077                str!["The quick brown fox jumps over the lazy dog."].raw(),
1078                str![[r#"
1079[]
1080
1081"#]]
1082                .raw(),
1083            ),
1084            (
1085                r#""""\
1086       """"#,
1087                str![""].raw(),
1088                str![[r#"
1089[]
1090
1091"#]]
1092                .raw(),
1093            ),
1094            (
1095                r#""""
1096\
1097  \
1098""""#,
1099                str![""].raw(),
1100                str![[r#"
1101[]
1102
1103"#]]
1104                .raw(),
1105            ),
1106            (
1107                r#""""  """#,
1108                str![[r#"  """#]].raw(),
1109                str![[r#"
1110[
1111    ParseError {
1112        context: Some(
1113            0..7,
1114        ),
1115        description: "invalid multi-line basic string",
1116        expected: Some(
1117            [
1118                Literal(
1119                    "\"",
1120                ),
1121            ],
1122        ),
1123        unexpected: Some(
1124            7..7,
1125        ),
1126    },
1127]
1128
1129"#]]
1130                .raw(),
1131            ),
1132            (
1133                r#""""  \""""#,
1134                str![[r#"  \"#]].raw(),
1135                str![[r#"
1136[
1137    ParseError {
1138        context: Some(
1139            0..9,
1140        ),
1141        description: "missing escaped value",
1142        expected: Some(
1143            [
1144                Literal(
1145                    "b",
1146                ),
1147                Literal(
1148                    "e",
1149                ),
1150                Literal(
1151                    "f",
1152                ),
1153                Literal(
1154                    "n",
1155                ),
1156                Literal(
1157                    "r",
1158                ),
1159                Literal(
1160                    "\\",
1161                ),
1162                Literal(
1163                    "\"",
1164                ),
1165                Literal(
1166                    "x",
1167                ),
1168                Literal(
1169                    "u",
1170                ),
1171                Literal(
1172                    "U",
1173                ),
1174            ],
1175        ),
1176        unexpected: Some(
1177            6..6,
1178        ),
1179    },
1180]
1181
1182"#]]
1183                .raw(),
1184            ),
1185        ];
1186        for (input, expected, expected_error) in cases {
1187            let mut error = Vec::new();
1188            let mut actual = Cow::Borrowed("");
1189            decode_ml_basic_string(
1190                Raw::new_unchecked(input, Some(Encoding::MlBasicString), Default::default()),
1191                &mut actual,
1192                &mut error,
1193            );
1194            assert_data_eq!(actual.as_ref(), expected);
1195            assert_data_eq!(error.to_debug(), expected_error);
1196        }
1197    }
1198
1199    #[test]
1200    fn unquoted_keys() {
1201        let cases = [
1202            (
1203                "a",
1204                str!["a"].raw(),
1205                str![[r#"
1206[]
1207
1208"#]]
1209                .raw(),
1210            ),
1211            (
1212                "hello",
1213                str!["hello"].raw(),
1214                str![[r#"
1215[]
1216
1217"#]]
1218                .raw(),
1219            ),
1220            (
1221                "-",
1222                str!["-"].raw(),
1223                str![[r#"
1224[]
1225
1226"#]]
1227                .raw(),
1228            ),
1229            (
1230                "_",
1231                str!["_"].raw(),
1232                str![[r#"
1233[]
1234
1235"#]]
1236                .raw(),
1237            ),
1238            (
1239                "-hello-world-",
1240                str!["-hello-world-"].raw(),
1241                str![[r#"
1242[]
1243
1244"#]]
1245                .raw(),
1246            ),
1247            (
1248                "_hello_world_",
1249                str!["_hello_world_"].raw(),
1250                str![[r#"
1251[]
1252
1253"#]]
1254                .raw(),
1255            ),
1256            (
1257                "",
1258                str![""].raw(),
1259                str![[r#"
1260[
1261    ParseError {
1262        context: Some(
1263            0..0,
1264        ),
1265        description: "unquoted keys cannot be empty",
1266        expected: Some(
1267            [
1268                Description(
1269                    "letters",
1270                ),
1271                Description(
1272                    "numbers",
1273                ),
1274                Literal(
1275                    "-",
1276                ),
1277                Literal(
1278                    "_",
1279                ),
1280            ],
1281        ),
1282        unexpected: Some(
1283            0..0,
1284        ),
1285    },
1286]
1287
1288"#]]
1289                .raw(),
1290            ),
1291        ];
1292
1293        for (input, expected, expected_error) in cases {
1294            let mut error = Vec::new();
1295            let mut actual = Cow::Borrowed("");
1296            decode_unquoted_key(
1297                Raw::new_unchecked(input, None, Default::default()),
1298                &mut actual,
1299                &mut error,
1300            );
1301            assert_data_eq!(actual.as_ref(), expected);
1302            assert_data_eq!(error.to_debug(), expected_error);
1303        }
1304    }
1305}