x11rb_protocol/resource_manager/
parser.rs

1//! Code for parsing resource management things
2
3use super::{Binding, Component, Entry};
4use alloc::string::{String, ToString};
5use alloc::vec::Vec;
6
7// =======================
8// Common helper functions
9// =======================
10
11/// Check if a character (well, u8) is an octal digit
12fn is_octal_digit(c: u8) -> bool {
13    matches!(c, b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7')
14}
15
16/// Find the longest prefix of the given data where the given callback returns true
17fn parse_with_matcher<M>(data: &[u8], matcher: M) -> (&[u8], &[u8])
18where
19    M: Fn(u8) -> bool,
20{
21    let end = data
22        .iter()
23        .enumerate()
24        .find(|(_, &c)| !matcher(c))
25        .map(|(idx, _)| idx)
26        .unwrap_or(data.len());
27    (&data[..end], &data[end..])
28}
29
30/// Check if a character is allowed in a quark name
31fn allowed_in_quark_name(c: u8) -> bool {
32    c.is_ascii_alphanumeric() || c == b'-' || c == b'_'
33}
34
35/// Find the longest prefix satisfying allowed_in_quark_name().
36/// This returns (Some(prefix), remaining) if a prefix is found, else (None, data).
37fn next_component(data: &[u8]) -> (Option<&[u8]>, &[u8]) {
38    let (prefix, remaining) = parse_with_matcher(data, allowed_in_quark_name);
39    match prefix {
40        [] => (None, remaining),
41        prefix => (Some(prefix), remaining),
42    }
43}
44
45// =========================
46// Parser for resource files
47// =========================
48
49/// Skip to the next end of line in the given data
50fn skip_to_eol(data: &[u8]) -> &[u8] {
51    parse_with_matcher(data, |c| c != b'\n').1
52}
53
54/// Skip all spaces in the given data
55fn skip_spaces(data: &[u8]) -> &[u8] {
56    parse_with_matcher(data, |c| c == b' ').1
57}
58
59/// Skip the given text. Returns `None` if the text was not found
60fn skip_text<'a>(data: &'a [u8], text: &[u8]) -> Option<&'a [u8]> {
61    if data.starts_with(text) {
62        Some(&data[text.len()..])
63    } else {
64        None
65    }
66}
67
68/// Parse a single `Component` from the data. This can either be a wildcard ("?") or a
69/// component made up of characters accepted by `allowed_in_quark_name`.
70fn next_component_name(data: &[u8]) -> (Option<Component>, &[u8]) {
71    if data.first() == Some(&b'?') {
72        (Some(Component::Wildcard), &data[1..])
73    } else {
74        let (comp, remaining) = next_component(data);
75        let comp = comp.map(|s| {
76            let s = std::str::from_utf8(s).expect("ascii-only");
77            Component::Normal(s.to_string())
78        });
79        (comp, remaining)
80    }
81}
82
83/// Parse a resource like "foo.?*baz" (wildcards allowed)
84fn parse_components(data: &[u8]) -> (Vec<(Binding, Component)>, &[u8]) {
85    fn parse_binding(mut data: &[u8]) -> (Binding, &[u8]) {
86        let mut binding = Binding::Tight;
87        loop {
88            match data.first() {
89                Some(&b'*') => binding = Binding::Loose,
90                Some(&b'.') => {}
91                _ => break,
92            }
93            data = &data[1..];
94        }
95        (binding, data)
96    }
97
98    let mut data = data;
99    let mut result = Vec::new();
100    loop {
101        let (binding, remaining) = parse_binding(data);
102        if let (Some(component), remaining) = next_component_name(remaining) {
103            data = remaining;
104            result.push((binding, component));
105        } else {
106            break;
107        }
108    }
109    (result, data)
110}
111
112/// Parse a full entry from the data. This begins with components (see `parse_components()`),
113/// then after a colon (":") comes the value. The value may contain escape sequences.
114fn parse_entry(data: &[u8]) -> (Result<Entry, ()>, &[u8]) {
115    let (components, data) = parse_components(data);
116
117    match components.last() {
118        // Empty components are not allowed
119        None => return (Err(()), skip_to_eol(data)),
120        // The last component may not be a wildcard
121        Some((_, Component::Wildcard)) => return (Err(()), skip_to_eol(data)),
122        _ => {}
123    }
124
125    let data = skip_spaces(data);
126
127    // next comes a colon
128    let data = match data.split_first() {
129        Some((&b':', data)) => data,
130        _ => return (Err(()), skip_to_eol(data)),
131    };
132
133    // skip more spaces and let \ escape line breaks
134    let mut data = data;
135    loop {
136        let (_, remaining) = parse_with_matcher(data, |c| c == b' ' || c == b'\t');
137        if remaining.get(..2) == Some(&b"\\\n"[..]) {
138            data = &remaining[2..];
139        } else {
140            data = remaining;
141            break;
142        }
143    }
144
145    // Parse the value, decoding escape sequences. The most complicated case are octal escape
146    // sequences like \123.
147    let mut value = Vec::new();
148    let mut index = 0;
149    let mut octal = None;
150    while let Some(&b) = data.get(index) {
151        index += 1;
152        if b == b'\n' {
153            break;
154        }
155        if let Some(oct) = octal {
156            if is_octal_digit(b) {
157                // We are currently parsing an octal; add the new character
158                match oct {
159                    (x, None) => octal = Some((x, Some(b))),
160                    (x, Some(y)) => {
161                        let (x, y, z) = (x - b'0', y - b'0', b - b'0');
162                        let decoded = (x * 8 + y) * 8 + z;
163                        value.push(decoded);
164                        octal = None;
165                    }
166                }
167                continue;
168            } else {
169                // Not an octal sequence; add the collected characters to the output
170                value.push(b'\\');
171                value.push(oct.0);
172                if let Some(oct2) = oct.1 {
173                    value.push(oct2);
174                }
175                octal = None;
176
177                // Fall through to the parsing code below
178            }
179        }
180        if b != b'\\' {
181            value.push(b);
182        } else {
183            match data.get(index) {
184                None => {
185                    value.push(b);
186                    // Keep index as-is. This is to counter the += 1 below.
187                    index -= 1;
188                }
189                Some(b' ') => value.push(b' '),
190                Some(b'\t') => value.push(b'\t'),
191                Some(b'n') => value.push(b'\n'),
192                Some(b'\\') => value.push(b'\\'),
193                Some(b'\n') => { /* Continue parsing next line */ }
194                Some(&x) if is_octal_digit(x) => octal = Some((x, None)),
195                Some(&x) => {
196                    value.push(b);
197                    value.push(x);
198                }
199            }
200            index += 1;
201        }
202    }
203
204    let entry = Entry { components, value };
205    (Ok(entry), &data[index..])
206}
207
208/// Parse the contents of a database
209pub(crate) fn parse_database<F>(mut data: &[u8], result: &mut Vec<Entry>, mut include_callback: F)
210where
211    for<'r> F: FnMut(&'r [u8], &mut Vec<Entry>),
212{
213    // Iterate over lines
214    while let Some(first) = data.first() {
215        match first {
216            // Skip empty lines
217            b'\n' => data = &data[1..],
218            // Comment, skip the line
219            b'!' => data = skip_to_eol(data),
220            b'#' => {
221                let remaining = skip_spaces(&data[1..]);
222                // Skip to the next line for the next loop iteration. The rest of the code here
223                // tried to parse the line.
224                data = skip_to_eol(remaining);
225
226                // Only #include is defined
227                if let Some(remaining) = skip_text(remaining, b"include") {
228                    let (_, remaining) = parse_with_matcher(remaining, |c| c == b' ');
229                    // Find the text enclosed in quotation marks
230                    if let Some(b'\"') = remaining.first() {
231                        let (file, remaining) =
232                            parse_with_matcher(&remaining[1..], |c| c != b'"' && c != b'\n');
233                        if let Some(b'\"') = remaining.first() {
234                            // Okay, we found a well-formed include directive.
235                            include_callback(file, result);
236                        }
237                    }
238                }
239            }
240            _ => {
241                let (entry, remaining) = parse_entry(data);
242                data = remaining;
243                // Add the entry to the result if we parsed one; ignore errors
244                result.extend(entry.ok());
245            }
246        }
247    }
248}
249
250/// Parse a resource query like "foo.bar.baz" (no wildcards allowed, no bindings allowed)
251pub(crate) fn parse_query(data: &[u8]) -> Option<Vec<String>> {
252    let mut data = data;
253    let mut result = Vec::new();
254    while let (Some(component), remaining) = next_component(data) {
255        data = remaining;
256        while let Some(&b'.') = data.first() {
257            data = &data[1..];
258        }
259        let component = std::str::from_utf8(component).expect("ascii-only");
260        result.push(component.to_string());
261    }
262    if data.is_empty() {
263        Some(result)
264    } else {
265        None
266    }
267}
268
269#[cfg(test)]
270mod test {
271    use super::{parse_database, parse_entry, parse_query, Binding, Component, Entry};
272    use alloc::string::{String, ToString};
273    use alloc::vec;
274    use alloc::vec::Vec;
275    use std::eprintln;
276
277    // Most tests in here are based on [1], which is: Copyright © 2016 Ingo Bürk
278    // [1]: https://github.com/Airblader/xcb-util-xrm/blob/master/tests/tests_parser.c
279
280    #[test]
281    fn test_parse_query_success() {
282        let tests = [
283            (
284                &b"First.second"[..],
285                vec!["First".to_string(), "second".to_string()],
286            ),
287            (b"", Vec::new()),
288            (
289                b"urxvt.scrollBar_right",
290                vec!["urxvt".to_string(), "scrollBar_right".to_string()],
291            ),
292            (
293                b"urxvt.Control-Shift-Up",
294                vec!["urxvt".to_string(), "Control-Shift-Up".to_string()],
295            ),
296        ];
297        for (data, expected) in tests.iter() {
298            let result = parse_query(data);
299            assert_eq!(result.as_ref(), Some(expected), "while parsing {data:?}");
300        }
301    }
302
303    #[test]
304    fn test_parse_query_error() {
305        let tests = [
306            &b"First.second: on"[..],
307            b"First*second",
308            b"First.?.second",
309            b"*second",
310            b"?.second",
311        ];
312        for data in tests.iter() {
313            let result = parse_query(data);
314            assert!(
315                result.is_none(),
316                "Unexpected success parsing '{data:?}': {result:?}"
317            );
318        }
319    }
320
321    #[test]
322    fn test_parse_entry_success() {
323        let tests = [
324            // Basics
325            (
326                &b"First: 1"[..],
327                vec![(Binding::Tight, Component::Normal("First".to_string()))],
328                &b"1"[..],
329            ),
330            (
331                b"First.second: 1",
332                vec![
333                    (Binding::Tight, Component::Normal("First".to_string())),
334                    (Binding::Tight, Component::Normal("second".to_string())),
335                ],
336                b"1",
337            ),
338            (
339                b"First..second: 1",
340                vec![
341                    (Binding::Tight, Component::Normal("First".to_string())),
342                    (Binding::Tight, Component::Normal("second".to_string())),
343                ],
344                b"1",
345            ),
346            // Wildcards
347            (
348                b"?.second: 1",
349                vec![
350                    (Binding::Tight, Component::Wildcard),
351                    (Binding::Tight, Component::Normal("second".to_string())),
352                ],
353                b"1",
354            ),
355            (
356                b"First.?.third: 1",
357                vec![
358                    (Binding::Tight, Component::Normal("First".to_string())),
359                    (Binding::Tight, Component::Wildcard),
360                    (Binding::Tight, Component::Normal("third".to_string())),
361                ],
362                b"1",
363            ),
364            // Loose bindings
365            (
366                b"*second: 1",
367                vec![(Binding::Loose, Component::Normal("second".to_string()))],
368                b"1",
369            ),
370            (
371                b"First*third: 1",
372                vec![
373                    (Binding::Tight, Component::Normal("First".to_string())),
374                    (Binding::Loose, Component::Normal("third".to_string())),
375                ],
376                b"1",
377            ),
378            (
379                b"First**third: 1",
380                vec![
381                    (Binding::Tight, Component::Normal("First".to_string())),
382                    (Binding::Loose, Component::Normal("third".to_string())),
383                ],
384                b"1",
385            ),
386            // Combinations
387            (
388                b"First*?.fourth: 1",
389                vec![
390                    (Binding::Tight, Component::Normal("First".to_string())),
391                    (Binding::Loose, Component::Wildcard),
392                    (Binding::Tight, Component::Normal("fourth".to_string())),
393                ],
394                b"1",
395            ),
396            // Values
397            (
398                b"First: 1337",
399                vec![(Binding::Tight, Component::Normal("First".to_string()))],
400                b"1337",
401            ),
402            (
403                b"First: -1337",
404                vec![(Binding::Tight, Component::Normal("First".to_string()))],
405                b"-1337",
406            ),
407            (
408                b"First: 13.37",
409                vec![(Binding::Tight, Component::Normal("First".to_string()))],
410                b"13.37",
411            ),
412            (
413                b"First: value",
414                vec![(Binding::Tight, Component::Normal("First".to_string()))],
415                b"value",
416            ),
417            (
418                b"First: #abcdef",
419                vec![(Binding::Tight, Component::Normal("First".to_string()))],
420                b"#abcdef",
421            ),
422            (
423                b"First: { key: 'value' }",
424                vec![(Binding::Tight, Component::Normal("First".to_string()))],
425                b"{ key: 'value' }",
426            ),
427            (
428                b"First: x?y",
429                vec![(Binding::Tight, Component::Normal("First".to_string()))],
430                b"x?y",
431            ),
432            (
433                b"First: x*y",
434                vec![(Binding::Tight, Component::Normal("First".to_string()))],
435                b"x*y",
436            ),
437            // Whitespace
438            (
439                b"First:    x",
440                vec![(Binding::Tight, Component::Normal("First".to_string()))],
441                b"x",
442            ),
443            (
444                b"First: x   ",
445                vec![(Binding::Tight, Component::Normal("First".to_string()))],
446                b"x   ",
447            ),
448            (
449                b"First:    x   ",
450                vec![(Binding::Tight, Component::Normal("First".to_string()))],
451                b"x   ",
452            ),
453            (
454                b"First:x",
455                vec![(Binding::Tight, Component::Normal("First".to_string()))],
456                b"x",
457            ),
458            (
459                b"First: \t x",
460                vec![(Binding::Tight, Component::Normal("First".to_string()))],
461                b"x",
462            ),
463            (
464                b"First: \t x \t",
465                vec![(Binding::Tight, Component::Normal("First".to_string()))],
466                b"x \t",
467            ),
468            // Special characters
469            (
470                b"First: \\ x",
471                vec![(Binding::Tight, Component::Normal("First".to_string()))],
472                b" x",
473            ),
474            (
475                b"First: x\\ x",
476                vec![(Binding::Tight, Component::Normal("First".to_string()))],
477                b"x x",
478            ),
479            (
480                b"First: \\\tx",
481                vec![(Binding::Tight, Component::Normal("First".to_string()))],
482                b"\tx",
483            ),
484            (
485                b"First: \\011x",
486                vec![(Binding::Tight, Component::Normal("First".to_string()))],
487                b"\tx",
488            ),
489            (
490                b"First: x\\\\x",
491                vec![(Binding::Tight, Component::Normal("First".to_string()))],
492                b"x\\x",
493            ),
494            (
495                b"First: x\\nx",
496                vec![(Binding::Tight, Component::Normal("First".to_string()))],
497                b"x\nx",
498            ),
499            (
500                b"First: \\080",
501                vec![(Binding::Tight, Component::Normal("First".to_string()))],
502                b"\\080",
503            ),
504            (
505                b"First: \\00a",
506                vec![(Binding::Tight, Component::Normal("First".to_string()))],
507                b"\\00a",
508            ),
509            // Own tests
510            // Some more escape tests, e.g. escape at end of input
511            (
512                b"First: \\",
513                vec![(Binding::Tight, Component::Normal("First".to_string()))],
514                b"\\",
515            ),
516            (
517                b"First: \\xxx",
518                vec![(Binding::Tight, Component::Normal("First".to_string()))],
519                b"\\xxx",
520            ),
521            (
522                b"First: \\1xx",
523                vec![(Binding::Tight, Component::Normal("First".to_string()))],
524                b"\\1xx",
525            ),
526            (
527                b"First: \\10x",
528                vec![(Binding::Tight, Component::Normal("First".to_string()))],
529                b"\\10x",
530            ),
531            (
532                b"First: \\100",
533                vec![(Binding::Tight, Component::Normal("First".to_string()))],
534                b"@",
535            ),
536            (
537                b"First: \\n",
538                vec![(Binding::Tight, Component::Normal("First".to_string()))],
539                b"\n",
540            ),
541        ];
542        for (data, resource, value) in tests.iter() {
543            run_entry_test(data, resource, value);
544        }
545    }
546
547    #[test]
548    fn test_parse_entry_error() {
549        let tests = [
550            &b": 1"[..],
551            b"?: 1",
552            b"First",
553            b"First second",
554            b"First.?: 1",
555            b"F\xc3\xb6rst: 1",
556            b"F~rst: 1",
557        ];
558        for data in tests.iter() {
559            match parse_entry(data) {
560                (Ok(v), _) => panic!("Unexpected success parsing '{:?}': {:?}", data, v),
561                (Err(_), b"") => {}
562                (Err(_), remaining) => panic!(
563                    "Unexpected remaining data parsing '{:?}': {:?}",
564                    data, remaining
565                ),
566            }
567        }
568    }
569
570    #[test]
571    fn test_parse_large_value() {
572        let value = vec![b'x'; 1025];
573        let mut data = b"First: ".to_vec();
574        data.extend(&value);
575        let resource = (Binding::Tight, Component::Normal("First".to_string()));
576        run_entry_test(&data, &[resource], &value);
577    }
578
579    #[test]
580    fn test_parse_large_resource() {
581        let x = vec![b'x'; 1025];
582        let y = vec![b'y'; 1025];
583        let mut data = x.clone();
584        data.push(b'.');
585        data.extend(&y);
586        data.extend(b": 1");
587        let resource = [
588            (
589                Binding::Tight,
590                Component::Normal(String::from_utf8(x).unwrap()),
591            ),
592            (
593                Binding::Tight,
594                Component::Normal(String::from_utf8(y).unwrap()),
595            ),
596        ];
597        run_entry_test(&data, &resource, b"1");
598    }
599
600    #[test]
601    fn test_parse_database() {
602        let expected_entry = Entry {
603            components: vec![(Binding::Tight, Component::Normal("First".to_string()))],
604            value: b"1".to_vec(),
605        };
606        let tests = [
607            (&b"First: 1\n\n\n"[..], vec![expected_entry.clone()]),
608            (b"First: 1\n!Foo", vec![expected_entry.clone()]),
609            (b"!First: 1\nbar\n\n\n", Vec::new()),
610            (b"!bar\nFirst: 1\nbaz", vec![expected_entry.clone()]),
611            (b"First :\\\n \\\n\\\n1\n", vec![expected_entry]),
612            (
613                b"First: \\\n  1\\\n2\n",
614                vec![Entry {
615                    components: vec![(Binding::Tight, Component::Normal("First".to_string()))],
616                    value: b"12".to_vec(),
617                }],
618            ),
619        ];
620        let mut success = true;
621        for (data, expected) in tests.iter() {
622            let mut result = Vec::new();
623            parse_database(data, &mut result, |_, _| unreachable!());
624            if &result != expected {
625                eprintln!("While testing {data:?}");
626                eprintln!("Expected: {expected:?}");
627                eprintln!("Got:      {result:?}");
628                eprintln!();
629                success = false;
630            }
631        }
632        if !success {
633            panic!()
634        }
635    }
636
637    #[test]
638    fn test_include_parsing() {
639        let tests = [
640            (&b"#include\"test\""[..], vec![&b"test"[..]]),
641            (b"#include\"test", Vec::new()),
642            (b"#include\"", Vec::new()),
643            (b"#include", Vec::new()),
644            (b"#includ", Vec::new()),
645            (b"#in", Vec::new()),
646            (b"#  foo", Vec::new()),
647            (
648                b"#  include   \" test \"   \n#include  \"foo\"",
649                vec![b" test ", b"foo"],
650            ),
651        ];
652        let mut success = true;
653        for (data, expected) in tests.iter() {
654            let mut result = Vec::new();
655            let mut calls = Vec::new();
656            parse_database(data, &mut result, |file, _| calls.push(file.to_vec()));
657            if &calls != expected {
658                eprintln!("While testing {data:?}");
659                eprintln!("Expected: {expected:?}");
660                eprintln!("Got:      {calls:?}");
661                eprintln!();
662                success = false;
663            }
664        }
665        if !success {
666            panic!()
667        }
668    }
669
670    #[test]
671    fn test_include_additions() {
672        let entry = Entry {
673            components: Vec::new(),
674            value: b"42".to_vec(),
675        };
676        let mut result = Vec::new();
677        parse_database(b"#include\"test\"", &mut result, |file, result| {
678            assert_eq!(file, b"test");
679            result.push(entry.clone());
680        });
681        assert_eq!(result, [entry]);
682    }
683
684    fn run_entry_test(data: &[u8], resource: &[(Binding, Component)], value: &[u8]) {
685        match parse_entry(data) {
686            (Ok(result), remaining) => {
687                assert_eq!(remaining, b"", "failed to parse {data:?}");
688                assert_eq!(
689                    result.components, resource,
690                    "incorrect components when parsing {data:?}",
691                );
692                assert_eq!(result.value, value, "incorrect value when parsing {data:?}");
693            }
694            (Err(err), _) => panic!("Failed to parse '{:?}': {:?}", data, err),
695        }
696    }
697}