nom_rfc8288/
complete.rs

1use std::fmt::Display;
2
3use itertools::Itertools;
4use nom::{
5    AsChar, IResult, Input, Offset, Parser,
6    branch::alt,
7    character::complete::{char, none_of, satisfy, space0},
8    combinator::{all_consuming, opt, recognize},
9    error::ParseError,
10    multi::{fold_many0, many_m_n, many0, many0_count, many1_count, separated_list1},
11    sequence::{delimited, pair, preceded},
12};
13use nom_language::error::VerboseError;
14
15use thiserror::Error;
16
17use crate::{is_qdtext, is_quoted_pair, is_tchar, optional_parser};
18
19/// ```text
20/// TCHAR = "!" / "#" / "$" / "%" / "&" / "'" / "*"
21///       / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
22///       / DIGIT / ALPHA
23/// ```
24pub fn tchar<I, E>(input: I) -> IResult<I, char, E>
25where
26    I: Input,
27    <I as Input>::Item: AsChar,
28    E: ParseError<I>,
29{
30    satisfy(is_tchar).parse(input)
31}
32
33/// `TOKEN = 1*TCHAR`
34pub fn token<I, E>(input: I) -> IResult<I, I, E>
35where
36    I: Input + Offset,
37    <I as Input>::Item: AsChar,
38    E: ParseError<I>,
39{
40    recognize(many1_count(tchar)).parse(input)
41}
42
43/// `QDTEXT = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text`
44pub fn qdtext<I, E>(input: I) -> IResult<I, char, E>
45where
46    I: Input,
47    <I as Input>::Item: AsChar,
48    E: ParseError<I>,
49{
50    satisfy(is_qdtext).parse(input)
51}
52
53/// `QUOTED-PAIR = "\" ( HTAB / SP / VCHAR / obs-text )`
54fn quoted_pair<I, E>(input: I) -> IResult<I, char, E>
55where
56    I: Input,
57    <I as Input>::Item: AsChar,
58    E: ParseError<I>,
59{
60    preceded(char('\\'), satisfy(is_quoted_pair)).parse(input)
61}
62
63/// `QUOTED-STRING = DQUOTE *( qdtext / quoted-pair ) DQUOTE`
64///
65/// If the parser succeeds, we return the unmodified string (with backslashes included)
66/// to prevent allocation and to make sure that all of the return types are consistent
67/// when using nom combinators
68fn quoted_string<I, E>(input: I) -> IResult<I, I, E>
69where
70    I: Input + Offset,
71    <I as Input>::Item: AsChar,
72    E: ParseError<I>,
73{
74    recognize(delimited(
75        char('"'),
76        recognize(many0_count(alt((quoted_pair, qdtext)))),
77        char('"'),
78    ))
79    .parse(input)
80}
81
82fn quoted_string_alloca<I, E>(input: I) -> IResult<I, String, E>
83where
84    I: Input,
85    <I as Input>::Item: AsChar,
86    E: ParseError<I>,
87{
88    all_consuming(delimited(
89        char('"'),
90        fold_many0(alt((quoted_pair, qdtext)), String::new, |mut acc, item| {
91            acc.push(item);
92            acc
93        }),
94        char('"'),
95    ))
96    .parse(input)
97}
98
99/// ```text
100/// #rule
101/// #element => [ element ] *( OWS "," OWS [ element ] )
102/// ```
103///
104/// RFC 9110 specifies that:
105///
106/// > A recipient MUST parse and ignore a reasonable number of empty list
107/// > elements: enough to handle common mistakes by senders that merge
108/// > values, but not so much that they could be used as a denial-of-
109/// > service mechanism.
110///
111/// However, the RFC does not specify what a "reasonable" value for this is,
112/// so we allow the user to configure such a limit.
113///
114/// In this implementation, empty list elements are represented as `None`
115/// in the returned `Vec`, and non-empty list elements are represented as
116/// `Some<E>`
117pub fn list<I, O, E, L>(
118    reasonable_count: usize,
119    element: L,
120    input: I,
121) -> IResult<I, Vec<Option<O>>, E>
122where
123    L: Parser<I, Output = O, Error = E>,
124    E: ParseError<I>,
125    I: Input + std::marker::Copy,
126    <I as Input>::Item: AsChar,
127{
128    let allow_empty_elements = reasonable_count != 0;
129    separated_list1(
130        many_m_n(1, reasonable_count + 1, (space0, char(','), space0)),
131        optional_parser(allow_empty_elements, element),
132    )
133    .parse(input)
134}
135
136/// [`LinkParam`] is used represent the parsed data. It stores key value pairs from
137/// the Link header
138#[derive(PartialEq, Debug)]
139pub struct LinkParam<'a> {
140    pub key: &'a str,
141    // This has to be a String because we need to strip out quotes and slashes
142    // necessitating an allocation. We could probably use an Enum, and return
143    // a &str when allocation isn't needed, but at this point the ease of use
144    // is probably more important
145    pub val: Option<String>,
146}
147
148/// The [`LinkData`] struct is used to store the URL provided in the Link header,
149/// as well as optional parameters.
150#[derive(PartialEq, Debug)]
151pub struct LinkData<'a> {
152    pub url: &'a str,
153    pub params: Vec<LinkParam<'a>>,
154}
155
156#[derive(PartialEq, Debug)]
157pub struct LinkParamOwned {
158    pub key: String,
159    pub val: Option<String>,
160}
161
162#[derive(PartialEq, Debug)]
163pub struct LinkDataOwned {
164    pub url: String,
165    pub params: Vec<LinkParamOwned>,
166}
167
168impl LinkParam<'_> {
169    pub fn to_owned(&self) -> LinkParamOwned {
170        LinkParamOwned {
171            key: self.key.to_owned(),
172            val: self.val.to_owned(),
173        }
174    }
175}
176
177impl LinkData<'_> {
178    pub fn to_owned(&self) -> LinkDataOwned {
179        LinkDataOwned {
180            url: self.url.to_owned(),
181            params: self.params.iter().map(|x| x.to_owned()).collect_vec(),
182        }
183    }
184}
185
186#[derive(Error, Debug, Clone, PartialEq)]
187pub enum LinkParseError {
188    #[error("left over data could not be parsed: `{0}`")]
189    IncompleteParse(String),
190    #[error("the data for key `{0}` is not available")]
191    FailedToParse(String),
192}
193
194#[doc(hidden)]
195impl<E> From<nom::Err<VerboseError<E>>> for LinkParseError
196where
197    E: Display,
198{
199    fn from(err: nom::Err<VerboseError<E>>) -> Self {
200        match err {
201            nom::Err::Incomplete(_) => Self::FailedToParse("Incomplete input".into()),
202            nom::Err::Error(err) | nom::Err::Failure(err) => Self::FailedToParse(err.to_string()),
203        }
204    }
205}
206
207// https://datatracker.ietf.org/doc/html/rfc9110#name-recipient-requirements states that we should deal with at least some null elements
208// and in fact the parser failed on the very first case I tried it on because of a trailing comma
209const NUM_EMPTY_ELEMENTS: usize = 2;
210
211enum ParseStrictness {
212    Strict,
213    Lenient,
214}
215
216/// This method will parse a [`&str`] and return an array of [`Option`]s if it can
217/// successfully parse the [`&str`] as a [Link](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Link) header.
218/// The reason we return [`Option`]s is because if the Link header has empty elements, we want to show that information
219/// to the user by returning [`None`]s.
220/// ```rust
221/// use nom_rfc8288::complete::{link_strict, LinkData, LinkParam};
222///
223/// let link_data = r#"<https://example.com>; rel="origin"; csv="one,two""#;
224/// let parsed = link_strict(link_data).unwrap();
225///
226/// assert_eq!(
227///     parsed,
228///     vec![
229///         Some(
230///             LinkData {
231///                 url: "https://example.com",
232///                 params: vec![
233///                     LinkParam {
234///                         key: "rel",
235///                         val: Some("origin".to_owned()),
236///                     },
237///                     LinkParam {
238///                         key: "csv",
239///                         val: Some("one,two".to_owned()),
240///                     }
241///                 ],
242///             }
243///         ),
244///     ]
245/// );
246/// ```
247pub fn link_strict<'a, E>(input: &'a str) -> Result<Vec<Option<LinkData<'a>>>, LinkParseError>
248where
249    E: ParseError<&'a str>,
250    nom::Err<VerboseError<&'a str>>: From<nom::Err<E>>,
251{
252    link_inner(input, ParseStrictness::Strict)
253}
254
255/// Same as [`link_strict`], except that empty parameters are leniently parsed. They don't result
256/// in an error and are skipped when collecting parameters.
257///
258/// ```rust
259/// use nom_rfc8288::complete::link_lenient;
260///
261/// let link_data = r#"<https://example.com>; rel="origin";; csv="one,two""#;
262/// assert!(link_lenient(link_data).is_ok());
263/// ```
264pub fn link_lenient<'a, E>(input: &'a str) -> Result<Vec<Option<LinkData<'a>>>, LinkParseError>
265where
266    E: ParseError<&'a str>,
267    nom::Err<VerboseError<&'a str>>: From<nom::Err<E>>,
268{
269    link_inner(input, ParseStrictness::Lenient)
270}
271
272/// Same as [`link_strict`], but with the original name. When lenient parsing was introduced,
273/// to avoid ambiguity the function was renamed. To avoid breakages, the function still exists,
274/// but instead use [`link_strict`].
275#[deprecated(since = "0.4.0", note = "please use `link_strict` instead")]
276pub fn link<'a, E>(input: &'a str) -> Result<Vec<Option<LinkData<'a>>>, LinkParseError>
277where
278    E: ParseError<&'a str>,
279    nom::Err<VerboseError<&'a str>>: From<nom::Err<E>>,
280{
281    link_inner(input, ParseStrictness::Strict)
282}
283
284fn link_inner<'a, E>(
285    input: &'a str,
286    strictness: ParseStrictness,
287) -> Result<Vec<Option<LinkData<'a>>>, LinkParseError>
288where
289    E: ParseError<&'a str>,
290    nom::Err<VerboseError<&'a str>>: From<nom::Err<E>>,
291{
292    type ParserOutput<'s> = (
293        &'s str,
294        Vec<Option<(&'s str, Vec<Option<(&'s str, Option<&'s str>)>>)>>,
295    );
296    let parsed = list::<_, _, VerboseError<&str>, _>(
297        NUM_EMPTY_ELEMENTS,
298        (
299            delimited(char('<'), recognize(many0_count(none_of(">"))), char('>')),
300            many0(preceded(
301                (space0, char(';'), space0),
302                opt(pair(
303                    token::<&str, VerboseError<&str>>,
304                    opt(preceded(
305                        pair(char('='), space0),
306                        alt((quoted_string, token)),
307                    )),
308                )),
309            )),
310        ),
311        input,
312    );
313
314    let (remainder, mut output): ParserOutput<'a> = parsed?;
315
316    if !remainder.is_empty() {
317        return Err(LinkParseError::IncompleteParse(remainder.to_owned()));
318    }
319
320    output
321        .drain(..)
322        .map(|parsed_link| {
323            let mut parsed_link = match parsed_link {
324                Some(l) => l,
325                None => return Ok(None),
326            };
327
328            let link_params = parsed_link
329                .1
330                .drain(..)
331                .map(|link_param| {
332                    let Some(link_param) = link_param else {
333                        match strictness {
334                            ParseStrictness::Strict => {
335                                return Err(LinkParseError::IncompleteParse(
336                                    "Empty parameter is disallowed".to_owned(),
337                                ));
338                            }
339                            ParseStrictness::Lenient => {
340                                return Ok(None);
341                            }
342                        }
343                    };
344                    let parsed_link_param_val = match link_param.1 {
345                        None => None,
346                        Some(link_param_val) if link_param_val.starts_with('"') => {
347                            match quoted_string_alloca::<&str, VerboseError<&str>>(link_param_val) {
348                                Ok(s) => Some(s.1),
349                                Err(e) => {
350                                    return Err(e.into());
351                                }
352                            }
353                        }
354                        Some(link_param_val) => Some(link_param_val.to_owned()),
355                    };
356
357                    Ok(Some(LinkParam {
358                        key: link_param.0,
359                        val: parsed_link_param_val,
360                    }))
361                })
362                .fold_ok(Vec::new(), |mut acc, item| {
363                    if let Some(param) = item {
364                        acc.push(param);
365                    }
366                    acc
367                })?;
368
369            Ok(Some(LinkData {
370                url: parsed_link.0,
371                params: link_params,
372            }))
373        })
374        .fold_ok(Vec::new(), |mut acc, item| {
375            acc.push(item);
376            acc
377        })
378}
379
380#[cfg(test)]
381mod tests {
382    use itertools::Itertools;
383    use nom::Err as OutCome;
384    use nom_language::error::VerboseError;
385
386    use crate::complete::{
387        LinkData, LinkDataOwned, LinkParam, LinkParseError, quoted_string, quoted_string_alloca,
388        tchar, token,
389    };
390
391    use super::{link_lenient, link_strict, list, quoted_pair};
392
393    #[test]
394    fn test_tchar() {
395        assert_eq!(tchar::<_, VerboseError<&str>>("mbbb"), Ok(("bbb", 'm')));
396        assert_eq!(tchar::<_, VerboseError<&str>>("!aa"), Ok(("aa", '!')));
397        assert!(tchar::<_, VerboseError<&str>>(",").is_err());
398    }
399
400    #[test]
401    fn test_token() {
402        assert!(matches!(
403            token::<_, VerboseError<&str>>(""),
404            Err(OutCome::Error(_))
405        ));
406        assert_eq!(token::<_, VerboseError<&str>>("mbbb"), Ok(("", "mbbb")));
407        assert_eq!(token::<_, VerboseError<&str>>("a,"), Ok((",", "a")));
408        assert!(matches!(
409            token::<_, VerboseError<&str>>(","),
410            Err(OutCome::Error(_))
411        ));
412    }
413
414    #[test]
415    fn test_quoted_string() {
416        assert_eq!(
417            quoted_string::<_, VerboseError<&str>>(r#""""#),
418            Ok(("", r#""""#))
419        );
420
421        assert_eq!(
422            quoted_string::<_, VerboseError<&str>>(r#""hello""#),
423            Ok(("", r#""hello""#))
424        );
425
426        assert_eq!(
427            quoted_string::<_, VerboseError<&str>>(r#""\"hello""#),
428            Ok(("", r#""\"hello""#))
429        );
430
431        assert!(matches!(
432            quoted_string::<_, VerboseError<&str>>(r#""awd"#),
433            Err(OutCome::Error(_))
434        ));
435
436        assert!(matches!(
437            quoted_string::<_, VerboseError<&str>>(r#" "text""#),
438            Err(OutCome::Error(_))
439        ));
440
441        assert_eq!(
442            quoted_string::<_, VerboseError<&str>>(r#""awd"trailing"#),
443            Ok(("trailing", r#""awd""#))
444        );
445    }
446
447    #[test]
448    fn test_list_rule() {
449        assert_eq!(
450            list::<_, _, VerboseError<&str>, _>(0, token, "a,b,c"),
451            Ok(("", vec![Some("a"), Some("b"), Some("c")]))
452        );
453
454        assert_eq!(
455            list::<_, _, VerboseError<&str>, _>(0, token, "a , b , c"),
456            Ok(("", vec![Some("a"), Some("b"), Some("c")]))
457        );
458
459        assert_eq!(
460            list::<_, _, VerboseError<&str>, _>(0, token, "a , b , "),
461            Ok((" , ", vec![Some("a"), Some("b")]))
462        );
463
464        assert_eq!(
465            list::<_, _, VerboseError<&str>, _>(0, token, "a , b , ,"),
466            Ok((" , ,", vec![Some("a"), Some("b")]))
467        );
468    }
469
470    #[test]
471    fn test_link() {
472        let input = r##"</terms>; rel="copyright"; anchor="#foo""##;
473
474        let res = link_strict::<VerboseError<&str>>(input).unwrap();
475
476        assert_eq!(
477            res,
478            vec![Some(LinkData {
479                url: "/terms",
480                params: vec![
481                    LinkParam {
482                        key: "rel",
483                        val: Some("copyright".into())
484                    },
485                    LinkParam {
486                        key: "anchor",
487                        val: Some("#foo".into())
488                    }
489                ]
490            })]
491        );
492    }
493
494    #[test]
495    fn test_empty_param_with_strict_parsing() {
496        let input = r##"</terms>; rel="copyright";; anchor="#foo""##;
497
498        let res = link_strict::<VerboseError<&str>>(input);
499
500        assert_eq!(
501            res,
502            Err(LinkParseError::IncompleteParse(
503                "Empty parameter is disallowed".to_owned()
504            ))
505        );
506    }
507
508    #[test]
509    fn test_empty_param_with_lenient_parsing() {
510        let input = r##"</terms>; rel="copyright";; anchor="#foo""##;
511
512        let res = link_lenient::<VerboseError<&str>>(input).unwrap();
513
514        assert_eq!(
515            res,
516            vec![Some(LinkData {
517                url: "/terms",
518                params: vec![
519                    LinkParam {
520                        key: "rel",
521                        val: Some("copyright".into())
522                    },
523                    LinkParam {
524                        key: "anchor",
525                        val: Some("#foo".into())
526                    }
527                ]
528            })]
529        );
530    }
531
532    #[test]
533    fn test_quoted_pair() {
534        let input = r#"\a"#;
535
536        let res = quoted_pair::<_, VerboseError<&str>>(input).unwrap();
537        assert_eq!(res.1, 'a');
538    }
539
540    #[test]
541    fn test_quoted_string_alloca() {
542        let input = r#""aaaa""#;
543
544        let res = quoted_string_alloca::<_, VerboseError<&str>>(input).unwrap();
545        assert_eq!(res.1, "aaaa".to_owned());
546    }
547
548    #[test]
549    fn test_quoted_string_alloca_quotes() {
550        let input = r#""aa\"aa""#;
551
552        let res = quoted_string_alloca::<_, VerboseError<&str>>(input).unwrap();
553        assert_eq!(res.1, "aa\"aa".to_owned());
554    }
555
556    #[test]
557    fn test_link_quoted_link_param() {
558        let input = r##"</terms>; rel="copy\"right"; anchor=#foo"##;
559
560        let res = link_strict::<VerboseError<&str>>(input).unwrap();
561
562        assert_eq!(
563            res,
564            vec![Some(LinkData {
565                url: "/terms",
566                params: vec![
567                    LinkParam {
568                        key: "rel",
569                        val: Some("copy\"right".into())
570                    },
571                    LinkParam {
572                        key: "anchor",
573                        val: Some("#foo".into())
574                    }
575                ]
576            })]
577        );
578    }
579
580    #[test]
581    fn test_error_return_ergonomics() -> Result<(), LinkParseError> {
582        /// In version 0.2, trying to use the ? operator on the return value of the link function
583        /// would cause lifetime errors because we returned a [`VerboseError<'a str>]` wrapped in a [`nom::Error`]
584        fn function_with_return_val<'a>() -> Result<Vec<Option<LinkData<'a>>>, LinkParseError> {
585            let input = r##"</terms>; rel="copy\"right"; anchor=#foo"##;
586
587            link_strict::<VerboseError<&str>>(input)
588        }
589
590        function_with_return_val()?;
591
592        Ok(())
593    }
594
595    #[test]
596    fn test_can_clone_nicely() {
597        fn function_with_return_val<'a>() -> Vec<Option<LinkDataOwned>> {
598            let input = r##"</terms>; rel="copy\"right"; anchor=#foo"##.to_owned();
599
600            link_strict::<VerboseError<&str>>(&input)
601                .unwrap()
602                .iter()
603                .map(|x| match x {
604                    Some(x) => Some(x.to_owned()),
605                    None => None,
606                })
607                .collect_vec()
608        }
609
610        function_with_return_val();
611    }
612}