1use std::fmt::Display;
2
3use itertools::Itertools;
4use nom::{
5 AsChar, IResult, Input, Offset, Parser,
6 branch::alt,
7 character::complete::{char, none_of, satisfy, space0},
8 combinator::{all_consuming, opt, recognize},
9 error::ParseError,
10 multi::{fold_many0, many_m_n, many0, many0_count, many1_count, separated_list1},
11 sequence::{delimited, pair, preceded},
12};
13use nom_language::error::VerboseError;
14
15use thiserror::Error;
16
17use crate::{is_qdtext, is_quoted_pair, is_tchar, optional_parser};
18
19pub fn tchar<I, E>(input: I) -> IResult<I, char, E>
25where
26 I: Input,
27 <I as Input>::Item: AsChar,
28 E: ParseError<I>,
29{
30 satisfy(is_tchar).parse(input)
31}
32
33pub fn token<I, E>(input: I) -> IResult<I, I, E>
35where
36 I: Input + Offset,
37 <I as Input>::Item: AsChar,
38 E: ParseError<I>,
39{
40 recognize(many1_count(tchar)).parse(input)
41}
42
43pub fn qdtext<I, E>(input: I) -> IResult<I, char, E>
45where
46 I: Input,
47 <I as Input>::Item: AsChar,
48 E: ParseError<I>,
49{
50 satisfy(is_qdtext).parse(input)
51}
52
53fn quoted_pair<I, E>(input: I) -> IResult<I, char, E>
55where
56 I: Input,
57 <I as Input>::Item: AsChar,
58 E: ParseError<I>,
59{
60 preceded(char('\\'), satisfy(is_quoted_pair)).parse(input)
61}
62
63fn quoted_string<I, E>(input: I) -> IResult<I, I, E>
69where
70 I: Input + Offset,
71 <I as Input>::Item: AsChar,
72 E: ParseError<I>,
73{
74 recognize(delimited(
75 char('"'),
76 recognize(many0_count(alt((quoted_pair, qdtext)))),
77 char('"'),
78 ))
79 .parse(input)
80}
81
82fn quoted_string_alloca<I, E>(input: I) -> IResult<I, String, E>
83where
84 I: Input,
85 <I as Input>::Item: AsChar,
86 E: ParseError<I>,
87{
88 all_consuming(delimited(
89 char('"'),
90 fold_many0(alt((quoted_pair, qdtext)), String::new, |mut acc, item| {
91 acc.push(item);
92 acc
93 }),
94 char('"'),
95 ))
96 .parse(input)
97}
98
99pub fn list<I, O, E, L>(
118 reasonable_count: usize,
119 element: L,
120 input: I,
121) -> IResult<I, Vec<Option<O>>, E>
122where
123 L: Parser<I, Output = O, Error = E>,
124 E: ParseError<I>,
125 I: Input + std::marker::Copy,
126 <I as Input>::Item: AsChar,
127{
128 let allow_empty_elements = reasonable_count != 0;
129 separated_list1(
130 many_m_n(1, reasonable_count + 1, (space0, char(','), space0)),
131 optional_parser(allow_empty_elements, element),
132 )
133 .parse(input)
134}
135
136#[derive(PartialEq, Debug)]
139pub struct LinkParam<'a> {
140 pub key: &'a str,
141 pub val: Option<String>,
146}
147
148#[derive(PartialEq, Debug)]
151pub struct LinkData<'a> {
152 pub url: &'a str,
153 pub params: Vec<LinkParam<'a>>,
154}
155
156#[derive(PartialEq, Debug)]
157pub struct LinkParamOwned {
158 pub key: String,
159 pub val: Option<String>,
160}
161
162#[derive(PartialEq, Debug)]
163pub struct LinkDataOwned {
164 pub url: String,
165 pub params: Vec<LinkParamOwned>,
166}
167
168impl LinkParam<'_> {
169 pub fn to_owned(&self) -> LinkParamOwned {
170 LinkParamOwned {
171 key: self.key.to_owned(),
172 val: self.val.to_owned(),
173 }
174 }
175}
176
177impl LinkData<'_> {
178 pub fn to_owned(&self) -> LinkDataOwned {
179 LinkDataOwned {
180 url: self.url.to_owned(),
181 params: self.params.iter().map(|x| x.to_owned()).collect_vec(),
182 }
183 }
184}
185
186#[derive(Error, Debug, Clone, PartialEq)]
187pub enum LinkParseError {
188 #[error("left over data could not be parsed: `{0}`")]
189 IncompleteParse(String),
190 #[error("the data for key `{0}` is not available")]
191 FailedToParse(String),
192}
193
194#[doc(hidden)]
195impl<E> From<nom::Err<VerboseError<E>>> for LinkParseError
196where
197 E: Display,
198{
199 fn from(err: nom::Err<VerboseError<E>>) -> Self {
200 match err {
201 nom::Err::Incomplete(_) => Self::FailedToParse("Incomplete input".into()),
202 nom::Err::Error(err) | nom::Err::Failure(err) => Self::FailedToParse(err.to_string()),
203 }
204 }
205}
206
207const NUM_EMPTY_ELEMENTS: usize = 2;
210
211enum ParseStrictness {
212 Strict,
213 Lenient,
214}
215
216pub fn link_strict<'a, E>(input: &'a str) -> Result<Vec<Option<LinkData<'a>>>, LinkParseError>
248where
249 E: ParseError<&'a str>,
250 nom::Err<VerboseError<&'a str>>: From<nom::Err<E>>,
251{
252 link_inner(input, ParseStrictness::Strict)
253}
254
255pub fn link_lenient<'a, E>(input: &'a str) -> Result<Vec<Option<LinkData<'a>>>, LinkParseError>
265where
266 E: ParseError<&'a str>,
267 nom::Err<VerboseError<&'a str>>: From<nom::Err<E>>,
268{
269 link_inner(input, ParseStrictness::Lenient)
270}
271
272#[deprecated(since = "0.4.0", note = "please use `link_strict` instead")]
276pub fn link<'a, E>(input: &'a str) -> Result<Vec<Option<LinkData<'a>>>, LinkParseError>
277where
278 E: ParseError<&'a str>,
279 nom::Err<VerboseError<&'a str>>: From<nom::Err<E>>,
280{
281 link_inner(input, ParseStrictness::Strict)
282}
283
284fn link_inner<'a, E>(
285 input: &'a str,
286 strictness: ParseStrictness,
287) -> Result<Vec<Option<LinkData<'a>>>, LinkParseError>
288where
289 E: ParseError<&'a str>,
290 nom::Err<VerboseError<&'a str>>: From<nom::Err<E>>,
291{
292 type ParserOutput<'s> = (
293 &'s str,
294 Vec<Option<(&'s str, Vec<Option<(&'s str, Option<&'s str>)>>)>>,
295 );
296 let parsed = list::<_, _, VerboseError<&str>, _>(
297 NUM_EMPTY_ELEMENTS,
298 (
299 delimited(char('<'), recognize(many0_count(none_of(">"))), char('>')),
300 many0(preceded(
301 (space0, char(';'), space0),
302 opt(pair(
303 token::<&str, VerboseError<&str>>,
304 opt(preceded(
305 pair(char('='), space0),
306 alt((quoted_string, token)),
307 )),
308 )),
309 )),
310 ),
311 input,
312 );
313
314 let (remainder, mut output): ParserOutput<'a> = parsed?;
315
316 if !remainder.is_empty() {
317 return Err(LinkParseError::IncompleteParse(remainder.to_owned()));
318 }
319
320 output
321 .drain(..)
322 .map(|parsed_link| {
323 let mut parsed_link = match parsed_link {
324 Some(l) => l,
325 None => return Ok(None),
326 };
327
328 let link_params = parsed_link
329 .1
330 .drain(..)
331 .map(|link_param| {
332 let Some(link_param) = link_param else {
333 match strictness {
334 ParseStrictness::Strict => {
335 return Err(LinkParseError::IncompleteParse(
336 "Empty parameter is disallowed".to_owned(),
337 ));
338 }
339 ParseStrictness::Lenient => {
340 return Ok(None);
341 }
342 }
343 };
344 let parsed_link_param_val = match link_param.1 {
345 None => None,
346 Some(link_param_val) if link_param_val.starts_with('"') => {
347 match quoted_string_alloca::<&str, VerboseError<&str>>(link_param_val) {
348 Ok(s) => Some(s.1),
349 Err(e) => {
350 return Err(e.into());
351 }
352 }
353 }
354 Some(link_param_val) => Some(link_param_val.to_owned()),
355 };
356
357 Ok(Some(LinkParam {
358 key: link_param.0,
359 val: parsed_link_param_val,
360 }))
361 })
362 .fold_ok(Vec::new(), |mut acc, item| {
363 if let Some(param) = item {
364 acc.push(param);
365 }
366 acc
367 })?;
368
369 Ok(Some(LinkData {
370 url: parsed_link.0,
371 params: link_params,
372 }))
373 })
374 .fold_ok(Vec::new(), |mut acc, item| {
375 acc.push(item);
376 acc
377 })
378}
379
380#[cfg(test)]
381mod tests {
382 use itertools::Itertools;
383 use nom::Err as OutCome;
384 use nom_language::error::VerboseError;
385
386 use crate::complete::{
387 LinkData, LinkDataOwned, LinkParam, LinkParseError, quoted_string, quoted_string_alloca,
388 tchar, token,
389 };
390
391 use super::{link_lenient, link_strict, list, quoted_pair};
392
393 #[test]
394 fn test_tchar() {
395 assert_eq!(tchar::<_, VerboseError<&str>>("mbbb"), Ok(("bbb", 'm')));
396 assert_eq!(tchar::<_, VerboseError<&str>>("!aa"), Ok(("aa", '!')));
397 assert!(tchar::<_, VerboseError<&str>>(",").is_err());
398 }
399
400 #[test]
401 fn test_token() {
402 assert!(matches!(
403 token::<_, VerboseError<&str>>(""),
404 Err(OutCome::Error(_))
405 ));
406 assert_eq!(token::<_, VerboseError<&str>>("mbbb"), Ok(("", "mbbb")));
407 assert_eq!(token::<_, VerboseError<&str>>("a,"), Ok((",", "a")));
408 assert!(matches!(
409 token::<_, VerboseError<&str>>(","),
410 Err(OutCome::Error(_))
411 ));
412 }
413
414 #[test]
415 fn test_quoted_string() {
416 assert_eq!(
417 quoted_string::<_, VerboseError<&str>>(r#""""#),
418 Ok(("", r#""""#))
419 );
420
421 assert_eq!(
422 quoted_string::<_, VerboseError<&str>>(r#""hello""#),
423 Ok(("", r#""hello""#))
424 );
425
426 assert_eq!(
427 quoted_string::<_, VerboseError<&str>>(r#""\"hello""#),
428 Ok(("", r#""\"hello""#))
429 );
430
431 assert!(matches!(
432 quoted_string::<_, VerboseError<&str>>(r#""awd"#),
433 Err(OutCome::Error(_))
434 ));
435
436 assert!(matches!(
437 quoted_string::<_, VerboseError<&str>>(r#" "text""#),
438 Err(OutCome::Error(_))
439 ));
440
441 assert_eq!(
442 quoted_string::<_, VerboseError<&str>>(r#""awd"trailing"#),
443 Ok(("trailing", r#""awd""#))
444 );
445 }
446
447 #[test]
448 fn test_list_rule() {
449 assert_eq!(
450 list::<_, _, VerboseError<&str>, _>(0, token, "a,b,c"),
451 Ok(("", vec![Some("a"), Some("b"), Some("c")]))
452 );
453
454 assert_eq!(
455 list::<_, _, VerboseError<&str>, _>(0, token, "a , b , c"),
456 Ok(("", vec![Some("a"), Some("b"), Some("c")]))
457 );
458
459 assert_eq!(
460 list::<_, _, VerboseError<&str>, _>(0, token, "a , b , "),
461 Ok((" , ", vec![Some("a"), Some("b")]))
462 );
463
464 assert_eq!(
465 list::<_, _, VerboseError<&str>, _>(0, token, "a , b , ,"),
466 Ok((" , ,", vec![Some("a"), Some("b")]))
467 );
468 }
469
470 #[test]
471 fn test_link() {
472 let input = r##"</terms>; rel="copyright"; anchor="#foo""##;
473
474 let res = link_strict::<VerboseError<&str>>(input).unwrap();
475
476 assert_eq!(
477 res,
478 vec![Some(LinkData {
479 url: "/terms",
480 params: vec![
481 LinkParam {
482 key: "rel",
483 val: Some("copyright".into())
484 },
485 LinkParam {
486 key: "anchor",
487 val: Some("#foo".into())
488 }
489 ]
490 })]
491 );
492 }
493
494 #[test]
495 fn test_empty_param_with_strict_parsing() {
496 let input = r##"</terms>; rel="copyright";; anchor="#foo""##;
497
498 let res = link_strict::<VerboseError<&str>>(input);
499
500 assert_eq!(
501 res,
502 Err(LinkParseError::IncompleteParse(
503 "Empty parameter is disallowed".to_owned()
504 ))
505 );
506 }
507
508 #[test]
509 fn test_empty_param_with_lenient_parsing() {
510 let input = r##"</terms>; rel="copyright";; anchor="#foo""##;
511
512 let res = link_lenient::<VerboseError<&str>>(input).unwrap();
513
514 assert_eq!(
515 res,
516 vec![Some(LinkData {
517 url: "/terms",
518 params: vec![
519 LinkParam {
520 key: "rel",
521 val: Some("copyright".into())
522 },
523 LinkParam {
524 key: "anchor",
525 val: Some("#foo".into())
526 }
527 ]
528 })]
529 );
530 }
531
532 #[test]
533 fn test_quoted_pair() {
534 let input = r#"\a"#;
535
536 let res = quoted_pair::<_, VerboseError<&str>>(input).unwrap();
537 assert_eq!(res.1, 'a');
538 }
539
540 #[test]
541 fn test_quoted_string_alloca() {
542 let input = r#""aaaa""#;
543
544 let res = quoted_string_alloca::<_, VerboseError<&str>>(input).unwrap();
545 assert_eq!(res.1, "aaaa".to_owned());
546 }
547
548 #[test]
549 fn test_quoted_string_alloca_quotes() {
550 let input = r#""aa\"aa""#;
551
552 let res = quoted_string_alloca::<_, VerboseError<&str>>(input).unwrap();
553 assert_eq!(res.1, "aa\"aa".to_owned());
554 }
555
556 #[test]
557 fn test_link_quoted_link_param() {
558 let input = r##"</terms>; rel="copy\"right"; anchor=#foo"##;
559
560 let res = link_strict::<VerboseError<&str>>(input).unwrap();
561
562 assert_eq!(
563 res,
564 vec![Some(LinkData {
565 url: "/terms",
566 params: vec![
567 LinkParam {
568 key: "rel",
569 val: Some("copy\"right".into())
570 },
571 LinkParam {
572 key: "anchor",
573 val: Some("#foo".into())
574 }
575 ]
576 })]
577 );
578 }
579
580 #[test]
581 fn test_error_return_ergonomics() -> Result<(), LinkParseError> {
582 fn function_with_return_val<'a>() -> Result<Vec<Option<LinkData<'a>>>, LinkParseError> {
585 let input = r##"</terms>; rel="copy\"right"; anchor=#foo"##;
586
587 link_strict::<VerboseError<&str>>(input)
588 }
589
590 function_with_return_val()?;
591
592 Ok(())
593 }
594
595 #[test]
596 fn test_can_clone_nicely() {
597 fn function_with_return_val<'a>() -> Vec<Option<LinkDataOwned>> {
598 let input = r##"</terms>; rel="copy\"right"; anchor=#foo"##.to_owned();
599
600 link_strict::<VerboseError<&str>>(&input)
601 .unwrap()
602 .iter()
603 .map(|x| match x {
604 Some(x) => Some(x.to_owned()),
605 None => None,
606 })
607 .collect_vec()
608 }
609
610 function_with_return_val();
611 }
612}