winnow/combinator/
expression.rs

1use crate::combinator::empty;
2use crate::combinator::fail;
3use crate::combinator::opt;
4use crate::combinator::trace;
5use crate::error::ParserError;
6use crate::stream::Stream;
7use crate::stream::StreamIsPartial;
8use crate::Parser;
9use crate::Result;
10
11/// Parses an expression based on operator precedence.
12///
13/// It uses a Pratt parsing algorithm, where operators are
14/// associated with a binding power. The higher the power,
15/// the more tightly an operator will bind to its operands.
16///
17/// This method returns an [`Expression`], which configures
18/// the Pratt parser.
19///
20/// Each operator type is configured with [`Prefix`], [`Postfix`],
21/// and [`Infix`]. These describe the operator's binding power,
22/// a function that applies the operator to its operand, and the
23/// operator's associativity (infix only).
24///
25/// For a more full-featured example, look at the [C-style Expression][crate::_topic::arithmetic#c-style-expression]
26/// topic.
27///
28/// # Example
29///
30/// Parsing a simple arithmetic expression without parenthesis.
31///
32/// ```rust
33/// # use winnow::prelude::*;
34/// # use winnow::error::ContextError;
35/// # use winnow::ascii::digit1;
36/// # use winnow::combinator::{dispatch, fail};
37/// # use winnow::token::any;
38/// use winnow::combinator::expression;
39/// use winnow::combinator::{Prefix, Postfix, Infix};
40///
41/// fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> {
42///     move |i: &mut &str| {
43///         use Infix::*;
44///         expression(digit1.parse_to::<i32>()) // operands are 32-bit integers
45///             .prefix(dispatch! {any;
46///                 '-' => Prefix(12, |_, a: i32| Ok(-a)),
47///                 _ => fail,
48///             })
49///             .infix(dispatch! {any;
50///                 '+' => Left(5, |_, a, b| Ok(a + b)),
51///                 '-' => Left(5, |_, a, b| Ok(a - b)),
52///                 '*' => Left(7, |_, a, b| Ok(a * b)),
53///                 '/' => Left(7, |_, a: i32, b| Ok(a.checked_div(b).unwrap_or_default())),
54///                 _ => fail,
55///             })
56///             .postfix(dispatch! {any;
57///                 '!' => Postfix(15, |_, a| if a < 1 { Ok(1) } else { Ok((1..=a).fold(1, |acc, a| acc*a)) }),
58///                 _ => fail,
59///             })
60///             .parse_next(i)
61///     }
62/// }
63///
64/// assert_eq!(parser().parse("1+1"), Ok(2));
65/// assert_eq!(parser().parse("0!"), Ok(1));
66/// assert_eq!(parser().parse("-1*5*2*10+30/3!"), Ok(-95));
67/// ```
68#[doc(alias = "pratt")]
69#[doc(alias = "separated")]
70#[doc(alias = "shunting_yard")]
71#[doc(alias = "precedence_climbing")]
72#[inline(always)]
73#[allow(clippy::type_complexity)]
74pub fn expression<I, ParseOperand, O, E>(
75    parse_operand: ParseOperand,
76) -> Expression<
77    I,
78    O,
79    ParseOperand,
80    impl Parser<I, Prefix<I, O, E>, E>,
81    impl Parser<I, Postfix<I, O, E>, E>,
82    impl Parser<I, Infix<I, O, E>, E>,
83    E,
84>
85where
86    I: Stream + StreamIsPartial,
87    ParseOperand: Parser<I, O, E>,
88    E: ParserError<I>,
89{
90    Expression {
91        precedence_level: 0,
92        parse_operand,
93        parse_prefix: fail,
94        parse_postfix: fail,
95        parse_infix: fail,
96        i: Default::default(),
97        o: Default::default(),
98        e: Default::default(),
99    }
100}
101
102/// A helper struct for [`expression()`].
103///
104/// Holds the configuration for the Pratt parser, including
105/// the operator and operand parsers. A precedence level can
106/// also be set, which is useful to disambiguate parse trees
107/// based on the parent operator's precedence.
108///
109/// Implements [`Parser`]. When parsing an input, it applies
110/// the Pratt parser.
111pub struct Expression<I, O, ParseOperand, Pre, Post, Pix, E>
112where
113    I: Stream + StreamIsPartial,
114    ParseOperand: Parser<I, O, E>,
115    E: ParserError<I>,
116{
117    precedence_level: i64,
118    parse_operand: ParseOperand,
119    parse_prefix: Pre,
120    parse_postfix: Post,
121    parse_infix: Pix,
122    i: core::marker::PhantomData<I>,
123    o: core::marker::PhantomData<O>,
124    e: core::marker::PhantomData<E>,
125}
126
127impl<I, O, ParseOperand, Pre, Post, Pix, E> Expression<I, O, ParseOperand, Pre, Post, Pix, E>
128where
129    ParseOperand: Parser<I, O, E>,
130    I: Stream + StreamIsPartial,
131    E: ParserError<I>,
132{
133    /// Sets the prefix operator parser.
134    ///
135    /// The parser should parse the input to a [`Prefix`],
136    /// which contains the operator's binding power and
137    /// a fold function which applies the operator to its
138    /// operands.
139    #[inline(always)]
140    pub fn prefix<NewParsePrefix>(
141        self,
142        parser: NewParsePrefix,
143    ) -> Expression<I, O, ParseOperand, NewParsePrefix, Post, Pix, E>
144    where
145        NewParsePrefix: Parser<I, Prefix<I, O, E>, E>,
146    {
147        Expression {
148            precedence_level: self.precedence_level,
149            parse_operand: self.parse_operand,
150            parse_prefix: parser,
151            parse_postfix: self.parse_postfix,
152            parse_infix: self.parse_infix,
153            i: Default::default(),
154            o: Default::default(),
155            e: Default::default(),
156        }
157    }
158
159    /// Sets the prefix operator parser.
160    ///
161    /// The parser should parse the input to a [`Postfix`],
162    /// which contains the operator's binding power and
163    /// a fold function which applies the operator to its
164    /// operands.
165    #[inline(always)]
166    pub fn postfix<NewParsePostfix>(
167        self,
168        parser: NewParsePostfix,
169    ) -> Expression<I, O, ParseOperand, Pre, NewParsePostfix, Pix, E>
170    where
171        NewParsePostfix: Parser<I, Postfix<I, O, E>, E>,
172    {
173        Expression {
174            precedence_level: self.precedence_level,
175            parse_operand: self.parse_operand,
176            parse_prefix: self.parse_prefix,
177            parse_postfix: parser,
178            parse_infix: self.parse_infix,
179            i: Default::default(),
180            o: Default::default(),
181            e: Default::default(),
182        }
183    }
184
185    /// Sets the prefix operator parser.
186    ///
187    /// The parser should parse the input to a [`Infix`],
188    /// which contains the operator's binding power and
189    /// a fold function which applies the operator to its
190    /// operands.
191    #[inline(always)]
192    pub fn infix<NewParseInfix>(
193        self,
194        parser: NewParseInfix,
195    ) -> Expression<I, O, ParseOperand, Pre, Post, NewParseInfix, E>
196    where
197        NewParseInfix: Parser<I, Infix<I, O, E>, E>,
198    {
199        Expression {
200            precedence_level: self.precedence_level,
201            parse_operand: self.parse_operand,
202            parse_prefix: self.parse_prefix,
203            parse_postfix: self.parse_postfix,
204            parse_infix: parser,
205            i: Default::default(),
206            o: Default::default(),
207            e: Default::default(),
208        }
209    }
210
211    /// Sets the precedence level for the current instance of the parser.
212    ///
213    /// It defaults to 0, which is traditionally treated as the "lowest"
214    /// possible precedence when parsing an expression.
215    ///
216    /// This is useful to disambiguate grammars based on the parent operator's
217    /// precedence. This comes up primarily when parsing recursive expressions.
218    ///
219    /// The parsing machinery underpinning [`Expression`] assumes that a "more
220    /// tightly binding" operator is numerically large, while a "more loosely
221    /// binding" operator is numerically small. For example, `13` is a higher
222    /// precedence level than `1` because `13 > 1`.
223    ///
224    /// Other ways of describing this relationship:
225    /// - `13` has a higher precedence compared to `1`
226    /// - `13` has a higher binding power compared to `1`
227    ///
228    /// Note: Binding power and precedence both refer to the same concept and
229    /// may be used interchangeably.
230    ///
231    /// # Motivation
232    ///
233    /// If you don't understand why this is useful to have, this section tries
234    /// to explain in more detail.
235    ///
236    /// The [C-style Expressions][crate::_topic::arithmetic#c-style-expression]
237    /// example has source code for parsing the expression described below, and
238    /// can provide a clearer usage example.
239    ///
240    /// Consider the following expression in the C language:
241    ///
242    /// ```c
243    /// int x = (1 == 1 ? 0 : 1, -123); // <-- let's parse this
244    /// printf("%d\n", x); // -123
245    /// ```
246    ///
247    /// Let's look at the right-hand side of the expression on the first line,
248    /// and replace some of the sub-expressions with symbols:
249    ///
250    /// ```text
251    /// (1 == 1 ? 0 : 1, -123) // rhs
252    /// (a      ? b : c, d  )  // symbolic
253    /// (a ? b : c, d)         // remove whitespace
254    /// (, (? a b c) d)        // prefix notation
255    /// ```
256    ///
257    /// Written symbolically:
258    /// - `a` is the condition, like `1 == 1`
259    /// - `b` is the value when the condition is true
260    /// - `c` is the value when the condition is false
261    /// - `d` is a secondary expression unrelated to the ternary
262    ///
263    /// In prefix notation, it's easier to see the specific operators and what
264    /// they bind to:
265    /// - COMMA (`,`) binds to `(? a b c)` and `d`
266    /// - TERNARY (`?`) binds to `a`, `b`, and `c`
267    ///
268    /// ## Parsing `c` and `d`
269    ///
270    /// Let's focus on parsing the sub-expressions `c` and `d`, as that
271    /// motivates why a parser precedence level is necessary.
272    ///
273    /// To parse `c`, we would really like to re-use the parser produced by
274    /// [`expression()`], because `c` is really *any* valid expression that
275    /// can be parsed by `expression()` already.
276    ///
277    /// However, we can't re-use the parser naively. When parsing `c`, we need
278    /// to "escape" from the inner parser when encountering the comma separating
279    /// `c` from `d`.
280    ///
281    /// The reason we have to "escape" is because of how operator precedence is
282    /// defined in the C language: the comma operator has the lowest precedence
283    /// among all the operators. When we're parsing `c`, we're in the context of
284    /// the ternary operator. We don't want to parse any valid expression! Just
285    /// what the ternary operator captures.
286    ///
287    /// That's where the precedence level comes in: you specify the minimum
288    /// precedence this parser is willing to accept. If you come across an
289    /// expression in the top-level with a lower binding power than the starting
290    /// precedence, you know to stop parsing.
291    ///
292    /// The parsing machinery inside of [`Expression`] handles most of this for
293    /// you, but it can't determine what the precedence level should be for a
294    /// given expression. That's a language-specific detail, and it depends on
295    /// what you want to parse.
296    #[inline(always)]
297    pub fn current_precedence_level(
298        mut self,
299        level: i64,
300    ) -> Expression<I, O, ParseOperand, Pre, Post, Pix, E> {
301        self.precedence_level = level;
302        self
303    }
304}
305
306impl<I, O, Pop, Pre, Post, Pix, E> Parser<I, O, E> for Expression<I, O, Pop, Pre, Post, Pix, E>
307where
308    I: Stream + StreamIsPartial,
309    Pop: Parser<I, O, E>,
310    Pix: Parser<I, Infix<I, O, E>, E>,
311    Pre: Parser<I, Prefix<I, O, E>, E>,
312    Post: Parser<I, Postfix<I, O, E>, E>,
313    E: ParserError<I>,
314{
315    #[inline(always)]
316    fn parse_next(&mut self, input: &mut I) -> Result<O, E> {
317        trace("expression", move |i: &mut I| {
318            expression_impl(
319                i,
320                &mut self.parse_operand,
321                &mut self.parse_prefix,
322                &mut self.parse_postfix,
323                &mut self.parse_infix,
324                self.precedence_level,
325            )
326        })
327        .parse_next(input)
328    }
329}
330
331/// Opaque implementation of the Pratt parser.
332fn expression_impl<I, O, Pop, Pre, Post, Pix, E>(
333    i: &mut I,
334    parse_operand: &mut Pop,
335    prefix: &mut Pre,
336    postfix: &mut Post,
337    infix: &mut Pix,
338    min_power: i64,
339) -> Result<O, E>
340where
341    I: Stream + StreamIsPartial,
342    Pop: Parser<I, O, E>,
343    Pix: Parser<I, Infix<I, O, E>, E>,
344    Pre: Parser<I, Prefix<I, O, E>, E>,
345    Post: Parser<I, Postfix<I, O, E>, E>,
346    E: ParserError<I>,
347{
348    let operand = opt(trace("operand", parse_operand.by_ref())).parse_next(i)?;
349    let mut operand = if let Some(operand) = operand {
350        operand
351    } else {
352        // Prefix unary operators
353        let len = i.eof_offset();
354        let Prefix(power, fold_prefix) = trace("prefix", prefix.by_ref()).parse_next(i)?;
355        // infinite loop check: the parser must always consume
356        if i.eof_offset() == len {
357            return Err(E::assert(i, "`prefix` parsers must always consume"));
358        }
359        let operand = expression_impl(i, parse_operand, prefix, postfix, infix, power)?;
360        fold_prefix(i, operand)?
361    };
362
363    // A variable to stop the `'parse` loop when `Assoc::Neither` with the same
364    // precedence is encountered e.g. `a == b == c`. `Assoc::Neither` has similar
365    // associativity rules as `Assoc::Left`, but we stop parsing when the next operator
366    // is the same as the current one.
367    let mut prev_op_is_neither = None;
368    'parse: while i.eof_offset() > 0 {
369        // Postfix unary operators
370        let start = i.checkpoint();
371        if let Some(Postfix(power, fold_postfix)) =
372            opt(trace("postfix", postfix.by_ref())).parse_next(i)?
373        {
374            // control precedence over the prefix e.g.:
375            // `--(i++)` or `(--i)++`
376            if power < min_power {
377                i.reset(&start);
378                break 'parse;
379            }
380            operand = fold_postfix(i, operand)?;
381
382            continue 'parse;
383        }
384
385        // Infix binary operators
386        let start = i.checkpoint();
387        let parse_result = opt(trace("infix", infix.by_ref())).parse_next(i)?;
388        if let Some(infix_op) = parse_result {
389            let mut is_neither = None;
390            let (lpower, rpower, fold_infix) = match infix_op {
391                Infix::Right(p, f) => (p, p - 1, f),
392                Infix::Left(p, f) => (p, p + 1, f),
393                Infix::Neither(p, f) => {
394                    is_neither = Some(p);
395                    (p, p + 1, f)
396                }
397            };
398            if lpower < min_power
399                // MSRV: `is_some_and`
400                || match prev_op_is_neither {
401                    None => false,
402                    Some(p) => lpower == p,
403                }
404            {
405                i.reset(&start);
406                break 'parse;
407            }
408            prev_op_is_neither = is_neither;
409            let rhs = expression_impl(i, parse_operand, prefix, postfix, infix, rpower)?;
410            operand = fold_infix(i, operand, rhs)?;
411
412            continue 'parse;
413        }
414
415        break 'parse;
416    }
417
418    Ok(operand)
419}
420
421/// Define an [`expression()`]'s prefix operator
422///
423/// It requires an operator binding power, as well as a
424/// fold function which applies the operator.
425pub struct Prefix<I, O, E>(
426    /// Binding power
427    pub i64,
428    /// Unary operator
429    pub fn(&mut I, O) -> Result<O, E>,
430);
431
432impl<I, O, E> Clone for Prefix<I, O, E> {
433    #[inline(always)]
434    fn clone(&self) -> Self {
435        Prefix(self.0, self.1)
436    }
437}
438
439impl<I: Stream, O, E: ParserError<I>> Parser<I, Prefix<I, O, E>, E> for Prefix<I, O, E> {
440    #[inline(always)]
441    fn parse_next(&mut self, input: &mut I) -> Result<Prefix<I, O, E>, E> {
442        empty.value(self.clone()).parse_next(input)
443    }
444}
445
446/// Define an [`expression()`]'s postfix operator
447///
448/// It requires an operator binding power, as well as a
449/// fold function which applies the operator.
450pub struct Postfix<I, O, E>(
451    /// Binding power
452    pub i64,
453    /// Unary operator
454    pub fn(&mut I, O) -> Result<O, E>,
455);
456
457impl<I, O, E> Clone for Postfix<I, O, E> {
458    #[inline(always)]
459    fn clone(&self) -> Self {
460        Postfix(self.0, self.1)
461    }
462}
463
464impl<I: Stream, O, E: ParserError<I>> Parser<I, Postfix<I, O, E>, E> for Postfix<I, O, E> {
465    #[inline(always)]
466    fn parse_next(&mut self, input: &mut I) -> Result<Postfix<I, O, E>, E> {
467        empty.value(self.clone()).parse_next(input)
468    }
469}
470
471/// Define an [`expression()`]'s infix operator
472///
473/// It requires an operator binding power, as well as a
474/// fold function which applies the operator.
475pub enum Infix<I, O, E> {
476    /// Left-associative operator
477    ///
478    /// The operators will bind more tightly to their rightmost operands.
479    ///
480    /// e.g `A op B op C` -> `(A op B) op C`
481    Left(
482        /// Binding power
483        i64,
484        /// Binary operator
485        fn(&mut I, O, O) -> Result<O, E>,
486    ),
487    /// Right-associative operator
488    ///
489    /// The operators will bind more tightly to their leftmost operands.
490    ///
491    /// e.g `A op B op C` -> `A op (B op C)`
492    Right(
493        /// Binding power
494        i64,
495        /// Binary operator
496        fn(&mut I, O, O) -> Result<O, E>,
497    ),
498    /// Neither left or right associative
499    ///
500    /// `Infix::Neither` has similar associativity rules as `Assoc::Left`, but we stop
501    /// parsing when the next operator is the same as the current one.
502    ///
503    /// e.g. `a == b == c` -> `(a == b)`, fail: `(== c)`
504    Neither(
505        /// Binding power
506        i64,
507        /// Binary operator
508        fn(&mut I, O, O) -> Result<O, E>,
509    ),
510}
511
512impl<I, O, E> Clone for Infix<I, O, E> {
513    #[inline(always)]
514    fn clone(&self) -> Self {
515        match self {
516            Infix::Left(p, f) => Infix::Left(*p, *f),
517            Infix::Right(p, f) => Infix::Right(*p, *f),
518            Infix::Neither(p, f) => Infix::Neither(*p, *f),
519        }
520    }
521}
522
523impl<I: Stream, O, E: ParserError<I>> Parser<I, Infix<I, O, E>, E> for Infix<I, O, E> {
524    #[inline(always)]
525    fn parse_next(&mut self, input: &mut I) -> Result<Infix<I, O, E>, E> {
526        empty.value(self.clone()).parse_next(input)
527    }
528}
529
530#[cfg(test)]
531mod tests {
532    use crate::ascii::digit1;
533    use crate::combinator::fail;
534    use crate::dispatch;
535    use crate::error::ContextError;
536    use crate::token::any;
537
538    use super::*;
539
540    fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> {
541        move |i: &mut &str| {
542            use Infix::*;
543            expression(digit1.parse_to::<i32>())
544                .current_precedence_level(0)
545                .prefix(dispatch! {any;
546                    '+' => Prefix(12, |_, a| Ok(a)),
547                    '-' => Prefix(12, |_, a: i32| Ok(-a)),
548                    _ => fail
549                })
550                .infix(dispatch! {any;
551                   '+' => Left(5, |_, a, b| Ok(a + b)),
552                   '-' => Left(5, |_, a, b| Ok(a - b)),
553                   '*' => Left(7, |_, a, b| Ok(a * b)),
554                   '/' => Left(7, |_, a, b| Ok(a / b)),
555                   '%' => Left(7, |_, a, b| Ok(a % b)),
556                   '^' => Left(9, |_, a, b| Ok(a ^ b)),
557                   _ => fail
558                })
559                .parse_next(i)
560        }
561    }
562
563    #[test]
564    fn test_expression() {
565        assert_eq!(parser().parse("-3+-3*4"), Ok(-15));
566        assert_eq!(parser().parse("+2+3*4"), Ok(14));
567        assert_eq!(parser().parse("2*3+4"), Ok(10));
568    }
569}