winnow/combinator/expression.rs
1use crate::combinator::empty;
2use crate::combinator::fail;
3use crate::combinator::opt;
4use crate::combinator::trace;
5use crate::error::ParserError;
6use crate::stream::Stream;
7use crate::stream::StreamIsPartial;
8use crate::Parser;
9use crate::Result;
10
11/// Parses an expression based on operator precedence.
12///
13/// It uses a Pratt parsing algorithm, where operators are
14/// associated with a binding power. The higher the power,
15/// the more tightly an operator will bind to its operands.
16///
17/// This method returns an [`Expression`], which configures
18/// the Pratt parser.
19///
20/// Each operator type is configured with [`Prefix`], [`Postfix`],
21/// and [`Infix`]. These describe the operator's binding power,
22/// a function that applies the operator to its operand, and the
23/// operator's associativity (infix only).
24///
25/// For a more full-featured example, look at the [C-style Expression][crate::_topic::arithmetic#c-style-expression]
26/// topic.
27///
28/// # Example
29///
30/// Parsing a simple arithmetic expression without parenthesis.
31///
32/// ```rust
33/// # use winnow::prelude::*;
34/// # use winnow::error::ContextError;
35/// # use winnow::ascii::digit1;
36/// # use winnow::combinator::{dispatch, fail};
37/// # use winnow::token::any;
38/// use winnow::combinator::expression;
39/// use winnow::combinator::{Prefix, Postfix, Infix};
40///
41/// fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> {
42/// move |i: &mut &str| {
43/// use Infix::*;
44/// expression(digit1.parse_to::<i32>()) // operands are 32-bit integers
45/// .prefix(dispatch! {any;
46/// '-' => Prefix(12, |_, a: i32| Ok(-a)),
47/// _ => fail,
48/// })
49/// .infix(dispatch! {any;
50/// '+' => Left(5, |_, a, b| Ok(a + b)),
51/// '-' => Left(5, |_, a, b| Ok(a - b)),
52/// '*' => Left(7, |_, a, b| Ok(a * b)),
53/// '/' => Left(7, |_, a: i32, b| Ok(a.checked_div(b).unwrap_or_default())),
54/// _ => fail,
55/// })
56/// .postfix(dispatch! {any;
57/// '!' => Postfix(15, |_, a| if a < 1 { Ok(1) } else { Ok((1..=a).fold(1, |acc, a| acc*a)) }),
58/// _ => fail,
59/// })
60/// .parse_next(i)
61/// }
62/// }
63///
64/// assert_eq!(parser().parse("1+1"), Ok(2));
65/// assert_eq!(parser().parse("0!"), Ok(1));
66/// assert_eq!(parser().parse("-1*5*2*10+30/3!"), Ok(-95));
67/// ```
68#[doc(alias = "pratt")]
69#[doc(alias = "separated")]
70#[doc(alias = "shunting_yard")]
71#[doc(alias = "precedence_climbing")]
72#[inline(always)]
73#[allow(clippy::type_complexity)]
74pub fn expression<I, ParseOperand, O, E>(
75 parse_operand: ParseOperand,
76) -> Expression<
77 I,
78 O,
79 ParseOperand,
80 impl Parser<I, Prefix<I, O, E>, E>,
81 impl Parser<I, Postfix<I, O, E>, E>,
82 impl Parser<I, Infix<I, O, E>, E>,
83 E,
84>
85where
86 I: Stream + StreamIsPartial,
87 ParseOperand: Parser<I, O, E>,
88 E: ParserError<I>,
89{
90 Expression {
91 precedence_level: 0,
92 parse_operand,
93 parse_prefix: fail,
94 parse_postfix: fail,
95 parse_infix: fail,
96 i: Default::default(),
97 o: Default::default(),
98 e: Default::default(),
99 }
100}
101
102/// A helper struct for [`expression()`].
103///
104/// Holds the configuration for the Pratt parser, including
105/// the operator and operand parsers. A precedence level can
106/// also be set, which is useful to disambiguate parse trees
107/// based on the parent operator's precedence.
108///
109/// Implements [`Parser`]. When parsing an input, it applies
110/// the Pratt parser.
111pub struct Expression<I, O, ParseOperand, Pre, Post, Pix, E>
112where
113 I: Stream + StreamIsPartial,
114 ParseOperand: Parser<I, O, E>,
115 E: ParserError<I>,
116{
117 precedence_level: i64,
118 parse_operand: ParseOperand,
119 parse_prefix: Pre,
120 parse_postfix: Post,
121 parse_infix: Pix,
122 i: core::marker::PhantomData<I>,
123 o: core::marker::PhantomData<O>,
124 e: core::marker::PhantomData<E>,
125}
126
127impl<I, O, ParseOperand, Pre, Post, Pix, E> Expression<I, O, ParseOperand, Pre, Post, Pix, E>
128where
129 ParseOperand: Parser<I, O, E>,
130 I: Stream + StreamIsPartial,
131 E: ParserError<I>,
132{
133 /// Sets the prefix operator parser.
134 ///
135 /// The parser should parse the input to a [`Prefix`],
136 /// which contains the operator's binding power and
137 /// a fold function which applies the operator to its
138 /// operands.
139 #[inline(always)]
140 pub fn prefix<NewParsePrefix>(
141 self,
142 parser: NewParsePrefix,
143 ) -> Expression<I, O, ParseOperand, NewParsePrefix, Post, Pix, E>
144 where
145 NewParsePrefix: Parser<I, Prefix<I, O, E>, E>,
146 {
147 Expression {
148 precedence_level: self.precedence_level,
149 parse_operand: self.parse_operand,
150 parse_prefix: parser,
151 parse_postfix: self.parse_postfix,
152 parse_infix: self.parse_infix,
153 i: Default::default(),
154 o: Default::default(),
155 e: Default::default(),
156 }
157 }
158
159 /// Sets the prefix operator parser.
160 ///
161 /// The parser should parse the input to a [`Postfix`],
162 /// which contains the operator's binding power and
163 /// a fold function which applies the operator to its
164 /// operands.
165 #[inline(always)]
166 pub fn postfix<NewParsePostfix>(
167 self,
168 parser: NewParsePostfix,
169 ) -> Expression<I, O, ParseOperand, Pre, NewParsePostfix, Pix, E>
170 where
171 NewParsePostfix: Parser<I, Postfix<I, O, E>, E>,
172 {
173 Expression {
174 precedence_level: self.precedence_level,
175 parse_operand: self.parse_operand,
176 parse_prefix: self.parse_prefix,
177 parse_postfix: parser,
178 parse_infix: self.parse_infix,
179 i: Default::default(),
180 o: Default::default(),
181 e: Default::default(),
182 }
183 }
184
185 /// Sets the prefix operator parser.
186 ///
187 /// The parser should parse the input to a [`Infix`],
188 /// which contains the operator's binding power and
189 /// a fold function which applies the operator to its
190 /// operands.
191 #[inline(always)]
192 pub fn infix<NewParseInfix>(
193 self,
194 parser: NewParseInfix,
195 ) -> Expression<I, O, ParseOperand, Pre, Post, NewParseInfix, E>
196 where
197 NewParseInfix: Parser<I, Infix<I, O, E>, E>,
198 {
199 Expression {
200 precedence_level: self.precedence_level,
201 parse_operand: self.parse_operand,
202 parse_prefix: self.parse_prefix,
203 parse_postfix: self.parse_postfix,
204 parse_infix: parser,
205 i: Default::default(),
206 o: Default::default(),
207 e: Default::default(),
208 }
209 }
210
211 /// Sets the precedence level for the current instance of the parser.
212 ///
213 /// It defaults to 0, which is traditionally treated as the "lowest"
214 /// possible precedence when parsing an expression.
215 ///
216 /// This is useful to disambiguate grammars based on the parent operator's
217 /// precedence. This comes up primarily when parsing recursive expressions.
218 ///
219 /// The parsing machinery underpinning [`Expression`] assumes that a "more
220 /// tightly binding" operator is numerically large, while a "more loosely
221 /// binding" operator is numerically small. For example, `13` is a higher
222 /// precedence level than `1` because `13 > 1`.
223 ///
224 /// Other ways of describing this relationship:
225 /// - `13` has a higher precedence compared to `1`
226 /// - `13` has a higher binding power compared to `1`
227 ///
228 /// Note: Binding power and precedence both refer to the same concept and
229 /// may be used interchangeably.
230 ///
231 /// # Motivation
232 ///
233 /// If you don't understand why this is useful to have, this section tries
234 /// to explain in more detail.
235 ///
236 /// The [C-style Expressions][crate::_topic::arithmetic#c-style-expression]
237 /// example has source code for parsing the expression described below, and
238 /// can provide a clearer usage example.
239 ///
240 /// Consider the following expression in the C language:
241 ///
242 /// ```c
243 /// int x = (1 == 1 ? 0 : 1, -123); // <-- let's parse this
244 /// printf("%d\n", x); // -123
245 /// ```
246 ///
247 /// Let's look at the right-hand side of the expression on the first line,
248 /// and replace some of the sub-expressions with symbols:
249 ///
250 /// ```text
251 /// (1 == 1 ? 0 : 1, -123) // rhs
252 /// (a ? b : c, d ) // symbolic
253 /// (a ? b : c, d) // remove whitespace
254 /// (, (? a b c) d) // prefix notation
255 /// ```
256 ///
257 /// Written symbolically:
258 /// - `a` is the condition, like `1 == 1`
259 /// - `b` is the value when the condition is true
260 /// - `c` is the value when the condition is false
261 /// - `d` is a secondary expression unrelated to the ternary
262 ///
263 /// In prefix notation, it's easier to see the specific operators and what
264 /// they bind to:
265 /// - COMMA (`,`) binds to `(? a b c)` and `d`
266 /// - TERNARY (`?`) binds to `a`, `b`, and `c`
267 ///
268 /// ## Parsing `c` and `d`
269 ///
270 /// Let's focus on parsing the sub-expressions `c` and `d`, as that
271 /// motivates why a parser precedence level is necessary.
272 ///
273 /// To parse `c`, we would really like to re-use the parser produced by
274 /// [`expression()`], because `c` is really *any* valid expression that
275 /// can be parsed by `expression()` already.
276 ///
277 /// However, we can't re-use the parser naively. When parsing `c`, we need
278 /// to "escape" from the inner parser when encountering the comma separating
279 /// `c` from `d`.
280 ///
281 /// The reason we have to "escape" is because of how operator precedence is
282 /// defined in the C language: the comma operator has the lowest precedence
283 /// among all the operators. When we're parsing `c`, we're in the context of
284 /// the ternary operator. We don't want to parse any valid expression! Just
285 /// what the ternary operator captures.
286 ///
287 /// That's where the precedence level comes in: you specify the minimum
288 /// precedence this parser is willing to accept. If you come across an
289 /// expression in the top-level with a lower binding power than the starting
290 /// precedence, you know to stop parsing.
291 ///
292 /// The parsing machinery inside of [`Expression`] handles most of this for
293 /// you, but it can't determine what the precedence level should be for a
294 /// given expression. That's a language-specific detail, and it depends on
295 /// what you want to parse.
296 #[inline(always)]
297 pub fn current_precedence_level(
298 mut self,
299 level: i64,
300 ) -> Expression<I, O, ParseOperand, Pre, Post, Pix, E> {
301 self.precedence_level = level;
302 self
303 }
304}
305
306impl<I, O, Pop, Pre, Post, Pix, E> Parser<I, O, E> for Expression<I, O, Pop, Pre, Post, Pix, E>
307where
308 I: Stream + StreamIsPartial,
309 Pop: Parser<I, O, E>,
310 Pix: Parser<I, Infix<I, O, E>, E>,
311 Pre: Parser<I, Prefix<I, O, E>, E>,
312 Post: Parser<I, Postfix<I, O, E>, E>,
313 E: ParserError<I>,
314{
315 #[inline(always)]
316 fn parse_next(&mut self, input: &mut I) -> Result<O, E> {
317 trace("expression", move |i: &mut I| {
318 expression_impl(
319 i,
320 &mut self.parse_operand,
321 &mut self.parse_prefix,
322 &mut self.parse_postfix,
323 &mut self.parse_infix,
324 self.precedence_level,
325 )
326 })
327 .parse_next(input)
328 }
329}
330
331/// Opaque implementation of the Pratt parser.
332fn expression_impl<I, O, Pop, Pre, Post, Pix, E>(
333 i: &mut I,
334 parse_operand: &mut Pop,
335 prefix: &mut Pre,
336 postfix: &mut Post,
337 infix: &mut Pix,
338 min_power: i64,
339) -> Result<O, E>
340where
341 I: Stream + StreamIsPartial,
342 Pop: Parser<I, O, E>,
343 Pix: Parser<I, Infix<I, O, E>, E>,
344 Pre: Parser<I, Prefix<I, O, E>, E>,
345 Post: Parser<I, Postfix<I, O, E>, E>,
346 E: ParserError<I>,
347{
348 let operand = opt(trace("operand", parse_operand.by_ref())).parse_next(i)?;
349 let mut operand = if let Some(operand) = operand {
350 operand
351 } else {
352 // Prefix unary operators
353 let len = i.eof_offset();
354 let Prefix(power, fold_prefix) = trace("prefix", prefix.by_ref()).parse_next(i)?;
355 // infinite loop check: the parser must always consume
356 if i.eof_offset() == len {
357 return Err(E::assert(i, "`prefix` parsers must always consume"));
358 }
359 let operand = expression_impl(i, parse_operand, prefix, postfix, infix, power)?;
360 fold_prefix(i, operand)?
361 };
362
363 // A variable to stop the `'parse` loop when `Assoc::Neither` with the same
364 // precedence is encountered e.g. `a == b == c`. `Assoc::Neither` has similar
365 // associativity rules as `Assoc::Left`, but we stop parsing when the next operator
366 // is the same as the current one.
367 let mut prev_op_is_neither = None;
368 'parse: while i.eof_offset() > 0 {
369 // Postfix unary operators
370 let start = i.checkpoint();
371 if let Some(Postfix(power, fold_postfix)) =
372 opt(trace("postfix", postfix.by_ref())).parse_next(i)?
373 {
374 // control precedence over the prefix e.g.:
375 // `--(i++)` or `(--i)++`
376 if power < min_power {
377 i.reset(&start);
378 break 'parse;
379 }
380 operand = fold_postfix(i, operand)?;
381
382 continue 'parse;
383 }
384
385 // Infix binary operators
386 let start = i.checkpoint();
387 let parse_result = opt(trace("infix", infix.by_ref())).parse_next(i)?;
388 if let Some(infix_op) = parse_result {
389 let mut is_neither = None;
390 let (lpower, rpower, fold_infix) = match infix_op {
391 Infix::Right(p, f) => (p, p - 1, f),
392 Infix::Left(p, f) => (p, p + 1, f),
393 Infix::Neither(p, f) => {
394 is_neither = Some(p);
395 (p, p + 1, f)
396 }
397 };
398 if lpower < min_power
399 // MSRV: `is_some_and`
400 || match prev_op_is_neither {
401 None => false,
402 Some(p) => lpower == p,
403 }
404 {
405 i.reset(&start);
406 break 'parse;
407 }
408 prev_op_is_neither = is_neither;
409 let rhs = expression_impl(i, parse_operand, prefix, postfix, infix, rpower)?;
410 operand = fold_infix(i, operand, rhs)?;
411
412 continue 'parse;
413 }
414
415 break 'parse;
416 }
417
418 Ok(operand)
419}
420
421/// Define an [`expression()`]'s prefix operator
422///
423/// It requires an operator binding power, as well as a
424/// fold function which applies the operator.
425pub struct Prefix<I, O, E>(
426 /// Binding power
427 pub i64,
428 /// Unary operator
429 pub fn(&mut I, O) -> Result<O, E>,
430);
431
432impl<I, O, E> Clone for Prefix<I, O, E> {
433 #[inline(always)]
434 fn clone(&self) -> Self {
435 Prefix(self.0, self.1)
436 }
437}
438
439impl<I: Stream, O, E: ParserError<I>> Parser<I, Prefix<I, O, E>, E> for Prefix<I, O, E> {
440 #[inline(always)]
441 fn parse_next(&mut self, input: &mut I) -> Result<Prefix<I, O, E>, E> {
442 empty.value(self.clone()).parse_next(input)
443 }
444}
445
446/// Define an [`expression()`]'s postfix operator
447///
448/// It requires an operator binding power, as well as a
449/// fold function which applies the operator.
450pub struct Postfix<I, O, E>(
451 /// Binding power
452 pub i64,
453 /// Unary operator
454 pub fn(&mut I, O) -> Result<O, E>,
455);
456
457impl<I, O, E> Clone for Postfix<I, O, E> {
458 #[inline(always)]
459 fn clone(&self) -> Self {
460 Postfix(self.0, self.1)
461 }
462}
463
464impl<I: Stream, O, E: ParserError<I>> Parser<I, Postfix<I, O, E>, E> for Postfix<I, O, E> {
465 #[inline(always)]
466 fn parse_next(&mut self, input: &mut I) -> Result<Postfix<I, O, E>, E> {
467 empty.value(self.clone()).parse_next(input)
468 }
469}
470
471/// Define an [`expression()`]'s infix operator
472///
473/// It requires an operator binding power, as well as a
474/// fold function which applies the operator.
475pub enum Infix<I, O, E> {
476 /// Left-associative operator
477 ///
478 /// The operators will bind more tightly to their rightmost operands.
479 ///
480 /// e.g `A op B op C` -> `(A op B) op C`
481 Left(
482 /// Binding power
483 i64,
484 /// Binary operator
485 fn(&mut I, O, O) -> Result<O, E>,
486 ),
487 /// Right-associative operator
488 ///
489 /// The operators will bind more tightly to their leftmost operands.
490 ///
491 /// e.g `A op B op C` -> `A op (B op C)`
492 Right(
493 /// Binding power
494 i64,
495 /// Binary operator
496 fn(&mut I, O, O) -> Result<O, E>,
497 ),
498 /// Neither left or right associative
499 ///
500 /// `Infix::Neither` has similar associativity rules as `Assoc::Left`, but we stop
501 /// parsing when the next operator is the same as the current one.
502 ///
503 /// e.g. `a == b == c` -> `(a == b)`, fail: `(== c)`
504 Neither(
505 /// Binding power
506 i64,
507 /// Binary operator
508 fn(&mut I, O, O) -> Result<O, E>,
509 ),
510}
511
512impl<I, O, E> Clone for Infix<I, O, E> {
513 #[inline(always)]
514 fn clone(&self) -> Self {
515 match self {
516 Infix::Left(p, f) => Infix::Left(*p, *f),
517 Infix::Right(p, f) => Infix::Right(*p, *f),
518 Infix::Neither(p, f) => Infix::Neither(*p, *f),
519 }
520 }
521}
522
523impl<I: Stream, O, E: ParserError<I>> Parser<I, Infix<I, O, E>, E> for Infix<I, O, E> {
524 #[inline(always)]
525 fn parse_next(&mut self, input: &mut I) -> Result<Infix<I, O, E>, E> {
526 empty.value(self.clone()).parse_next(input)
527 }
528}
529
530#[cfg(test)]
531mod tests {
532 use crate::ascii::digit1;
533 use crate::combinator::fail;
534 use crate::dispatch;
535 use crate::error::ContextError;
536 use crate::token::any;
537
538 use super::*;
539
540 fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> {
541 move |i: &mut &str| {
542 use Infix::*;
543 expression(digit1.parse_to::<i32>())
544 .current_precedence_level(0)
545 .prefix(dispatch! {any;
546 '+' => Prefix(12, |_, a| Ok(a)),
547 '-' => Prefix(12, |_, a: i32| Ok(-a)),
548 _ => fail
549 })
550 .infix(dispatch! {any;
551 '+' => Left(5, |_, a, b| Ok(a + b)),
552 '-' => Left(5, |_, a, b| Ok(a - b)),
553 '*' => Left(7, |_, a, b| Ok(a * b)),
554 '/' => Left(7, |_, a, b| Ok(a / b)),
555 '%' => Left(7, |_, a, b| Ok(a % b)),
556 '^' => Left(9, |_, a, b| Ok(a ^ b)),
557 _ => fail
558 })
559 .parse_next(i)
560 }
561 }
562
563 #[test]
564 fn test_expression() {
565 assert_eq!(parser().parse("-3+-3*4"), Ok(-15));
566 assert_eq!(parser().parse("+2+3*4"), Ok(14));
567 assert_eq!(parser().parse("2*3+4"), Ok(10));
568 }
569}