cssparser/
parser.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5use crate::cow_rc_str::CowRcStr;
6use crate::tokenizer::{SourceLocation, SourcePosition, Token, Tokenizer};
7use smallvec::SmallVec;
8use std::fmt;
9use std::ops::BitOr;
10use std::ops::Range;
11
12/// A capture of the internal state of a `Parser` (including the position within the input),
13/// obtained from the `Parser::position` method.
14///
15/// Can be used with the `Parser::reset` method to restore that state.
16/// Should only be used with the `Parser` instance it came from.
17#[derive(Debug, Clone)]
18pub struct ParserState {
19    pub(crate) position: usize,
20    pub(crate) current_line_start_position: usize,
21    pub(crate) current_line_number: u32,
22    pub(crate) at_start_of: Option<BlockType>,
23}
24
25impl ParserState {
26    /// The position from the start of the input, counted in UTF-8 bytes.
27    #[inline]
28    pub fn position(&self) -> SourcePosition {
29        SourcePosition(self.position)
30    }
31
32    /// The line number and column number
33    #[inline]
34    pub fn source_location(&self) -> SourceLocation {
35        SourceLocation {
36            line: self.current_line_number,
37            column: (self.position - self.current_line_start_position + 1) as u32,
38        }
39    }
40}
41
42/// When parsing until a given token, sometimes the caller knows that parsing is going to restart
43/// at some earlier point, and consuming until we find a top level delimiter is just wasted work.
44///
45/// In that case, callers can pass ParseUntilErrorBehavior::Stop to avoid doing all that wasted
46/// work.
47///
48/// This is important for things like CSS nesting, where something like:
49///
50///   foo:is(..) {
51///     ...
52///   }
53///
54/// Would need to scan the whole {} block to find a semicolon, only for parsing getting restarted
55/// as a qualified rule later.
56#[derive(Clone, Copy, Debug, Eq, PartialEq)]
57pub enum ParseUntilErrorBehavior {
58    /// Consume until we see the relevant delimiter or the end of the stream.
59    Consume,
60    /// Eagerly error.
61    Stop,
62}
63
64/// Details about a `BasicParseError`
65#[derive(Clone, Debug, PartialEq)]
66pub enum BasicParseErrorKind<'i> {
67    /// An unexpected token was encountered.
68    UnexpectedToken(Token<'i>),
69    /// The end of the input was encountered unexpectedly.
70    EndOfInput,
71    /// An `@` rule was encountered that was invalid.
72    AtRuleInvalid(CowRcStr<'i>),
73    /// The body of an '@' rule was invalid.
74    AtRuleBodyInvalid,
75    /// A qualified rule was encountered that was invalid.
76    QualifiedRuleInvalid,
77}
78
79impl fmt::Display for BasicParseErrorKind<'_> {
80    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
81        match self {
82            BasicParseErrorKind::UnexpectedToken(token) => {
83                write!(f, "unexpected token: {token:?}")
84            }
85            BasicParseErrorKind::EndOfInput => write!(f, "unexpected end of input"),
86            BasicParseErrorKind::AtRuleInvalid(rule) => {
87                write!(f, "invalid @ rule encountered: '@{rule}'")
88            }
89            BasicParseErrorKind::AtRuleBodyInvalid => write!(f, "invalid @ rule body encountered"),
90            BasicParseErrorKind::QualifiedRuleInvalid => {
91                write!(f, "invalid qualified rule encountered")
92            }
93        }
94    }
95}
96
97/// The fundamental parsing errors that can be triggered by built-in parsing routines.
98#[derive(Clone, Debug, PartialEq)]
99pub struct BasicParseError<'i> {
100    /// Details of this error
101    pub kind: BasicParseErrorKind<'i>,
102    /// Location where this error occurred
103    pub location: SourceLocation,
104}
105
106impl<'i, T> From<BasicParseError<'i>> for ParseError<'i, T> {
107    #[inline]
108    fn from(this: BasicParseError<'i>) -> ParseError<'i, T> {
109        ParseError {
110            kind: ParseErrorKind::Basic(this.kind),
111            location: this.location,
112        }
113    }
114}
115
116impl SourceLocation {
117    /// Create a new BasicParseError at this location for an unexpected token
118    #[inline]
119    pub fn new_basic_unexpected_token_error(self, token: Token<'_>) -> BasicParseError<'_> {
120        self.new_basic_error(BasicParseErrorKind::UnexpectedToken(token))
121    }
122
123    /// Create a new BasicParseError at this location
124    #[inline]
125    pub fn new_basic_error(self, kind: BasicParseErrorKind<'_>) -> BasicParseError<'_> {
126        BasicParseError {
127            kind,
128            location: self,
129        }
130    }
131
132    /// Create a new ParseError at this location for an unexpected token
133    #[inline]
134    pub fn new_unexpected_token_error<E>(self, token: Token<'_>) -> ParseError<'_, E> {
135        self.new_error(BasicParseErrorKind::UnexpectedToken(token))
136    }
137
138    /// Create a new basic ParseError at the current location
139    #[inline]
140    pub fn new_error<E>(self, kind: BasicParseErrorKind<'_>) -> ParseError<'_, E> {
141        ParseError {
142            kind: ParseErrorKind::Basic(kind),
143            location: self,
144        }
145    }
146
147    /// Create a new custom ParseError at this location
148    #[inline]
149    pub fn new_custom_error<'i, E1: Into<E2>, E2>(self, error: E1) -> ParseError<'i, E2> {
150        ParseError {
151            kind: ParseErrorKind::Custom(error.into()),
152            location: self,
153        }
154    }
155}
156
157/// Details of a `ParseError`
158#[derive(Clone, Debug, PartialEq)]
159pub enum ParseErrorKind<'i, T: 'i> {
160    /// A fundamental parse error from a built-in parsing routine.
161    Basic(BasicParseErrorKind<'i>),
162    /// A parse error reported by downstream consumer code.
163    Custom(T),
164}
165
166impl<'i, T> ParseErrorKind<'i, T> {
167    /// Like `std::convert::Into::into`
168    pub fn into<U>(self) -> ParseErrorKind<'i, U>
169    where
170        T: Into<U>,
171    {
172        match self {
173            ParseErrorKind::Basic(basic) => ParseErrorKind::Basic(basic),
174            ParseErrorKind::Custom(custom) => ParseErrorKind::Custom(custom.into()),
175        }
176    }
177}
178
179impl<E: fmt::Display> fmt::Display for ParseErrorKind<'_, E> {
180    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
181        match self {
182            ParseErrorKind::Basic(ref basic) => basic.fmt(f),
183            ParseErrorKind::Custom(ref custom) => custom.fmt(f),
184        }
185    }
186}
187
188/// Extensible parse errors that can be encountered by client parsing implementations.
189#[derive(Clone, Debug, PartialEq)]
190pub struct ParseError<'i, E> {
191    /// Details of this error
192    pub kind: ParseErrorKind<'i, E>,
193    /// Location where this error occurred
194    pub location: SourceLocation,
195}
196
197impl<'i, T> ParseError<'i, T> {
198    /// Extract the fundamental parse error from an extensible error.
199    pub fn basic(self) -> BasicParseError<'i> {
200        match self.kind {
201            ParseErrorKind::Basic(kind) => BasicParseError {
202                kind,
203                location: self.location,
204            },
205            ParseErrorKind::Custom(_) => panic!("Not a basic parse error"),
206        }
207    }
208
209    /// Like `std::convert::Into::into`
210    pub fn into<U>(self) -> ParseError<'i, U>
211    where
212        T: Into<U>,
213    {
214        ParseError {
215            kind: self.kind.into(),
216            location: self.location,
217        }
218    }
219}
220
221impl<E: fmt::Display> fmt::Display for ParseError<'_, E> {
222    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
223        self.kind.fmt(f)
224    }
225}
226
227impl<E: fmt::Display + fmt::Debug> std::error::Error for ParseError<'_, E> {}
228
229/// The owned input for a parser.
230pub struct ParserInput<'i> {
231    tokenizer: Tokenizer<'i>,
232    cached_token: Option<CachedToken<'i>>,
233}
234
235struct CachedToken<'i> {
236    token: Token<'i>,
237    start_position: SourcePosition,
238    end_state: ParserState,
239}
240
241impl<'i> ParserInput<'i> {
242    /// Create a new input for a parser.
243    pub fn new(input: &'i str) -> ParserInput<'i> {
244        ParserInput {
245            tokenizer: Tokenizer::new(input),
246            cached_token: None,
247        }
248    }
249
250    #[inline]
251    fn cached_token_ref(&self) -> &Token<'i> {
252        &self.cached_token.as_ref().unwrap().token
253    }
254}
255
256/// A CSS parser that borrows its `&str` input,
257/// yields `Token`s,
258/// and keeps track of nested blocks and functions.
259pub struct Parser<'i, 't> {
260    input: &'t mut ParserInput<'i>,
261    /// If `Some(_)`, .parse_nested_block() can be called.
262    at_start_of: Option<BlockType>,
263    /// For parsers from `parse_until` or `parse_nested_block`
264    stop_before: Delimiters,
265}
266
267#[derive(Copy, Clone, PartialEq, Eq, Debug)]
268pub(crate) enum BlockType {
269    Parenthesis,
270    SquareBracket,
271    CurlyBracket,
272}
273
274impl BlockType {
275    fn opening(token: &Token) -> Option<BlockType> {
276        match *token {
277            Token::Function(_) | Token::ParenthesisBlock => Some(BlockType::Parenthesis),
278            Token::SquareBracketBlock => Some(BlockType::SquareBracket),
279            Token::CurlyBracketBlock => Some(BlockType::CurlyBracket),
280            _ => None,
281        }
282    }
283
284    fn closing(token: &Token) -> Option<BlockType> {
285        match *token {
286            Token::CloseParenthesis => Some(BlockType::Parenthesis),
287            Token::CloseSquareBracket => Some(BlockType::SquareBracket),
288            Token::CloseCurlyBracket => Some(BlockType::CurlyBracket),
289            _ => None,
290        }
291    }
292}
293
294/// A set of characters, to be used with the `Parser::parse_until*` methods.
295///
296/// The union of two sets can be obtained with the `|` operator. Example:
297///
298/// ```rust,ignore
299/// input.parse_until_before(Delimiter::CurlyBracketBlock | Delimiter::Semicolon)
300/// ```
301#[derive(Copy, Clone, PartialEq, Eq, Debug)]
302pub struct Delimiters {
303    bits: u8,
304}
305
306/// `Delimiters` constants.
307#[allow(non_upper_case_globals, non_snake_case)]
308pub mod Delimiter {
309    use super::Delimiters;
310
311    /// The empty delimiter set
312    pub const None: Delimiters = Delimiters { bits: 0 };
313    /// The delimiter set with only the `{` opening curly bracket
314    pub const CurlyBracketBlock: Delimiters = Delimiters { bits: 1 << 1 };
315    /// The delimiter set with only the `;` semicolon
316    pub const Semicolon: Delimiters = Delimiters { bits: 1 << 2 };
317    /// The delimiter set with only the `!` exclamation point
318    pub const Bang: Delimiters = Delimiters { bits: 1 << 3 };
319    /// The delimiter set with only the `,` comma
320    pub const Comma: Delimiters = Delimiters { bits: 1 << 4 };
321}
322
323#[allow(non_upper_case_globals, non_snake_case)]
324mod ClosingDelimiter {
325    use super::Delimiters;
326
327    pub const CloseCurlyBracket: Delimiters = Delimiters { bits: 1 << 5 };
328    pub const CloseSquareBracket: Delimiters = Delimiters { bits: 1 << 6 };
329    pub const CloseParenthesis: Delimiters = Delimiters { bits: 1 << 7 };
330}
331
332impl BitOr<Delimiters> for Delimiters {
333    type Output = Delimiters;
334
335    #[inline]
336    fn bitor(self, other: Delimiters) -> Delimiters {
337        Delimiters {
338            bits: self.bits | other.bits,
339        }
340    }
341}
342
343impl Delimiters {
344    #[inline]
345    fn contains(self, other: Delimiters) -> bool {
346        (self.bits & other.bits) != 0
347    }
348
349    #[inline]
350    pub(crate) fn from_byte(byte: Option<u8>) -> Delimiters {
351        const TABLE: [Delimiters; 256] = {
352            let mut table = [Delimiter::None; 256];
353            table[b';' as usize] = Delimiter::Semicolon;
354            table[b'!' as usize] = Delimiter::Bang;
355            table[b',' as usize] = Delimiter::Comma;
356            table[b'{' as usize] = Delimiter::CurlyBracketBlock;
357            table[b'}' as usize] = ClosingDelimiter::CloseCurlyBracket;
358            table[b']' as usize] = ClosingDelimiter::CloseSquareBracket;
359            table[b')' as usize] = ClosingDelimiter::CloseParenthesis;
360            table
361        };
362
363        assert_eq!(TABLE[0], Delimiter::None);
364        TABLE[byte.unwrap_or(0) as usize]
365    }
366}
367
368/// Used in some `fn expect_*` methods
369macro_rules! expect {
370    ($parser: ident, $($branches: tt)+) => {
371        {
372            let start_location = $parser.current_source_location();
373            match *$parser.next()? {
374                $($branches)+
375                ref token => {
376                    return Err(start_location.new_basic_unexpected_token_error(token.clone()))
377                }
378            }
379        }
380    }
381}
382
383/// A list of arbitrary substitution functions. Should be lowercase ascii.
384/// See https://drafts.csswg.org/css-values-5/#arbitrary-substitution
385pub type ArbitrarySubstitutionFunctions<'a> = &'a [&'static str];
386
387impl<'i: 't, 't> Parser<'i, 't> {
388    /// Create a new parser
389    #[inline]
390    pub fn new(input: &'t mut ParserInput<'i>) -> Parser<'i, 't> {
391        Parser {
392            input,
393            at_start_of: None,
394            stop_before: Delimiter::None,
395        }
396    }
397
398    /// Return the current line that is being parsed.
399    pub fn current_line(&self) -> &'i str {
400        self.input.tokenizer.current_source_line()
401    }
402
403    /// Check whether the input is exhausted. That is, if `.next()` would return a token.
404    ///
405    /// This ignores whitespace and comments.
406    #[inline]
407    pub fn is_exhausted(&mut self) -> bool {
408        self.expect_exhausted().is_ok()
409    }
410
411    /// Check whether the input is exhausted. That is, if `.next()` would return a token.
412    /// Return a `Result` so that the `?` operator can be used: `input.expect_exhausted()?`
413    ///
414    /// This ignores whitespace and comments.
415    #[inline]
416    pub fn expect_exhausted(&mut self) -> Result<(), BasicParseError<'i>> {
417        let start = self.state();
418        let result = match self.next() {
419            Err(BasicParseError {
420                kind: BasicParseErrorKind::EndOfInput,
421                ..
422            }) => Ok(()),
423            Err(e) => unreachable!("Unexpected error encountered: {:?}", e),
424            Ok(t) => Err(start
425                .source_location()
426                .new_basic_unexpected_token_error(t.clone())),
427        };
428        self.reset(&start);
429        result
430    }
431
432    /// Return the current position within the input.
433    ///
434    /// This can be used with the `Parser::slice` and `slice_from` methods.
435    #[inline]
436    pub fn position(&self) -> SourcePosition {
437        self.input.tokenizer.position()
438    }
439
440    /// The current line number and column number.
441    #[inline]
442    pub fn current_source_location(&self) -> SourceLocation {
443        self.input.tokenizer.current_source_location()
444    }
445
446    /// The source map URL, if known.
447    ///
448    /// The source map URL is extracted from a specially formatted
449    /// comment.  The last such comment is used, so this value may
450    /// change as parsing proceeds.
451    pub fn current_source_map_url(&self) -> Option<&str> {
452        self.input.tokenizer.current_source_map_url()
453    }
454
455    /// The source URL, if known.
456    ///
457    /// The source URL is extracted from a specially formatted
458    /// comment.  The last such comment is used, so this value may
459    /// change as parsing proceeds.
460    pub fn current_source_url(&self) -> Option<&str> {
461        self.input.tokenizer.current_source_url()
462    }
463
464    /// Create a new BasicParseError at the current location
465    #[inline]
466    pub fn new_basic_error(&self, kind: BasicParseErrorKind<'i>) -> BasicParseError<'i> {
467        self.current_source_location().new_basic_error(kind)
468    }
469
470    /// Create a new basic ParseError at the current location
471    #[inline]
472    pub fn new_error<E>(&self, kind: BasicParseErrorKind<'i>) -> ParseError<'i, E> {
473        self.current_source_location().new_error(kind)
474    }
475
476    /// Create a new custom BasicParseError at the current location
477    #[inline]
478    pub fn new_custom_error<E1: Into<E2>, E2>(&self, error: E1) -> ParseError<'i, E2> {
479        self.current_source_location().new_custom_error(error)
480    }
481
482    /// Create a new unexpected token BasicParseError at the current location
483    #[inline]
484    pub fn new_basic_unexpected_token_error(&self, token: Token<'i>) -> BasicParseError<'i> {
485        self.new_basic_error(BasicParseErrorKind::UnexpectedToken(token))
486    }
487
488    /// Create a new unexpected token ParseError at the current location
489    #[inline]
490    pub fn new_unexpected_token_error<E>(&self, token: Token<'i>) -> ParseError<'i, E> {
491        self.new_error(BasicParseErrorKind::UnexpectedToken(token))
492    }
493
494    /// Create a new unexpected token or EOF ParseError at the current location
495    #[inline]
496    pub fn new_error_for_next_token<E>(&mut self) -> ParseError<'i, E> {
497        let token = match self.next() {
498            Ok(token) => token.clone(),
499            Err(e) => return e.into(),
500        };
501        self.new_error(BasicParseErrorKind::UnexpectedToken(token))
502    }
503
504    /// Return the current internal state of the parser (including position within the input).
505    ///
506    /// This state can later be restored with the `Parser::reset` method.
507    #[inline]
508    pub fn state(&self) -> ParserState {
509        ParserState {
510            at_start_of: self.at_start_of,
511            ..self.input.tokenizer.state()
512        }
513    }
514
515    /// Advance the input until the next token that’s not whitespace or a comment.
516    #[inline]
517    pub fn skip_whitespace(&mut self) {
518        if let Some(block_type) = self.at_start_of.take() {
519            consume_until_end_of_block(block_type, &mut self.input.tokenizer);
520        }
521
522        self.input.tokenizer.skip_whitespace()
523    }
524
525    #[inline]
526    pub(crate) fn skip_cdc_and_cdo(&mut self) {
527        if let Some(block_type) = self.at_start_of.take() {
528            consume_until_end_of_block(block_type, &mut self.input.tokenizer);
529        }
530
531        self.input.tokenizer.skip_cdc_and_cdo()
532    }
533
534    #[inline]
535    pub(crate) fn next_byte(&self) -> Option<u8> {
536        let byte = self.input.tokenizer.next_byte();
537        if self.stop_before.contains(Delimiters::from_byte(byte)) {
538            return None;
539        }
540        byte
541    }
542
543    /// Restore the internal state of the parser (including position within the input)
544    /// to what was previously saved by the `Parser::position` method.
545    ///
546    /// Should only be used with `SourcePosition` values from the same `Parser` instance.
547    #[inline]
548    pub fn reset(&mut self, state: &ParserState) {
549        self.input.tokenizer.reset(state);
550        self.at_start_of = state.at_start_of;
551    }
552
553    /// Start looking for arbitrary substitution functions like `var()` / `env()` functions.
554    /// (See the `.seen_arbitrary_substitution_functions()` method.)
555    #[inline]
556    pub fn look_for_arbitrary_substitution_functions(
557        &mut self,
558        fns: ArbitrarySubstitutionFunctions<'i>,
559    ) {
560        self.input
561            .tokenizer
562            .look_for_arbitrary_substitution_functions(fns)
563    }
564
565    /// Return whether a relevant function has been seen by the tokenizer since
566    /// `look_for_arbitrary_substitution_functions` was called, and stop looking.
567    #[inline]
568    pub fn seen_arbitrary_substitution_functions(&mut self) -> bool {
569        self.input.tokenizer.seen_arbitrary_substitution_functions()
570    }
571
572    /// The old name of `try_parse`, which requires raw identifiers in the Rust 2018 edition.
573    #[inline]
574    pub fn r#try<F, T, E>(&mut self, thing: F) -> Result<T, E>
575    where
576        F: FnOnce(&mut Parser<'i, 't>) -> Result<T, E>,
577    {
578        self.try_parse(thing)
579    }
580
581    /// Execute the given closure, passing it the parser.
582    /// If the result (returned unchanged) is `Err`,
583    /// the internal state of the parser  (including position within the input)
584    /// is restored to what it was before the call.
585    #[inline]
586    pub fn try_parse<F, T, E>(&mut self, thing: F) -> Result<T, E>
587    where
588        F: FnOnce(&mut Parser<'i, 't>) -> Result<T, E>,
589    {
590        let start = self.state();
591        let result = thing(self);
592        if result.is_err() {
593            self.reset(&start)
594        }
595        result
596    }
597
598    /// Return a slice of the CSS input
599    #[inline]
600    pub fn slice(&self, range: Range<SourcePosition>) -> &'i str {
601        self.input.tokenizer.slice(range)
602    }
603
604    /// Return a slice of the CSS input, from the given position to the current one.
605    #[inline]
606    pub fn slice_from(&self, start_position: SourcePosition) -> &'i str {
607        self.input.tokenizer.slice_from(start_position)
608    }
609
610    /// Return the next token in the input that is neither whitespace or a comment,
611    /// and advance the position accordingly.
612    ///
613    /// After returning a `Function`, `ParenthesisBlock`,
614    /// `CurlyBracketBlock`, or `SquareBracketBlock` token,
615    /// the next call will skip until after the matching `CloseParenthesis`,
616    /// `CloseCurlyBracket`, or `CloseSquareBracket` token.
617    ///
618    /// See the `Parser::parse_nested_block` method to parse the content of functions or blocks.
619    ///
620    /// This only returns a closing token when it is unmatched (and therefore an error).
621    #[allow(clippy::should_implement_trait)]
622    pub fn next(&mut self) -> Result<&Token<'i>, BasicParseError<'i>> {
623        self.skip_whitespace();
624        self.next_including_whitespace_and_comments()
625    }
626
627    /// Same as `Parser::next`, but does not skip whitespace tokens.
628    pub fn next_including_whitespace(&mut self) -> Result<&Token<'i>, BasicParseError<'i>> {
629        loop {
630            match self.next_including_whitespace_and_comments() {
631                Err(e) => return Err(e),
632                Ok(&Token::Comment(_)) => {}
633                _ => break,
634            }
635        }
636        Ok(self.input.cached_token_ref())
637    }
638
639    /// Same as `Parser::next`, but does not skip whitespace or comment tokens.
640    ///
641    /// **Note**: This should only be used in contexts like a CSS pre-processor
642    /// where comments are preserved.
643    /// When parsing higher-level values, per the CSS Syntax specification,
644    /// comments should always be ignored between tokens.
645    pub fn next_including_whitespace_and_comments(
646        &mut self,
647    ) -> Result<&Token<'i>, BasicParseError<'i>> {
648        if let Some(block_type) = self.at_start_of.take() {
649            consume_until_end_of_block(block_type, &mut self.input.tokenizer);
650        }
651
652        let byte = self.input.tokenizer.next_byte();
653        if self.stop_before.contains(Delimiters::from_byte(byte)) {
654            return Err(self.new_basic_error(BasicParseErrorKind::EndOfInput));
655        }
656
657        let token_start_position = self.input.tokenizer.position();
658        let using_cached_token = self
659            .input
660            .cached_token
661            .as_ref()
662            .map_or(false, |cached_token| {
663                cached_token.start_position == token_start_position
664            });
665        let token = if using_cached_token {
666            let cached_token = self.input.cached_token.as_ref().unwrap();
667            self.input.tokenizer.reset(&cached_token.end_state);
668            if let Token::Function(ref name) = cached_token.token {
669                self.input.tokenizer.see_function(name)
670            }
671            &cached_token.token
672        } else {
673            let new_token = self
674                .input
675                .tokenizer
676                .next()
677                .map_err(|()| self.new_basic_error(BasicParseErrorKind::EndOfInput))?;
678            self.input.cached_token = Some(CachedToken {
679                token: new_token,
680                start_position: token_start_position,
681                end_state: self.input.tokenizer.state(),
682            });
683            self.input.cached_token_ref()
684        };
685
686        if let Some(block_type) = BlockType::opening(token) {
687            self.at_start_of = Some(block_type);
688        }
689        Ok(token)
690    }
691
692    /// Have the given closure parse something, then check the the input is exhausted.
693    /// The result is overridden to an `Err(..)` if some input remains.
694    ///
695    /// This can help tell e.g. `color: green;` from `color: green 4px;`
696    #[inline]
697    pub fn parse_entirely<F, T, E>(&mut self, parse: F) -> Result<T, ParseError<'i, E>>
698    where
699        F: FnOnce(&mut Parser<'i, 't>) -> Result<T, ParseError<'i, E>>,
700    {
701        let result = parse(self)?;
702        self.expect_exhausted()?;
703        Ok(result)
704    }
705
706    /// Parse a list of comma-separated values, all with the same syntax.
707    ///
708    /// The given closure is called repeatedly with a "delimited" parser
709    /// (see the `Parser::parse_until_before` method) so that it can over
710    /// consume the input past a comma at this block/function nesting level.
711    ///
712    /// Successful results are accumulated in a vector.
713    ///
714    /// This method returns an`Err(..)` the first time that a closure call does,
715    /// or if a closure call leaves some input before the next comma or the end
716    /// of the input.
717    #[inline]
718    pub fn parse_comma_separated<F, T, E>(
719        &mut self,
720        parse_one: F,
721    ) -> Result<Vec<T>, ParseError<'i, E>>
722    where
723        F: for<'tt> FnMut(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
724    {
725        self.parse_comma_separated_internal(parse_one, /* ignore_errors = */ false)
726    }
727
728    /// Like `parse_comma_separated`, but ignores errors on unknown components,
729    /// rather than erroring out in the whole list.
730    ///
731    /// Caller must deal with the fact that the resulting list might be empty,
732    /// if there's no valid component on the list.
733    #[inline]
734    pub fn parse_comma_separated_ignoring_errors<F, T, E: 'i>(&mut self, parse_one: F) -> Vec<T>
735    where
736        F: for<'tt> FnMut(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
737    {
738        match self.parse_comma_separated_internal(parse_one, /* ignore_errors = */ true) {
739            Ok(values) => values,
740            Err(..) => unreachable!(),
741        }
742    }
743
744    #[inline]
745    fn parse_comma_separated_internal<F, T, E>(
746        &mut self,
747        mut parse_one: F,
748        ignore_errors: bool,
749    ) -> Result<Vec<T>, ParseError<'i, E>>
750    where
751        F: for<'tt> FnMut(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
752    {
753        // Vec grows from 0 to 4 by default on first push().  So allocate with
754        // capacity 1, so in the somewhat common case of only one item we don't
755        // way overallocate.  Note that we always push at least one item if
756        // parsing succeeds.
757        let mut values = Vec::with_capacity(1);
758        loop {
759            self.skip_whitespace(); // Unnecessary for correctness, but may help try() in parse_one rewind less.
760            match self.parse_until_before(Delimiter::Comma, &mut parse_one) {
761                Ok(v) => values.push(v),
762                Err(e) if !ignore_errors => return Err(e),
763                Err(_) => {}
764            }
765            match self.next() {
766                Err(_) => return Ok(values),
767                Ok(&Token::Comma) => continue,
768                Ok(_) => unreachable!(),
769            }
770        }
771    }
772
773    /// Parse the content of a block or function.
774    ///
775    /// This method panics if the last token yielded by this parser
776    /// (from one of the `next*` methods)
777    /// is not a on that marks the start of a block or function:
778    /// a `Function`, `ParenthesisBlock`, `CurlyBracketBlock`, or `SquareBracketBlock`.
779    ///
780    /// The given closure is called with a "delimited" parser
781    /// that stops at the end of the block or function (at the matching closing token).
782    ///
783    /// The result is overridden to an `Err(..)` if the closure leaves some input before that point.
784    #[inline]
785    pub fn parse_nested_block<F, T, E>(&mut self, parse: F) -> Result<T, ParseError<'i, E>>
786    where
787        F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
788    {
789        parse_nested_block(self, parse)
790    }
791
792    /// Limit parsing to until a given delimiter or the end of the input. (E.g.
793    /// a semicolon for a property value.)
794    ///
795    /// The given closure is called with a "delimited" parser
796    /// that stops before the first character at this block/function nesting level
797    /// that matches the given set of delimiters, or at the end of the input.
798    ///
799    /// The result is overridden to an `Err(..)` if the closure leaves some input before that point.
800    #[inline]
801    pub fn parse_until_before<F, T, E>(
802        &mut self,
803        delimiters: Delimiters,
804        parse: F,
805    ) -> Result<T, ParseError<'i, E>>
806    where
807        F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
808    {
809        parse_until_before(self, delimiters, ParseUntilErrorBehavior::Consume, parse)
810    }
811
812    /// Like `parse_until_before`, but also consume the delimiter token.
813    ///
814    /// This can be useful when you don’t need to know which delimiter it was
815    /// (e.g. if these is only one in the given set)
816    /// or if it was there at all (as opposed to reaching the end of the input).
817    #[inline]
818    pub fn parse_until_after<F, T, E>(
819        &mut self,
820        delimiters: Delimiters,
821        parse: F,
822    ) -> Result<T, ParseError<'i, E>>
823    where
824        F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
825    {
826        parse_until_after(self, delimiters, ParseUntilErrorBehavior::Consume, parse)
827    }
828
829    /// Parse a <whitespace-token> and return its value.
830    #[inline]
831    pub fn expect_whitespace(&mut self) -> Result<&'i str, BasicParseError<'i>> {
832        let start_location = self.current_source_location();
833        match *self.next_including_whitespace()? {
834            Token::WhiteSpace(value) => Ok(value),
835            ref t => Err(start_location.new_basic_unexpected_token_error(t.clone())),
836        }
837    }
838
839    /// Parse a <ident-token> and return the unescaped value.
840    #[inline]
841    pub fn expect_ident(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> {
842        expect! {self,
843            Token::Ident(ref value) => Ok(value),
844        }
845    }
846
847    /// expect_ident, but clone the CowRcStr
848    #[inline]
849    pub fn expect_ident_cloned(&mut self) -> Result<CowRcStr<'i>, BasicParseError<'i>> {
850        self.expect_ident().cloned()
851    }
852
853    /// Parse a <ident-token> whose unescaped value is an ASCII-insensitive match for the given value.
854    #[inline]
855    pub fn expect_ident_matching(
856        &mut self,
857        expected_value: &str,
858    ) -> Result<(), BasicParseError<'i>> {
859        expect! {self,
860            Token::Ident(ref value) if value.eq_ignore_ascii_case(expected_value) => Ok(()),
861        }
862    }
863
864    /// Parse a <string-token> and return the unescaped value.
865    #[inline]
866    pub fn expect_string(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> {
867        expect! {self,
868            Token::QuotedString(ref value) => Ok(value),
869        }
870    }
871
872    /// expect_string, but clone the CowRcStr
873    #[inline]
874    pub fn expect_string_cloned(&mut self) -> Result<CowRcStr<'i>, BasicParseError<'i>> {
875        self.expect_string().cloned()
876    }
877
878    /// Parse either a <ident-token> or a <string-token>, and return the unescaped value.
879    #[inline]
880    pub fn expect_ident_or_string(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> {
881        expect! {self,
882            Token::Ident(ref value) => Ok(value),
883            Token::QuotedString(ref value) => Ok(value),
884        }
885    }
886
887    /// Parse a <url-token> and return the unescaped value.
888    #[inline]
889    pub fn expect_url(&mut self) -> Result<CowRcStr<'i>, BasicParseError<'i>> {
890        expect! {self,
891            Token::UnquotedUrl(ref value) => Ok(value.clone()),
892            Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
893                self.parse_nested_block(|input| {
894                    input.expect_string().map_err(Into::into).cloned()
895                })
896                .map_err(ParseError::<()>::basic)
897            }
898        }
899    }
900
901    /// Parse either a <url-token> or a <string-token>, and return the unescaped value.
902    #[inline]
903    pub fn expect_url_or_string(&mut self) -> Result<CowRcStr<'i>, BasicParseError<'i>> {
904        expect! {self,
905            Token::UnquotedUrl(ref value) => Ok(value.clone()),
906            Token::QuotedString(ref value) => Ok(value.clone()),
907            Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
908                self.parse_nested_block(|input| {
909                    input.expect_string().map_err(Into::into).cloned()
910                })
911                .map_err(ParseError::<()>::basic)
912            }
913        }
914    }
915
916    /// Parse a <number-token> and return the integer value.
917    #[inline]
918    pub fn expect_number(&mut self) -> Result<f32, BasicParseError<'i>> {
919        expect! {self,
920            Token::Number { value, .. } => Ok(value),
921        }
922    }
923
924    /// Parse a <number-token> that does not have a fractional part, and return the integer value.
925    #[inline]
926    pub fn expect_integer(&mut self) -> Result<i32, BasicParseError<'i>> {
927        expect! {self,
928            Token::Number { int_value: Some(int_value), .. } => Ok(int_value),
929        }
930    }
931
932    /// Parse a <percentage-token> and return the value.
933    /// `0%` and `100%` map to `0.0` and `1.0` (not `100.0`), respectively.
934    #[inline]
935    pub fn expect_percentage(&mut self) -> Result<f32, BasicParseError<'i>> {
936        expect! {self,
937            Token::Percentage { unit_value, .. } => Ok(unit_value),
938        }
939    }
940
941    /// Parse a `:` <colon-token>.
942    #[inline]
943    pub fn expect_colon(&mut self) -> Result<(), BasicParseError<'i>> {
944        expect! {self,
945            Token::Colon => Ok(()),
946        }
947    }
948
949    /// Parse a `;` <semicolon-token>.
950    #[inline]
951    pub fn expect_semicolon(&mut self) -> Result<(), BasicParseError<'i>> {
952        expect! {self,
953            Token::Semicolon => Ok(()),
954        }
955    }
956
957    /// Parse a `,` <comma-token>.
958    #[inline]
959    pub fn expect_comma(&mut self) -> Result<(), BasicParseError<'i>> {
960        expect! {self,
961            Token::Comma => Ok(()),
962        }
963    }
964
965    /// Parse a <delim-token> with the given value.
966    #[inline]
967    pub fn expect_delim(&mut self, expected_value: char) -> Result<(), BasicParseError<'i>> {
968        expect! {self,
969            Token::Delim(value) if value == expected_value => Ok(()),
970        }
971    }
972
973    /// Parse a `{ /* ... */ }` curly brackets block.
974    ///
975    /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method.
976    #[inline]
977    pub fn expect_curly_bracket_block(&mut self) -> Result<(), BasicParseError<'i>> {
978        expect! {self,
979            Token::CurlyBracketBlock => Ok(()),
980        }
981    }
982
983    /// Parse a `[ /* ... */ ]` square brackets block.
984    ///
985    /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method.
986    #[inline]
987    pub fn expect_square_bracket_block(&mut self) -> Result<(), BasicParseError<'i>> {
988        expect! {self,
989            Token::SquareBracketBlock => Ok(()),
990        }
991    }
992
993    /// Parse a `( /* ... */ )` parenthesis block.
994    ///
995    /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method.
996    #[inline]
997    pub fn expect_parenthesis_block(&mut self) -> Result<(), BasicParseError<'i>> {
998        expect! {self,
999            Token::ParenthesisBlock => Ok(()),
1000        }
1001    }
1002
1003    /// Parse a <function> token and return its name.
1004    ///
1005    /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method.
1006    #[inline]
1007    pub fn expect_function(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> {
1008        expect! {self,
1009            Token::Function(ref name) => Ok(name),
1010        }
1011    }
1012
1013    /// Parse a <function> token whose name is an ASCII-insensitive match for the given value.
1014    ///
1015    /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method.
1016    #[inline]
1017    pub fn expect_function_matching(
1018        &mut self,
1019        expected_name: &str,
1020    ) -> Result<(), BasicParseError<'i>> {
1021        expect! {self,
1022            Token::Function(ref name) if name.eq_ignore_ascii_case(expected_name) => Ok(()),
1023        }
1024    }
1025
1026    /// Parse the input until exhaustion and check that it contains no “error” token.
1027    ///
1028    /// See `Token::is_parse_error`. This also checks nested blocks and functions recursively.
1029    #[inline]
1030    pub fn expect_no_error_token(&mut self) -> Result<(), BasicParseError<'i>> {
1031        loop {
1032            match self.next_including_whitespace_and_comments() {
1033                Ok(&Token::Function(_))
1034                | Ok(&Token::ParenthesisBlock)
1035                | Ok(&Token::SquareBracketBlock)
1036                | Ok(&Token::CurlyBracketBlock) => self
1037                    .parse_nested_block(|input| input.expect_no_error_token().map_err(Into::into))
1038                    .map_err(ParseError::<()>::basic)?,
1039                Ok(t) => {
1040                    // FIXME: maybe these should be separate variants of
1041                    // BasicParseError instead?
1042                    if t.is_parse_error() {
1043                        let token = t.clone();
1044                        return Err(self.new_basic_unexpected_token_error(token));
1045                    }
1046                }
1047                Err(_) => return Ok(()),
1048            }
1049        }
1050    }
1051}
1052
1053pub fn parse_until_before<'i: 't, 't, F, T, E>(
1054    parser: &mut Parser<'i, 't>,
1055    delimiters: Delimiters,
1056    error_behavior: ParseUntilErrorBehavior,
1057    parse: F,
1058) -> Result<T, ParseError<'i, E>>
1059where
1060    F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
1061{
1062    let delimiters = parser.stop_before | delimiters;
1063    let result;
1064    // Introduce a new scope to limit duration of nested_parser’s borrow
1065    {
1066        let mut delimited_parser = Parser {
1067            input: parser.input,
1068            at_start_of: parser.at_start_of.take(),
1069            stop_before: delimiters,
1070        };
1071        result = delimited_parser.parse_entirely(parse);
1072        if error_behavior == ParseUntilErrorBehavior::Stop && result.is_err() {
1073            return result;
1074        }
1075        if let Some(block_type) = delimited_parser.at_start_of {
1076            consume_until_end_of_block(block_type, &mut delimited_parser.input.tokenizer);
1077        }
1078    }
1079    // FIXME: have a special-purpose tokenizer method for this that does less work.
1080    loop {
1081        if delimiters.contains(Delimiters::from_byte(parser.input.tokenizer.next_byte())) {
1082            break;
1083        }
1084        if let Ok(token) = parser.input.tokenizer.next() {
1085            if let Some(block_type) = BlockType::opening(&token) {
1086                consume_until_end_of_block(block_type, &mut parser.input.tokenizer);
1087            }
1088        } else {
1089            break;
1090        }
1091    }
1092    result
1093}
1094
1095pub fn parse_until_after<'i: 't, 't, F, T, E>(
1096    parser: &mut Parser<'i, 't>,
1097    delimiters: Delimiters,
1098    error_behavior: ParseUntilErrorBehavior,
1099    parse: F,
1100) -> Result<T, ParseError<'i, E>>
1101where
1102    F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
1103{
1104    let result = parse_until_before(parser, delimiters, error_behavior, parse);
1105    if error_behavior == ParseUntilErrorBehavior::Stop && result.is_err() {
1106        return result;
1107    }
1108    let next_byte = parser.input.tokenizer.next_byte();
1109    if next_byte.is_some()
1110        && !parser
1111            .stop_before
1112            .contains(Delimiters::from_byte(next_byte))
1113    {
1114        debug_assert!(delimiters.contains(Delimiters::from_byte(next_byte)));
1115        // We know this byte is ASCII.
1116        parser.input.tokenizer.advance(1);
1117        if next_byte == Some(b'{') {
1118            consume_until_end_of_block(BlockType::CurlyBracket, &mut parser.input.tokenizer);
1119        }
1120    }
1121    result
1122}
1123
1124pub fn parse_nested_block<'i: 't, 't, F, T, E>(
1125    parser: &mut Parser<'i, 't>,
1126    parse: F,
1127) -> Result<T, ParseError<'i, E>>
1128where
1129    F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>,
1130{
1131    let block_type = parser.at_start_of.take().expect(
1132        "\
1133         A nested parser can only be created when a Function, \
1134         ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \
1135         token was just consumed.\
1136         ",
1137    );
1138    let closing_delimiter = match block_type {
1139        BlockType::CurlyBracket => ClosingDelimiter::CloseCurlyBracket,
1140        BlockType::SquareBracket => ClosingDelimiter::CloseSquareBracket,
1141        BlockType::Parenthesis => ClosingDelimiter::CloseParenthesis,
1142    };
1143    let result;
1144    // Introduce a new scope to limit duration of nested_parser’s borrow
1145    {
1146        let mut nested_parser = Parser {
1147            input: parser.input,
1148            at_start_of: None,
1149            stop_before: closing_delimiter,
1150        };
1151        result = nested_parser.parse_entirely(parse);
1152        if let Some(block_type) = nested_parser.at_start_of {
1153            consume_until_end_of_block(block_type, &mut nested_parser.input.tokenizer);
1154        }
1155    }
1156    consume_until_end_of_block(block_type, &mut parser.input.tokenizer);
1157    result
1158}
1159
1160#[inline(never)]
1161#[cold]
1162fn consume_until_end_of_block(block_type: BlockType, tokenizer: &mut Tokenizer) {
1163    let mut stack = SmallVec::<[BlockType; 16]>::new();
1164    stack.push(block_type);
1165
1166    // FIXME: have a special-purpose tokenizer method for this that does less work.
1167    while let Ok(ref token) = tokenizer.next() {
1168        if let Some(b) = BlockType::closing(token) {
1169            if *stack.last().unwrap() == b {
1170                stack.pop();
1171                if stack.is_empty() {
1172                    return;
1173                }
1174            }
1175        }
1176
1177        if let Some(block_type) = BlockType::opening(token) {
1178            stack.push(block_type);
1179        }
1180    }
1181}
cssparser/parser.rs

cssparser/
parser.rs