match_token/
lib.rs

1extern crate proc_macro;
2
3use quote::quote;
4use syn::{braced, Token};
5
6use std::collections::HashSet;
7use syn::ext::IdentExt;
8use syn::parse::{Parse, ParseStream, Result};
9
10/// Implements the `match_token!()` macro for use by the HTML tree builder
11/// in `src/tree_builder/rules.rs`.
12///
13/// ## Example
14///
15/// ```rust,ignore
16/// match_token!(token {
17///     CommentToken(text) => 1,
18///     tag @ <base> <link> <meta> => 2,
19///     </head> => 3,
20///     </body> </html> </br> => else,
21///     tag @ </_> => 4,
22///     token => 5,
23/// })
24/// ```
25///
26/// ## Syntax
27/// Because of the simplistic parser, the macro invocation must
28/// start with exactly `match_token!(token {` (with whitespace as specified)
29/// and end with exactly `})`.
30/// The left-hand side of each match arm is an optional `name @` binding, followed by
31///   - an ordinary Rust pattern that starts with an identifier or an underscore, or
32///   - a sequence of HTML tag names as identifiers, each inside "<...>" or "</...>"
33///     to match an open or close tag respectively, or
34///   - a "wildcard tag" "<_>" or "</_>" to match all open tags or all close tags
35///     respectively.
36///
37/// The right-hand side is either an expression or the keyword `else`.
38/// Note that this syntax does not support guards or pattern alternation like
39/// `Foo | Bar`.  This is not a fundamental limitation; it's done for implementation
40/// simplicity.
41/// ## Semantics
42/// Ordinary Rust patterns match as usual.  If present, the `name @` binding has
43/// the usual meaning.
44/// A sequence of named tags matches any of those tags.  A single sequence can
45/// contain both open and close tags.  If present, the `name @` binding binds (by
46/// move) the `Tag` struct, not the outer `Token`.  That is, a match arm like
47/// ```rust,ignore
48/// tag @ <html> <head> => ...
49/// ```
50/// expands to something like
51/// ```rust,ignore
52/// TagToken(tag @ Tag { name: local_name!("html"), kind: StartTag })
53/// | TagToken(tag @ Tag { name: local_name!("head"), kind: StartTag }) => ...
54/// ```
55/// A wildcard tag matches any tag of the appropriate kind, *unless* it was
56/// previously matched with an `else` right-hand side (more on this below).
57/// The expansion of this macro reorders code somewhat, to satisfy various
58/// restrictions arising from moves.  However it provides the semantics of in-order
59/// matching, by enforcing the following restrictions on its input:
60///   - The last pattern must be a variable or the wildcard "_".  In other words
61///     it must match everything.
62///   - Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear
63///     after wildcard tag patterns.
64///   - No tag name may appear more than once.
65///   - A wildcard tag pattern may not occur in the same arm as any other tag.
66///     "<_> <html> => ..." and "<_> </_> => ..." are both forbidden.
67///   - The right-hand side "else" may only appear with specific-tag patterns.
68///     It means that these specific tags should be handled by the last,
69///     catch-all case arm, rather than by any wildcard tag arm.  This situation
70///     is common in the HTML5 syntax.
71#[proc_macro]
72pub fn match_token(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
73    let input = proc_macro2::TokenStream::from(input);
74
75    let match_token = syn::parse2::<MatchToken>(input).expect("Parsing match_token! input failed");
76    let output = expand_match_token_macro(match_token);
77
78    proc_macro::TokenStream::from(output)
79}
80
81struct MatchToken {
82    ident: syn::Ident,
83    arms: Vec<MatchTokenArm>,
84}
85
86struct MatchTokenArm {
87    binding: Option<syn::Ident>,
88    lhs: Lhs,
89    rhs: Rhs,
90}
91
92enum Lhs {
93    Tags(Vec<Tag>),
94    Pattern(syn::Pat),
95}
96
97enum Rhs {
98    Expression(syn::Expr),
99    Else,
100}
101
102#[derive(PartialEq, Eq, Hash, Clone)]
103enum TagKind {
104    StartTag,
105    EndTag,
106}
107
108// Option is None if wildcard
109#[derive(PartialEq, Eq, Hash, Clone)]
110struct Tag {
111    kind: TagKind,
112    name: Option<syn::Ident>,
113}
114
115impl Parse for Tag {
116    fn parse(input: ParseStream) -> Result<Self> {
117        input.parse::<Token![<]>()?;
118        let closing: Option<Token![/]> = input.parse()?;
119        let name = match input.call(syn::Ident::parse_any)? {
120            ref wildcard if wildcard == "_" => None,
121            other => Some(other),
122        };
123        input.parse::<Token![>]>()?;
124        Ok(Tag {
125            kind: if closing.is_some() {
126                TagKind::EndTag
127            } else {
128                TagKind::StartTag
129            },
130            name,
131        })
132    }
133}
134
135impl Parse for Lhs {
136    fn parse(input: ParseStream) -> Result<Self> {
137        if input.peek(Token![<]) {
138            let mut tags = Vec::new();
139            while !input.peek(Token![=>]) {
140                tags.push(input.parse()?);
141            }
142            Ok(Lhs::Tags(tags))
143        } else {
144            let p = input.call(syn::Pat::parse_single)?;
145            Ok(Lhs::Pattern(p))
146        }
147    }
148}
149
150impl Parse for MatchTokenArm {
151    fn parse(input: ParseStream) -> Result<Self> {
152        let binding = if input.peek2(Token![@]) {
153            let binding = input.parse::<syn::Ident>()?;
154            input.parse::<Token![@]>()?;
155            Some(binding)
156        } else {
157            None
158        };
159        let lhs = input.parse::<Lhs>()?;
160        input.parse::<Token![=>]>()?;
161        let rhs = if input.peek(syn::token::Brace) {
162            let block = input.parse::<syn::Block>().unwrap();
163            let block = syn::ExprBlock {
164                attrs: vec![],
165                label: None,
166                block,
167            };
168            input.parse::<Option<Token![,]>>()?;
169            Rhs::Expression(syn::Expr::Block(block))
170        } else if input.peek(Token![else]) {
171            input.parse::<Token![else]>()?;
172            input.parse::<Token![,]>()?;
173            Rhs::Else
174        } else {
175            let expr = input.parse::<syn::Expr>().unwrap();
176            input.parse::<Option<Token![,]>>()?;
177            Rhs::Expression(expr)
178        };
179
180        Ok(MatchTokenArm { binding, lhs, rhs })
181    }
182}
183
184impl Parse for MatchToken {
185    fn parse(input: ParseStream) -> Result<Self> {
186        let ident = input.parse::<syn::Ident>()?;
187        let content;
188        braced!(content in input);
189        let mut arms = vec![];
190        while !content.is_empty() {
191            arms.push(content.parse()?);
192        }
193        Ok(MatchToken { ident, arms })
194    }
195}
196
197fn expand_match_token_macro(match_token: MatchToken) -> proc_macro2::TokenStream {
198    let mut arms = match_token.arms;
199    let to_be_matched = match_token.ident;
200    // Handle the last arm specially at the end.
201    let last_arm = arms.pop().unwrap();
202
203    // Tags we've seen, used for detecting duplicates.
204    let mut seen_tags: HashSet<Tag> = HashSet::new();
205
206    // Case arms for wildcard matching.  We collect these and
207    // emit them later.
208    let mut wildcards_patterns: Vec<proc_macro2::TokenStream> = Vec::new();
209    let mut wildcards_expressions: Vec<syn::Expr> = Vec::new();
210
211    // Tags excluded (by an 'else' RHS) from wildcard matching.
212    let mut wild_excluded_patterns: Vec<proc_macro2::TokenStream> = Vec::new();
213
214    let mut arms_code = Vec::new();
215
216    for MatchTokenArm { binding, lhs, rhs } in arms {
217        // Build Rust syntax for the `name @` binding, if any.
218        let binding = match binding {
219            Some(ident) => quote!(#ident @),
220            None => quote!(),
221        };
222
223        match (lhs, rhs) {
224            (Lhs::Pattern(_), Rhs::Else) => {
225                panic!("'else' may not appear with an ordinary pattern")
226            },
227
228            // ordinary pattern => expression
229            (Lhs::Pattern(pat), Rhs::Expression(expr)) => {
230                if !wildcards_patterns.is_empty() {
231                    panic!("ordinary patterns may not appear after wildcard tags");
232                }
233                arms_code.push(quote!(#binding #pat => #expr,))
234            },
235
236            // <tag> <tag> ... => else
237            (Lhs::Tags(tags), Rhs::Else) => {
238                for tag in tags {
239                    if !seen_tags.insert(tag.clone()) {
240                        panic!("duplicate tag");
241                    }
242                    if tag.name.is_none() {
243                        panic!("'else' may not appear with a wildcard tag");
244                    }
245                    wild_excluded_patterns
246                        .push(make_tag_pattern(&proc_macro2::TokenStream::new(), tag));
247                }
248            },
249
250            // <_> => expression
251            // <tag> <tag> ... => expression
252            (Lhs::Tags(tags), Rhs::Expression(expr)) => {
253                // Is this arm a tag wildcard?
254                // `None` if we haven't processed the first tag yet.
255                let mut wildcard = None;
256                for tag in tags {
257                    if !seen_tags.insert(tag.clone()) {
258                        panic!("duplicate tag");
259                    }
260
261                    match tag.name {
262                        // <tag>
263                        Some(_) => {
264                            if !wildcards_patterns.is_empty() {
265                                panic!("specific tags may not appear after wildcard tags");
266                            }
267
268                            if wildcard == Some(true) {
269                                panic!("wildcard tags must appear alone");
270                            }
271
272                            if wildcard.is_some() {
273                                // Push the delimiter `|` if it's not the first tag.
274                                arms_code.push(quote!( | ))
275                            }
276                            arms_code.push(make_tag_pattern(&binding, tag));
277
278                            wildcard = Some(false);
279                        },
280
281                        // <_>
282                        None => {
283                            if wildcard.is_some() {
284                                panic!("wildcard tags must appear alone");
285                            }
286                            wildcard = Some(true);
287                            wildcards_patterns.push(make_tag_pattern(&binding, tag));
288                            wildcards_expressions.push(expr.clone());
289                        },
290                    }
291                }
292
293                match wildcard {
294                    None => panic!("[internal macro error] tag arm with no tags"),
295                    Some(false) => arms_code.push(quote!( => #expr,)),
296                    Some(true) => {}, // codegen for wildcards is deferred
297                }
298            },
299        }
300    }
301
302    // Time to process the last, catch-all arm.  We will generate something like
303    //
304    //     last_arm_token => {
305    //         let enable_wildcards = match last_arm_token {
306    //             TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false,
307    //             TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false,
308    //             // ...
309    //             _ => true,
310    //         };
311    //
312    //         match (enable_wildcards, last_arm_token) {
313    //             (true, TagToken(name @ Tag { kind: StartTag, .. }))
314    //                 => ...,  // wildcard action for start tags
315    //
316    //             (true, TagToken(name @ Tag { kind: EndTag, .. }))
317    //                 => ...,  // wildcard action for end tags
318    //
319    //             (_, token) => ...  // using the pattern from that last arm
320    //         }
321    //     }
322
323    let MatchTokenArm { binding, lhs, rhs } = last_arm;
324
325    let (last_pat, last_expr) = match (binding, lhs, rhs) {
326        (Some(_), _, _) => panic!("the last arm cannot have an @-binding"),
327        (None, Lhs::Tags(_), _) => panic!("the last arm cannot have tag patterns"),
328        (None, _, Rhs::Else) => panic!("the last arm cannot use 'else'"),
329        (None, Lhs::Pattern(p), Rhs::Expression(e)) => (p, e),
330    };
331
332    quote! {
333        match #to_be_matched {
334            #(
335                #arms_code
336            )*
337            last_arm_token => {
338                let enable_wildcards = match last_arm_token {
339                    #(
340                        #wild_excluded_patterns => false,
341                    )*
342                    _ => true,
343                };
344                match (enable_wildcards, last_arm_token) {
345                    #(
346                        (true, #wildcards_patterns) => #wildcards_expressions,
347                    )*
348                    (_, #last_pat) => #last_expr,
349                }
350            }
351        }
352    }
353}
354
355fn make_tag_pattern(binding: &proc_macro2::TokenStream, tag: Tag) -> proc_macro2::TokenStream {
356    let kind = match tag.kind {
357        TagKind::StartTag => quote!(crate::tokenizer::StartTag),
358        TagKind::EndTag => quote!(crate::tokenizer::EndTag),
359    };
360    let name_field = if let Some(name) = tag.name {
361        let name = name.to_string();
362        quote!(name: local_name!(#name),)
363    } else {
364        quote!()
365    };
366    quote! {
367        crate::tree_builder::types::Token::Tag(#binding crate::tokenizer::Tag { kind: #kind, #name_field .. })
368    }
369}