phf_macros/
lib.rs

1//! A set of macros to generate Rust source for PHF data structures at compile time.
2//! See [the `phf` crate's documentation][phf] for details.
3//!
4//! [phf]: https://docs.rs/phf
5
6use phf_generator::HashState;
7use phf_shared::PhfHash;
8use proc_macro::TokenStream;
9use quote::quote;
10use std::collections::HashSet;
11use std::hash::Hasher;
12use syn::parse::{self, Parse, ParseStream};
13use syn::punctuated::Punctuated;
14use syn::{parse_macro_input, BinOp, Error, Expr, ExprLit, Lit, Token, UnOp};
15#[cfg(feature = "uncased")]
16use uncased_::Uncased;
17#[cfg(feature = "unicase")]
18use unicase_::{Ascii, UniCase};
19
20#[derive(Hash, PartialEq, Eq, Clone)]
21enum ParsedKey {
22    Str(String),
23    Binary(Vec<u8>),
24    Char(char),
25    I8(i8),
26    I16(i16),
27    I32(i32),
28    I64(i64),
29    I128(i128),
30    Isize(isize),
31    U8(u8),
32    U16(u16),
33    U32(u32),
34    U64(u64),
35    U128(u128),
36    Usize(usize),
37    Bool(bool),
38    Tuple(Vec<ParsedKey>),
39    #[cfg(feature = "unicase")]
40    UniCase(UniCase<String>),
41    #[cfg(feature = "unicase")]
42    UniCaseAscii(Ascii<String>),
43    #[cfg(feature = "uncased")]
44    Uncased(Uncased<'static>),
45}
46
47impl PhfHash for ParsedKey {
48    fn phf_hash<H>(&self, state: &mut H)
49    where
50        H: Hasher,
51    {
52        match self {
53            ParsedKey::Str(s) => s.phf_hash(state),
54            ParsedKey::Binary(s) => s.phf_hash(state),
55            ParsedKey::Char(s) => s.phf_hash(state),
56            ParsedKey::I8(s) => s.phf_hash(state),
57            ParsedKey::I16(s) => s.phf_hash(state),
58            ParsedKey::I32(s) => s.phf_hash(state),
59            ParsedKey::I64(s) => s.phf_hash(state),
60            ParsedKey::I128(s) => s.phf_hash(state),
61            ParsedKey::Isize(s) => s.phf_hash(state),
62            ParsedKey::U8(s) => s.phf_hash(state),
63            ParsedKey::U16(s) => s.phf_hash(state),
64            ParsedKey::U32(s) => s.phf_hash(state),
65            ParsedKey::U64(s) => s.phf_hash(state),
66            ParsedKey::U128(s) => s.phf_hash(state),
67            ParsedKey::Usize(s) => s.phf_hash(state),
68            ParsedKey::Bool(s) => s.phf_hash(state),
69            ParsedKey::Tuple(elements) => {
70                for element in elements {
71                    element.phf_hash(state);
72                }
73            }
74            #[cfg(feature = "unicase")]
75            ParsedKey::UniCase(s) => s.phf_hash(state),
76            #[cfg(feature = "unicase")]
77            ParsedKey::UniCaseAscii(s) => s.phf_hash(state),
78            #[cfg(feature = "uncased")]
79            ParsedKey::Uncased(s) => s.phf_hash(state),
80        }
81    }
82}
83
84impl ParsedKey {
85    fn from_expr(expr: &Expr) -> Option<ParsedKey> {
86        match expr {
87            Expr::Lit(lit) => match &lit.lit {
88                Lit::Str(s) => Some(ParsedKey::Str(s.value())),
89                Lit::ByteStr(s) => Some(ParsedKey::Binary(s.value())),
90                Lit::Byte(s) => Some(ParsedKey::U8(s.value())),
91                Lit::Char(s) => Some(ParsedKey::Char(s.value())),
92                Lit::Int(s) => match s.suffix() {
93                    // we've lost the sign at this point, so `-128i8` looks like `128i8`,
94                    // which doesn't fit in an `i8`; parse it as a `u8` and cast (to `0i8`),
95                    // which is handled below, by `Unary`
96                    "i8" => Some(ParsedKey::I8(s.base10_parse::<u8>().unwrap() as i8)),
97                    "i16" => Some(ParsedKey::I16(s.base10_parse::<u16>().unwrap() as i16)),
98                    "i32" => Some(ParsedKey::I32(s.base10_parse::<u32>().unwrap() as i32)),
99                    "i64" => Some(ParsedKey::I64(s.base10_parse::<u64>().unwrap() as i64)),
100                    "i128" => Some(ParsedKey::I128(s.base10_parse::<u128>().unwrap() as i128)),
101                    "isize" => Some(ParsedKey::Isize(s.base10_parse::<usize>().unwrap() as isize)),
102                    "u8" => Some(ParsedKey::U8(s.base10_parse::<u8>().unwrap())),
103                    "u16" => Some(ParsedKey::U16(s.base10_parse::<u16>().unwrap())),
104                    "u32" => Some(ParsedKey::U32(s.base10_parse::<u32>().unwrap())),
105                    "u64" => Some(ParsedKey::U64(s.base10_parse::<u64>().unwrap())),
106                    "u128" => Some(ParsedKey::U128(s.base10_parse::<u128>().unwrap())),
107                    "usize" => Some(ParsedKey::Usize(s.base10_parse::<usize>().unwrap())),
108                    // Handle unsuffixed integer literals, default to i32
109                    "" => {
110                        if let Ok(val) = s.base10_parse::<i32>() {
111                            Some(ParsedKey::I32(val))
112                        } else {
113                            None
114                        }
115                    }
116                    _ => None,
117                },
118                Lit::Bool(s) => Some(ParsedKey::Bool(s.value)),
119                _ => None,
120            },
121            Expr::Array(array) => {
122                let mut buf = vec![];
123                for expr in &array.elems {
124                    match expr {
125                        Expr::Lit(lit) => match &lit.lit {
126                            Lit::Int(s) => match s.suffix() {
127                                "u8" | "" => buf.push(s.base10_parse::<u8>().unwrap()),
128                                _ => return None,
129                            },
130                            _ => return None,
131                        },
132                        _ => return None,
133                    }
134                }
135                Some(ParsedKey::Binary(buf))
136            }
137            Expr::Unary(unary) => {
138                // Handle negation for signed integer types
139                // If we received an integer literal (always unsigned) greater than i__::max_value()
140                // then casting it to a signed integer type of the same width will negate it to
141                // the same absolute value so we don't need to negate it here
142                macro_rules! try_negate {
143                    ($val:expr) => {
144                        if $val < 0 {
145                            $val
146                        } else {
147                            -$val
148                        }
149                    };
150                }
151
152                match unary.op {
153                    UnOp::Neg(_) => match ParsedKey::from_expr(&unary.expr)? {
154                        ParsedKey::I8(v) => Some(ParsedKey::I8(try_negate!(v))),
155                        ParsedKey::I16(v) => Some(ParsedKey::I16(try_negate!(v))),
156                        ParsedKey::I32(v) => Some(ParsedKey::I32(try_negate!(v))),
157                        ParsedKey::I64(v) => Some(ParsedKey::I64(try_negate!(v))),
158                        ParsedKey::I128(v) => Some(ParsedKey::I128(try_negate!(v))),
159                        ParsedKey::Isize(v) => Some(ParsedKey::Isize(try_negate!(v))),
160                        _ => None,
161                    },
162                    UnOp::Deref(_) => {
163                        let mut expr = &*unary.expr;
164                        while let Expr::Group(group) = expr {
165                            expr = &*group.expr;
166                        }
167                        match expr {
168                            Expr::Lit(ExprLit {
169                                lit: Lit::ByteStr(s),
170                                ..
171                            }) => Some(ParsedKey::Binary(s.value())),
172                            _ => None,
173                        }
174                    }
175                    _ => None,
176                }
177            }
178            Expr::Tuple(tuple) => {
179                let mut elements = Vec::new();
180                for elem in &tuple.elems {
181                    if let Some(parsed_elem) = ParsedKey::from_expr(elem) {
182                        elements.push(parsed_elem);
183                    } else {
184                        return None;
185                    }
186                }
187                Some(ParsedKey::Tuple(elements))
188            }
189            Expr::Group(group) => ParsedKey::from_expr(&group.expr),
190            Expr::Call(call) if call.args.len() == 1 => {
191                let last;
192                let last_ahead;
193
194                if let Expr::Path(ep) = call.func.as_ref() {
195                    let mut segments = ep.path.segments.iter();
196                    last = segments.next_back()?.ident.to_string();
197                    last_ahead = segments.next_back()?.ident.to_string();
198                } else {
199                    return None;
200                }
201
202                let mut arg = call.args.first().unwrap();
203
204                while let Expr::Group(group) = arg {
205                    arg = &group.expr;
206                }
207
208                let _value = match arg {
209                    Expr::Lit(ExprLit {
210                        attrs: _,
211                        lit: Lit::Str(s),
212                    }) => s.value(),
213                    _ => {
214                        return None;
215                    }
216                };
217
218                match (&*last_ahead, &*last) {
219                    #[cfg(feature = "unicase")]
220                    ("UniCase", "unicode") => Some(ParsedKey::UniCase(UniCase::unicode(_value))),
221                    #[cfg(feature = "unicase")]
222                    ("UniCase", "ascii") => Some(ParsedKey::UniCase(UniCase::ascii(_value))),
223                    #[cfg(feature = "unicase")]
224                    ("Ascii", "new") => Some(ParsedKey::UniCaseAscii(Ascii::new(_value))),
225                    #[cfg(feature = "uncased")]
226                    ("UncasedStr", "new") => Some(ParsedKey::Uncased(Uncased::new(_value))),
227                    _ => None,
228                }
229            }
230            _ => None,
231        }
232    }
233}
234
235#[derive(Clone)]
236struct Key {
237    parsed: Vec<ParsedKey>,
238    expr: Vec<Expr>,
239    attrs: Vec<syn::Attribute>,
240}
241
242impl PhfHash for Key {
243    fn phf_hash<H>(&self, state: &mut H)
244    where
245        H: Hasher,
246    {
247        // For OR patterns, we hash the first key (they should all hash to the same value)
248        if let Some(first) = self.parsed.first() {
249            first.phf_hash(state);
250        }
251    }
252}
253
254impl Parse for Key {
255    fn parse(input: ParseStream<'_>) -> parse::Result<Key> {
256        let attrs = input.call(syn::Attribute::parse_outer)?;
257
258        // Parse the expression (which might contain OR patterns)
259        let expr = input.parse::<Expr>()?;
260
261        // Extract all keys from the expression (handling OR patterns)
262        let (exprs, parsed_keys) = extract_keys_from_expr(&expr)?;
263
264        Ok(Key {
265            parsed: parsed_keys,
266            expr: exprs,
267            attrs,
268        })
269    }
270}
271
272/// Extract all keys from an expression, handling OR patterns
273fn extract_keys_from_expr(expr: &Expr) -> parse::Result<(Vec<Expr>, Vec<ParsedKey>)> {
274    match expr {
275        Expr::Binary(binary) => {
276            if let BinOp::BitOr(_) = binary.op {
277                // Handle OR pattern: left | right
278                let (left_exprs, left_keys) = extract_keys_from_expr(&binary.left)?;
279                let (right_exprs, right_keys) = extract_keys_from_expr(&binary.right)?;
280
281                let mut exprs = left_exprs;
282                exprs.extend(right_exprs);
283
284                let mut keys = left_keys;
285                keys.extend(right_keys);
286
287                Ok((exprs, keys))
288            } else {
289                // Single key
290                let parsed = ParsedKey::from_expr(expr)
291                    .ok_or_else(|| Error::new_spanned(expr, "unsupported key expression"))?;
292                Ok((vec![expr.clone()], vec![parsed]))
293            }
294        }
295        _ => {
296            // Single key
297            let parsed = ParsedKey::from_expr(expr)
298                .ok_or_else(|| Error::new_spanned(expr, "unsupported key expression"))?;
299            Ok((vec![expr.clone()], vec![parsed]))
300        }
301    }
302}
303
304#[derive(Clone)]
305struct Entry {
306    key: Key,
307    value: Expr,
308    attrs: Vec<syn::Attribute>,
309}
310
311impl PhfHash for Entry {
312    fn phf_hash<H>(&self, state: &mut H)
313    where
314        H: Hasher,
315    {
316        self.key.phf_hash(state)
317    }
318}
319
320impl Parse for Entry {
321    fn parse(input: ParseStream<'_>) -> parse::Result<Entry> {
322        let attrs = input.call(syn::Attribute::parse_outer)?;
323        let key = input.parse()?;
324        input.parse::<Token![=>]>()?;
325        let value = input.parse()?;
326        Ok(Entry { key, value, attrs })
327    }
328}
329
330struct Map(Vec<Entry>);
331
332impl Parse for Map {
333    fn parse(input: ParseStream<'_>) -> parse::Result<Map> {
334        let parsed = Punctuated::<Entry, Token![,]>::parse_terminated(input)?;
335        let mut expanded_entries = Vec::new();
336
337        // Expand OR patterns into multiple entries
338        for entry in parsed {
339            for (i, (parsed_key, expr)) in entry
340                .key
341                .parsed
342                .iter()
343                .zip(entry.key.expr.iter())
344                .enumerate()
345            {
346                let expanded_key = Key {
347                    parsed: vec![parsed_key.clone()],
348                    expr: vec![expr.clone()],
349                    attrs: if i == 0 {
350                        entry.key.attrs.clone()
351                    } else {
352                        Vec::new()
353                    },
354                };
355                let expanded_entry = Entry {
356                    key: expanded_key,
357                    value: entry.value.clone(),
358                    attrs: if i == 0 {
359                        entry.attrs.clone()
360                    } else {
361                        Vec::new()
362                    },
363                };
364                expanded_entries.push(expanded_entry);
365            }
366        }
367
368        check_duplicates(&expanded_entries)?;
369        Ok(Map(expanded_entries))
370    }
371}
372
373struct Set(Vec<Entry>);
374
375impl Parse for Set {
376    fn parse(input: ParseStream<'_>) -> parse::Result<Set> {
377        let parsed = Punctuated::<Key, Token![,]>::parse_terminated(input)?;
378        let unit_value: Expr = syn::parse_str("()").expect("Failed to parse unit value");
379
380        let mut expanded_entries = Vec::new();
381
382        // Expand OR patterns into multiple entries
383        for key in parsed {
384            for (i, (parsed_key, expr)) in key.parsed.iter().zip(key.expr.iter()).enumerate() {
385                let expanded_key = Key {
386                    parsed: vec![parsed_key.clone()],
387                    expr: vec![expr.clone()],
388                    attrs: if i == 0 {
389                        key.attrs.clone()
390                    } else {
391                        Vec::new()
392                    },
393                };
394                let expanded_entry = Entry {
395                    key: expanded_key,
396                    value: unit_value.clone(),
397                    attrs: if i == 0 {
398                        key.attrs.clone()
399                    } else {
400                        Vec::new()
401                    },
402                };
403                expanded_entries.push(expanded_entry);
404            }
405        }
406
407        check_duplicates(&expanded_entries)?;
408        Ok(Set(expanded_entries))
409    }
410}
411
412fn check_duplicates(entries: &[Entry]) -> parse::Result<()> {
413    let mut keys = HashSet::new();
414    for entry in entries {
415        if let Some(first) = entry.key.parsed.first() {
416            if !keys.insert(first) {
417                return Err(Error::new_spanned(&entry.key.expr[0], "duplicate key"));
418            }
419        }
420    }
421    Ok(())
422}
423
424fn build_map(entries: &[Entry], state: HashState) -> proc_macro2::TokenStream {
425    let key = state.key;
426    let disps = state.disps.iter().map(|&(d1, d2)| quote!((#d1, #d2)));
427    let entries = state.map.iter().map(|&idx| {
428        let entry = &entries[idx];
429        let key = &entry.key.expr[0]; // Use the first expression
430        let value = &entry.value;
431        // Don't include attributes since we've filtered at macro expansion time
432        quote!((#key, #value))
433    });
434
435    quote! {
436        phf::Map {
437            key: #key,
438            disps: &[#(#disps),*],
439            entries: &[#(#entries),*],
440        }
441    }
442}
443
444fn build_ordered_map(entries: &[Entry], state: HashState) -> proc_macro2::TokenStream {
445    let key = state.key;
446    let disps = state.disps.iter().map(|&(d1, d2)| quote!((#d1, #d2)));
447    let idxs = state.map.iter().map(|idx| quote!(#idx));
448    let entries = entries.iter().map(|entry| {
449        let key = &entry.key.expr[0]; // Use the first expression
450        let value = &entry.value;
451        // Don't include attributes since we've filtered at macro expansion time
452        quote!((#key, #value))
453    });
454
455    quote! {
456        phf::OrderedMap {
457            key: #key,
458            disps: &[#(#disps),*],
459            idxs: &[#(#idxs),*],
460            entries: &[#(#entries),*],
461        }
462    }
463}
464
465#[proc_macro]
466pub fn phf_map(input: TokenStream) -> TokenStream {
467    let map = parse_macro_input!(input as Map);
468
469    // Check if any entries have cfg attributes
470    let has_cfg_attrs = map.0.iter().any(|entry| !entry.attrs.is_empty());
471
472    if !has_cfg_attrs {
473        // No cfg attributes - use the simple approach
474        let state = phf_generator::generate_hash(&map.0);
475        build_map(&map.0, state).into()
476    } else {
477        // Has cfg attributes - need to generate conditional map code
478        build_conditional_phf_map(&map.0).into()
479    }
480}
481
482/// Generate conditional cfg conditions for a given mask and conditional entries
483fn build_cfg_conditions(mask: usize, conditional: &[&Entry]) -> Vec<proc_macro2::TokenStream> {
484    let mut conditions = Vec::new();
485    for (i, &entry) in conditional.iter().enumerate() {
486        let include = (mask & (1 << i)) != 0;
487        if let Some(attr) = entry.attrs.first() {
488            if let Ok(meta) = attr.meta.require_list() {
489                let tokens = &meta.tokens;
490                if include {
491                    conditions.push(quote!(cfg!(#tokens)));
492                } else {
493                    conditions.push(quote!(!cfg!(#tokens)));
494                }
495            }
496        }
497    }
498    conditions
499}
500
501/// Combine multiple conditions into a single condition expression
502fn combine_conditions(conditions: Vec<proc_macro2::TokenStream>) -> proc_macro2::TokenStream {
503    if conditions.is_empty() {
504        quote!(true)
505    } else if conditions.len() == 1 {
506        conditions[0].clone()
507    } else {
508        quote!(#(#conditions)&&*)
509    }
510}
511
512/// Generate nested if-else chain from variants
513fn build_nested_conditional(
514    variants: Vec<(proc_macro2::TokenStream, proc_macro2::TokenStream)>,
515) -> proc_macro2::TokenStream {
516    if variants.is_empty() {
517        return quote!(compile_error!("No valid variants found"));
518    }
519
520    if variants.len() == 1 {
521        return variants[0].1.clone();
522    }
523
524    let mut result = variants.last().unwrap().1.clone();
525    for (condition, tokens) in variants.iter().rev().skip(1) {
526        result = quote! {
527            if #condition {
528                #tokens
529            } else {
530                #result
531            }
532        };
533    }
534    quote! { { #result } }
535}
536
537/// Generic function to build conditional PHF structures
538fn build_conditional_phf<F>(
539    entries: &[Entry],
540    simple_builder: F,
541    empty_structure: proc_macro2::TokenStream,
542) -> proc_macro2::TokenStream
543where
544    F: Fn(&[Entry], HashState) -> proc_macro2::TokenStream,
545{
546    let unconditional: Vec<_> = entries.iter().filter(|e| e.attrs.is_empty()).collect();
547    let conditional: Vec<_> = entries.iter().filter(|e| !e.attrs.is_empty()).collect();
548
549    if conditional.is_empty() {
550        let state = phf_generator::generate_hash(entries);
551        return simple_builder(entries, state);
552    }
553
554    let mut variants = Vec::new();
555    let num_conditional = conditional.len();
556
557    for mask in 0..(1 << num_conditional) {
558        let mut variant_entries = unconditional.clone();
559
560        for (i, &entry) in conditional.iter().enumerate() {
561            if (mask & (1 << i)) != 0 {
562                variant_entries.push(entry);
563            }
564        }
565
566        if variant_entries.is_empty() {
567            continue;
568        }
569
570        let entries_vec: Vec<Entry> = variant_entries.into_iter().cloned().collect();
571        let state = phf_generator::generate_hash(&entries_vec);
572        let structure_tokens = simple_builder(&entries_vec, state);
573
574        let conditions = build_cfg_conditions(mask, &conditional);
575        let condition = combine_conditions(conditions);
576
577        variants.push((condition, structure_tokens));
578    }
579
580    if variants.is_empty() {
581        empty_structure
582    } else {
583        build_nested_conditional(variants)
584    }
585}
586
587fn build_conditional_phf_map(entries: &[Entry]) -> proc_macro2::TokenStream {
588    build_conditional_phf(
589        entries,
590        build_map,
591        quote! {
592            phf::Map {
593                key: 0,
594                disps: &[],
595                entries: &[],
596            }
597        },
598    )
599}
600
601#[proc_macro]
602pub fn phf_set(input: TokenStream) -> TokenStream {
603    let set = parse_macro_input!(input as Set);
604
605    // Check if any entries have cfg attributes
606    let has_cfg_attrs = set.0.iter().any(|entry| !entry.attrs.is_empty());
607
608    if !has_cfg_attrs {
609        // No cfg attributes - use the simple approach
610        let state = phf_generator::generate_hash(&set.0);
611        let map = build_map(&set.0, state);
612        quote!(phf::Set { map: #map }).into()
613    } else {
614        // Has cfg attributes - need to generate conditional set code
615        build_conditional_phf_set(&set.0).into()
616    }
617}
618
619fn build_conditional_phf_set(entries: &[Entry]) -> proc_macro2::TokenStream {
620    // Similar to conditional map but wraps in Set
621    let map_tokens = build_conditional_phf_map(entries);
622    quote!(phf::Set { map: #map_tokens })
623}
624
625#[proc_macro]
626pub fn phf_ordered_map(input: TokenStream) -> TokenStream {
627    let map = parse_macro_input!(input as Map);
628
629    // Check if any entries have cfg attributes
630    let has_cfg_attrs = map.0.iter().any(|entry| !entry.attrs.is_empty());
631
632    if !has_cfg_attrs {
633        // No cfg attributes - use the simple approach
634        let state = phf_generator::generate_hash(&map.0);
635        build_ordered_map(&map.0, state).into()
636    } else {
637        // Has cfg attributes - need to generate conditional ordered map code
638        build_conditional_phf_ordered_map(&map.0).into()
639    }
640}
641
642fn build_conditional_phf_ordered_map(entries: &[Entry]) -> proc_macro2::TokenStream {
643    build_conditional_phf(
644        entries,
645        build_ordered_map,
646        quote! {
647            phf::OrderedMap {
648                key: 0,
649                disps: &[],
650                idxs: &[],
651                entries: &[],
652            }
653        },
654    )
655}
656
657#[proc_macro]
658pub fn phf_ordered_set(input: TokenStream) -> TokenStream {
659    let set = parse_macro_input!(input as Set);
660
661    let has_cfg_attrs = set.0.iter().any(|entry| !entry.attrs.is_empty());
662
663    if !has_cfg_attrs {
664        // No cfg attributes - use the simple approach
665        let state = phf_generator::generate_hash(&set.0);
666        let map = build_ordered_map(&set.0, state);
667        quote!(phf::OrderedSet { map: #map }).into()
668    } else {
669        // Has cfg attributes - need to generate conditional ordered set code
670        build_conditional_phf_ordered_set(&set.0).into()
671    }
672}
673
674fn build_conditional_phf_ordered_set(entries: &[Entry]) -> proc_macro2::TokenStream {
675    // Similar to conditional ordered map but wraps in OrderedSet
676    let map_tokens = build_conditional_phf_ordered_map(entries);
677    quote!(phf::OrderedSet { map: #map_tokens })
678}