1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
//! Parsing and inspecting Rust literal tokens.
//!
//! This library offers functionality to parse Rust literals, i.e. tokens in the
//! Rust programming language that represent fixed values. The grammar for
//! those is defined [here][ref].
//!
//! This kind of functionality already exists in the crate `syn`. However, as
//! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was
//! built. This crate also offers a bit more flexibility compared to `syn`
//! (only regarding literals, of course).
//!
//!
//! # Quick start
//!
//! | **`StringLit::try_from(tt)?.value()`** |
//! | - |
//!
//! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be
//! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]).
//! Calling `value()` returns the value that is represented by the literal.
//!
//! **Mini Example**
//!
//! ```ignore
//! use proc_macro::TokenStream;
//!
//! #[proc_macro]
//! pub fn foo(input: TokenStream) -> TokenStream {
//!      let first_token = input.into_iter().next().unwrap(); // Do proper error handling!
//!      let string_value = match litrs::StringLit::try_from(first_token) {
//!          Ok(string_lit) => string_lit.value(),
//!          Err(e) => return e.to_compile_error(),
//!      };
//!
//!      // `string_value` is the string value with all escapes resolved.
//!      todo!()
//! }
//! ```
//!
//! # Overview
//!
//! The main types of this library are [`Literal`], representing any kind of
//! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a
//! specific kind of literal.
//!
//! There are different ways to obtain such a literal type:
//!
//! - **`parse`**: parses a `&str` or `String` and returns `Result<_,
//!     ParseError>`. For example: [`Literal::parse`] and
//!     [`IntegerLit::parse`].
//!
//! - **`From<proc_macro::Literal> for Literal`**: turns a `Literal` value from
//!     the `proc_macro` crate into a `Literal` from this crate.
//!
//! - **`TryFrom<proc_macro::Literal> for *Lit`**: tries to turn a
//!     `proc_macro::Literal` into a specific literal type of this crate. If
//!     the input is a literal of a different kind, `Err(InvalidToken)` is
//!     returned.
//!
//! - **`TryFrom<proc_macro::TokenTree>`**: attempts to turn a token tree into a
//!     literal type of this crate. An error is returned if the token tree is
//!     not a literal, or if you are trying to turn it into a specific kind of
//!     literal and the token tree is a different kind of literal.
//!
//! All of the `From` and `TryFrom` conversions also work for reference to
//! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is
//! enabled (which it is by default), all these `From` and `TryFrom` impls also
//! exist for the corresponding `proc_macro2` types.
//!
//! **Note**: `true` and `false` are `Ident`s when passed to your proc macro.
//! The `TryFrom<TokenTree>` impls check for those two special idents and
//! return a [`BoolLit`] appropriately. For that reason, there is also no
//! `TryFrom<proc_macro::Literal>` impl for [`BoolLit`]. The `proc_macro::Literal`
//! simply cannot represent bool literals.
//!
//!
//! # Examples
//!
//! In a proc-macro:
//!
//! ```ignore
//! use std::convert::TryFrom;
//! use proc_macro::TokenStream;
//! use litrs::FloatLit;
//!
//! #[proc_macro]
//! pub fn foo(input: TokenStream) -> TokenStream {
//!      let mut input = input.into_iter().collect::<Vec<_>>();
//!      if input.len() != 1 {
//!          // Please do proper error handling in your real code!
//!          panic!("expected exactly one token as input");
//!      }
//!      let token = input.remove(0);
//!
//!      match FloatLit::try_from(token) {
//!          Ok(float_lit) => { /* do something */ }
//!          Err(e) => return e.to_compile_error(),
//!      }
//!
//!      // Dummy output
//!      TokenStream::new()
//! }
//! ```
//!
//! Parsing from string:
//!
//! ```
//! use litrs::{FloatLit, Literal};
//!
//! // Parse a specific kind of literal (float in this case):
//! let float_lit = FloatLit::parse("3.14f32");
//! assert!(float_lit.is_ok());
//! assert_eq!(float_lit.unwrap().suffix(), "f32");
//! assert!(FloatLit::parse("'c'").is_err());
//!
//! // Parse any kind of literal. After parsing, you can inspect the literal
//! // and decide what to do in each case.
//! let lit = Literal::parse("0xff80").expect("failed to parse literal");
//! match lit {
//!     Literal::Integer(lit) => { /* ... */ }
//!     Literal::Float(lit) => { /* ... */ }
//!     Literal::Bool(lit) => { /* ... */ }
//!     Literal::Char(lit) => { /* ... */ }
//!     Literal::String(lit) => { /* ... */ }
//!     Literal::Byte(lit) => { /* ... */ }
//!     Literal::ByteString(lit) => { /* ... */ }
//! }
//! ```
//!
//!
//!
//! # Crate features
//!
//! - `proc-macro2` (**default**): adds the dependency `proc_macro2`, a bunch of
//!   `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`].
//! - `check_suffix`: if enabled, `parse` functions will exactly verify that the
//!   literal suffix is valid. Adds the dependency `unicode-xid`. If disabled,
//!   only an approximate check (only in ASCII range) is done. If you are
//!   writing a proc macro, you don't need to enable this as the suffix is
//!   already checked by the compiler.
//!
//!
//! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals
//!

#![deny(missing_debug_implementations)]

extern crate proc_macro;

#[cfg(test)]
#[macro_use]
mod test_util;

#[cfg(test)]
mod tests;

mod bool;
mod byte;
mod bytestr;
mod char;
mod err;
mod escape;
mod float;
mod impls;
mod integer;
mod parse;
mod string;


use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}};

pub use self::{
    bool::BoolLit,
    byte::ByteLit,
    bytestr::ByteStringLit,
    char::CharLit,
    err::{InvalidToken, ParseError},
    float::{FloatLit, FloatType},
    integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType},
    string::StringLit,
};


// ==============================================================================================
// ===== `Literal` and type defs
// ==============================================================================================

/// A literal. This is the main type of this library.
///
/// This type is generic over the underlying buffer `B`, which can be `&str` or
/// `String`.
///
/// To create this type, you have to either call [`Literal::parse`] with an
/// input string or use the `From<_>` impls of this type. The impls are only
/// available of the corresponding crate features are enabled (they are enabled
/// by default).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Literal<B: Buffer> {
    Bool(BoolLit),
    Integer(IntegerLit<B>),
    Float(FloatLit<B>),
    Char(CharLit<B>),
    String(StringLit<B>),
    Byte(ByteLit<B>),
    ByteString(ByteStringLit<B>),
}

impl<B: Buffer> Literal<B> {
    /// Parses the given input as a Rust literal.
    pub fn parse(input: B) -> Result<Self, ParseError> {
        parse::parse(input)
    }

    /// Returns the suffix of this literal or `""` if it doesn't have one.
    ///
    /// Rust token grammar actually allows suffixes for all kinds of tokens.
    /// Most Rust programmer only know the type suffixes for integer and
    /// floats, e.g. `0u32`. And in normal Rust code, everything else causes an
    /// error. But it is possible to pass literals with arbitrary suffixes to
    /// proc macros, for example:
    ///
    /// ```ignore
    /// some_macro!(3.14f33  16px  '🦊'good_boy  "toph"beifong);
    /// ```
    ///
    /// Boolean literals, not actually being literals, but idents, cannot have
    /// suffixes and this method always returns `""` for those.
    ///
    /// There are some edge cases to be aware of:
    /// - Integer suffixes must not start with `e` or `E` as that conflicts with
    ///   the exponent grammar for floats. `0e1` is a float; `0eel` is also
    ///   parsed as a float and results in an error.
    /// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a
    ///   suffix von `gh`.
    /// - Suffixes can contain and start with `_`, but for integer and number
    ///   literals, `_` is eagerly parsed as part of the number, so `1_x` has
    ///   the suffix `x`.
    /// - The input `55f32` is regarded as integer literal with suffix `f32`.
    ///
    /// # Example
    ///
    /// ```
    /// use litrs::Literal;
    ///
    /// assert_eq!(Literal::parse(r##"3.14f33"##).unwrap().suffix(), "f33");
    /// assert_eq!(Literal::parse(r##"123hackerman"##).unwrap().suffix(), "hackerman");
    /// assert_eq!(Literal::parse(r##"0x0fuck"##).unwrap().suffix(), "uck");
    /// assert_eq!(Literal::parse(r##"'🦊'good_boy"##).unwrap().suffix(), "good_boy");
    /// assert_eq!(Literal::parse(r##""toph"beifong"##).unwrap().suffix(), "beifong");
    /// ```
    pub fn suffix(&self) -> &str {
        match self {
            Literal::Bool(_) => "",
            Literal::Integer(l) => l.suffix(),
            Literal::Float(l) => l.suffix(),
            Literal::Char(l) => l.suffix(),
            Literal::String(l) => l.suffix(),
            Literal::Byte(l) => l.suffix(),
            Literal::ByteString(l) => l.suffix(),
        }
    }
}

impl Literal<&str> {
    /// Makes a copy of the underlying buffer and returns the owned version of
    /// `Self`.
    pub fn into_owned(self) -> Literal<String> {
        match self {
            Literal::Bool(l) => Literal::Bool(l.to_owned()),
            Literal::Integer(l) => Literal::Integer(l.to_owned()),
            Literal::Float(l) => Literal::Float(l.to_owned()),
            Literal::Char(l) => Literal::Char(l.to_owned()),
            Literal::String(l) => Literal::String(l.into_owned()),
            Literal::Byte(l) => Literal::Byte(l.to_owned()),
            Literal::ByteString(l) => Literal::ByteString(l.into_owned()),
        }
    }
}

impl<B: Buffer> fmt::Display for Literal<B> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Literal::Bool(l) => l.fmt(f),
            Literal::Integer(l) => l.fmt(f),
            Literal::Float(l) => l.fmt(f),
            Literal::Char(l) => l.fmt(f),
            Literal::String(l) => l.fmt(f),
            Literal::Byte(l) => l.fmt(f),
            Literal::ByteString(l) => l.fmt(f),
        }
    }
}


// ==============================================================================================
// ===== Buffer
// ==============================================================================================

/// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*.
///
/// This is trait is implementation detail of this library, cannot be
/// implemented in other crates and is not subject to semantic versioning.
/// `litrs` only guarantees that this trait is implemented for `String` and
/// `for<'a> &'a str`.
pub trait Buffer: sealed::Sealed + Deref<Target = str> {
    /// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`.
    type Cow: From<String> + AsRef<str> + Borrow<str> + Deref<Target = str>;

    #[doc(hidden)]
    fn into_cow(self) -> Self::Cow;

    /// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`.
    type ByteCow: From<Vec<u8>> + AsRef<[u8]> + Borrow<[u8]> + Deref<Target = [u8]>;

    #[doc(hidden)]
    fn into_byte_cow(self) -> Self::ByteCow;

    /// Cuts away some characters at the beginning and some at the end. Given
    /// range has to be in bounds.
    #[doc(hidden)]
    fn cut(self, range: Range<usize>) -> Self;
}

mod sealed {
    pub trait Sealed {}
}

impl<'a> sealed::Sealed for &'a str {}
impl<'a> Buffer for &'a str {
    #[doc(hidden)]
    fn cut(self, range: Range<usize>) -> Self {
        &self[range]
    }

    type Cow = Cow<'a, str>;
    #[doc(hidden)]
    fn into_cow(self) -> Self::Cow {
        self.into()
    }
    type ByteCow = Cow<'a, [u8]>;
    #[doc(hidden)]
    fn into_byte_cow(self) -> Self::ByteCow {
        self.as_bytes().into()
    }
}

impl sealed::Sealed for String {}
impl Buffer for String {
    #[doc(hidden)]
    fn cut(mut self, range: Range<usize>) -> Self {
        // This is not the most efficient way, but it works. First we cut the
        // end, then the beginning. Note that `drain` also removes the range if
        // the iterator is not consumed.
        self.truncate(range.end);
        self.drain(..range.start);
        self
    }

    type Cow = Cow<'static, str>;
    #[doc(hidden)]
    fn into_cow(self) -> Self::Cow {
        self.into()
    }

    type ByteCow = Cow<'static, [u8]>;
    #[doc(hidden)]
    fn into_byte_cow(self) -> Self::ByteCow {
        self.into_bytes().into()
    }
}