strck_ident/
unicode.rs

1//! Checked strings containing Unicode identifiers according to the
2//! [Unicode Standard Annex #31](https://www.unicode.org/reports/tr31/).
3//!
4//! # Examples
5//!
6//! ```rust
7//! use strck_ident::{IntoCk, unicode::UnicodeIdent};
8//!
9//! assert!("foo".ck::<UnicodeIdent>().is_ok());
10//! assert!("struct".ck::<UnicodeIdent>().is_ok());
11//! assert!("Москва".ck::<UnicodeIdent>().is_ok());
12//! assert!("東京".ck::<UnicodeIdent>().is_ok());
13//!
14//! assert!("_identifier".ck::<UnicodeIdent>().is_err());
15//! assert!("r#try".ck::<UnicodeIdent>().is_err());
16//! assert!("👍".ck::<UnicodeIdent>().is_err());
17//! ```
18//!
19//! # Aliases
20//!
21//! This module exposes [`Ident`] and [`IdentBuf`], which alias `Ck<UnicodeIdent>`
22//! and `Check<UnicodeIdent>` respectively. These aliases are preferred to keep
23//! type signatures succinct.
24//!
25//! These are also exported under the root, and can be accessed as
26//! `strck_ident::Ident` and `strck_ident::IdentBuf`.
27use core::fmt;
28use strck::{Check, Ck, Invariant};
29
30/// An [`Invariant`] for unicode identifiers according to
31/// [Unicode Standard Annex #31](https://www.unicode.org/reports/tr31/).
32///
33/// # Invariants
34///
35/// * The string is nonempty.
36/// * The first character is XID_Start.
37/// * Any following characters are XID_Continue.
38#[derive(Clone, Debug)]
39pub struct UnicodeIdent;
40
41/// Borrowed checked string containing a Unicode identifier.
42///
43/// See [`UnicodeIdent`] for more details.
44pub type Ident = Ck<UnicodeIdent>;
45
46/// Owned checked string containing a Unicode identifier.
47///
48/// See [`UnicodeIdent`] for more details.
49pub type IdentBuf<B = String> = Check<UnicodeIdent, B>;
50
51/// The error type returned from checking invariants of [`UnicodeIdent`].
52#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
53pub enum Error {
54    /// Empty string.
55    Empty,
56
57    /// The first character isn't XID_Start.
58    Start(char),
59
60    /// A trailing character isn't XID_Continue.
61    Continue(char),
62}
63
64impl std::error::Error for Error {}
65
66impl fmt::Display for Error {
67    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
68        match self {
69            Error::Empty => f.pad("empty"),
70            Error::Start(ch) => write!(f, "invalid start '{ch}'"),
71            Error::Continue(ch) => write!(f, "invalid continue '{ch}'"),
72        }
73    }
74}
75
76impl Invariant for UnicodeIdent {
77    type Error = Error;
78
79    fn check(slice: &str) -> Result<(), Self::Error> {
80        let mut chars = slice.chars();
81        let start = chars.next().ok_or(Error::Empty)?;
82
83        if !unicode_ident::is_xid_start(start) {
84            return Err(Error::Start(start));
85        }
86
87        for ch in chars {
88            if !unicode_ident::is_xid_continue(ch) {
89                return Err(Error::Continue(ch));
90            }
91        }
92        Ok(())
93    }
94}
95
96#[cfg(test)]
97mod tests {
98    use super::{Error, UnicodeIdent};
99    use strck::IntoCk;
100
101    #[test]
102    fn test_invalid() {
103        assert_eq!("".ck::<UnicodeIdent>().unwrap_err(), Error::Empty);
104        assert_eq!("12345".ck::<UnicodeIdent>().unwrap_err(), Error::Start('1'));
105        assert_eq!(
106            "😂_foo".ck::<UnicodeIdent>().unwrap_err(),
107            Error::Start('😂')
108        );
109        assert_eq!(
110            "foo_😂".ck::<UnicodeIdent>().unwrap_err(),
111            Error::Continue('😂')
112        );
113        assert_eq!(
114            "hello.there".ck::<UnicodeIdent>().unwrap_err(),
115            Error::Continue('.')
116        );
117        assert_eq!(
118            "\\as2mkf".ck::<UnicodeIdent>().unwrap_err(),
119            Error::Start('\\')
120        );
121        assert_eq!(
122            "the book".ck::<UnicodeIdent>().unwrap_err(),
123            Error::Continue(' ')
124        );
125        assert_eq!(" book".ck::<UnicodeIdent>().unwrap_err(), Error::Start(' '));
126        assert_eq!("\n".ck::<UnicodeIdent>().unwrap_err(), Error::Start('\n'));
127        assert_eq!(
128            "_underscore".ck::<UnicodeIdent>().unwrap_err(),
129            Error::Start('_')
130        );
131        assert_eq!(
132            "r#try".ck::<UnicodeIdent>().unwrap_err(),
133            Error::Continue('#')
134        );
135    }
136
137    #[test]
138    fn test_valid() {
139        assert!("a2345".ck::<UnicodeIdent>().is_ok());
140        assert!("foo".ck::<UnicodeIdent>().is_ok());
141        assert!("snake_case".ck::<UnicodeIdent>().is_ok());
142        assert!("impl".ck::<UnicodeIdent>().is_ok());
143        assert!("岡林".ck::<UnicodeIdent>().is_ok());
144    }
145}