unicase/
lib.rs

1#![cfg_attr(test, deny(missing_docs))]
2#![cfg_attr(test, deny(warnings))]
3#![cfg_attr(feature = "nightly", feature(test))]
4#![no_std]
5
6//! # UniCase
7//!
8//! UniCase provides a way of specifying strings that are case-insensitive.
9//!
10//! UniCase supports full [Unicode case
11//! folding](https://www.w3.org/International/wiki/Case_folding). It can also
12//! utilize faster ASCII case comparisons, if both strings are ASCII.
13//!
14//! Using the `UniCase::new()` constructor will check the string to see if it
15//! is all ASCII. When a `UniCase` is compared against another, if both are
16//! ASCII, it will use the faster comparison.
17//!
18//! There also exists the `Ascii` type in this crate, which will always assume
19//! to use the ASCII case comparisons, if the encoding is already known.
20//!
21//! ## Example
22//!
23//! ```rust
24//! use unicase::UniCase;
25//!
26//! let a = UniCase::new("Maße");
27//! let b = UniCase::new("MASSE");
28//! let c = UniCase::new("mase");
29//!
30//! assert_eq!(a, b);
31//! assert!(b != c);
32//! ```
33//!
34//! ## Ascii
35//!
36//! ```rust
37//! use unicase::Ascii;
38//!
39//! let a = Ascii::new("foobar");
40//! let b = Ascii::new("FoObAr");
41//!
42//! assert_eq!(a, b);
43//! ```
44
45#[cfg(test)]
46extern crate std;
47#[cfg(feature = "nightly")]
48extern crate test;
49
50extern crate alloc;
51use alloc::string::String;
52
53use alloc::borrow::Cow;
54use core::cmp::Ordering;
55use core::fmt;
56use core::hash::{Hash, Hasher};
57use core::ops::{Deref, DerefMut};
58use core::str::FromStr;
59
60use self::unicode::Unicode;
61
62mod ascii;
63mod unicode;
64
65/// Case Insensitive wrapper of strings.
66#[derive(Clone, Copy)]
67pub struct UniCase<S>(Encoding<S>);
68
69/// Case Insensitive wrapper of Ascii strings.
70#[derive(Clone, Copy, Debug, Default)]
71pub struct Ascii<S>(S);
72
73/// Compare two string-like types for case-less equality, using unicode folding.
74///
75/// Equivalent to `UniCase::new(left) == UniCase::new(right)`.
76///
77/// Note: This will perform a scan for ASCII characters before doing the
78/// the comparison. See `UniCase` for more information.
79#[inline]
80pub fn eq<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
81    UniCase::new(left) == UniCase::new(right)
82}
83
84/// Compare two string-like types for case-less equality, ignoring ASCII case.
85///
86/// Equivalent to `Ascii::new(left) == Ascii::new(right)`.
87#[inline]
88pub fn eq_ascii<S: AsRef<str> + ?Sized>(left: &S, right: &S) -> bool {
89    Ascii(left) == Ascii(right)
90}
91
92#[derive(Clone, Copy, Debug)]
93enum Encoding<S> {
94    Ascii(Ascii<S>),
95    Unicode(Unicode<S>),
96}
97
98macro_rules! inner {
99    (mut $e:expr) => {{
100        match &mut $e {
101            &mut Encoding::Ascii(ref mut s) => &mut s.0,
102            &mut Encoding::Unicode(ref mut s) => &mut s.0,
103        }
104    }};
105    ($e:expr) => {{
106        match &$e {
107            &Encoding::Ascii(ref s) => &s.0,
108            &Encoding::Unicode(ref s) => &s.0,
109        }
110    }};
111}
112
113impl<S: AsRef<str> + Default> Default for UniCase<S> {
114    fn default() -> Self {
115        Self::new(Default::default())
116    }
117}
118
119impl<S: AsRef<str>> UniCase<S> {
120    /// Creates a new `UniCase`.
121    ///
122    /// Note: This scans the text to determine if it is all ASCII or not.
123    pub fn new(s: S) -> UniCase<S> {
124        if s.as_ref().is_ascii() {
125            UniCase(Encoding::Ascii(Ascii(s)))
126        } else {
127            UniCase(Encoding::Unicode(Unicode(s)))
128        }
129    }
130
131    /// Returns a copy of this string where each character is mapped to its
132    /// Unicode CaseFolding equivalent.
133    ///
134    /// # Note
135    ///
136    /// Unicode Case Folding is meant for string storage and matching, not for
137    /// display.
138    pub fn to_folded_case(&self) -> String {
139        match self.0 {
140            Encoding::Ascii(ref s) => s.0.as_ref().to_ascii_lowercase(),
141            Encoding::Unicode(ref s) => s.to_folded_case(),
142        }
143    }
144}
145
146impl<S> UniCase<S> {
147    /// Creates a new `UniCase`, skipping the ASCII check.
148    pub const fn unicode(s: S) -> UniCase<S> {
149        UniCase(Encoding::Unicode(Unicode(s)))
150    }
151
152    /// Creates a new `UniCase` which performs only ASCII case folding.
153    pub const fn ascii(s: S) -> UniCase<S> {
154        UniCase(Encoding::Ascii(Ascii(s)))
155    }
156
157    /// Return `true` if this instance will only perform ASCII case folding.
158    pub fn is_ascii(&self) -> bool {
159        match self.0 {
160            Encoding::Ascii(_) => true,
161            Encoding::Unicode(_) => false,
162        }
163    }
164
165    /// Unwraps the inner value held by this `UniCase`.
166    #[inline]
167    pub fn into_inner(self) -> S {
168        match self.0 {
169            Encoding::Ascii(s) => s.0,
170            Encoding::Unicode(s) => s.0,
171        }
172    }
173}
174
175impl<S> Deref for UniCase<S> {
176    type Target = S;
177    #[inline]
178    fn deref<'a>(&'a self) -> &'a S {
179        inner!(self.0)
180    }
181}
182
183impl<S> DerefMut for UniCase<S> {
184    #[inline]
185    fn deref_mut<'a>(&'a mut self) -> &'a mut S {
186        inner!(mut self.0)
187    }
188}
189
190impl<S: AsRef<str>> AsRef<str> for UniCase<S> {
191    #[inline]
192    fn as_ref(&self) -> &str {
193        inner!(self.0).as_ref()
194    }
195}
196
197impl<S: fmt::Debug> fmt::Debug for UniCase<S> {
198    #[inline]
199    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
200        fmt::Debug::fmt(inner!(self.0), fmt)
201    }
202}
203
204impl<S: fmt::Display> fmt::Display for UniCase<S> {
205    #[inline]
206    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
207        fmt::Display::fmt(inner!(self.0), fmt)
208    }
209}
210
211impl<S1: AsRef<str>, S2: AsRef<str>> PartialEq<UniCase<S2>> for UniCase<S1> {
212    #[inline]
213    fn eq(&self, other: &UniCase<S2>) -> bool {
214        match (&self.0, &other.0) {
215            (&Encoding::Ascii(ref x), &Encoding::Ascii(ref y)) => x == y,
216            (&Encoding::Unicode(ref x), &Encoding::Unicode(ref y)) => x == y,
217            (&Encoding::Ascii(ref x), &Encoding::Unicode(ref y)) => &Unicode(x.as_ref()) == y,
218            (&Encoding::Unicode(ref x), &Encoding::Ascii(ref y)) => x == &Unicode(y.as_ref()),
219        }
220    }
221}
222
223impl<S: AsRef<str>> Eq for UniCase<S> {}
224
225impl<S: AsRef<str>> Hash for UniCase<S> {
226    #[inline]
227    fn hash<H: Hasher>(&self, hasher: &mut H) {
228        match self.0 {
229            Encoding::Ascii(ref s) => s.hash(hasher),
230            Encoding::Unicode(ref s) => s.hash(hasher),
231        }
232    }
233}
234
235impl<S> From<Ascii<S>> for UniCase<S> {
236    fn from(ascii: Ascii<S>) -> Self {
237        UniCase(Encoding::Ascii(ascii))
238    }
239}
240
241macro_rules! from_impl {
242    ($from:ty => $to:ty; $by:ident) => (
243        impl<'a> From<$from> for UniCase<$to> {
244            fn from(s: $from) -> Self {
245                UniCase::unicode(s.$by())
246            }
247        }
248    );
249    ($from:ty => $to:ty) => ( from_impl!($from => $to; into); )
250}
251
252macro_rules! into_impl {
253    ($to:ty) => {
254        impl<'a> Into<$to> for UniCase<$to> {
255            fn into(self) -> $to {
256                self.into_inner()
257            }
258        }
259    };
260}
261
262impl<S: AsRef<str>> From<S> for UniCase<S> {
263    fn from(s: S) -> Self {
264        UniCase::new(s)
265    }
266}
267
268from_impl!(&'a str => Cow<'a, str>);
269from_impl!(String => Cow<'a, str>);
270from_impl!(&'a str => String);
271from_impl!(Cow<'a, str> => String; into_owned);
272from_impl!(&'a String => &'a str; as_ref);
273
274into_impl!(&'a str);
275into_impl!(String);
276into_impl!(Cow<'a, str>);
277
278impl<T: AsRef<str>> PartialOrd for UniCase<T> {
279    #[inline]
280    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
281        Some(self.cmp(other))
282    }
283}
284
285impl<T: AsRef<str>> Ord for UniCase<T> {
286    #[inline]
287    fn cmp(&self, other: &Self) -> Ordering {
288        match (&self.0, &other.0) {
289            (&Encoding::Ascii(ref x), &Encoding::Ascii(ref y)) => x.cmp(y),
290            (&Encoding::Unicode(ref x), &Encoding::Unicode(ref y)) => x.cmp(y),
291            (&Encoding::Ascii(ref x), &Encoding::Unicode(ref y)) => {
292                Unicode(x.as_ref()).cmp(&Unicode(y.0.as_ref()))
293            }
294            (&Encoding::Unicode(ref x), &Encoding::Ascii(ref y)) => {
295                Unicode(x.0.as_ref()).cmp(&Unicode(y.as_ref()))
296            }
297        }
298    }
299}
300
301impl<S: FromStr + AsRef<str>> FromStr for UniCase<S> {
302    type Err = <S as FromStr>::Err;
303    fn from_str(s: &str) -> Result<UniCase<S>, Self::Err> {
304        s.parse().map(UniCase::new)
305    }
306}
307
308#[cfg(test)]
309mod tests {
310    use super::UniCase;
311    use alloc::borrow::Cow;
312    use std::borrow::ToOwned;
313    use std::collections::hash_map::DefaultHasher;
314    use std::hash::{Hash, Hasher};
315    use std::string::String;
316
317    fn hash<T: Hash>(t: &T) -> u64 {
318        let mut s = DefaultHasher::new();
319        t.hash(&mut s);
320        s.finish()
321    }
322
323    #[test]
324    fn test_copy_for_refs() {
325        fn foo<T>(_: UniCase<T>) {}
326
327        let a = UniCase::new("foobar");
328        foo(a);
329        foo(a);
330    }
331
332    #[test]
333    fn test_eq_ascii() {
334        let a = UniCase::new("foobar");
335        let b = UniCase::new("FOOBAR");
336        let c = UniCase::ascii("FoObAr");
337        let d = UniCase::<&str>::from("foobar");
338
339        assert_eq!(a, b);
340        assert_eq!(a, c);
341        assert_eq!(a, d);
342
343        assert_eq!(b, a);
344        assert_eq!(b, c);
345        assert_eq!(b, d);
346
347        assert_eq!(c, a);
348        assert_eq!(c, b);
349        assert_eq!(c, d);
350
351        assert_eq!(d, a);
352        assert_eq!(d, b);
353        assert_eq!(d, c);
354
355        assert_eq!(hash(&a), hash(&b));
356        assert_eq!(hash(&a), hash(&c));
357        assert_eq!(hash(&a), hash(&d));
358
359        assert!(a.is_ascii());
360        assert!(b.is_ascii());
361        assert!(c.is_ascii());
362        assert!(d.is_ascii());
363    }
364
365    #[test]
366    fn test_str_ascii() {
367        // https://github.com/seanmonstar/unicase/issues/76
368
369        let a = UniCase::new("foobar");
370        let b = UniCase::<&str>::from("foobar");
371        let c = UniCase::<String>::from(String::from("foobar"));
372        let d = UniCase::<Cow<str>>::from(Cow::from("foobar"));
373
374        assert!(a.is_ascii());
375        assert!(b.is_ascii());
376        assert!(c.is_ascii());
377        assert!(d.is_ascii());
378    }
379
380    #[test]
381    fn test_eq_unicode() {
382        let a = UniCase::new("στιγμας");
383        let b = UniCase::new("στιγμασ");
384        assert_eq!(a, b);
385        assert_eq!(b, a);
386        assert_eq!(hash(&a), hash(&b));
387    }
388
389    #[test]
390    fn test_eq_unicode_left_is_substring() {
391        // https://github.com/seanmonstar/unicase/issues/38
392        let a = UniCase::unicode("foo");
393        let b = UniCase::unicode("foobar");
394
395        assert!(a != b);
396        assert!(b != a);
397    }
398
399    #[cfg(feature = "nightly")]
400    #[bench]
401    fn bench_unicase_ascii(b: &mut ::test::Bencher) {
402        b.bytes = b"foobar".len() as u64;
403        let x = UniCase::new("foobar");
404        let y = UniCase::new("FOOBAR");
405        b.iter(|| assert_eq!(x, y));
406    }
407
408    #[cfg(feature = "nightly")]
409    static SUBJECT: &'static [u8] = b"ffoo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz foo bar baz oo bar baz quux herp derp";
410
411    #[cfg(feature = "nightly")]
412    #[inline(never)]
413    fn is_ascii(bytes: &[u8]) -> bool {
414        #[allow(unused, deprecated)]
415        use std::ascii::AsciiExt;
416        bytes.is_ascii()
417    }
418
419    #[cfg(feature = "nightly")]
420    #[bench]
421    fn bench_is_ascii(b: &mut ::test::Bencher) {
422        b.iter(|| assert!(is_ascii(SUBJECT)));
423    }
424
425    #[cfg(feature = "nightly")]
426    #[bench]
427    fn bench_is_utf8(b: &mut ::test::Bencher) {
428        b.iter(|| assert!(::std::str::from_utf8(SUBJECT).is_ok()));
429    }
430
431    #[test]
432    fn test_case_cmp() {
433        assert!(UniCase::new("a") < UniCase::new("B"));
434
435        assert!(UniCase::new("A") < UniCase::new("b"));
436        assert!(UniCase::new("aa") > UniCase::new("a"));
437
438        assert!(UniCase::new("a") < UniCase::new("aa"));
439        assert!(UniCase::new("a") < UniCase::new("AA"));
440    }
441
442    #[test]
443    fn test_from_impls() {
444        let view: &'static str = "foobar";
445        let _: UniCase<&'static str> = view.into();
446        let _: UniCase<&str> = view.into();
447        let _: UniCase<String> = view.into();
448
449        let owned: String = view.to_owned();
450        let _: UniCase<&str> = (&owned).into();
451        let _: UniCase<String> = owned.into();
452    }
453
454    #[test]
455    fn test_into_impls() {
456        let view: UniCase<&'static str> = UniCase::new("foobar");
457        let _: &'static str = view.into();
458        let _: &str = view.into();
459
460        let owned: UniCase<String> = "foobar".into();
461        let _: String = owned.clone().into();
462        let _: &str = owned.as_ref();
463    }
464
465    #[test]
466    fn test_unicase_unicode_const() {
467        const _UNICASE: UniCase<&'static str> = UniCase::unicode("");
468    }
469}