jiff/util/
utf8.rs

1use core::cmp::Ordering;
2
3/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
4///
5/// If no valid encoding of a codepoint exists at the beginning of the
6/// given byte slice, then a 1-3 byte slice is returned (which is guaranteed
7/// to be a prefix of `bytes`). That byte slice corresponds either to a single
8/// invalid byte, or to a prefix of a valid UTF-8 encoding of a Unicode scalar
9/// value (but which ultimately did not lead to a valid encoding).
10///
11/// This returns `None` if and only if `bytes` is empty.
12///
13/// This never panics.
14///
15/// *WARNING*: This is not designed for performance. If you're looking for a
16/// fast UTF-8 decoder, this is not it. If you feel like you need one in this
17/// crate, then please file an issue and discuss your use case.
18pub(crate) fn decode(bytes: &[u8]) -> Option<Result<char, &[u8]>> {
19    crate::shared::util::utf8::decode(bytes)
20}
21
22/// Like std's `eq_ignore_ascii_case`, but returns a full `Ordering`.
23#[inline]
24pub(crate) fn cmp_ignore_ascii_case(s1: &str, s2: &str) -> Ordering {
25    cmp_ignore_ascii_case_bytes(s1.as_bytes(), s2.as_bytes())
26}
27
28/// Like std's `eq_ignore_ascii_case`, but returns a full `Ordering` on
29/// `&[u8]`.
30#[inline]
31pub(crate) fn cmp_ignore_ascii_case_bytes(s1: &[u8], s2: &[u8]) -> Ordering {
32    // This function used to look like this:
33    //
34    //     let it1 = s1.iter().map(|&b| b.to_ascii_lowercase());
35    //     let it2 = s2.iter().map(|&b| b.to_ascii_lowercase());
36    //     it1.cmp(it2)
37    //
38    // But the code below seems to do better in microbenchmarks.
39    let mut i = 0;
40    loop {
41        let b1 = s1.get(i).copied().map(|b| b.to_ascii_lowercase());
42        let b2 = s2.get(i).copied().map(|b| b.to_ascii_lowercase());
43        match (b1, b2) {
44            (None, None) => return Ordering::Equal,
45            (Some(_), None) => return Ordering::Greater,
46            (None, Some(_)) => return Ordering::Less,
47            (Some(b1), Some(b2)) if b1 == b2 => i += 1,
48            (Some(b1), Some(b2)) => return b1.cmp(&b2),
49        }
50    }
51}