jiff/shared/util/
utf8.rs

1/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
2///
3/// If no valid encoding of a codepoint exists at the beginning of the
4/// given byte slice, then a 1-3 byte slice is returned (which is guaranteed
5/// to be a prefix of `bytes`). That byte slice corresponds either to a single
6/// invalid byte, or to a prefix of a valid UTF-8 encoding of a Unicode scalar
7/// value (but which ultimately did not lead to a valid encoding).
8///
9/// This returns `None` if and only if `bytes` is empty.
10///
11/// This never panics.
12///
13/// *WARNING*: This is not designed for performance. If you're looking for
14/// a fast UTF-8 decoder, this is not it. If you feel like you need one in
15/// this crate, then please file an issue and discuss your use case.
16pub(crate) fn decode(bytes: &[u8]) -> Option<Result<char, &[u8]>> {
17    if bytes.is_empty() {
18        return None;
19    }
20    let string = match core::str::from_utf8(&bytes[..bytes.len().min(4)]) {
21        Ok(s) => s,
22        Err(ref err) if err.valid_up_to() > 0 => {
23            core::str::from_utf8(&bytes[..err.valid_up_to()]).unwrap()
24        }
25        // In this case, we want to return 1-3 bytes that make up a prefix of
26        // a potentially valid codepoint.
27        Err(err) => {
28            return Some(Err(
29                &bytes[..err.error_len().unwrap_or_else(|| bytes.len())]
30            ))
31        }
32    };
33    // OK because we guaranteed above that `string`
34    // must be non-empty. And thus, `str::chars` must
35    // yield at least one Unicode scalar value.
36    Some(Ok(string.chars().next().unwrap()))
37}