jiff/util/
parse.rs

1use crate::error::util::{ParseFractionError, ParseIntError};
2
3/// Parses an `i64` number from the beginning to the end of the given slice of
4/// ASCII digit characters.
5///
6/// If any byte in the given slice is not `[0-9]`, then this returns an error.
7/// Similarly, if the number parsed does not fit into a `i64`, then this
8/// returns an error. Notably, this routine does not permit parsing a negative
9/// integer. (We use `i64` because everything in this crate uses signed
10/// integers, and because a higher level routine might want to parse the sign
11/// and then apply it to the result of this routine.)
12#[cfg_attr(feature = "perf-inline", inline(always))]
13pub(crate) fn i64(bytes: &[u8]) -> Result<i64, ParseIntError> {
14    if bytes.is_empty() {
15        return Err(ParseIntError::NoDigitsFound);
16    }
17    let mut n: i64 = 0;
18    for &byte in bytes {
19        if !(b'0' <= byte && byte <= b'9') {
20            return Err(ParseIntError::InvalidDigit(byte));
21        }
22        let digit = i64::from(byte - b'0');
23        n = n
24            .checked_mul(10)
25            .and_then(|n| n.checked_add(digit))
26            .ok_or(ParseIntError::TooBig)?;
27    }
28    Ok(n)
29}
30
31/// Parsed an optional `u64` that is a prefix of `bytes`.
32///
33/// If no digits (`[0-9]`) were found at the beginning of `bytes`, then `None`
34/// is returned.
35///
36/// Note that this is safe to call on untrusted input. It will not attempt
37/// to consume more input than could possibly fit into a parsed integer.
38///
39/// Since this returns a `u64`, it is possible that an integer that cannot
40/// fit into an `i64` is returned. Callers should handle this. (Indeed,
41/// `DurationUnits` handles this case.)
42///
43/// # Errors
44///
45/// When the parsed integer cannot fit into a `u64`.
46#[cfg_attr(feature = "perf-inline", inline(always))]
47pub(crate) fn u64_prefix(
48    bytes: &[u8],
49) -> Result<(Option<u64>, &[u8]), ParseIntError> {
50    // Discovered via `u64::MAX.to_string().len()`.
51    const MAX_U64_DIGITS: usize = 20;
52
53    let mut digit_count = 0;
54    let mut n: u64 = 0;
55    while digit_count <= MAX_U64_DIGITS {
56        let Some(&byte) = bytes.get(digit_count) else { break };
57        if !byte.is_ascii_digit() {
58            break;
59        }
60        digit_count += 1;
61        // OK because we confirmed `byte` is an ASCII digit.
62        let digit = u64::from(byte - b'0');
63        n = n
64            .checked_mul(10)
65            .and_then(|n| n.checked_add(digit))
66            .ok_or(ParseIntError::TooBig)?;
67    }
68    if digit_count == 0 {
69        return Ok((None, bytes));
70    }
71    Ok((Some(n), &bytes[digit_count..]))
72}
73
74/// Parses a `u32` fractional number from the beginning to the end of the given
75/// slice of ASCII digit characters.
76///
77/// The fraction's maximum precision is always 9 digits. The returned integer
78/// will always be in units of `10^{max_precision}`. For example, this
79/// will parse a fractional amount of seconds with a maximum precision of
80/// nanoseconds.
81///
82/// If any byte in the given slice is not `[0-9]`, then this returns an error.
83/// Notably, this routine does not permit parsing a negative integer.
84pub(crate) fn fraction(bytes: &[u8]) -> Result<u32, ParseFractionError> {
85    if bytes.is_empty() {
86        return Err(ParseFractionError::NoDigitsFound);
87    } else if bytes.len() > ParseFractionError::MAX_PRECISION {
88        return Err(ParseFractionError::TooManyDigits);
89    }
90    let mut n: u32 = 0;
91    for &byte in bytes {
92        let digit = match byte.checked_sub(b'0') {
93            None => {
94                return Err(ParseFractionError::InvalidDigit(byte));
95            }
96            Some(digit) if digit > 9 => {
97                return Err(ParseFractionError::InvalidDigit(byte));
98            }
99            Some(digit) => {
100                debug_assert!((0..=9).contains(&digit));
101                u32::from(digit)
102            }
103        };
104        n = n
105            .checked_mul(10)
106            .and_then(|n| n.checked_add(digit))
107            .ok_or_else(|| ParseFractionError::TooBig)?;
108    }
109    for _ in bytes.len()..ParseFractionError::MAX_PRECISION {
110        n = n.checked_mul(10).ok_or_else(|| ParseFractionError::TooBig)?;
111    }
112    Ok(n)
113}
114
115/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
116///
117/// This is effectively `OsStr::to_str`, but with a slightly better error
118/// message.
119#[cfg(any(feature = "tz-system", feature = "tzdb-zoneinfo"))]
120pub(crate) fn os_str_utf8<'o, O>(
121    os_str: &'o O,
122) -> Result<&'o str, crate::error::util::OsStrUtf8Error>
123where
124    O: ?Sized + AsRef<std::ffi::OsStr>,
125{
126    let os_str = os_str.as_ref();
127    os_str
128        .to_str()
129        .ok_or_else(|| crate::error::util::OsStrUtf8Error::from(os_str))
130}
131
132/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
133///
134/// The main difference between this and `OsStr::to_str` is that this will
135/// be a zero-cost conversion on Unix platforms to `&[u8]`. On Windows, this
136/// will do UTF-8 validation and return an error if it's invalid UTF-8.
137#[cfg(feature = "tz-system")]
138pub(crate) fn os_str_bytes<'o, O>(
139    os_str: &'o O,
140) -> Result<&'o [u8], crate::error::util::OsStrUtf8Error>
141where
142    O: ?Sized + AsRef<std::ffi::OsStr>,
143{
144    let os_str = os_str.as_ref();
145    #[cfg(unix)]
146    {
147        use std::os::unix::ffi::OsStrExt;
148        Ok(os_str.as_bytes())
149    }
150    #[cfg(not(unix))]
151    {
152        // It is suspect that we're doing UTF-8 validation and then throwing
153        // away the fact that we did UTF-8 validation. So this could lead
154        // to an extra UTF-8 check if the caller ultimately needs UTF-8. If
155        // that's important, we can add a new API that returns a `&str`. But it
156        // probably won't matter because an `OsStr` in this crate is usually
157        // just an environment variable.
158        Ok(os_str_utf8(os_str)?.as_bytes())
159    }
160}
161
162/// Splits the given input into two slices at the given position.
163///
164/// If the position is greater than the length of the slice given, then this
165/// returns `None`.
166#[cfg_attr(feature = "perf-inline", inline(always))]
167pub(crate) fn split(input: &[u8], at: usize) -> Option<(&[u8], &[u8])> {
168    if at > input.len() {
169        None
170    } else {
171        Some(input.split_at(at))
172    }
173}
174
175/// Returns a function that converts two slices to an offset.
176///
177/// It takes the starting point as input and returns a function that, when
178/// given an ending point (greater than or equal to the starting point), then
179/// the corresponding pointers are subtracted and an offset relative to the
180/// starting point is returned.
181///
182/// This is useful as a helper function in parsing routines that use slices
183/// but want to report offsets.
184///
185/// # Panics
186///
187/// This may panic if the ending point is not a suffix slice of `start`.
188pub(crate) fn offseter<'a>(
189    start: &'a [u8],
190) -> impl Fn(&'a [u8]) -> usize + 'a {
191    move |end| (end.as_ptr() as usize) - (start.as_ptr() as usize)
192}
193
194/// Returns a function that converts two slices to the slice between them.
195///
196/// This takes a starting point as input and returns a function that, when
197/// given an ending point (greater than or equal to the starting point), it
198/// returns a slice beginning at the starting point and ending just at the
199/// ending point.
200///
201/// This is useful as a helper function in parsing routines.
202///
203/// # Panics
204///
205/// This may panic if the ending point is not a suffix slice of `start`.
206pub(crate) fn slicer<'a>(
207    start: &'a [u8],
208) -> impl Fn(&'a [u8]) -> &'a [u8] + 'a {
209    let mkoffset = offseter(start);
210    move |end| {
211        let offset = mkoffset(end);
212        &start[..offset]
213    }
214}