jiff/util/
parse.rs

1use crate::{
2    error::{err, Error},
3    util::escape::{Byte, Bytes},
4};
5
6/// Parses an `i64` number from the beginning to the end of the given slice of
7/// ASCII digit characters.
8///
9/// If any byte in the given slice is not `[0-9]`, then this returns an error.
10/// Similarly, if the number parsed does not fit into a `i64`, then this
11/// returns an error. Notably, this routine does not permit parsing a negative
12/// integer. (We use `i64` because everything in this crate uses signed
13/// integers, and because a higher level routine might want to parse the sign
14/// and then apply it to the result of this routine.)
15#[cfg_attr(feature = "perf-inline", inline(always))]
16pub(crate) fn i64(bytes: &[u8]) -> Result<i64, Error> {
17    if bytes.is_empty() {
18        return Err(err!("invalid number, no digits found"));
19    }
20    let mut n: i64 = 0;
21    for &byte in bytes {
22        let digit = match byte.checked_sub(b'0') {
23            None => {
24                return Err(err!(
25                    "invalid digit, expected 0-9 but got {}",
26                    Byte(byte),
27                ));
28            }
29            Some(digit) if digit > 9 => {
30                return Err(err!(
31                    "invalid digit, expected 0-9 but got {}",
32                    Byte(byte),
33                ))
34            }
35            Some(digit) => {
36                debug_assert!((0..=9).contains(&digit));
37                i64::from(digit)
38            }
39        };
40        n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else(
41            || {
42                err!(
43                    "number '{}' too big to parse into 64-bit integer",
44                    Bytes(bytes),
45                )
46            },
47        )?;
48    }
49    Ok(n)
50}
51
52/// Parsed an optional `u64` that is a prefix of `bytes`.
53///
54/// If no digits (`[0-9]`) were found at the beginning of `bytes`, then `None`
55/// is returned.
56///
57/// Note that this is safe to call on untrusted input. It will not attempt
58/// to consume more input than could possibly fit into a parsed integer.
59///
60/// Since this returns a `u64`, it is possible that an integer that cannot
61/// fit into an `i64` is returned. Callers should handle this. (Indeed,
62/// `DurationUnits` handles this case.)
63///
64/// # Errors
65///
66/// When the parsed integer cannot fit into a `u64`.
67#[cfg_attr(feature = "perf-inline", inline(always))]
68pub(crate) fn u64_prefix(bytes: &[u8]) -> Result<(Option<u64>, &[u8]), Error> {
69    // Discovered via `u64::MAX.to_string().len()`.
70    const MAX_U64_DIGITS: usize = 20;
71
72    let mut digit_count = 0;
73    let mut n: u64 = 0;
74    while digit_count <= MAX_U64_DIGITS {
75        let Some(&byte) = bytes.get(digit_count) else { break };
76        if !byte.is_ascii_digit() {
77            break;
78        }
79        digit_count += 1;
80        // OK because we confirmed `byte` is an ASCII digit.
81        let digit = u64::from(byte - b'0');
82        n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else(
83            #[inline(never)]
84            || {
85                err!(
86                    "number `{}` too big to parse into 64-bit integer",
87                    Bytes(&bytes[..digit_count]),
88                )
89            },
90        )?;
91    }
92    if digit_count == 0 {
93        return Ok((None, bytes));
94    }
95    Ok((Some(n), &bytes[digit_count..]))
96}
97
98/// Parses a `u32` fractional number from the beginning to the end of the given
99/// slice of ASCII digit characters.
100///
101/// The fraction's maximum precision is always 9 digits. The returned integer
102/// will always be in units of `10^{max_precision}`. For example, this
103/// will parse a fractional amount of seconds with a maximum precision of
104/// nanoseconds.
105///
106/// If any byte in the given slice is not `[0-9]`, then this returns an error.
107/// Notably, this routine does not permit parsing a negative integer.
108pub(crate) fn fraction(bytes: &[u8]) -> Result<u32, Error> {
109    const MAX_PRECISION: usize = 9;
110
111    if bytes.is_empty() {
112        return Err(err!("invalid fraction, no digits found"));
113    } else if bytes.len() > MAX_PRECISION {
114        return Err(err!(
115            "invalid fraction, too many digits \
116             (at most {MAX_PRECISION} are allowed"
117        ));
118    }
119    let mut n: u32 = 0;
120    for &byte in bytes {
121        let digit = match byte.checked_sub(b'0') {
122            None => {
123                return Err(err!(
124                    "invalid fractional digit, expected 0-9 but got {}",
125                    Byte(byte),
126                ));
127            }
128            Some(digit) if digit > 9 => {
129                return Err(err!(
130                    "invalid fractional digit, expected 0-9 but got {}",
131                    Byte(byte),
132                ))
133            }
134            Some(digit) => {
135                debug_assert!((0..=9).contains(&digit));
136                u32::from(digit)
137            }
138        };
139        n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else(
140            || {
141                err!(
142                    "fractional '{}' too big to parse into 64-bit integer",
143                    Bytes(bytes),
144                )
145            },
146        )?;
147    }
148    for _ in bytes.len()..MAX_PRECISION {
149        n = n.checked_mul(10).ok_or_else(|| {
150            err!(
151                "fractional '{}' too big to parse into 64-bit integer \
152                 (too much precision supported)",
153                Bytes(bytes)
154            )
155        })?;
156    }
157    Ok(n)
158}
159
160/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
161///
162/// This is effectively `OsStr::to_str`, but with a slightly better error
163/// message.
164#[cfg(feature = "tzdb-zoneinfo")]
165pub(crate) fn os_str_utf8<'o, O>(os_str: &'o O) -> Result<&'o str, Error>
166where
167    O: ?Sized + AsRef<std::ffi::OsStr>,
168{
169    let os_str = os_str.as_ref();
170    os_str
171        .to_str()
172        .ok_or_else(|| err!("environment value {os_str:?} is not valid UTF-8"))
173}
174
175/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
176///
177/// The main difference between this and `OsStr::to_str` is that this will
178/// be a zero-cost conversion on Unix platforms to `&[u8]`. On Windows, this
179/// will do UTF-8 validation and return an error if it's invalid UTF-8.
180#[cfg(feature = "tz-system")]
181pub(crate) fn os_str_bytes<'o, O>(os_str: &'o O) -> Result<&'o [u8], Error>
182where
183    O: ?Sized + AsRef<std::ffi::OsStr>,
184{
185    let os_str = os_str.as_ref();
186    #[cfg(unix)]
187    {
188        use std::os::unix::ffi::OsStrExt;
189        Ok(os_str.as_bytes())
190    }
191    #[cfg(not(unix))]
192    {
193        let string = os_str.to_str().ok_or_else(|| {
194            err!("environment value {os_str:?} is not valid UTF-8")
195        })?;
196        // It is suspect that we're doing UTF-8 validation and then throwing
197        // away the fact that we did UTF-8 validation. So this could lead
198        // to an extra UTF-8 check if the caller ultimately needs UTF-8. If
199        // that's important, we can add a new API that returns a `&str`. But it
200        // probably won't matter because an `OsStr` in this crate is usually
201        // just an environment variable.
202        Ok(string.as_bytes())
203    }
204}
205
206/// Splits the given input into two slices at the given position.
207///
208/// If the position is greater than the length of the slice given, then this
209/// returns `None`.
210#[cfg_attr(feature = "perf-inline", inline(always))]
211pub(crate) fn split(input: &[u8], at: usize) -> Option<(&[u8], &[u8])> {
212    if at > input.len() {
213        None
214    } else {
215        Some(input.split_at(at))
216    }
217}
218
219/// Returns a function that converts two slices to an offset.
220///
221/// It takes the starting point as input and returns a function that, when
222/// given an ending point (greater than or equal to the starting point), then
223/// the corresponding pointers are subtracted and an offset relative to the
224/// starting point is returned.
225///
226/// This is useful as a helper function in parsing routines that use slices
227/// but want to report offsets.
228///
229/// # Panics
230///
231/// This may panic if the ending point is not a suffix slice of `start`.
232pub(crate) fn offseter<'a>(
233    start: &'a [u8],
234) -> impl Fn(&'a [u8]) -> usize + 'a {
235    move |end| (end.as_ptr() as usize) - (start.as_ptr() as usize)
236}
237
238/// Returns a function that converts two slices to the slice between them.
239///
240/// This takes a starting point as input and returns a function that, when
241/// given an ending point (greater than or equal to the starting point), it
242/// returns a slice beginning at the starting point and ending just at the
243/// ending point.
244///
245/// This is useful as a helper function in parsing routines.
246///
247/// # Panics
248///
249/// This may panic if the ending point is not a suffix slice of `start`.
250pub(crate) fn slicer<'a>(
251    start: &'a [u8],
252) -> impl Fn(&'a [u8]) -> &'a [u8] + 'a {
253    let mkoffset = offseter(start);
254    move |end| {
255        let offset = mkoffset(end);
256        &start[..offset]
257    }
258}