jiff/util/parse.rs
1use crate::error::util::{ParseFractionError, ParseIntError};
2
3/// Parses an `i64` number from the beginning to the end of the given slice of
4/// ASCII digit characters.
5///
6/// If any byte in the given slice is not `[0-9]`, then this returns an error.
7/// Similarly, if the number parsed does not fit into a `i64`, then this
8/// returns an error. Notably, this routine does not permit parsing a negative
9/// integer. (We use `i64` because everything in this crate uses signed
10/// integers, and because a higher level routine might want to parse the sign
11/// and then apply it to the result of this routine.)
12#[cfg_attr(feature = "perf-inline", inline(always))]
13pub(crate) fn i64(bytes: &[u8]) -> Result<i64, ParseIntError> {
14 if bytes.is_empty() {
15 return Err(ParseIntError::NoDigitsFound);
16 }
17 let mut n: i64 = 0;
18 for &byte in bytes {
19 if !(b'0' <= byte && byte <= b'9') {
20 return Err(ParseIntError::InvalidDigit(byte));
21 }
22 let digit = i64::from(byte - b'0');
23 n = n
24 .checked_mul(10)
25 .and_then(|n| n.checked_add(digit))
26 .ok_or(ParseIntError::TooBig)?;
27 }
28 Ok(n)
29}
30
31/// Parsed an optional `u64` that is a prefix of `bytes`.
32///
33/// If no digits (`[0-9]`) were found at the beginning of `bytes`, then `None`
34/// is returned.
35///
36/// Note that this is safe to call on untrusted input. It will not attempt
37/// to consume more input than could possibly fit into a parsed integer.
38///
39/// Since this returns a `u64`, it is possible that an integer that cannot
40/// fit into an `i64` is returned. Callers should handle this. (Indeed,
41/// `DurationUnits` handles this case.)
42///
43/// # Errors
44///
45/// When the parsed integer cannot fit into a `u64`.
46#[cfg_attr(feature = "perf-inline", inline(always))]
47pub(crate) fn u64_prefix(
48 bytes: &[u8],
49) -> Result<(Option<u64>, &[u8]), ParseIntError> {
50 // Discovered via `u64::MAX.to_string().len()`.
51 const MAX_U64_DIGITS: usize = 20;
52
53 let mut digit_count = 0;
54 let mut n: u64 = 0;
55 while digit_count <= MAX_U64_DIGITS {
56 let Some(&byte) = bytes.get(digit_count) else { break };
57 if !byte.is_ascii_digit() {
58 break;
59 }
60 digit_count += 1;
61 // OK because we confirmed `byte` is an ASCII digit.
62 let digit = u64::from(byte - b'0');
63 n = n
64 .checked_mul(10)
65 .and_then(|n| n.checked_add(digit))
66 .ok_or(ParseIntError::TooBig)?;
67 }
68 if digit_count == 0 {
69 return Ok((None, bytes));
70 }
71 Ok((Some(n), &bytes[digit_count..]))
72}
73
74/// Parses a `u32` fractional number from the beginning to the end of the given
75/// slice of ASCII digit characters.
76///
77/// The fraction's maximum precision is always 9 digits. The returned integer
78/// will always be in units of `10^{max_precision}`. For example, this
79/// will parse a fractional amount of seconds with a maximum precision of
80/// nanoseconds.
81///
82/// If any byte in the given slice is not `[0-9]`, then this returns an error.
83/// Notably, this routine does not permit parsing a negative integer.
84pub(crate) fn fraction(bytes: &[u8]) -> Result<u32, ParseFractionError> {
85 if bytes.is_empty() {
86 return Err(ParseFractionError::NoDigitsFound);
87 } else if bytes.len() > ParseFractionError::MAX_PRECISION {
88 return Err(ParseFractionError::TooManyDigits);
89 }
90 let mut n: u32 = 0;
91 for &byte in bytes {
92 let digit = match byte.checked_sub(b'0') {
93 None => {
94 return Err(ParseFractionError::InvalidDigit(byte));
95 }
96 Some(digit) if digit > 9 => {
97 return Err(ParseFractionError::InvalidDigit(byte));
98 }
99 Some(digit) => {
100 debug_assert!((0..=9).contains(&digit));
101 u32::from(digit)
102 }
103 };
104 n = n
105 .checked_mul(10)
106 .and_then(|n| n.checked_add(digit))
107 .ok_or_else(|| ParseFractionError::TooBig)?;
108 }
109 for _ in bytes.len()..ParseFractionError::MAX_PRECISION {
110 n = n.checked_mul(10).ok_or_else(|| ParseFractionError::TooBig)?;
111 }
112 Ok(n)
113}
114
115/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
116///
117/// This is effectively `OsStr::to_str`, but with a slightly better error
118/// message.
119#[cfg(any(feature = "tz-system", feature = "tzdb-zoneinfo"))]
120pub(crate) fn os_str_utf8<'o, O>(
121 os_str: &'o O,
122) -> Result<&'o str, crate::error::util::OsStrUtf8Error>
123where
124 O: ?Sized + AsRef<std::ffi::OsStr>,
125{
126 let os_str = os_str.as_ref();
127 os_str
128 .to_str()
129 .ok_or_else(|| crate::error::util::OsStrUtf8Error::from(os_str))
130}
131
132/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
133///
134/// The main difference between this and `OsStr::to_str` is that this will
135/// be a zero-cost conversion on Unix platforms to `&[u8]`. On Windows, this
136/// will do UTF-8 validation and return an error if it's invalid UTF-8.
137#[cfg(feature = "tz-system")]
138pub(crate) fn os_str_bytes<'o, O>(
139 os_str: &'o O,
140) -> Result<&'o [u8], crate::error::util::OsStrUtf8Error>
141where
142 O: ?Sized + AsRef<std::ffi::OsStr>,
143{
144 let os_str = os_str.as_ref();
145 #[cfg(unix)]
146 {
147 use std::os::unix::ffi::OsStrExt;
148 Ok(os_str.as_bytes())
149 }
150 #[cfg(not(unix))]
151 {
152 // It is suspect that we're doing UTF-8 validation and then throwing
153 // away the fact that we did UTF-8 validation. So this could lead
154 // to an extra UTF-8 check if the caller ultimately needs UTF-8. If
155 // that's important, we can add a new API that returns a `&str`. But it
156 // probably won't matter because an `OsStr` in this crate is usually
157 // just an environment variable.
158 Ok(os_str_utf8(os_str)?.as_bytes())
159 }
160}
161
162/// Splits the given input into two slices at the given position.
163///
164/// If the position is greater than the length of the slice given, then this
165/// returns `None`.
166#[cfg_attr(feature = "perf-inline", inline(always))]
167pub(crate) fn split(input: &[u8], at: usize) -> Option<(&[u8], &[u8])> {
168 if at > input.len() {
169 None
170 } else {
171 Some(input.split_at(at))
172 }
173}
174
175/// Returns a function that converts two slices to an offset.
176///
177/// It takes the starting point as input and returns a function that, when
178/// given an ending point (greater than or equal to the starting point), then
179/// the corresponding pointers are subtracted and an offset relative to the
180/// starting point is returned.
181///
182/// This is useful as a helper function in parsing routines that use slices
183/// but want to report offsets.
184///
185/// # Panics
186///
187/// This may panic if the ending point is not a suffix slice of `start`.
188pub(crate) fn offseter<'a>(
189 start: &'a [u8],
190) -> impl Fn(&'a [u8]) -> usize + 'a {
191 move |end| (end.as_ptr() as usize) - (start.as_ptr() as usize)
192}
193
194/// Returns a function that converts two slices to the slice between them.
195///
196/// This takes a starting point as input and returns a function that, when
197/// given an ending point (greater than or equal to the starting point), it
198/// returns a slice beginning at the starting point and ending just at the
199/// ending point.
200///
201/// This is useful as a helper function in parsing routines.
202///
203/// # Panics
204///
205/// This may panic if the ending point is not a suffix slice of `start`.
206pub(crate) fn slicer<'a>(
207 start: &'a [u8],
208) -> impl Fn(&'a [u8]) -> &'a [u8] + 'a {
209 let mkoffset = offseter(start);
210 move |end| {
211 let offset = mkoffset(end);
212 &start[..offset]
213 }
214}