jiff/util/parse.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
use crate::{
error::{err, Error},
util::escape::{Byte, Bytes},
};
/// Parses an `i64` number from the beginning to the end of the given slice of
/// ASCII digit characters.
///
/// If any byte in the given slice is not `[0-9]`, then this returns an error.
/// Similarly, if the number parsed does not fit into a `i64`, then this
/// returns an error. Notably, this routine does not permit parsing a negative
/// integer. (We use `i64` because everything in this crate uses signed
/// integers, and because a higher level routine might want to parse the sign
/// and then apply it to the result of this routine.)
#[inline(always)]
pub(crate) fn i64(bytes: &[u8]) -> Result<i64, Error> {
if bytes.is_empty() {
return Err(err!("invalid number, no digits found"));
}
let mut n: i64 = 0;
for &byte in bytes {
let digit = match byte.checked_sub(b'0') {
None => {
return Err(err!(
"invalid digit, expected 0-9 but got {}",
Byte(byte),
));
}
Some(digit) if digit > 9 => {
return Err(err!(
"invalid digit, expected 0-9 but got {}",
Byte(byte),
))
}
Some(digit) => {
debug_assert!((0..=9).contains(&digit));
i64::from(digit)
}
};
n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else(
|| {
err!(
"number '{}' too big to parse into 64-bit integer",
Bytes(bytes),
)
},
)?;
}
Ok(n)
}
/// Parses an `i64` fractional number from the beginning to the end of the
/// given slice of ASCII digit characters.
///
/// The fraction's maximum precision must be provided. The returned integer
/// will always be in units of `10^{max_precision}`. For example, to parse a
/// fractional amount of seconds with a maximum precision of nanoseconds, then
/// use `max_precision=9`.
///
/// If any byte in the given slice is not `[0-9]`, then this returns an error.
/// Similarly, if the fraction parsed does not fit into a `i64`, then this
/// returns an error. Notably, this routine does not permit parsing a negative
/// integer. (We use `i64` because everything in this crate uses signed
/// integers, and because a higher level routine might want to parse the sign
/// and then apply it to the result of this routine.)
pub(crate) fn fraction(
bytes: &[u8],
max_precision: usize,
) -> Result<i64, Error> {
if bytes.is_empty() {
return Err(err!("invalid fraction, no digits found"));
} else if bytes.len() > max_precision {
return Err(err!(
"invalid fraction, too many digits \
(at most {max_precision} are allowed"
));
}
let mut n: i64 = 0;
for &byte in bytes {
let digit = match byte.checked_sub(b'0') {
None => {
return Err(err!(
"invalid fractional digit, expected 0-9 but got {}",
Byte(byte),
));
}
Some(digit) if digit > 9 => {
return Err(err!(
"invalid fractional digit, expected 0-9 but got {}",
Byte(byte),
))
}
Some(digit) => {
debug_assert!((0..=9).contains(&digit));
i64::from(digit)
}
};
n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else(
|| {
err!(
"fractional '{}' too big to parse into 64-bit integer",
Bytes(bytes),
)
},
)?;
}
for _ in bytes.len()..max_precision {
n = n.checked_mul(10).ok_or_else(|| {
err!(
"fractional '{}' too big to parse into 64-bit integer \
(too much precision supported)",
Bytes(bytes)
)
})?;
}
Ok(n)
}
/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
///
/// This is effectively `OsStr::to_str`, but with a slightly better error
/// message.
#[cfg(feature = "tzdb-zoneinfo")]
pub(crate) fn os_str_utf8<'o, O>(os_str: &'o O) -> Result<&'o str, Error>
where
O: ?Sized + AsRef<std::ffi::OsStr>,
{
let os_str = os_str.as_ref();
os_str
.to_str()
.ok_or_else(|| err!("environment value {os_str:?} is not valid UTF-8"))
}
/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
///
/// The main difference between this and `OsStr::to_str` is that this will
/// be a zero-cost conversion on Unix platforms to `&[u8]`. On Windows, this
/// will do UTF-8 validation and return an error if it's invalid UTF-8.
#[cfg(feature = "tz-system")]
pub(crate) fn os_str_bytes<'o, O>(os_str: &'o O) -> Result<&'o [u8], Error>
where
O: ?Sized + AsRef<std::ffi::OsStr>,
{
let os_str = os_str.as_ref();
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
Ok(os_str.as_bytes())
}
#[cfg(not(unix))]
{
let string = os_str.to_str().ok_or_else(|| {
err!("environment value {os_str:?} is not valid UTF-8")
})?;
// It is suspect that we're doing UTF-8 validation and then throwing
// away the fact that we did UTF-8 validation. So this could lead
// to an extra UTF-8 check if the caller ultimately needs UTF-8. If
// that's important, we can add a new API that returns a `&str`. But it
// probably won't matter because an `OsStr` in this crate is usually
// just an environment variable.
Ok(string.as_bytes())
}
}
/// Splits the given input into two slices at the given position.
///
/// If the position is greater than the length of the slice given, then this
/// returns `None`.
#[inline(always)]
pub(crate) fn split(input: &[u8], at: usize) -> Option<(&[u8], &[u8])> {
if at > input.len() {
None
} else {
Some(input.split_at(at))
}
}
/// Returns a function that converts two slices to an offset.
///
/// It takes the starting point as input and returns a function that, when
/// given an ending point (greater than or equal to the starting point), then
/// the corresponding pointers are subtracted and an offset relative to the
/// starting point is returned.
///
/// This is useful as a helper function in parsing routines that use slices
/// but want to report offsets.
///
/// # Panics
///
/// This may panic if the ending point is not a suffix slice of `start`.
pub(crate) fn offseter<'a>(
start: &'a [u8],
) -> impl Fn(&'a [u8]) -> usize + 'a {
move |end| (end.as_ptr() as usize) - (start.as_ptr() as usize)
}
/// Returns a function that converts two slices to the slice between them.
///
/// This takes a starting point as input and returns a function that, when
/// given an ending point (greater than or equal to the starting point), it
/// returns a slice beginning at the starting point and ending just at the
/// ending point.
///
/// This is useful as a helper function in parsing routines.
///
/// # Panics
///
/// This may panic if the ending point is not a suffix slice of `start`.
pub(crate) fn slicer<'a>(
start: &'a [u8],
) -> impl Fn(&'a [u8]) -> &'a [u8] + 'a {
let mkoffset = offseter(start);
move |end| {
let offset = mkoffset(end);
&start[..offset]
}
}