jiff/util/
parse.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
use crate::{
    error::{err, Error},
    util::escape::{Byte, Bytes},
};

/// Parses an `i64` number from the beginning to the end of the given slice of
/// ASCII digit characters.
///
/// If any byte in the given slice is not `[0-9]`, then this returns an error.
/// Similarly, if the number parsed does not fit into a `i64`, then this
/// returns an error. Notably, this routine does not permit parsing a negative
/// integer. (We use `i64` because everything in this crate uses signed
/// integers, and because a higher level routine might want to parse the sign
/// and then apply it to the result of this routine.)
#[inline(always)]
pub(crate) fn i64(bytes: &[u8]) -> Result<i64, Error> {
    if bytes.is_empty() {
        return Err(err!("invalid number, no digits found"));
    }
    let mut n: i64 = 0;
    for &byte in bytes {
        let digit = match byte.checked_sub(b'0') {
            None => {
                return Err(err!(
                    "invalid digit, expected 0-9 but got {}",
                    Byte(byte),
                ));
            }
            Some(digit) if digit > 9 => {
                return Err(err!(
                    "invalid digit, expected 0-9 but got {}",
                    Byte(byte),
                ))
            }
            Some(digit) => {
                debug_assert!((0..=9).contains(&digit));
                i64::from(digit)
            }
        };
        n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else(
            || {
                err!(
                    "number '{}' too big to parse into 64-bit integer",
                    Bytes(bytes),
                )
            },
        )?;
    }
    Ok(n)
}

/// Parses an `i64` fractional number from the beginning to the end of the
/// given slice of ASCII digit characters.
///
/// The fraction's maximum precision must be provided. The returned integer
/// will always be in units of `10^{max_precision}`. For example, to parse a
/// fractional amount of seconds with a maximum precision of nanoseconds, then
/// use `max_precision=9`.
///
/// If any byte in the given slice is not `[0-9]`, then this returns an error.
/// Similarly, if the fraction parsed does not fit into a `i64`, then this
/// returns an error. Notably, this routine does not permit parsing a negative
/// integer. (We use `i64` because everything in this crate uses signed
/// integers, and because a higher level routine might want to parse the sign
/// and then apply it to the result of this routine.)
pub(crate) fn fraction(
    bytes: &[u8],
    max_precision: usize,
) -> Result<i64, Error> {
    if bytes.is_empty() {
        return Err(err!("invalid fraction, no digits found"));
    } else if bytes.len() > max_precision {
        return Err(err!(
            "invalid fraction, too many digits \
             (at most {max_precision} are allowed"
        ));
    }
    let mut n: i64 = 0;
    for &byte in bytes {
        let digit = match byte.checked_sub(b'0') {
            None => {
                return Err(err!(
                    "invalid fractional digit, expected 0-9 but got {}",
                    Byte(byte),
                ));
            }
            Some(digit) if digit > 9 => {
                return Err(err!(
                    "invalid fractional digit, expected 0-9 but got {}",
                    Byte(byte),
                ))
            }
            Some(digit) => {
                debug_assert!((0..=9).contains(&digit));
                i64::from(digit)
            }
        };
        n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else(
            || {
                err!(
                    "fractional '{}' too big to parse into 64-bit integer",
                    Bytes(bytes),
                )
            },
        )?;
    }
    for _ in bytes.len()..max_precision {
        n = n.checked_mul(10).ok_or_else(|| {
            err!(
                "fractional '{}' too big to parse into 64-bit integer \
                 (too much precision supported)",
                Bytes(bytes)
            )
        })?;
    }
    Ok(n)
}

/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
///
/// This is effectively `OsStr::to_str`, but with a slightly better error
/// message.
#[cfg(feature = "tzdb-zoneinfo")]
pub(crate) fn os_str_utf8<'o, O>(os_str: &'o O) -> Result<&'o str, Error>
where
    O: ?Sized + AsRef<std::ffi::OsStr>,
{
    let os_str = os_str.as_ref();
    os_str
        .to_str()
        .ok_or_else(|| err!("environment value {os_str:?} is not valid UTF-8"))
}

/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
///
/// The main difference between this and `OsStr::to_str` is that this will
/// be a zero-cost conversion on Unix platforms to `&[u8]`. On Windows, this
/// will do UTF-8 validation and return an error if it's invalid UTF-8.
#[cfg(feature = "tz-system")]
pub(crate) fn os_str_bytes<'o, O>(os_str: &'o O) -> Result<&'o [u8], Error>
where
    O: ?Sized + AsRef<std::ffi::OsStr>,
{
    let os_str = os_str.as_ref();
    #[cfg(unix)]
    {
        use std::os::unix::ffi::OsStrExt;
        Ok(os_str.as_bytes())
    }
    #[cfg(not(unix))]
    {
        let string = os_str.to_str().ok_or_else(|| {
            err!("environment value {os_str:?} is not valid UTF-8")
        })?;
        // It is suspect that we're doing UTF-8 validation and then throwing
        // away the fact that we did UTF-8 validation. So this could lead
        // to an extra UTF-8 check if the caller ultimately needs UTF-8. If
        // that's important, we can add a new API that returns a `&str`. But it
        // probably won't matter because an `OsStr` in this crate is usually
        // just an environment variable.
        Ok(string.as_bytes())
    }
}

/// Splits the given input into two slices at the given position.
///
/// If the position is greater than the length of the slice given, then this
/// returns `None`.
#[inline(always)]
pub(crate) fn split(input: &[u8], at: usize) -> Option<(&[u8], &[u8])> {
    if at > input.len() {
        None
    } else {
        Some(input.split_at(at))
    }
}

/// Returns a function that converts two slices to an offset.
///
/// It takes the starting point as input and returns a function that, when
/// given an ending point (greater than or equal to the starting point), then
/// the corresponding pointers are subtracted and an offset relative to the
/// starting point is returned.
///
/// This is useful as a helper function in parsing routines that use slices
/// but want to report offsets.
///
/// # Panics
///
/// This may panic if the ending point is not a suffix slice of `start`.
pub(crate) fn offseter<'a>(
    start: &'a [u8],
) -> impl Fn(&'a [u8]) -> usize + 'a {
    move |end| (end.as_ptr() as usize) - (start.as_ptr() as usize)
}

/// Returns a function that converts two slices to the slice between them.
///
/// This takes a starting point as input and returns a function that, when
/// given an ending point (greater than or equal to the starting point), it
/// returns a slice beginning at the starting point and ending just at the
/// ending point.
///
/// This is useful as a helper function in parsing routines.
///
/// # Panics
///
/// This may panic if the ending point is not a suffix slice of `start`.
pub(crate) fn slicer<'a>(
    start: &'a [u8],
) -> impl Fn(&'a [u8]) -> &'a [u8] + 'a {
    let mkoffset = offseter(start);
    move |end| {
        let offset = mkoffset(end);
        &start[..offset]
    }
}