script/dom/bindings/
str.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::sync::LazyLock;
6
7use num_traits::Zero;
8use regex::Regex;
9pub use script_bindings::str::*;
10use time::{Date, Month, OffsetDateTime, Time, Weekday};
11
12/// <https://html.spec.whatwg.org/multipage/#parse-a-month-component>
13fn parse_month_component(value: &str) -> Option<(i32, u32)> {
14    // Step 3
15    let mut iterator = value.split('-');
16    let year = iterator.next()?;
17    let month = iterator.next()?;
18
19    // Step 1, 2
20    let year_int = year.parse::<i32>().ok()?;
21    if year.len() < 4 || year_int == 0 {
22        return None;
23    }
24
25    // Step 4, 5
26    let month_int = month.parse::<u32>().ok()?;
27    if month.len() != 2 || !(1..=12).contains(&month_int) {
28        return None;
29    }
30
31    // Step 6
32    Some((year_int, month_int))
33}
34
35/// <https://html.spec.whatwg.org/multipage/#parse-a-date-component>
36fn parse_date_component(value: &str) -> Option<(i32, u32, u32)> {
37    // Step 1
38    let (year_int, month_int) = parse_month_component(value)?;
39
40    // Step 3, 4
41    let day = value.split('-').nth(2)?;
42    let day_int = day.parse::<u32>().ok()?;
43    if day.len() != 2 {
44        return None;
45    }
46
47    // Step 2, 5
48    let max_day = max_day_in_month(year_int, month_int)?;
49    if day_int == 0 || day_int > max_day {
50        return None;
51    }
52
53    // Step 6
54    Some((year_int, month_int, day_int))
55}
56
57/// <https://html.spec.whatwg.org/multipage/#parse-a-time-component>
58fn parse_time_component(value: &str) -> Option<(u8, u8, u8, u16)> {
59    // Step 1: Collect a sequence of code points that are ASCII digits from input given
60    // position. If the collected sequence is not exactly two characters long, then fail.
61    // Otherwise, interpret the resulting sequence as a base-ten integer. Let that number
62    // be the hour.
63    let mut iterator = value.split(':');
64    let hour = iterator.next()?;
65    if hour.len() != 2 {
66        return None;
67    }
68    // Step 2: If hour is not a number in the range 0 ≤ hour ≤ 23, then fail.
69    let hour_int = hour.parse::<u8>().ok()?;
70    if hour_int > 23 {
71        return None;
72    }
73
74    // Step 3: If position is beyond the end of input or if the character at position is
75    // not a U+003A COLON character, then fail. Otherwise, move position forwards one
76    // character.
77    // Step 4: Collect a sequence of code points that are ASCII digits from input given
78    // position. If the collected sequence is not exactly two characters long, then fail.
79    // Otherwise, interpret the resulting sequence as a base-ten integer. Let that number
80    // be the minute.
81    // Step 5: If minute is not a number in the range 0 ≤ minute ≤ 59, then fail.
82    let minute = iterator.next()?;
83    if minute.len() != 2 {
84        return None;
85    }
86    let minute_int = minute.parse::<u8>().ok()?;
87    if minute_int > 59 {
88        return None;
89    }
90
91    // Step 6, 7: Asks us to parse the seconds as a floating point number, but below this
92    // is done as integral parts in order to avoid floating point precision issues.
93    let Some(seconds_and_milliseconds) = iterator.next() else {
94        return Some((hour_int, minute_int, 0, 0));
95    };
96
97    // Parse the seconds portion.
98    let mut second_iterator = seconds_and_milliseconds.split('.');
99    let second = second_iterator.next()?;
100    if second.len() != 2 {
101        return None;
102    }
103    let second_int = second.parse::<u8>().ok()?;
104
105    // Parse the milliseconds portion as a u16 (milliseconds can be up to 1000) and
106    // make sure that it has the proper value based on how long the string is.
107    let Some(millisecond) = second_iterator.next() else {
108        return Some((hour_int, minute_int, second_int, 0));
109    };
110    let millisecond_length = millisecond.len() as u32;
111    if millisecond_length > 3 {
112        return None;
113    }
114    let millisecond_int = millisecond.parse::<u16>().ok()?;
115    let millisecond_int = millisecond_int * 10_u16.pow(3 - millisecond_length);
116
117    // Step 8: Return hour, minute, and second (and in our case the milliseconds due to the note
118    // above about floating point precision).
119    Some((hour_int, minute_int, second_int, millisecond_int))
120}
121
122fn max_day_in_month(year_num: i32, month_num: u32) -> Option<u32> {
123    match month_num {
124        1 | 3 | 5 | 7 | 8 | 10 | 12 => Some(31),
125        4 | 6 | 9 | 11 => Some(30),
126        2 => {
127            if is_leap_year(year_num) {
128                Some(29)
129            } else {
130                Some(28)
131            }
132        },
133        _ => None,
134    }
135}
136
137/// <https://html.spec.whatwg.org/multipage/#week-number-of-the-last-day>
138///
139/// > A week-year with a number year has 53 weeks if it corresponds to either a year year
140/// > in the proleptic Gregorian calendar that has a Thursday as its first day (January
141/// > 1st), or a year year in the proleptic Gregorian calendar that has a Wednesday as its
142/// > first day (January 1st) and where year is a number divisible by 400, or a number
143/// > divisible by 4 but not by 100. All other week-years have 52 weeks.
144fn max_week_in_year(year: i32) -> u32 {
145    let Ok(date) = Date::from_calendar_date(year, Month::January, 1) else {
146        return 52;
147    };
148
149    match OffsetDateTime::new_utc(date, Time::MIDNIGHT).weekday() {
150        Weekday::Thursday => 53,
151        Weekday::Wednesday if is_leap_year(year) => 53,
152        _ => 52,
153    }
154}
155
156#[inline]
157fn is_leap_year(year: i32) -> bool {
158    year % 400 == 0 || (year % 4 == 0 && year % 100 != 0)
159}
160
161pub(crate) trait ToInputValueString {
162    fn to_date_string(&self) -> String;
163    fn to_month_string(&self) -> String;
164    fn to_week_string(&self) -> String;
165    fn to_time_string(&self) -> String;
166
167    /// A valid normalized local date and time string should be "{date}T{time}"
168    /// where date and time are both valid, and the time string must be as short as possible
169    /// <https://html.spec.whatwg.org/multipage/#valid-normalised-local-date-and-time-string>
170    fn to_local_date_time_string(&self) -> String;
171}
172
173impl ToInputValueString for OffsetDateTime {
174    fn to_date_string(&self) -> String {
175        format!(
176            "{:04}-{:02}-{:02}",
177            self.year(),
178            self.month() as u8,
179            self.day()
180        )
181    }
182
183    fn to_month_string(&self) -> String {
184        format!("{:04}-{:02}", self.year(), self.month() as u8)
185    }
186
187    fn to_week_string(&self) -> String {
188        // NB: The ISO week year might be different than the year of the day.
189        let (year, week, _) = self.to_iso_week_date();
190        format!("{:04}-W{:02}", year, week)
191    }
192
193    fn to_time_string(&self) -> String {
194        if self.second().is_zero() && self.millisecond().is_zero() {
195            format!("{:02}:{:02}", self.hour(), self.minute())
196        } else {
197            // This needs to trim off the zero parts of the milliseconds.
198            format!(
199                "{:02}:{:02}:{:02}.{:03}",
200                self.hour(),
201                self.minute(),
202                self.second(),
203                self.millisecond()
204            )
205            .trim_end_matches(['.', '0'])
206            .to_owned()
207        }
208    }
209
210    fn to_local_date_time_string(&self) -> String {
211        format!("{}T{}", self.to_date_string(), self.to_time_string())
212    }
213}
214
215pub(crate) trait FromInputValueString {
216    /// <https://html.spec.whatwg.org/multipage/#parse-a-date-string>
217    ///
218    /// Parse the date string and return an [`OffsetDateTime`] on midnight of the
219    /// given date in UTC.
220    ///
221    /// A valid date string should be "YYYY-MM-DD"
222    /// YYYY must be four or more digits, MM and DD both must be two digits
223    /// <https://html.spec.whatwg.org/multipage/#valid-date-string>
224    fn parse_date_string(&self) -> Option<OffsetDateTime>;
225
226    /// <https://html.spec.whatwg.org/multipage/#parse-a-month-string>
227    ///
228    /// Parse the month and return an [`OffsetDateTime`] on midnight of UTC of the morning of
229    /// the first day of the parsed month.
230    ///
231    /// A valid month string should be "YYYY-MM" YYYY must be four or more digits, MM both
232    /// must be two digits <https://html.spec.whatwg.org/multipage/#valid-month-string>
233    fn parse_month_string(&self) -> Option<OffsetDateTime>;
234
235    /// <https://html.spec.whatwg.org/multipage/#parse-a-week-string>
236    ///
237    /// Parse the week string, returning an [`OffsetDateTime`] on the Monday of the parsed
238    /// week.
239    ///
240    /// A valid week string should be like {YYYY}-W{WW}, such as "2017-W52" YYYY must be
241    /// four or more digits, WW both must be two digits
242    /// <https://html.spec.whatwg.org/multipage/#valid-week-string>
243    fn parse_week_string(&self) -> Option<OffsetDateTime>;
244
245    /// Parse this value as a time string according to
246    /// <https://html.spec.whatwg.org/multipage/#valid-time-string>.
247    fn parse_time_string(&self) -> Option<OffsetDateTime>;
248
249    /// <https://html.spec.whatwg.org/multipage/#parse-a-local-date-and-time-string>
250    ///
251    /// Parse the local date and time, returning an [`OffsetDateTime`] in UTC or None.
252    fn parse_local_date_time_string(&self) -> Option<OffsetDateTime>;
253
254    /// Validates whether or not this value is a valid date string according to
255    /// <https://html.spec.whatwg.org/multipage/#valid-date-string>.
256    fn is_valid_date_string(&self) -> bool {
257        self.parse_date_string().is_some()
258    }
259
260    /// Validates whether or not this value is a valid month string according to
261    /// <https://html.spec.whatwg.org/multipage/#valid-month-string>.
262    fn is_valid_month_string(&self) -> bool {
263        self.parse_month_string().is_some()
264    }
265    /// Validates whether or not this value is a valid week string according to
266    /// <https://html.spec.whatwg.org/multipage/#valid-week-string>.
267    fn is_valid_week_string(&self) -> bool {
268        self.parse_week_string().is_some()
269    }
270    /// Validates whether or not this value is a valid time string according to
271    /// <https://html.spec.whatwg.org/multipage/#valid-time-string>.
272    fn is_valid_time_string(&self) -> bool;
273
274    /// Validates whether or not this value is a valid local date time string according to
275    /// <https://html.spec.whatwg.org/multipage/#valid-week-string>.
276    fn is_valid_local_date_time_string(&self) -> bool {
277        self.parse_local_date_time_string().is_some()
278    }
279
280    /// <https://html.spec.whatwg.org/multipage/#valid-simple-colour>
281    fn is_valid_simple_color_string(&self) -> bool;
282
283    /// <https://html.spec.whatwg.org/multipage/#valid-e-mail-address>
284    fn is_valid_email_address_string(&self) -> bool;
285}
286
287impl FromInputValueString for &str {
288    fn parse_date_string(&self) -> Option<OffsetDateTime> {
289        // Step 1, 2, 3
290        let (year_int, month_int, day_int) = parse_date_component(self)?;
291
292        // Step 4
293        if self.split('-').nth(3).is_some() {
294            return None;
295        }
296
297        // Step 5, 6
298        let month = (month_int as u8).try_into().ok()?;
299        let date = Date::from_calendar_date(year_int, month, day_int as u8).ok()?;
300        Some(OffsetDateTime::new_utc(date, Time::MIDNIGHT))
301    }
302
303    fn parse_month_string(&self) -> Option<OffsetDateTime> {
304        // Step 1, 2, 3
305        let (year_int, month_int) = parse_month_component(self)?;
306
307        // Step 4
308        if self.split('-').nth(2).is_some() {
309            return None;
310        }
311        // Step 5
312        let month = (month_int as u8).try_into().ok()?;
313        let date = Date::from_calendar_date(year_int, month, 1).ok()?;
314        Some(OffsetDateTime::new_utc(date, Time::MIDNIGHT))
315    }
316
317    fn parse_week_string(&self) -> Option<OffsetDateTime> {
318        // Step 1, 2, 3
319        let mut iterator = self.split('-');
320        let year = iterator.next()?;
321
322        // Step 4
323        let year_int = year.parse::<i32>().ok()?;
324        if year.len() < 4 || year_int == 0 {
325            return None;
326        }
327
328        // Step 5, 6
329        let week = iterator.next()?;
330        let (week_first, week_last) = week.split_at(1);
331        if week_first != "W" {
332            return None;
333        }
334
335        // Step 7
336        let week_int = week_last.parse::<u32>().ok()?;
337        if week_last.len() != 2 {
338            return None;
339        }
340
341        // Step 8
342        let max_week = max_week_in_year(year_int);
343
344        // Step 9
345        if week_int < 1 || week_int > max_week {
346            return None;
347        }
348
349        // Step 10
350        if iterator.next().is_some() {
351            return None;
352        }
353
354        // Step 11
355        let date = Date::from_iso_week_date(year_int, week_int as u8, Weekday::Monday).ok()?;
356        Some(OffsetDateTime::new_utc(date, Time::MIDNIGHT))
357    }
358
359    fn parse_time_string(&self) -> Option<OffsetDateTime> {
360        // Step 1, 2, 3
361        let (hour, minute, second, millisecond) = parse_time_component(self)?;
362
363        // Step 4
364        if self.split(':').nth(3).is_some() {
365            return None;
366        }
367
368        // Step 5, 6
369        let time = Time::from_hms_milli(hour, minute, second, millisecond).ok()?;
370        Some(OffsetDateTime::new_utc(
371            OffsetDateTime::UNIX_EPOCH.date(),
372            time,
373        ))
374    }
375
376    fn parse_local_date_time_string(&self) -> Option<OffsetDateTime> {
377        // Step 1, 2, 4
378        let mut iterator = if self.contains('T') {
379            self.split('T')
380        } else {
381            self.split(' ')
382        };
383
384        // Step 3
385        let date = iterator.next()?;
386        let (year, month, day) = parse_date_component(date)?;
387
388        // Step 5
389        let time = iterator.next()?;
390        let (hour, minute, second, millisecond) = parse_time_component(time)?;
391
392        // Step 6
393        if iterator.next().is_some() {
394            return None;
395        }
396
397        // Step 7, 8, 9
398        // TODO: Is this supposed to know the locale's daylight-savings-time rules?
399        let month = (month as u8).try_into().ok()?;
400        let date = Date::from_calendar_date(year, month, day as u8).ok()?;
401        let time = Time::from_hms_milli(hour, minute, second, millisecond).ok()?;
402        Some(OffsetDateTime::new_utc(date, time))
403    }
404
405    fn is_valid_time_string(&self) -> bool {
406        enum State {
407            HourHigh,
408            HourLow09,
409            HourLow03,
410            MinuteColon,
411            MinuteHigh,
412            MinuteLow,
413            SecondColon,
414            SecondHigh,
415            SecondLow,
416            MilliStop,
417            MilliHigh,
418            MilliMiddle,
419            MilliLow,
420            Done,
421            Error,
422        }
423        let next_state =
424            |valid: bool, next: State| -> State { if valid { next } else { State::Error } };
425
426        let state = self.chars().fold(State::HourHigh, |state, c| {
427            match state {
428                // Step 1 "HH"
429                State::HourHigh => match c {
430                    '0' | '1' => State::HourLow09,
431                    '2' => State::HourLow03,
432                    _ => State::Error,
433                },
434                State::HourLow09 => next_state(c.is_ascii_digit(), State::MinuteColon),
435                State::HourLow03 => next_state(c.is_digit(4), State::MinuteColon),
436
437                // Step 2 ":"
438                State::MinuteColon => next_state(c == ':', State::MinuteHigh),
439
440                // Step 3 "mm"
441                State::MinuteHigh => next_state(c.is_digit(6), State::MinuteLow),
442                State::MinuteLow => next_state(c.is_ascii_digit(), State::SecondColon),
443
444                // Step 4.1 ":"
445                State::SecondColon => next_state(c == ':', State::SecondHigh),
446                // Step 4.2 "ss"
447                State::SecondHigh => next_state(c.is_digit(6), State::SecondLow),
448                State::SecondLow => next_state(c.is_ascii_digit(), State::MilliStop),
449
450                // Step 4.3.1 "."
451                State::MilliStop => next_state(c == '.', State::MilliHigh),
452                // Step 4.3.2 "SSS"
453                State::MilliHigh => next_state(c.is_ascii_digit(), State::MilliMiddle),
454                State::MilliMiddle => next_state(c.is_ascii_digit(), State::MilliLow),
455                State::MilliLow => next_state(c.is_ascii_digit(), State::Done),
456
457                _ => State::Error,
458            }
459        });
460
461        match state {
462            State::Done |
463            // Step 4 (optional)
464            State::SecondColon |
465            // Step 4.3 (optional)
466            State::MilliStop |
467            // Step 4.3.2 (only 1 digit required)
468            State::MilliMiddle | State::MilliLow => true,
469            _ => false
470        }
471    }
472
473    fn is_valid_simple_color_string(&self) -> bool {
474        let mut chars = self.chars();
475        if self.len() == 7 && chars.next() == Some('#') {
476            chars.all(|c| c.is_ascii_hexdigit())
477        } else {
478            false
479        }
480    }
481
482    fn is_valid_email_address_string(&self) -> bool {
483        static RE: LazyLock<Regex> = LazyLock::new(|| {
484            Regex::new(concat!(
485                r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?",
486                r"(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
487            ))
488            .unwrap()
489        });
490        RE.is_match(self)
491    }
492}