script/dom/bindings/
str.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::ops::Deref;
6use std::sync::LazyLock;
7
8use num_traits::Zero;
9use regex::Regex;
10pub use script_bindings::str::*;
11use time::{Date, Month, OffsetDateTime, Time, Weekday};
12
13/// <https://html.spec.whatwg.org/multipage/#parse-a-month-component>
14fn parse_month_component(value: &str) -> Option<(i32, u32)> {
15    // Step 3
16    let mut iterator = value.split('-');
17    let year = iterator.next()?;
18    let month = iterator.next()?;
19
20    // Step 1, 2
21    let year_int = year.parse::<i32>().ok()?;
22    if year.len() < 4 || year_int == 0 {
23        return None;
24    }
25
26    // Step 4, 5
27    let month_int = month.parse::<u32>().ok()?;
28    if month.len() != 2 || !(1..=12).contains(&month_int) {
29        return None;
30    }
31
32    // Step 6
33    Some((year_int, month_int))
34}
35
36/// <https://html.spec.whatwg.org/multipage/#parse-a-date-component>
37fn parse_date_component(value: &str) -> Option<(i32, u32, u32)> {
38    // Step 1
39    let (year_int, month_int) = parse_month_component(value)?;
40
41    // Step 3, 4
42    let day = value.split('-').nth(2)?;
43    let day_int = day.parse::<u32>().ok()?;
44    if day.len() != 2 {
45        return None;
46    }
47
48    // Step 2, 5
49    let max_day = max_day_in_month(year_int, month_int)?;
50    if day_int == 0 || day_int > max_day {
51        return None;
52    }
53
54    // Step 6
55    Some((year_int, month_int, day_int))
56}
57
58/// <https://html.spec.whatwg.org/multipage/#parse-a-time-component>
59fn parse_time_component(value: &str) -> Option<(u8, u8, u8, u16)> {
60    // Step 1: Collect a sequence of code points that are ASCII digits from input given
61    // position. If the collected sequence is not exactly two characters long, then fail.
62    // Otherwise, interpret the resulting sequence as a base-ten integer. Let that number
63    // be the hour.
64    let mut iterator = value.split(':');
65    let hour = iterator.next()?;
66    if hour.len() != 2 {
67        return None;
68    }
69    // Step 2: If hour is not a number in the range 0 ≤ hour ≤ 23, then fail.
70    let hour_int = hour.parse::<u8>().ok()?;
71    if hour_int > 23 {
72        return None;
73    }
74
75    // Step 3: If position is beyond the end of input or if the character at position is
76    // not a U+003A COLON character, then fail. Otherwise, move position forwards one
77    // character.
78    // Step 4: Collect a sequence of code points that are ASCII digits from input given
79    // position. If the collected sequence is not exactly two characters long, then fail.
80    // Otherwise, interpret the resulting sequence as a base-ten integer. Let that number
81    // be the minute.
82    // Step 5: If minute is not a number in the range 0 ≤ minute ≤ 59, then fail.
83    let minute = iterator.next()?;
84    if minute.len() != 2 {
85        return None;
86    }
87    let minute_int = minute.parse::<u8>().ok()?;
88    if minute_int > 59 {
89        return None;
90    }
91
92    // Step 6, 7: Asks us to parse the seconds as a floating point number, but below this
93    // is done as integral parts in order to avoid floating point precision issues.
94    let Some(seconds_and_milliseconds) = iterator.next() else {
95        return Some((hour_int, minute_int, 0, 0));
96    };
97
98    // Parse the seconds portion.
99    let mut second_iterator = seconds_and_milliseconds.split('.');
100    let second = second_iterator.next()?;
101    if second.len() != 2 {
102        return None;
103    }
104    let second_int = second.parse::<u8>().ok()?;
105
106    // Parse the milliseconds portion as a u16 (milliseconds can be up to 1000) and
107    // make sure that it has the proper value based on how long the string is.
108    let Some(millisecond) = second_iterator.next() else {
109        return Some((hour_int, minute_int, second_int, 0));
110    };
111    let millisecond_length = millisecond.len() as u32;
112    if millisecond_length > 3 {
113        return None;
114    }
115    let millisecond_int = millisecond.parse::<u16>().ok()?;
116    let millisecond_int = millisecond_int * 10_u16.pow(3 - millisecond_length);
117
118    // Step 8: Return hour, minute, and second (and in our case the milliseconds due to the note
119    // above about floating point precision).
120    Some((hour_int, minute_int, second_int, millisecond_int))
121}
122
123fn max_day_in_month(year_num: i32, month_num: u32) -> Option<u32> {
124    match month_num {
125        1 | 3 | 5 | 7 | 8 | 10 | 12 => Some(31),
126        4 | 6 | 9 | 11 => Some(30),
127        2 => {
128            if is_leap_year(year_num) {
129                Some(29)
130            } else {
131                Some(28)
132            }
133        },
134        _ => None,
135    }
136}
137
138/// <https://html.spec.whatwg.org/multipage/#week-number-of-the-last-day>
139///
140/// > A week-year with a number year has 53 weeks if it corresponds to either a year year
141/// > in the proleptic Gregorian calendar that has a Thursday as its first day (January
142/// > 1st), or a year year in the proleptic Gregorian calendar that has a Wednesday as its
143/// > first day (January 1st) and where year is a number divisible by 400, or a number
144/// > divisible by 4 but not by 100. All other week-years have 52 weeks.
145fn max_week_in_year(year: i32) -> u32 {
146    let Ok(date) = Date::from_calendar_date(year, Month::January, 1) else {
147        return 52;
148    };
149
150    match OffsetDateTime::new_utc(date, Time::MIDNIGHT).weekday() {
151        Weekday::Thursday => 53,
152        Weekday::Wednesday if is_leap_year(year) => 53,
153        _ => 52,
154    }
155}
156
157#[inline]
158fn is_leap_year(year: i32) -> bool {
159    year % 400 == 0 || (year % 4 == 0 && year % 100 != 0)
160}
161
162pub(crate) trait ToInputValueString {
163    fn to_date_string(&self) -> String;
164    fn to_month_string(&self) -> String;
165    fn to_week_string(&self) -> String;
166    fn to_time_string(&self) -> String;
167
168    /// A valid normalized local date and time string should be "{date}T{time}"
169    /// where date and time are both valid, and the time string must be as short as possible
170    /// <https://html.spec.whatwg.org/multipage/#valid-normalised-local-date-and-time-string>
171    fn to_local_date_time_string(&self) -> String;
172}
173
174impl ToInputValueString for OffsetDateTime {
175    fn to_date_string(&self) -> String {
176        format!(
177            "{:04}-{:02}-{:02}",
178            self.year(),
179            self.month() as u8,
180            self.day()
181        )
182    }
183
184    fn to_month_string(&self) -> String {
185        format!("{:04}-{:02}", self.year(), self.month() as u8)
186    }
187
188    fn to_week_string(&self) -> String {
189        // NB: The ISO week year might be different than the year of the day.
190        let (year, week, _) = self.to_iso_week_date();
191        format!("{:04}-W{:02}", year, week)
192    }
193
194    fn to_time_string(&self) -> String {
195        if self.second().is_zero() && self.millisecond().is_zero() {
196            format!("{:02}:{:02}", self.hour(), self.minute())
197        } else {
198            // This needs to trim off the zero parts of the milliseconds.
199            format!(
200                "{:02}:{:02}:{:02}.{:03}",
201                self.hour(),
202                self.minute(),
203                self.second(),
204                self.millisecond()
205            )
206            .trim_end_matches(['.', '0'])
207            .to_owned()
208        }
209    }
210
211    fn to_local_date_time_string(&self) -> String {
212        format!("{}T{}", self.to_date_string(), self.to_time_string())
213    }
214}
215
216pub(crate) trait FromInputValueString {
217    /// <https://html.spec.whatwg.org/multipage/#parse-a-date-string>
218    ///
219    /// Parse the date string and return an [`OffsetDateTime`] on midnight of the
220    /// given date in UTC.
221    ///
222    /// A valid date string should be "YYYY-MM-DD"
223    /// YYYY must be four or more digits, MM and DD both must be two digits
224    /// <https://html.spec.whatwg.org/multipage/#valid-date-string>
225    fn parse_date_string(&self) -> Option<OffsetDateTime>;
226
227    /// <https://html.spec.whatwg.org/multipage/#parse-a-month-string>
228    ///
229    /// Parse the month and return an [`OffsetDateTime`] on midnight of UTC of the morning of
230    /// the first day of the parsed month.
231    ///
232    /// A valid month string should be "YYYY-MM" YYYY must be four or more digits, MM both
233    /// must be two digits <https://html.spec.whatwg.org/multipage/#valid-month-string>
234    fn parse_month_string(&self) -> Option<OffsetDateTime>;
235
236    /// <https://html.spec.whatwg.org/multipage/#parse-a-week-string>
237    ///
238    /// Parse the week string, returning an [`OffsetDateTime`] on the Monday of the parsed
239    /// week.
240    ///
241    /// A valid week string should be like {YYYY}-W{WW}, such as "2017-W52" YYYY must be
242    /// four or more digits, WW both must be two digits
243    /// <https://html.spec.whatwg.org/multipage/#valid-week-string>
244    fn parse_week_string(&self) -> Option<OffsetDateTime>;
245
246    /// Parse this value as a time string according to
247    /// <https://html.spec.whatwg.org/multipage/#valid-time-string>.
248    fn parse_time_string(&self) -> Option<OffsetDateTime>;
249
250    /// <https://html.spec.whatwg.org/multipage/#parse-a-local-date-and-time-string>
251    ///
252    /// Parse the local date and time, returning an [`OffsetDateTime`] in UTC or None.
253    fn parse_local_date_time_string(&self) -> Option<OffsetDateTime>;
254
255    /// Validates whether or not this value is a valid date string according to
256    /// <https://html.spec.whatwg.org/multipage/#valid-date-string>.
257    fn is_valid_date_string(&self) -> bool {
258        self.parse_date_string().is_some()
259    }
260
261    /// Validates whether or not this value is a valid month string according to
262    /// <https://html.spec.whatwg.org/multipage/#valid-month-string>.
263    fn is_valid_month_string(&self) -> bool {
264        self.parse_month_string().is_some()
265    }
266    /// Validates whether or not this value is a valid week string according to
267    /// <https://html.spec.whatwg.org/multipage/#valid-week-string>.
268    fn is_valid_week_string(&self) -> bool {
269        self.parse_week_string().is_some()
270    }
271    /// Validates whether or not this value is a valid time string according to
272    /// <https://html.spec.whatwg.org/multipage/#valid-time-string>.
273    fn is_valid_time_string(&self) -> bool;
274
275    /// Validates whether or not this value is a valid local date time string according to
276    /// <https://html.spec.whatwg.org/multipage/#valid-week-string>.
277    fn is_valid_local_date_time_string(&self) -> bool {
278        self.parse_local_date_time_string().is_some()
279    }
280
281    /// <https://html.spec.whatwg.org/multipage/#valid-simple-colour>
282    fn is_valid_simple_color_string(&self) -> bool;
283
284    /// <https://html.spec.whatwg.org/multipage/#valid-e-mail-address>
285    fn is_valid_email_address_string(&self) -> bool;
286}
287
288impl<T: Deref<Target = str>> FromInputValueString for T {
289    fn parse_date_string(&self) -> Option<OffsetDateTime> {
290        // Step 1, 2, 3
291        let (year_int, month_int, day_int) = parse_date_component(self)?;
292
293        // Step 4
294        if self.split('-').nth(3).is_some() {
295            return None;
296        }
297
298        // Step 5, 6
299        let month = (month_int as u8).try_into().ok()?;
300        let date = Date::from_calendar_date(year_int, month, day_int as u8).ok()?;
301        Some(OffsetDateTime::new_utc(date, Time::MIDNIGHT))
302    }
303
304    fn parse_month_string(&self) -> Option<OffsetDateTime> {
305        // Step 1, 2, 3
306        let (year_int, month_int) = parse_month_component(self)?;
307
308        // Step 4
309        if self.split('-').nth(2).is_some() {
310            return None;
311        }
312        // Step 5
313        let month = (month_int as u8).try_into().ok()?;
314        let date = Date::from_calendar_date(year_int, month, 1).ok()?;
315        Some(OffsetDateTime::new_utc(date, Time::MIDNIGHT))
316    }
317
318    fn parse_week_string(&self) -> Option<OffsetDateTime> {
319        // Step 1, 2, 3
320        let mut iterator = self.split('-');
321        let year = iterator.next()?;
322
323        // Step 4
324        let year_int = year.parse::<i32>().ok()?;
325        if year.len() < 4 || year_int == 0 {
326            return None;
327        }
328
329        // Step 5, 6
330        let week = iterator.next()?;
331        let (week_first, week_last) = week.split_at(1);
332        if week_first != "W" {
333            return None;
334        }
335
336        // Step 7
337        let week_int = week_last.parse::<u32>().ok()?;
338        if week_last.len() != 2 {
339            return None;
340        }
341
342        // Step 8
343        let max_week = max_week_in_year(year_int);
344
345        // Step 9
346        if week_int < 1 || week_int > max_week {
347            return None;
348        }
349
350        // Step 10
351        if iterator.next().is_some() {
352            return None;
353        }
354
355        // Step 11
356        let date = Date::from_iso_week_date(year_int, week_int as u8, Weekday::Monday).ok()?;
357        Some(OffsetDateTime::new_utc(date, Time::MIDNIGHT))
358    }
359
360    fn parse_time_string(&self) -> Option<OffsetDateTime> {
361        // Step 1, 2, 3
362        let (hour, minute, second, millisecond) = parse_time_component(self)?;
363
364        // Step 4
365        if self.split(':').nth(3).is_some() {
366            return None;
367        }
368
369        // Step 5, 6
370        let time = Time::from_hms_milli(hour, minute, second, millisecond).ok()?;
371        Some(OffsetDateTime::new_utc(
372            OffsetDateTime::UNIX_EPOCH.date(),
373            time,
374        ))
375    }
376
377    fn parse_local_date_time_string(&self) -> Option<OffsetDateTime> {
378        // Step 1, 2, 4
379        let mut iterator = if self.contains('T') {
380            self.split('T')
381        } else {
382            self.split(' ')
383        };
384
385        // Step 3
386        let date = iterator.next()?;
387        let (year, month, day) = parse_date_component(date)?;
388
389        // Step 5
390        let time = iterator.next()?;
391        let (hour, minute, second, millisecond) = parse_time_component(time)?;
392
393        // Step 6
394        if iterator.next().is_some() {
395            return None;
396        }
397
398        // Step 7, 8, 9
399        // TODO: Is this supposed to know the locale's daylight-savings-time rules?
400        let month = (month as u8).try_into().ok()?;
401        let date = Date::from_calendar_date(year, month, day as u8).ok()?;
402        let time = Time::from_hms_milli(hour, minute, second, millisecond).ok()?;
403        Some(OffsetDateTime::new_utc(date, time))
404    }
405
406    fn is_valid_time_string(&self) -> bool {
407        enum State {
408            HourHigh,
409            HourLow09,
410            HourLow03,
411            MinuteColon,
412            MinuteHigh,
413            MinuteLow,
414            SecondColon,
415            SecondHigh,
416            SecondLow,
417            MilliStop,
418            MilliHigh,
419            MilliMiddle,
420            MilliLow,
421            Done,
422            Error,
423        }
424        let next_state =
425            |valid: bool, next: State| -> State { if valid { next } else { State::Error } };
426
427        let state = self.chars().fold(State::HourHigh, |state, c| {
428            match state {
429                // Step 1 "HH"
430                State::HourHigh => match c {
431                    '0' | '1' => State::HourLow09,
432                    '2' => State::HourLow03,
433                    _ => State::Error,
434                },
435                State::HourLow09 => next_state(c.is_ascii_digit(), State::MinuteColon),
436                State::HourLow03 => next_state(c.is_digit(4), State::MinuteColon),
437
438                // Step 2 ":"
439                State::MinuteColon => next_state(c == ':', State::MinuteHigh),
440
441                // Step 3 "mm"
442                State::MinuteHigh => next_state(c.is_digit(6), State::MinuteLow),
443                State::MinuteLow => next_state(c.is_ascii_digit(), State::SecondColon),
444
445                // Step 4.1 ":"
446                State::SecondColon => next_state(c == ':', State::SecondHigh),
447                // Step 4.2 "ss"
448                State::SecondHigh => next_state(c.is_digit(6), State::SecondLow),
449                State::SecondLow => next_state(c.is_ascii_digit(), State::MilliStop),
450
451                // Step 4.3.1 "."
452                State::MilliStop => next_state(c == '.', State::MilliHigh),
453                // Step 4.3.2 "SSS"
454                State::MilliHigh => next_state(c.is_ascii_digit(), State::MilliMiddle),
455                State::MilliMiddle => next_state(c.is_ascii_digit(), State::MilliLow),
456                State::MilliLow => next_state(c.is_ascii_digit(), State::Done),
457
458                _ => State::Error,
459            }
460        });
461
462        match state {
463            State::Done |
464            // Step 4 (optional)
465            State::SecondColon |
466            // Step 4.3 (optional)
467            State::MilliStop |
468            // Step 4.3.2 (only 1 digit required)
469            State::MilliMiddle | State::MilliLow => true,
470            _ => false
471        }
472    }
473
474    fn is_valid_simple_color_string(&self) -> bool {
475        let mut chars = self.chars();
476        if self.len() == 7 && chars.next() == Some('#') {
477            chars.all(|c| c.is_ascii_hexdigit())
478        } else {
479            false
480        }
481    }
482
483    fn is_valid_email_address_string(&self) -> bool {
484        static RE: LazyLock<Regex> = LazyLock::new(|| {
485            Regex::new(concat!(
486                r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?",
487                r"(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
488            ))
489            .unwrap()
490        });
491        RE.is_match(self)
492    }
493}