urlpattern/
constructor_parser.rs

1// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
2
3use crate::error::Error;
4use crate::regexp::RegExp;
5use crate::tokenizer::Token;
6use crate::tokenizer::TokenType;
7use crate::UrlPatternInit;
8
9// Ref: https://wicg.github.io/urlpattern/#constructor-string-parser-state
10#[derive(Debug, Eq, PartialEq)]
11enum ConstructorStringParserState {
12  Init,
13  Protocol,
14  Authority,
15  Username,
16  Password,
17  Hostname,
18  Port,
19  Pathname,
20  Search,
21  Hash,
22  Done,
23}
24
25// Ref: https://wicg.github.io/urlpattern/#constructor-string-parser
26struct ConstructorStringParser<'a> {
27  input: &'a str,
28  token_list: Vec<Token>,
29  result: UrlPatternInit,
30  component_start: usize,
31  token_index: usize,
32  token_increment: usize,
33  group_depth: usize,
34  hostname_ipv6_bracket_depth: usize,
35  protocol_matches_special_scheme: bool,
36  state: ConstructorStringParserState,
37}
38
39impl<'a> ConstructorStringParser<'a> {
40  // Ref: https://wicg.github.io/urlpattern/#rewind
41  #[inline]
42  fn rewind(&mut self) {
43    self.token_index = self.component_start;
44    self.token_increment = 0;
45  }
46
47  // Ref: https://wicg.github.io/urlpattern/#is-a-hash-prefix
48  #[inline]
49  fn is_hash_prefix(&self) -> bool {
50    self.is_non_special_pattern_char(self.token_index, "#")
51  }
52
53  // Ref: https://wicg.github.io/urlpattern/#is-a-protocol-suffix
54  #[inline]
55  fn is_protocol_suffix(&self) -> bool {
56    self.is_non_special_pattern_char(self.token_index, ":")
57  }
58
59  // Ref: https://wicg.github.io/urlpattern/#is-a-search-prefix
60  fn is_search_prefix(&self) -> bool {
61    if self.is_non_special_pattern_char(self.token_index, "?") {
62      return true;
63    }
64    if self.token_list[self.token_index].value != "?" {
65      return false;
66    }
67    if self.token_index == 0 {
68      return true;
69    }
70    let previous_token = self.get_safe_token(self.token_index - 1);
71    !matches!(
72      previous_token.kind,
73      TokenType::Name
74        | TokenType::Regexp
75        | TokenType::Close
76        | TokenType::Asterisk
77    )
78  }
79
80  // Ref: https://wicg.github.io/urlpattern/#is-a-password-prefix
81  #[inline]
82  fn is_password_prefix(&self) -> bool {
83    self.is_non_special_pattern_char(self.token_index, ":")
84  }
85
86  // Ref: https://wicg.github.io/urlpattern/#is-a-port-prefix
87  #[inline]
88  fn is_port_prefix(&self) -> bool {
89    self.is_non_special_pattern_char(self.token_index, ":")
90  }
91
92  // Ref: https://wicg.github.io/urlpattern/#is-a-pathname-start
93  #[inline]
94  fn is_pathname_start(&self) -> bool {
95    self.is_non_special_pattern_char(self.token_index, "/")
96  }
97
98  // Ref: https://wicg.github.io/urlpattern/#is-an-identity-terminator
99  #[inline]
100  fn is_identity_terminator(&self) -> bool {
101    self.is_non_special_pattern_char(self.token_index, "@")
102  }
103
104  // Ref: https://wicg.github.io/urlpattern/#is-a-non-special-pattern-char
105  fn is_non_special_pattern_char(&self, index: usize, value: &str) -> bool {
106    let token = self.get_safe_token(index);
107    if token.value != value {
108      false
109    } else {
110      matches!(
111        token.kind,
112        TokenType::Char | TokenType::EscapedChar | TokenType::InvalidChar
113      )
114    }
115  }
116
117  // Ref: https://wicg.github.io/urlpattern/#get-a-safe-token
118  fn get_safe_token(&self, index: usize) -> &Token {
119    if index < self.token_list.len() {
120      &self.token_list[index]
121    } else {
122      assert!(!self.token_list.is_empty());
123      let token = self.token_list.last().unwrap();
124      assert!(token.kind == TokenType::End);
125      token
126    }
127  }
128
129  // Ref: https://wicg.github.io/urlpattern/#change-state
130  fn change_state(
131    &mut self,
132    new_state: ConstructorStringParserState,
133    skip: usize,
134  ) {
135    match self.state {
136      ConstructorStringParserState::Protocol => {
137        self.result.protocol = Some(self.make_component_string())
138      }
139      ConstructorStringParserState::Username => {
140        self.result.username = Some(self.make_component_string())
141      }
142      ConstructorStringParserState::Password => {
143        self.result.password = Some(self.make_component_string())
144      }
145      ConstructorStringParserState::Hostname => {
146        self.result.hostname = Some(self.make_component_string())
147      }
148      ConstructorStringParserState::Port => {
149        self.result.port = Some(self.make_component_string())
150      }
151      ConstructorStringParserState::Pathname => {
152        self.result.pathname = Some(self.make_component_string())
153      }
154      ConstructorStringParserState::Search => {
155        self.result.search = Some(self.make_component_string())
156      }
157      ConstructorStringParserState::Hash => {
158        self.result.hash = Some(self.make_component_string())
159      }
160      ConstructorStringParserState::Init
161      | ConstructorStringParserState::Authority
162      | ConstructorStringParserState::Done => {}
163    }
164
165    if self.state != ConstructorStringParserState::Init
166      && new_state != ConstructorStringParserState::Done
167    {
168      if matches!(
169        self.state,
170        ConstructorStringParserState::Protocol
171          | ConstructorStringParserState::Authority
172          | ConstructorStringParserState::Username
173          | ConstructorStringParserState::Password
174      ) && matches!(
175        new_state,
176        ConstructorStringParserState::Port
177          | ConstructorStringParserState::Pathname
178          | ConstructorStringParserState::Search
179          | ConstructorStringParserState::Hash
180      ) && self.result.hostname.is_none()
181      {
182        self.result.hostname = Some(String::new());
183      }
184
185      if matches!(
186        self.state,
187        ConstructorStringParserState::Protocol
188          | ConstructorStringParserState::Authority
189          | ConstructorStringParserState::Username
190          | ConstructorStringParserState::Password
191          | ConstructorStringParserState::Hostname
192          | ConstructorStringParserState::Port
193      ) && matches!(
194        new_state,
195        ConstructorStringParserState::Search
196          | ConstructorStringParserState::Hash
197      ) && self.result.pathname.is_none()
198      {
199        if self.protocol_matches_special_scheme {
200          self.result.pathname = Some(String::from("/"));
201        } else {
202          self.result.pathname = Some(String::new());
203        }
204      }
205
206      if matches!(
207        self.state,
208        ConstructorStringParserState::Protocol
209          | ConstructorStringParserState::Authority
210          | ConstructorStringParserState::Username
211          | ConstructorStringParserState::Password
212          | ConstructorStringParserState::Hostname
213          | ConstructorStringParserState::Port
214          | ConstructorStringParserState::Pathname
215      ) && new_state == ConstructorStringParserState::Hash
216        && self.result.search.is_none()
217      {
218        self.result.search = Some(String::new());
219      }
220    }
221
222    self.state = new_state;
223    self.token_index += skip;
224    self.component_start = self.token_index;
225    self.token_increment = 0;
226  }
227
228  // Ref: https://wicg.github.io/urlpattern/#make-a-component-string
229  fn make_component_string(&self) -> String {
230    assert!(self.token_index < self.token_list.len());
231    let token = &self.token_list[self.token_index];
232    let component_start_index = self.get_safe_token(self.component_start).index;
233
234    self
235      .input
236      .chars()
237      .skip(component_start_index)
238      .take(token.index - component_start_index)
239      .collect()
240  }
241
242  // Ref: https://wicg.github.io/urlpattern/#rewind-and-set-state
243  #[inline]
244  fn rewind_and_set_state(&mut self, state: ConstructorStringParserState) {
245    self.rewind();
246    self.state = state;
247  }
248
249  // Ref: https://wicg.github.io/urlpattern/#is-a-group-open
250  #[inline]
251  fn is_group_open(&self) -> bool {
252    self.token_list[self.token_index].kind == TokenType::Open
253  }
254
255  // Ref: https://wicg.github.io/urlpattern/#is-a-group-close
256  #[inline]
257  fn is_group_close(&self) -> bool {
258    self.token_list[self.token_index].kind == TokenType::Close
259  }
260
261  // Ref: https://wicg.github.io/urlpattern/#compute-protocol-matches-a-special-scheme-flag
262  fn compute_protocol_matches_special_scheme<R: RegExp>(
263    &mut self,
264  ) -> Result<(), Error> {
265    let protocol_string = self.make_component_string();
266    let protocol_component = crate::component::Component::<R>::compile(
267      Some(&protocol_string),
268      crate::canonicalize_and_process::canonicalize_protocol,
269      Default::default(),
270    )?;
271    if protocol_component.protocol_component_matches_special_scheme() {
272      self.protocol_matches_special_scheme = true;
273    }
274    Ok(())
275  }
276
277  // Ref: https://wicg.github.io/urlpattern/#next-is-authority-slashes
278  #[inline]
279  fn next_is_authority_slashes(&self) -> bool {
280    if !self.is_non_special_pattern_char(self.token_index + 1, "/") {
281      false
282    } else {
283      self.is_non_special_pattern_char(self.token_index + 2, "/")
284    }
285  }
286
287  // Ref: https://wicg.github.io/urlpattern/#is-an-ipv6-open
288  #[inline]
289  fn is_ipv6_open(&self) -> bool {
290    self.is_non_special_pattern_char(self.token_index, "[")
291  }
292
293  // Ref: https://wicg.github.io/urlpattern/#is-an-ipv6-close
294  #[inline]
295  fn is_ipv6_close(&self) -> bool {
296    self.is_non_special_pattern_char(self.token_index, "]")
297  }
298}
299
300// Ref: https://wicg.github.io/urlpattern/#parse-a-constructor-string
301pub(crate) fn parse_constructor_string<R: RegExp>(
302  input: &str,
303) -> Result<UrlPatternInit, Error> {
304  let token_list = crate::tokenizer::tokenize(
305    input,
306    crate::tokenizer::TokenizePolicy::Lenient,
307  )?;
308
309  let mut parser = ConstructorStringParser {
310    input,
311    token_list,
312    result: UrlPatternInit {
313      protocol: None,
314      username: None,
315      password: None,
316      hostname: None,
317      port: None,
318      pathname: None,
319      search: None,
320      hash: None,
321      base_url: None,
322    },
323    component_start: 0,
324    token_index: 0,
325    token_increment: 1,
326    group_depth: 0,
327    hostname_ipv6_bracket_depth: 0,
328    protocol_matches_special_scheme: false,
329    state: ConstructorStringParserState::Init,
330  };
331
332  while parser.token_index < parser.token_list.len() {
333    parser.token_increment = 1;
334    if parser.token_list[parser.token_index].kind == TokenType::End {
335      if parser.state == ConstructorStringParserState::Init {
336        parser.rewind();
337        if parser.is_hash_prefix() {
338          parser.change_state(ConstructorStringParserState::Hash, 1);
339        } else if parser.is_search_prefix() {
340          parser.change_state(ConstructorStringParserState::Search, 1);
341        } else {
342          parser.change_state(ConstructorStringParserState::Pathname, 0);
343        }
344        parser.token_index += parser.token_increment;
345        continue;
346      }
347      if parser.state == ConstructorStringParserState::Authority {
348        parser.rewind_and_set_state(ConstructorStringParserState::Hostname);
349        parser.token_index += parser.token_increment;
350        continue;
351      }
352      parser.change_state(ConstructorStringParserState::Done, 0);
353      break;
354    }
355    if parser.is_group_open() {
356      parser.group_depth += 1;
357      parser.token_index += parser.token_increment;
358      continue;
359    }
360    if parser.group_depth > 0 {
361      if parser.is_group_close() {
362        parser.group_depth -= 1;
363      } else {
364        parser.token_index += parser.token_increment;
365        continue;
366      }
367    }
368    match parser.state {
369      ConstructorStringParserState::Init => {
370        if parser.is_protocol_suffix() {
371          parser.rewind_and_set_state(ConstructorStringParserState::Protocol);
372        }
373      }
374      ConstructorStringParserState::Protocol => {
375        if parser.is_protocol_suffix() {
376          parser.compute_protocol_matches_special_scheme::<R>()?;
377          let mut next_state = ConstructorStringParserState::Pathname;
378          let mut skip = 1;
379          if parser.next_is_authority_slashes() {
380            next_state = ConstructorStringParserState::Authority;
381            skip = 3;
382          } else if parser.protocol_matches_special_scheme {
383            next_state = ConstructorStringParserState::Authority;
384          }
385          parser.change_state(next_state, skip);
386        }
387      }
388      ConstructorStringParserState::Authority => {
389        if parser.is_identity_terminator() {
390          parser.rewind_and_set_state(ConstructorStringParserState::Username);
391        } else if parser.is_pathname_start()
392          || parser.is_search_prefix()
393          || parser.is_hash_prefix()
394        {
395          parser.rewind_and_set_state(ConstructorStringParserState::Hostname);
396        }
397      }
398      ConstructorStringParserState::Username => {
399        if parser.is_password_prefix() {
400          parser.change_state(ConstructorStringParserState::Password, 1);
401        } else if parser.is_identity_terminator() {
402          parser.change_state(ConstructorStringParserState::Hostname, 1);
403        }
404      }
405      ConstructorStringParserState::Password => {
406        if parser.is_identity_terminator() {
407          parser.change_state(ConstructorStringParserState::Hostname, 1);
408        }
409      }
410      ConstructorStringParserState::Hostname => {
411        if parser.is_ipv6_open() {
412          parser.hostname_ipv6_bracket_depth += 1;
413        } else if parser.is_ipv6_close() {
414          parser.hostname_ipv6_bracket_depth -= 1;
415        } else if parser.is_port_prefix()
416          && parser.hostname_ipv6_bracket_depth == 0
417        {
418          parser.change_state(ConstructorStringParserState::Port, 1);
419        } else if parser.is_pathname_start() {
420          parser.change_state(ConstructorStringParserState::Pathname, 0);
421        } else if parser.is_search_prefix() {
422          parser.change_state(ConstructorStringParserState::Search, 1);
423        } else if parser.is_hash_prefix() {
424          parser.change_state(ConstructorStringParserState::Hash, 1);
425        }
426      }
427      ConstructorStringParserState::Port => {
428        if parser.is_pathname_start() {
429          parser.change_state(ConstructorStringParserState::Pathname, 0);
430        } else if parser.is_search_prefix() {
431          parser.change_state(ConstructorStringParserState::Search, 1);
432        } else if parser.is_hash_prefix() {
433          parser.change_state(ConstructorStringParserState::Hash, 1);
434        }
435      }
436      ConstructorStringParserState::Pathname => {
437        if parser.is_search_prefix() {
438          parser.change_state(ConstructorStringParserState::Search, 1);
439        } else if parser.is_hash_prefix() {
440          parser.change_state(ConstructorStringParserState::Hash, 1);
441        }
442      }
443      ConstructorStringParserState::Search => {
444        if parser.is_hash_prefix() {
445          parser.change_state(ConstructorStringParserState::Hash, 1);
446        }
447      }
448      ConstructorStringParserState::Hash => {}
449      ConstructorStringParserState::Done => unreachable!(),
450    }
451    parser.token_index += parser.token_increment;
452  }
453
454  if parser.result.hostname.is_some() && parser.result.port.is_none() {
455    parser.result.port = Some(String::new());
456  }
457
458  Ok(parser.result)
459}