1use crate::error::Error;
4use crate::regexp::RegExp;
5use crate::tokenizer::Token;
6use crate::tokenizer::TokenType;
7use crate::UrlPatternInit;
8
9#[derive(Debug, Eq, PartialEq)]
11enum ConstructorStringParserState {
12 Init,
13 Protocol,
14 Authority,
15 Username,
16 Password,
17 Hostname,
18 Port,
19 Pathname,
20 Search,
21 Hash,
22 Done,
23}
24
25struct ConstructorStringParser<'a> {
27 input: &'a str,
28 token_list: Vec<Token>,
29 result: UrlPatternInit,
30 component_start: usize,
31 token_index: usize,
32 token_increment: usize,
33 group_depth: usize,
34 hostname_ipv6_bracket_depth: usize,
35 protocol_matches_special_scheme: bool,
36 state: ConstructorStringParserState,
37}
38
39impl<'a> ConstructorStringParser<'a> {
40 #[inline]
42 fn rewind(&mut self) {
43 self.token_index = self.component_start;
44 self.token_increment = 0;
45 }
46
47 #[inline]
49 fn is_hash_prefix(&self) -> bool {
50 self.is_non_special_pattern_char(self.token_index, "#")
51 }
52
53 #[inline]
55 fn is_protocol_suffix(&self) -> bool {
56 self.is_non_special_pattern_char(self.token_index, ":")
57 }
58
59 fn is_search_prefix(&self) -> bool {
61 if self.is_non_special_pattern_char(self.token_index, "?") {
62 return true;
63 }
64 if self.token_list[self.token_index].value != "?" {
65 return false;
66 }
67 if self.token_index == 0 {
68 return true;
69 }
70 let previous_token = self.get_safe_token(self.token_index - 1);
71 !matches!(
72 previous_token.kind,
73 TokenType::Name
74 | TokenType::Regexp
75 | TokenType::Close
76 | TokenType::Asterisk
77 )
78 }
79
80 #[inline]
82 fn is_password_prefix(&self) -> bool {
83 self.is_non_special_pattern_char(self.token_index, ":")
84 }
85
86 #[inline]
88 fn is_port_prefix(&self) -> bool {
89 self.is_non_special_pattern_char(self.token_index, ":")
90 }
91
92 #[inline]
94 fn is_pathname_start(&self) -> bool {
95 self.is_non_special_pattern_char(self.token_index, "/")
96 }
97
98 #[inline]
100 fn is_identity_terminator(&self) -> bool {
101 self.is_non_special_pattern_char(self.token_index, "@")
102 }
103
104 fn is_non_special_pattern_char(&self, index: usize, value: &str) -> bool {
106 let token = self.get_safe_token(index);
107 if token.value != value {
108 false
109 } else {
110 matches!(
111 token.kind,
112 TokenType::Char | TokenType::EscapedChar | TokenType::InvalidChar
113 )
114 }
115 }
116
117 fn get_safe_token(&self, index: usize) -> &Token {
119 if index < self.token_list.len() {
120 &self.token_list[index]
121 } else {
122 assert!(!self.token_list.is_empty());
123 let token = self.token_list.last().unwrap();
124 assert!(token.kind == TokenType::End);
125 token
126 }
127 }
128
129 fn change_state(
131 &mut self,
132 new_state: ConstructorStringParserState,
133 skip: usize,
134 ) {
135 match self.state {
136 ConstructorStringParserState::Protocol => {
137 self.result.protocol = Some(self.make_component_string())
138 }
139 ConstructorStringParserState::Username => {
140 self.result.username = Some(self.make_component_string())
141 }
142 ConstructorStringParserState::Password => {
143 self.result.password = Some(self.make_component_string())
144 }
145 ConstructorStringParserState::Hostname => {
146 self.result.hostname = Some(self.make_component_string())
147 }
148 ConstructorStringParserState::Port => {
149 self.result.port = Some(self.make_component_string())
150 }
151 ConstructorStringParserState::Pathname => {
152 self.result.pathname = Some(self.make_component_string())
153 }
154 ConstructorStringParserState::Search => {
155 self.result.search = Some(self.make_component_string())
156 }
157 ConstructorStringParserState::Hash => {
158 self.result.hash = Some(self.make_component_string())
159 }
160 ConstructorStringParserState::Init
161 | ConstructorStringParserState::Authority
162 | ConstructorStringParserState::Done => {}
163 }
164
165 if self.state != ConstructorStringParserState::Init
166 && new_state != ConstructorStringParserState::Done
167 {
168 if matches!(
169 self.state,
170 ConstructorStringParserState::Protocol
171 | ConstructorStringParserState::Authority
172 | ConstructorStringParserState::Username
173 | ConstructorStringParserState::Password
174 ) && matches!(
175 new_state,
176 ConstructorStringParserState::Port
177 | ConstructorStringParserState::Pathname
178 | ConstructorStringParserState::Search
179 | ConstructorStringParserState::Hash
180 ) && self.result.hostname.is_none()
181 {
182 self.result.hostname = Some(String::new());
183 }
184
185 if matches!(
186 self.state,
187 ConstructorStringParserState::Protocol
188 | ConstructorStringParserState::Authority
189 | ConstructorStringParserState::Username
190 | ConstructorStringParserState::Password
191 | ConstructorStringParserState::Hostname
192 | ConstructorStringParserState::Port
193 ) && matches!(
194 new_state,
195 ConstructorStringParserState::Search
196 | ConstructorStringParserState::Hash
197 ) && self.result.pathname.is_none()
198 {
199 if self.protocol_matches_special_scheme {
200 self.result.pathname = Some(String::from("/"));
201 } else {
202 self.result.pathname = Some(String::new());
203 }
204 }
205
206 if matches!(
207 self.state,
208 ConstructorStringParserState::Protocol
209 | ConstructorStringParserState::Authority
210 | ConstructorStringParserState::Username
211 | ConstructorStringParserState::Password
212 | ConstructorStringParserState::Hostname
213 | ConstructorStringParserState::Port
214 | ConstructorStringParserState::Pathname
215 ) && new_state == ConstructorStringParserState::Hash
216 && self.result.search.is_none()
217 {
218 self.result.search = Some(String::new());
219 }
220 }
221
222 self.state = new_state;
223 self.token_index += skip;
224 self.component_start = self.token_index;
225 self.token_increment = 0;
226 }
227
228 fn make_component_string(&self) -> String {
230 assert!(self.token_index < self.token_list.len());
231 let token = &self.token_list[self.token_index];
232 let component_start_index = self.get_safe_token(self.component_start).index;
233
234 self
235 .input
236 .chars()
237 .skip(component_start_index)
238 .take(token.index - component_start_index)
239 .collect()
240 }
241
242 #[inline]
244 fn rewind_and_set_state(&mut self, state: ConstructorStringParserState) {
245 self.rewind();
246 self.state = state;
247 }
248
249 #[inline]
251 fn is_group_open(&self) -> bool {
252 self.token_list[self.token_index].kind == TokenType::Open
253 }
254
255 #[inline]
257 fn is_group_close(&self) -> bool {
258 self.token_list[self.token_index].kind == TokenType::Close
259 }
260
261 fn compute_protocol_matches_special_scheme<R: RegExp>(
263 &mut self,
264 ) -> Result<(), Error> {
265 let protocol_string = self.make_component_string();
266 let protocol_component = crate::component::Component::<R>::compile(
267 Some(&protocol_string),
268 crate::canonicalize_and_process::canonicalize_protocol,
269 Default::default(),
270 )?;
271 if protocol_component.protocol_component_matches_special_scheme() {
272 self.protocol_matches_special_scheme = true;
273 }
274 Ok(())
275 }
276
277 #[inline]
279 fn next_is_authority_slashes(&self) -> bool {
280 if !self.is_non_special_pattern_char(self.token_index + 1, "/") {
281 false
282 } else {
283 self.is_non_special_pattern_char(self.token_index + 2, "/")
284 }
285 }
286
287 #[inline]
289 fn is_ipv6_open(&self) -> bool {
290 self.is_non_special_pattern_char(self.token_index, "[")
291 }
292
293 #[inline]
295 fn is_ipv6_close(&self) -> bool {
296 self.is_non_special_pattern_char(self.token_index, "]")
297 }
298}
299
300pub(crate) fn parse_constructor_string<R: RegExp>(
302 input: &str,
303) -> Result<UrlPatternInit, Error> {
304 let token_list = crate::tokenizer::tokenize(
305 input,
306 crate::tokenizer::TokenizePolicy::Lenient,
307 )?;
308
309 let mut parser = ConstructorStringParser {
310 input,
311 token_list,
312 result: UrlPatternInit {
313 protocol: None,
314 username: None,
315 password: None,
316 hostname: None,
317 port: None,
318 pathname: None,
319 search: None,
320 hash: None,
321 base_url: None,
322 },
323 component_start: 0,
324 token_index: 0,
325 token_increment: 1,
326 group_depth: 0,
327 hostname_ipv6_bracket_depth: 0,
328 protocol_matches_special_scheme: false,
329 state: ConstructorStringParserState::Init,
330 };
331
332 while parser.token_index < parser.token_list.len() {
333 parser.token_increment = 1;
334 if parser.token_list[parser.token_index].kind == TokenType::End {
335 if parser.state == ConstructorStringParserState::Init {
336 parser.rewind();
337 if parser.is_hash_prefix() {
338 parser.change_state(ConstructorStringParserState::Hash, 1);
339 } else if parser.is_search_prefix() {
340 parser.change_state(ConstructorStringParserState::Search, 1);
341 } else {
342 parser.change_state(ConstructorStringParserState::Pathname, 0);
343 }
344 parser.token_index += parser.token_increment;
345 continue;
346 }
347 if parser.state == ConstructorStringParserState::Authority {
348 parser.rewind_and_set_state(ConstructorStringParserState::Hostname);
349 parser.token_index += parser.token_increment;
350 continue;
351 }
352 parser.change_state(ConstructorStringParserState::Done, 0);
353 break;
354 }
355 if parser.is_group_open() {
356 parser.group_depth += 1;
357 parser.token_index += parser.token_increment;
358 continue;
359 }
360 if parser.group_depth > 0 {
361 if parser.is_group_close() {
362 parser.group_depth -= 1;
363 } else {
364 parser.token_index += parser.token_increment;
365 continue;
366 }
367 }
368 match parser.state {
369 ConstructorStringParserState::Init => {
370 if parser.is_protocol_suffix() {
371 parser.rewind_and_set_state(ConstructorStringParserState::Protocol);
372 }
373 }
374 ConstructorStringParserState::Protocol => {
375 if parser.is_protocol_suffix() {
376 parser.compute_protocol_matches_special_scheme::<R>()?;
377 let mut next_state = ConstructorStringParserState::Pathname;
378 let mut skip = 1;
379 if parser.next_is_authority_slashes() {
380 next_state = ConstructorStringParserState::Authority;
381 skip = 3;
382 } else if parser.protocol_matches_special_scheme {
383 next_state = ConstructorStringParserState::Authority;
384 }
385 parser.change_state(next_state, skip);
386 }
387 }
388 ConstructorStringParserState::Authority => {
389 if parser.is_identity_terminator() {
390 parser.rewind_and_set_state(ConstructorStringParserState::Username);
391 } else if parser.is_pathname_start()
392 || parser.is_search_prefix()
393 || parser.is_hash_prefix()
394 {
395 parser.rewind_and_set_state(ConstructorStringParserState::Hostname);
396 }
397 }
398 ConstructorStringParserState::Username => {
399 if parser.is_password_prefix() {
400 parser.change_state(ConstructorStringParserState::Password, 1);
401 } else if parser.is_identity_terminator() {
402 parser.change_state(ConstructorStringParserState::Hostname, 1);
403 }
404 }
405 ConstructorStringParserState::Password => {
406 if parser.is_identity_terminator() {
407 parser.change_state(ConstructorStringParserState::Hostname, 1);
408 }
409 }
410 ConstructorStringParserState::Hostname => {
411 if parser.is_ipv6_open() {
412 parser.hostname_ipv6_bracket_depth += 1;
413 } else if parser.is_ipv6_close() {
414 parser.hostname_ipv6_bracket_depth -= 1;
415 } else if parser.is_port_prefix()
416 && parser.hostname_ipv6_bracket_depth == 0
417 {
418 parser.change_state(ConstructorStringParserState::Port, 1);
419 } else if parser.is_pathname_start() {
420 parser.change_state(ConstructorStringParserState::Pathname, 0);
421 } else if parser.is_search_prefix() {
422 parser.change_state(ConstructorStringParserState::Search, 1);
423 } else if parser.is_hash_prefix() {
424 parser.change_state(ConstructorStringParserState::Hash, 1);
425 }
426 }
427 ConstructorStringParserState::Port => {
428 if parser.is_pathname_start() {
429 parser.change_state(ConstructorStringParserState::Pathname, 0);
430 } else if parser.is_search_prefix() {
431 parser.change_state(ConstructorStringParserState::Search, 1);
432 } else if parser.is_hash_prefix() {
433 parser.change_state(ConstructorStringParserState::Hash, 1);
434 }
435 }
436 ConstructorStringParserState::Pathname => {
437 if parser.is_search_prefix() {
438 parser.change_state(ConstructorStringParserState::Search, 1);
439 } else if parser.is_hash_prefix() {
440 parser.change_state(ConstructorStringParserState::Hash, 1);
441 }
442 }
443 ConstructorStringParserState::Search => {
444 if parser.is_hash_prefix() {
445 parser.change_state(ConstructorStringParserState::Hash, 1);
446 }
447 }
448 ConstructorStringParserState::Hash => {}
449 ConstructorStringParserState::Done => unreachable!(),
450 }
451 parser.token_index += parser.token_increment;
452 }
453
454 if parser.result.hostname.is_some() && parser.result.port.is_none() {
455 parser.result.port = Some(String::new());
456 }
457
458 Ok(parser.result)
459}