urlpattern/
component.rs

1// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
2
3use crate::canonicalize_and_process::escape_pattern_string;
4use crate::matcher::InnerMatcher;
5use crate::matcher::Matcher;
6use crate::parser::Options;
7use crate::parser::Part;
8use crate::parser::PartModifier;
9use crate::parser::PartType;
10use crate::parser::FULL_WILDCARD_REGEXP_VALUE;
11use crate::regexp::RegExp;
12use crate::tokenizer::is_valid_name_codepoint;
13use crate::Error;
14use std::fmt::Write;
15
16// Ref: https://wicg.github.io/urlpattern/#component
17#[derive(Debug)]
18pub(crate) struct Component<R: RegExp> {
19  pub pattern_string: String,
20  pub regexp: Result<R, Error>,
21  pub group_name_list: Vec<String>,
22  pub matcher: Matcher<R>,
23  pub has_regexp_group: bool,
24}
25
26impl<R: RegExp> Component<R> {
27  // Ref: https://wicg.github.io/urlpattern/#compile-a-component
28  pub(crate) fn compile<F>(
29    input: Option<&str>,
30    encoding_callback: F,
31    options: Options,
32  ) -> Result<Self, Error>
33  where
34    F: Fn(&str) -> Result<String, Error>,
35  {
36    let part_list = crate::parser::parse_pattern_string(
37      input.unwrap_or("*"),
38      &options,
39      encoding_callback,
40    )?;
41    let part_list = part_list.iter().collect::<Vec<_>>();
42    let (regexp_string, name_list) =
43      generate_regular_expression_and_name_list(&part_list, &options);
44    let flags = if options.ignore_case { "ui" } else { "u" };
45    let regexp = R::parse(&regexp_string, flags).map_err(Error::RegExp);
46    let pattern_string = generate_pattern_string(&part_list, &options);
47    let matcher = generate_matcher::<R>(&part_list, &options, flags);
48    Ok(Component {
49      pattern_string,
50      regexp,
51      group_name_list: name_list,
52      matcher,
53      has_regexp_group: part_list
54        .iter()
55        .any(|part| part.kind == PartType::Regexp),
56    })
57  }
58
59  // Ref: https://wicg.github.io/urlpattern/#protocol-component-matches-a-special-scheme
60  pub(crate) fn protocol_component_matches_special_scheme(&self) -> bool {
61    const SPECIAL_SCHEMES: [&str; 6] =
62      ["ftp", "file", "http", "https", "ws", "wss"];
63    if let Ok(regex) = &self.regexp {
64      for scheme in SPECIAL_SCHEMES {
65        if regex.matches(scheme).is_some() {
66          return true;
67        }
68      }
69    }
70    false
71  }
72
73  // Ref: https://wicg.github.io/urlpattern/#create-a-component-match-result
74  pub(crate) fn create_match_result(
75    &self,
76    input: String,
77    exec_result: Vec<Option<&str>>,
78  ) -> crate::UrlPatternComponentResult {
79    let groups = self
80      .group_name_list
81      .clone()
82      .into_iter()
83      .zip(exec_result.into_iter().map(|s| s.map(str::to_owned)))
84      .collect();
85    crate::UrlPatternComponentResult { input, groups }
86  }
87
88  pub(crate) fn optionally_transpose_regex_error(
89    mut self,
90    do_transpose: bool,
91  ) -> Result<Self, Error> {
92    if do_transpose {
93      self.regexp = Ok(self.regexp?);
94    }
95    Ok(self)
96  }
97}
98
99// Ref: https://wicg.github.io/urlpattern/#generate-a-regular-expression-and-name-list
100fn generate_regular_expression_and_name_list(
101  part_list: &[&Part],
102  options: &Options,
103) -> (String, Vec<String>) {
104  let mut result = String::from("^");
105  let mut name_list = vec![];
106  for part in part_list {
107    if part.kind == PartType::FixedText {
108      if part.modifier == PartModifier::None {
109        result.push_str(&options.escape_regexp_string(&part.value));
110      } else {
111        write!(
112          result,
113          "(?:{}){}",
114          options.escape_regexp_string(&part.value),
115          part.modifier
116        )
117        .unwrap();
118      }
119      continue;
120    }
121
122    assert!(!part.name.is_empty());
123    name_list.push(part.name.clone());
124    let regexp_value = if part.kind == PartType::SegmentWildcard {
125      options.generate_segment_wildcard_regexp()
126    } else if part.kind == PartType::FullWildcard {
127      FULL_WILDCARD_REGEXP_VALUE.to_string()
128    } else {
129      part.value.clone()
130    };
131
132    if part.prefix.is_empty() && part.suffix.is_empty() {
133      if matches!(part.modifier, PartModifier::None | PartModifier::Optional) {
134        write!(result, "({}){}", regexp_value, part.modifier).unwrap();
135      } else {
136        write!(result, "((?:{}){})", regexp_value, part.modifier).unwrap();
137      }
138      continue;
139    }
140    if matches!(part.modifier, PartModifier::None | PartModifier::Optional) {
141      write!(
142        result,
143        "(?:{}({}){}){}",
144        options.escape_regexp_string(&part.prefix),
145        regexp_value,
146        options.escape_regexp_string(&part.suffix),
147        part.modifier
148      )
149      .unwrap();
150      continue;
151    }
152    assert!(!part.prefix.is_empty() || !part.suffix.is_empty());
153    write!(
154      result,
155      "(?:{}((?:{})(?:{}{}(?:{}))*){}){}",
156      options.escape_regexp_string(&part.prefix),
157      regexp_value,
158      options.escape_regexp_string(&part.suffix),
159      options.escape_regexp_string(&part.prefix),
160      regexp_value,
161      options.escape_regexp_string(&part.suffix),
162      if part.modifier == PartModifier::ZeroOrMore {
163        "?"
164      } else {
165        ""
166      }
167    )
168    .unwrap();
169  }
170  result.push('$');
171  (result, name_list)
172}
173
174// Ref: https://wicg.github.io/urlpattern/#generate-a-pattern-string
175fn generate_pattern_string(part_list: &[&Part], options: &Options) -> String {
176  let mut result = String::new();
177  for (i, part) in part_list.iter().enumerate() {
178    let prev_part: Option<&Part> = if i == 0 {
179      None
180    } else {
181      part_list.get(i - 1).copied()
182    };
183    let next_part: Option<&Part> = part_list.get(i + 1).copied();
184    if part.kind == PartType::FixedText {
185      if part.modifier == PartModifier::None {
186        result.push_str(&escape_pattern_string(&part.value));
187        continue;
188      }
189      write!(
190        result,
191        "{{{}}}{}",
192        escape_pattern_string(&part.value),
193        part.modifier
194      )
195      .unwrap();
196      continue;
197    }
198    let custom_name = !part.name.chars().next().unwrap().is_ascii_digit();
199    let mut needs_grouping = !part.suffix.is_empty()
200      || (!part.prefix.is_empty() && part.prefix != options.prefix_code_point);
201    if !needs_grouping
202      && custom_name
203      && part.kind == PartType::SegmentWildcard
204      && part.modifier == PartModifier::None
205      && matches!(next_part, Some(Part { prefix, suffix, .. }) if prefix.is_empty() && suffix.is_empty())
206    {
207      let next_part = next_part.unwrap();
208      if next_part.kind == PartType::FixedText {
209        needs_grouping = is_valid_name_codepoint(
210          next_part.value.chars().next().unwrap_or_default(),
211          false,
212        );
213      } else {
214        needs_grouping =
215          next_part.name.chars().next().unwrap().is_ascii_digit();
216      }
217    }
218    if !needs_grouping
219      && part.prefix.is_empty()
220      && matches!(
221        prev_part,
222        Some(Part {
223          kind: PartType::FixedText,
224          value,
225          ..
226        }) if value.chars().last().unwrap().to_string() == options.prefix_code_point
227      )
228    {
229      needs_grouping = true;
230    }
231    assert!(!part.name.is_empty());
232    if needs_grouping {
233      result.push('{');
234    }
235    result.push_str(&escape_pattern_string(&part.prefix));
236    if custom_name {
237      result.push(':');
238      result.push_str(&part.name);
239    }
240    match part.kind {
241      PartType::FixedText => unreachable!(),
242      PartType::Regexp => write!(result, "({})", part.value).unwrap(),
243      PartType::SegmentWildcard if !custom_name => {
244        write!(result, "({})", options.generate_segment_wildcard_regexp())
245          .unwrap()
246      }
247      PartType::SegmentWildcard => {}
248      PartType::FullWildcard => {
249        if !custom_name
250          && (prev_part.is_none()
251            || prev_part.unwrap().kind == PartType::FixedText
252            || prev_part.unwrap().modifier != PartModifier::None
253            || needs_grouping
254            || !part.prefix.is_empty())
255        {
256          result.push('*');
257        } else {
258          result.push_str(&format!("({FULL_WILDCARD_REGEXP_VALUE})"));
259        }
260      }
261    }
262    if part.kind == PartType::SegmentWildcard
263      && custom_name
264      && !part.suffix.is_empty()
265      && is_valid_name_codepoint(part.suffix.chars().next().unwrap(), false)
266    {
267      result.push('\\');
268    }
269    result.push_str(&escape_pattern_string(&part.suffix));
270    if needs_grouping {
271      result.push('}');
272    }
273    result.push_str(&part.modifier.to_string());
274  }
275  result
276}
277
278/// This function generates a matcher for a given parts list.
279fn generate_matcher<R: RegExp>(
280  mut part_list: &[&Part],
281  options: &Options,
282  flags: &str,
283) -> Matcher<R> {
284  fn is_literal(part: &Part) -> bool {
285    part.kind == PartType::FixedText && part.modifier == PartModifier::None
286  }
287
288  // If the first part is a fixed string, we can use it as a literal prefix.
289  let mut prefix = match part_list.first() {
290    Some(part) if is_literal(part) => {
291      part_list = &part_list[1..];
292      part.value.clone()
293    }
294    _ => "".into(),
295  };
296  // If the last part is a fixed string, we can use it as a literal suffix.
297  let mut suffix = match part_list.last() {
298    Some(part) if is_literal(part) => {
299      part_list = &part_list[..part_list.len() - 1];
300      part.value.clone()
301    }
302    _ => "".into(),
303  };
304
305  // If there are no more parts, we must have a prefix and/or a suffix. We can
306  // combine these into a single fixed text literal matcher.
307  if part_list.is_empty() {
308    return Matcher {
309      prefix: "".to_string(),
310      suffix: "".to_string(),
311      inner: InnerMatcher::Literal {
312        literal: format!("{prefix}{suffix}"),
313      },
314      ignore_case: options.ignore_case,
315    };
316  }
317
318  let inner = match part_list {
319    // If there is only one part, and it is a simple full wildcard with no
320    // prefix or suffix, we can use a simple wildcard matcher.
321    [part]
322      if part.kind == PartType::FullWildcard
323        && part.modifier == PartModifier::None =>
324    {
325      prefix += &part.prefix;
326      if !part.suffix.is_empty() {
327        suffix = format!("{}{suffix}", part.suffix);
328      }
329      InnerMatcher::SingleCapture {
330        filter: None,
331        allow_empty: true,
332      }
333    }
334    // If there is only one part, and it is a simple segment wildcard with no
335    // prefix or suffix, we can use a simple wildcard matcher.
336    [part]
337      if part.kind == PartType::SegmentWildcard
338        && part.modifier == PartModifier::None =>
339    {
340      prefix += &part.prefix;
341      if !part.suffix.is_empty() {
342        suffix = format!("{}{suffix}", part.suffix);
343      }
344      InnerMatcher::SingleCapture {
345        filter: options.delimiter_code_point,
346        allow_empty: false,
347      }
348    }
349    // For all other cases, we fall back to a regexp matcher.
350    part_list => {
351      let (regexp_string, _) =
352        generate_regular_expression_and_name_list(part_list, options);
353      let regexp = R::parse(&regexp_string, flags).map_err(Error::RegExp);
354      InnerMatcher::RegExp { regexp }
355    }
356  };
357
358  Matcher {
359    prefix,
360    suffix,
361    inner,
362    ignore_case: options.ignore_case,
363  }
364}