1use crate::canonicalize_and_process::escape_pattern_string;
4use crate::matcher::InnerMatcher;
5use crate::matcher::Matcher;
6use crate::parser::Options;
7use crate::parser::Part;
8use crate::parser::PartModifier;
9use crate::parser::PartType;
10use crate::parser::FULL_WILDCARD_REGEXP_VALUE;
11use crate::regexp::RegExp;
12use crate::tokenizer::is_valid_name_codepoint;
13use crate::Error;
14use std::fmt::Write;
15
16#[derive(Debug)]
18pub(crate) struct Component<R: RegExp> {
19 pub pattern_string: String,
20 pub regexp: Result<R, Error>,
21 pub group_name_list: Vec<String>,
22 pub matcher: Matcher<R>,
23 pub has_regexp_group: bool,
24}
25
26impl<R: RegExp> Component<R> {
27 pub(crate) fn compile<F>(
29 input: Option<&str>,
30 encoding_callback: F,
31 options: Options,
32 ) -> Result<Self, Error>
33 where
34 F: Fn(&str) -> Result<String, Error>,
35 {
36 let part_list = crate::parser::parse_pattern_string(
37 input.unwrap_or("*"),
38 &options,
39 encoding_callback,
40 )?;
41 let part_list = part_list.iter().collect::<Vec<_>>();
42 let (regexp_string, name_list) =
43 generate_regular_expression_and_name_list(&part_list, &options);
44 let flags = if options.ignore_case { "ui" } else { "u" };
45 let regexp = R::parse(®exp_string, flags).map_err(Error::RegExp);
46 let pattern_string = generate_pattern_string(&part_list, &options);
47 let matcher = generate_matcher::<R>(&part_list, &options, flags);
48 Ok(Component {
49 pattern_string,
50 regexp,
51 group_name_list: name_list,
52 matcher,
53 has_regexp_group: part_list
54 .iter()
55 .any(|part| part.kind == PartType::Regexp),
56 })
57 }
58
59 pub(crate) fn protocol_component_matches_special_scheme(&self) -> bool {
61 const SPECIAL_SCHEMES: [&str; 6] =
62 ["ftp", "file", "http", "https", "ws", "wss"];
63 if let Ok(regex) = &self.regexp {
64 for scheme in SPECIAL_SCHEMES {
65 if regex.matches(scheme).is_some() {
66 return true;
67 }
68 }
69 }
70 false
71 }
72
73 pub(crate) fn create_match_result(
75 &self,
76 input: String,
77 exec_result: Vec<Option<&str>>,
78 ) -> crate::UrlPatternComponentResult {
79 let groups = self
80 .group_name_list
81 .clone()
82 .into_iter()
83 .zip(exec_result.into_iter().map(|s| s.map(str::to_owned)))
84 .collect();
85 crate::UrlPatternComponentResult { input, groups }
86 }
87
88 pub(crate) fn optionally_transpose_regex_error(
89 mut self,
90 do_transpose: bool,
91 ) -> Result<Self, Error> {
92 if do_transpose {
93 self.regexp = Ok(self.regexp?);
94 }
95 Ok(self)
96 }
97}
98
99fn generate_regular_expression_and_name_list(
101 part_list: &[&Part],
102 options: &Options,
103) -> (String, Vec<String>) {
104 let mut result = String::from("^");
105 let mut name_list = vec![];
106 for part in part_list {
107 if part.kind == PartType::FixedText {
108 if part.modifier == PartModifier::None {
109 result.push_str(&options.escape_regexp_string(&part.value));
110 } else {
111 write!(
112 result,
113 "(?:{}){}",
114 options.escape_regexp_string(&part.value),
115 part.modifier
116 )
117 .unwrap();
118 }
119 continue;
120 }
121
122 assert!(!part.name.is_empty());
123 name_list.push(part.name.clone());
124 let regexp_value = if part.kind == PartType::SegmentWildcard {
125 options.generate_segment_wildcard_regexp()
126 } else if part.kind == PartType::FullWildcard {
127 FULL_WILDCARD_REGEXP_VALUE.to_string()
128 } else {
129 part.value.clone()
130 };
131
132 if part.prefix.is_empty() && part.suffix.is_empty() {
133 if matches!(part.modifier, PartModifier::None | PartModifier::Optional) {
134 write!(result, "({}){}", regexp_value, part.modifier).unwrap();
135 } else {
136 write!(result, "((?:{}){})", regexp_value, part.modifier).unwrap();
137 }
138 continue;
139 }
140 if matches!(part.modifier, PartModifier::None | PartModifier::Optional) {
141 write!(
142 result,
143 "(?:{}({}){}){}",
144 options.escape_regexp_string(&part.prefix),
145 regexp_value,
146 options.escape_regexp_string(&part.suffix),
147 part.modifier
148 )
149 .unwrap();
150 continue;
151 }
152 assert!(!part.prefix.is_empty() || !part.suffix.is_empty());
153 write!(
154 result,
155 "(?:{}((?:{})(?:{}{}(?:{}))*){}){}",
156 options.escape_regexp_string(&part.prefix),
157 regexp_value,
158 options.escape_regexp_string(&part.suffix),
159 options.escape_regexp_string(&part.prefix),
160 regexp_value,
161 options.escape_regexp_string(&part.suffix),
162 if part.modifier == PartModifier::ZeroOrMore {
163 "?"
164 } else {
165 ""
166 }
167 )
168 .unwrap();
169 }
170 result.push('$');
171 (result, name_list)
172}
173
174fn generate_pattern_string(part_list: &[&Part], options: &Options) -> String {
176 let mut result = String::new();
177 for (i, part) in part_list.iter().enumerate() {
178 let prev_part: Option<&Part> = if i == 0 {
179 None
180 } else {
181 part_list.get(i - 1).copied()
182 };
183 let next_part: Option<&Part> = part_list.get(i + 1).copied();
184 if part.kind == PartType::FixedText {
185 if part.modifier == PartModifier::None {
186 result.push_str(&escape_pattern_string(&part.value));
187 continue;
188 }
189 write!(
190 result,
191 "{{{}}}{}",
192 escape_pattern_string(&part.value),
193 part.modifier
194 )
195 .unwrap();
196 continue;
197 }
198 let custom_name = !part.name.chars().next().unwrap().is_ascii_digit();
199 let mut needs_grouping = !part.suffix.is_empty()
200 || (!part.prefix.is_empty() && part.prefix != options.prefix_code_point);
201 if !needs_grouping
202 && custom_name
203 && part.kind == PartType::SegmentWildcard
204 && part.modifier == PartModifier::None
205 && matches!(next_part, Some(Part { prefix, suffix, .. }) if prefix.is_empty() && suffix.is_empty())
206 {
207 let next_part = next_part.unwrap();
208 if next_part.kind == PartType::FixedText {
209 needs_grouping = is_valid_name_codepoint(
210 next_part.value.chars().next().unwrap_or_default(),
211 false,
212 );
213 } else {
214 needs_grouping =
215 next_part.name.chars().next().unwrap().is_ascii_digit();
216 }
217 }
218 if !needs_grouping
219 && part.prefix.is_empty()
220 && matches!(
221 prev_part,
222 Some(Part {
223 kind: PartType::FixedText,
224 value,
225 ..
226 }) if value.chars().last().unwrap().to_string() == options.prefix_code_point
227 )
228 {
229 needs_grouping = true;
230 }
231 assert!(!part.name.is_empty());
232 if needs_grouping {
233 result.push('{');
234 }
235 result.push_str(&escape_pattern_string(&part.prefix));
236 if custom_name {
237 result.push(':');
238 result.push_str(&part.name);
239 }
240 match part.kind {
241 PartType::FixedText => unreachable!(),
242 PartType::Regexp => write!(result, "({})", part.value).unwrap(),
243 PartType::SegmentWildcard if !custom_name => {
244 write!(result, "({})", options.generate_segment_wildcard_regexp())
245 .unwrap()
246 }
247 PartType::SegmentWildcard => {}
248 PartType::FullWildcard => {
249 if !custom_name
250 && (prev_part.is_none()
251 || prev_part.unwrap().kind == PartType::FixedText
252 || prev_part.unwrap().modifier != PartModifier::None
253 || needs_grouping
254 || !part.prefix.is_empty())
255 {
256 result.push('*');
257 } else {
258 result.push_str(&format!("({FULL_WILDCARD_REGEXP_VALUE})"));
259 }
260 }
261 }
262 if part.kind == PartType::SegmentWildcard
263 && custom_name
264 && !part.suffix.is_empty()
265 && is_valid_name_codepoint(part.suffix.chars().next().unwrap(), false)
266 {
267 result.push('\\');
268 }
269 result.push_str(&escape_pattern_string(&part.suffix));
270 if needs_grouping {
271 result.push('}');
272 }
273 result.push_str(&part.modifier.to_string());
274 }
275 result
276}
277
278fn generate_matcher<R: RegExp>(
280 mut part_list: &[&Part],
281 options: &Options,
282 flags: &str,
283) -> Matcher<R> {
284 fn is_literal(part: &Part) -> bool {
285 part.kind == PartType::FixedText && part.modifier == PartModifier::None
286 }
287
288 let mut prefix = match part_list.first() {
290 Some(part) if is_literal(part) => {
291 part_list = &part_list[1..];
292 part.value.clone()
293 }
294 _ => "".into(),
295 };
296 let mut suffix = match part_list.last() {
298 Some(part) if is_literal(part) => {
299 part_list = &part_list[..part_list.len() - 1];
300 part.value.clone()
301 }
302 _ => "".into(),
303 };
304
305 if part_list.is_empty() {
308 return Matcher {
309 prefix: "".to_string(),
310 suffix: "".to_string(),
311 inner: InnerMatcher::Literal {
312 literal: format!("{prefix}{suffix}"),
313 },
314 ignore_case: options.ignore_case,
315 };
316 }
317
318 let inner = match part_list {
319 [part]
322 if part.kind == PartType::FullWildcard
323 && part.modifier == PartModifier::None =>
324 {
325 prefix += &part.prefix;
326 if !part.suffix.is_empty() {
327 suffix = format!("{}{suffix}", part.suffix);
328 }
329 InnerMatcher::SingleCapture {
330 filter: None,
331 allow_empty: true,
332 }
333 }
334 [part]
337 if part.kind == PartType::SegmentWildcard
338 && part.modifier == PartModifier::None =>
339 {
340 prefix += &part.prefix;
341 if !part.suffix.is_empty() {
342 suffix = format!("{}{suffix}", part.suffix);
343 }
344 InnerMatcher::SingleCapture {
345 filter: options.delimiter_code_point,
346 allow_empty: false,
347 }
348 }
349 part_list => {
351 let (regexp_string, _) =
352 generate_regular_expression_and_name_list(part_list, options);
353 let regexp = R::parse(®exp_string, flags).map_err(Error::RegExp);
354 InnerMatcher::RegExp { regexp }
355 }
356 };
357
358 Matcher {
359 prefix,
360 suffix,
361 inner,
362 ignore_case: options.ignore_case,
363 }
364}