1use crate::error::ParserError;
4use crate::tokenizer::Token;
5use crate::tokenizer::TokenType;
6use crate::Error;
7
8pub const FULL_WILDCARD_REGEXP_VALUE: &str = ".*";
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum RegexSyntax {
14 Rust,
16 EcmaScript,
22}
23
24#[derive(Debug, Clone)]
26pub struct Options {
27 pub delimiter_code_point: Option<char>,
28 pub prefix_code_point: String, pub regex_syntax: RegexSyntax,
30 pub ignore_case: bool,
31}
32
33impl std::default::Default for Options {
34 #[inline]
36 fn default() -> Self {
37 Options {
38 delimiter_code_point: None,
39 prefix_code_point: String::new(),
40 regex_syntax: RegexSyntax::Rust,
41 ignore_case: false,
42 }
43 }
44}
45
46impl Options {
47 #[inline]
49 pub fn hostname() -> Self {
50 Options {
51 delimiter_code_point: Some('.'),
52 prefix_code_point: String::new(),
53 regex_syntax: RegexSyntax::Rust,
54 ignore_case: false,
55 }
56 }
57
58 #[inline]
60 pub fn pathname() -> Self {
61 Options {
62 delimiter_code_point: Some('/'),
63 prefix_code_point: String::from("/"),
64 regex_syntax: RegexSyntax::Rust,
65 ignore_case: false,
66 }
67 }
68
69 pub fn escape_regexp_string(&self, input: &str) -> String {
71 assert!(input.is_ascii());
72 let mut result = String::new();
73 for char in input.chars() {
74 if matches!(
75 char,
76 '.'
77 | '+'
78 | '*'
79 | '?'
80 | '^'
81 | '$'
82 | '{'
83 | '}'
84 | '('
85 | ')'
86 | '['
87 | ']'
88 | '|'
89 | '\\'
91 ) || (char == '/' && self.regex_syntax == RegexSyntax::EcmaScript)
92 {
93 result.push('\\');
94 }
95 result.push(char);
96 }
97 result
98 }
99
100 #[inline]
102 pub fn generate_segment_wildcard_regexp(&self) -> String {
103 if let Some(code_point) = self.delimiter_code_point {
107 let mut buffer = [0; 4];
108 format!(
109 "[^{}]+?",
110 self.escape_regexp_string(code_point.encode_utf8(&mut buffer))
111 )
112 } else {
113 ".+?".to_owned()
114 }
115 }
116}
117
118#[derive(Debug, Eq, PartialEq)]
120pub enum PartType {
121 FixedText,
122 Regexp,
123 SegmentWildcard,
124 FullWildcard,
125}
126
127#[derive(Debug, Eq, PartialEq)]
129pub enum PartModifier {
130 None,
131 Optional,
132 ZeroOrMore,
133 OneOrMore,
134}
135
136impl std::fmt::Display for PartModifier {
137 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
139 f.write_str(match self {
140 PartModifier::None => "",
141 PartModifier::Optional => "?",
142 PartModifier::ZeroOrMore => "*",
143 PartModifier::OneOrMore => "+",
144 })
145 }
146}
147
148#[derive(Debug)]
150pub struct Part {
151 pub kind: PartType,
152 pub value: String,
153 pub modifier: PartModifier,
154 pub name: String,
155 pub prefix: String,
156 pub suffix: String,
157}
158
159impl Part {
160 fn new(kind: PartType, value: String, modifier: PartModifier) -> Self {
161 Part {
162 kind,
163 value,
164 modifier,
165 name: String::new(),
166 prefix: String::new(),
167 suffix: String::new(),
168 }
169 }
170}
171
172struct PatternParser<F>
174where
175 F: Fn(&str) -> Result<String, Error>,
176{
177 token_list: Vec<Token>,
178 encoding_callback: F,
179 segment_wildcard_regexp: String,
180 part_list: Vec<Part>,
181 pending_fixed_value: String,
182 index: usize,
183 next_numeric_name: usize,
184}
185
186impl<F> PatternParser<F>
187where
188 F: Fn(&str) -> Result<String, Error>,
189{
190 fn try_consume_token(&mut self, kind: TokenType) -> Option<Token> {
192 assert!(self.index < self.token_list.len());
193 let next_token = self.token_list[self.index].clone();
194 if next_token.kind != kind {
195 None
196 } else {
197 self.index += 1;
198 Some(next_token)
199 }
200 }
201
202 #[inline]
204 fn try_consume_regexp_or_wildcard_token(
205 &mut self,
206 name_token_is_none: bool,
207 ) -> Option<Token> {
208 let token = self.try_consume_token(TokenType::Regexp);
209 if name_token_is_none && token.is_none() {
210 self.try_consume_token(TokenType::Asterisk)
211 } else {
212 token
213 }
214 }
215
216 #[inline]
218 fn try_consume_modifier_token(&mut self) -> Option<Token> {
219 self
220 .try_consume_token(TokenType::OtherModifier)
221 .or_else(|| self.try_consume_token(TokenType::Asterisk))
222 }
223
224 #[inline]
226 fn maybe_add_part_from_pending_fixed_value(&mut self) -> Result<(), Error> {
227 if self.pending_fixed_value.is_empty() {
228 return Ok(());
229 }
230 let encoded_value = (self.encoding_callback)(&self.pending_fixed_value)?;
231 self.pending_fixed_value = String::new();
232 self.part_list.push(Part::new(
233 PartType::FixedText,
234 encoded_value,
235 PartModifier::None,
236 ));
237
238 Ok(())
239 }
240
241 fn add_part(
243 &mut self,
244 prefix: &str,
245 name_token: Option<Token>,
246 regexp_or_wildcard_token: Option<Token>,
247 suffix: &str,
248 modifier_token: Option<Token>,
249 ) -> Result<(), Error> {
250 let mut modifier = PartModifier::None;
251 if let Some(modifier_token) = modifier_token {
252 modifier = match modifier_token.value.as_ref() {
253 "?" => PartModifier::Optional,
254 "*" => PartModifier::ZeroOrMore,
255 "+" => PartModifier::OneOrMore,
256 _ => unreachable!(),
257 };
258 }
259 if name_token.is_none()
260 && regexp_or_wildcard_token.is_none()
261 && modifier == PartModifier::None
262 {
263 self.pending_fixed_value.push_str(prefix);
264 return Ok(());
265 }
266 self.maybe_add_part_from_pending_fixed_value()?;
267 if name_token.is_none() && regexp_or_wildcard_token.is_none() {
268 assert!(suffix.is_empty());
269 if prefix.is_empty() {
270 return Ok(());
271 }
272 let encoded_value = (self.encoding_callback)(prefix)?;
273 self.part_list.push(Part::new(
274 PartType::FixedText,
275 encoded_value,
276 modifier,
277 ));
278 return Ok(());
279 }
280
281 let mut regexp_value = match ®exp_or_wildcard_token {
282 None => self.segment_wildcard_regexp.to_owned(),
283 Some(regexp_or_wildcard_token) => {
284 if regexp_or_wildcard_token.kind == TokenType::Asterisk {
285 FULL_WILDCARD_REGEXP_VALUE.to_string()
286 } else {
287 regexp_or_wildcard_token.value.to_owned()
288 }
289 }
290 };
291
292 let mut kind = PartType::Regexp;
293 if regexp_value == self.segment_wildcard_regexp {
294 kind = PartType::SegmentWildcard;
295 regexp_value = String::new();
296 } else if regexp_value == FULL_WILDCARD_REGEXP_VALUE {
297 kind = PartType::FullWildcard;
298 regexp_value = String::new();
299 }
300
301 let mut name = String::new();
302 if let Some(name_token) = name_token {
303 name = name_token.value;
304 } else if regexp_or_wildcard_token.is_some() {
305 name = self.next_numeric_name.to_string();
306 self.next_numeric_name += 1;
307 }
308 if self.is_duplicate_name(&name) {
309 return Err(Error::Parser(ParserError::DuplicateName(name)));
310 }
311 let encoded_prefix = (self.encoding_callback)(prefix)?;
312 let encoded_suffix = (self.encoding_callback)(suffix)?;
313 self.part_list.push(Part {
314 kind,
315 value: regexp_value,
316 modifier,
317 name,
318 prefix: encoded_prefix,
319 suffix: encoded_suffix,
320 });
321
322 Ok(())
323 }
324
325 fn is_duplicate_name(&self, name: &str) -> bool {
327 self.part_list.iter().any(|p| p.name == name)
328 }
329
330 fn consume_text(&mut self) -> String {
332 let mut result = String::new();
333 loop {
334 let mut token = self.try_consume_token(TokenType::Char);
335 if token.is_none() {
336 token = self.try_consume_token(TokenType::EscapedChar);
337 }
338 if token.is_none() {
339 break;
340 }
341 result.push_str(&token.unwrap().value);
342 }
343 result
344 }
345
346 #[inline]
348 fn consume_required_token(
349 &mut self,
350 kind: TokenType,
351 ) -> Result<Token, Error> {
352 self.try_consume_token(kind.clone()).ok_or_else(|| {
353 Error::Parser(ParserError::ExpectedToken(
354 kind,
355 self.token_list[self.index].kind.clone(),
356 self.token_list[self.index].value.clone(),
357 ))
358 })
359 }
360}
361
362pub fn parse_pattern_string<F>(
364 input: &str,
365 options: &Options,
366 encoding_callback: F,
367) -> Result<Vec<Part>, Error>
368where
369 F: Fn(&str) -> Result<String, Error>,
370{
371 let token_list = crate::tokenizer::tokenize(
372 input,
373 crate::tokenizer::TokenizePolicy::Strict,
374 )?;
375
376 let mut parser = PatternParser {
377 token_list,
378 encoding_callback,
379 segment_wildcard_regexp: options.generate_segment_wildcard_regexp(),
380 part_list: vec![],
381 pending_fixed_value: String::new(),
382 index: 0,
383 next_numeric_name: 0,
384 };
385
386 while parser.index < parser.token_list.len() {
387 let char_token = parser.try_consume_token(TokenType::Char);
388 let mut name_token = parser.try_consume_token(TokenType::Name);
389 let mut regexp_or_wildcard_token =
390 parser.try_consume_regexp_or_wildcard_token(name_token.is_none());
391 if name_token.is_some() || regexp_or_wildcard_token.is_some() {
392 let mut prefix = String::new();
393 if let Some(char_token) = char_token {
394 char_token.value.clone_into(&mut prefix);
395 }
396 if !prefix.is_empty() && prefix != options.prefix_code_point {
397 parser.pending_fixed_value.push_str(&prefix);
398 prefix = String::new();
399 }
400 parser.maybe_add_part_from_pending_fixed_value()?;
401 let modifier_token = parser.try_consume_modifier_token();
402 parser.add_part(
403 &prefix,
404 name_token,
405 regexp_or_wildcard_token,
406 "",
407 modifier_token,
408 )?;
409 continue;
410 }
411 let mut fixed_token = char_token;
412 if fixed_token.is_none() {
413 fixed_token = parser.try_consume_token(TokenType::EscapedChar);
414 }
415 if let Some(fixed_token) = fixed_token {
416 parser.pending_fixed_value.push_str(&fixed_token.value);
417 continue;
418 }
419 let open_token = parser.try_consume_token(TokenType::Open);
420 if open_token.is_some() {
421 let prefix = parser.consume_text();
422 name_token = parser.try_consume_token(TokenType::Name);
423 regexp_or_wildcard_token =
424 parser.try_consume_regexp_or_wildcard_token(name_token.is_none());
425 let suffix = parser.consume_text();
426 parser.consume_required_token(TokenType::Close)?;
427 let modifier_token = parser.try_consume_modifier_token();
428 parser.add_part(
429 &prefix,
430 name_token,
431 regexp_or_wildcard_token,
432 &suffix,
433 modifier_token,
434 )?;
435 continue;
436 }
437 parser.maybe_add_part_from_pending_fixed_value()?;
438 parser.consume_required_token(TokenType::End)?;
439 }
440
441 Ok(parser.part_list)
442}