urlpattern/
canonicalize_and_process.rs

1// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
2
3// NOTE to all: the code in this crate sometimes slightly diverges from the
4// precise wording of the spec, because rust-url does not expose all the
5// routines exactly as the spec wants. The end behaviour should be identical.
6
7use crate::Error;
8
9// https://wicg.github.io/urlpattern/#canon-encoding-callbacks
10
11// Ref: https://wicg.github.io/urlpattern/#canonicalize-a-protocol
12pub fn canonicalize_protocol(value: &str) -> Result<String, Error> {
13  if value.is_empty() {
14    return Ok(String::new());
15  }
16  url::Url::parse(&format!("{value}://dummy.test"))
17    .map(|url| url.scheme().to_owned())
18    .map_err(Error::Url)
19}
20
21// Ref: https://wicg.github.io/urlpattern/#canonicalize-a-username
22pub fn canonicalize_username(value: &str) -> Result<String, Error> {
23  if value.is_empty() {
24    return Ok(String::new());
25  }
26  let mut url = url::Url::parse("http://dummy.test").unwrap();
27  // Note: unwrap is safe, because this is a HTTP url that supports username.
28  url.set_username(value).unwrap();
29  Ok(url.username().to_string())
30}
31
32// Ref: https://wicg.github.io/urlpattern/#canonicalize-a-password
33pub fn canonicalize_password(value: &str) -> Result<String, Error> {
34  if value.is_empty() {
35    return Ok(String::new());
36  }
37  let mut url = url::Url::parse("http://dummy.test").unwrap();
38  // Note: unwrap is safe, because this is a HTTP url that supports password.
39  url.set_password(Some(value)).unwrap();
40  Ok(url.password().unwrap().to_string())
41}
42
43// Ref: https://wicg.github.io/urlpattern/#canonicalize-a-hostname
44pub fn canonicalize_hostname(value: &str) -> Result<String, Error> {
45  if value.is_empty() {
46    return Ok(String::new());
47  }
48  let mut url = url::Url::parse("http://dummy.test").unwrap();
49  url.set_host(Some(value)).map_err(Error::Url)?;
50  Ok(url::quirks::hostname(&url).to_string())
51}
52
53// Ref: https://wicg.github.io/urlpattern/#canonicalize-an-ipv6-hostname
54pub fn canonicalize_ipv6_hostname(value: &str) -> Result<String, Error> {
55  let valid_ipv6 = value
56    .chars()
57    .all(|c| c.is_ascii_hexdigit() || matches!(c, '[' | ']' | ':'));
58  if !valid_ipv6 {
59    Err(Error::Url(url::ParseError::InvalidIpv6Address))
60  } else {
61    Ok(value.to_ascii_lowercase())
62  }
63}
64
65// Ref: https://wicg.github.io/urlpattern/#canonicalize-a-port
66pub fn canonicalize_port(
67  value: &str,
68  mut protocol: Option<&str>,
69) -> Result<String, Error> {
70  if value.is_empty() {
71    return Ok(String::new());
72  }
73  if let Some("") = protocol {
74    protocol = None;
75  }
76  let port = value
77    .parse::<u16>()
78    .map_err(|_| Error::Url(url::ParseError::InvalidPort))?;
79  // Note: this unwrap is safe, because the protocol was previously parsed to be
80  // valid.
81  let mut url =
82    url::Url::parse(&format!("{}://dummy.test", protocol.unwrap_or("dummy")))
83      .unwrap();
84  url.set_port(Some(port)).unwrap(); // TODO: dont unwrap, instead ParseError
85  Ok(url::quirks::port(&url).to_string())
86}
87
88// Ref: https://wicg.github.io/urlpattern/#canonicalize-a-pathname
89pub fn canonicalize_pathname(value: &str) -> Result<String, Error> {
90  if value.is_empty() {
91    return Ok(String::new());
92  }
93  let leading_slash = value.starts_with('/');
94  let modified_value = if !leading_slash {
95    format!("/-{value}")
96  } else {
97    value.to_string()
98  };
99  let mut url = url::Url::parse("http://dummy.test").unwrap();
100  url.set_path(&modified_value);
101  let mut pathname = url::quirks::pathname(&url);
102  if !leading_slash {
103    pathname = &pathname[2..];
104  }
105  Ok(pathname.to_string())
106}
107
108// Ref: https://wicg.github.io/urlpattern/#canonicalize-an-opaque-pathname
109pub fn canonicalize_an_opaque_pathname(value: &str) -> Result<String, Error> {
110  if value.is_empty() {
111    return Ok(String::new());
112  }
113  let mut url = url::Url::parse("data:dummy,test").unwrap();
114  url.set_path(value);
115  Ok(url::quirks::pathname(&url).to_string())
116}
117
118// Ref: https://wicg.github.io/urlpattern/#canonicalize-a-search
119pub fn canonicalize_search(value: &str) -> Result<String, Error> {
120  if value.is_empty() {
121    return Ok(String::new());
122  }
123  let mut url = url::Url::parse("http://dummy.test").unwrap();
124  url.set_query(Some(value));
125  Ok(url.query().unwrap_or("").to_string())
126}
127
128// Ref: https://wicg.github.io/urlpattern/#canonicalize-a-search
129pub fn canonicalize_hash(value: &str) -> Result<String, Error> {
130  if value.is_empty() {
131    return Ok(String::new());
132  }
133  let mut url = url::Url::parse("http://dummy.test").unwrap();
134  url.set_fragment(Some(value));
135  Ok(url.fragment().unwrap_or("").to_string())
136}
137
138#[derive(Debug, Eq, PartialEq)]
139pub enum ProcessType {
140  Pattern,
141  Url,
142}
143
144// Ref: https://wicg.github.io/urlpattern/#process-protocol-for-init
145pub fn process_protocol_init(
146  value: &str,
147  kind: &ProcessType,
148) -> Result<String, Error> {
149  let stripped_value = value.strip_suffix(':').unwrap_or(value);
150  if kind == &ProcessType::Pattern {
151    Ok(stripped_value.to_string())
152  } else {
153    canonicalize_protocol(stripped_value)
154  }
155}
156
157// Ref: https://wicg.github.io/urlpattern/#process-username-for-init
158pub fn process_username_init(
159  value: &str,
160  kind: &ProcessType,
161) -> Result<String, Error> {
162  if kind == &ProcessType::Pattern {
163    Ok(value.to_string())
164  } else {
165    canonicalize_username(value)
166  }
167}
168
169// Ref: https://wicg.github.io/urlpattern/#process-password-for-init
170pub fn process_password_init(
171  value: &str,
172  kind: &ProcessType,
173) -> Result<String, Error> {
174  if kind == &ProcessType::Pattern {
175    Ok(value.to_string())
176  } else {
177    canonicalize_password(value)
178  }
179}
180
181// Ref: https://wicg.github.io/urlpattern/#process-hostname-for-init
182pub fn process_hostname_init(
183  value: &str,
184  kind: &ProcessType,
185) -> Result<String, Error> {
186  if kind == &ProcessType::Pattern {
187    Ok(value.to_string())
188  } else {
189    canonicalize_hostname(value)
190  }
191}
192
193// Ref: https://wicg.github.io/urlpattern/#process-port-for-init
194pub fn process_port_init(
195  port_value: &str,
196  protocol_value: Option<&str>,
197  kind: &ProcessType,
198) -> Result<String, Error> {
199  if kind == &ProcessType::Pattern {
200    Ok(port_value.to_string())
201  } else {
202    canonicalize_port(port_value, protocol_value)
203  }
204}
205
206// Ref: https://wicg.github.io/urlpattern/#process-pathname-for-init
207pub fn process_pathname_init(
208  pathname_value: &str,
209  protocol_value: Option<&str>,
210  kind: &ProcessType,
211) -> Result<String, Error> {
212  if kind == &ProcessType::Pattern {
213    Ok(pathname_value.to_string())
214  } else {
215    match protocol_value {
216      Some(protocol) if protocol.is_empty() || is_special_scheme(protocol) => {
217        canonicalize_pathname(pathname_value)
218      }
219      _ => canonicalize_an_opaque_pathname(pathname_value),
220    }
221  }
222}
223
224// Ref: https://wicg.github.io/urlpattern/#process-search-for-init
225pub fn process_search_init(
226  value: &str,
227  kind: &ProcessType,
228) -> Result<String, Error> {
229  let stripped_value = if value.starts_with('?') {
230    value.get(1..).unwrap()
231  } else {
232    value
233  };
234  if kind == &ProcessType::Pattern {
235    Ok(stripped_value.to_string())
236  } else {
237    canonicalize_search(stripped_value)
238  }
239}
240
241// Ref: https://wicg.github.io/urlpattern/#process-hash-for-init
242pub fn process_hash_init(
243  value: &str,
244  kind: &ProcessType,
245) -> Result<String, Error> {
246  let stripped_value = if value.starts_with('#') {
247    value.get(1..).unwrap()
248  } else {
249    value
250  };
251  if kind == &ProcessType::Pattern {
252    Ok(stripped_value.to_string())
253  } else {
254    canonicalize_hash(stripped_value)
255  }
256}
257
258pub fn is_special_scheme(scheme: &str) -> bool {
259  matches!(scheme, "http" | "https" | "ws" | "wss" | "ftp" | "file")
260}
261
262pub fn special_scheme_default_port(scheme: &str) -> Option<&'static str> {
263  match scheme {
264    "http" => Some("80"),
265    "https" => Some("443"),
266    "ws" => Some("80"),
267    "wss" => Some("443"),
268    "ftp" => Some("21"),
269    "file" => None,
270    _ => None,
271  }
272}
273
274// Ref: https://urlpattern.spec.whatwg.org/#process-a-base-url-string
275pub fn process_base_url(input: &str, kind: &ProcessType) -> String {
276  if kind != &ProcessType::Pattern {
277    input.to_string()
278  } else {
279    escape_pattern_string(input)
280  }
281}
282
283// Ref: https://wicg.github.io/urlpattern/#escape-a-pattern-string
284pub fn escape_pattern_string(input: &str) -> String {
285  assert!(input.is_ascii());
286  let mut result = String::new();
287  for char in input.chars() {
288    if matches!(char, '+' | '*' | '?' | ':' | '{' | '}' | '(' | ')' | '\\') {
289      result.push('\\');
290    }
291    result.push(char);
292  }
293  result
294}