net_traits/
pub_domains.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5//! Implementation of public domain matching.
6//!
7//! The list is a file located on the `resources` folder and loaded once on first need.
8//!
9//! The list can be updated with `./mach update-pub-domains` from this source:
10//! <https://publicsuffix.org/list/>
11//!
12//! This implementation is not strictly following the specification of the list. Wildcards are not
13//! restricted to appear only in the leftmost position, but the current list has no such cases so
14//! we don't need to make the code more complex for it. The `mach` update command makes sure that
15//! those cases are not present.
16
17use std::iter::FromIterator;
18use std::sync::LazyLock;
19
20use embedder_traits::resources::{self, Resource};
21use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
22use malloc_size_of_derive::MallocSizeOf;
23use rustc_hash::FxHashSet;
24use servo_url::{Host, ImmutableOrigin, ServoUrl};
25
26// We can use FxHash here.
27// The list is given by publicsuffix.org so an attack is highly unlikely
28#[derive(Clone, Debug, Default, MallocSizeOf)]
29pub struct PubDomainRules {
30    rules: FxHashSet<String>,
31    wildcards: FxHashSet<String>,
32    exceptions: FxHashSet<String>,
33}
34
35static PUB_DOMAINS: LazyLock<PubDomainRules> = LazyLock::new(load_pub_domains);
36
37pub fn public_suffix_list_size_of(ops: &mut MallocSizeOfOps) -> usize {
38    PUB_DOMAINS.size_of(ops)
39}
40
41impl<'a> FromIterator<&'a str> for PubDomainRules {
42    fn from_iter<T>(iter: T) -> Self
43    where
44        T: IntoIterator<Item = &'a str>,
45    {
46        let mut result = PubDomainRules::default();
47        for item in iter {
48            if let Some(stripped) = item.strip_prefix('!') {
49                result.exceptions.insert(String::from(stripped));
50            } else if let Some(stripped) = item.strip_prefix("*.") {
51                result.wildcards.insert(String::from(stripped));
52            } else {
53                result.rules.insert(String::from(item));
54            }
55        }
56        result
57    }
58}
59
60impl PubDomainRules {
61    pub fn parse(content: &str) -> PubDomainRules {
62        content
63            .lines()
64            .map(str::trim)
65            .filter(|s| !s.is_empty())
66            .filter(|s| !s.starts_with("//"))
67            .collect()
68    }
69    fn suffix_pair<'a>(&self, domain: &'a str) -> (&'a str, &'a str) {
70        let domain = domain.trim_start_matches('.');
71        let mut suffix = domain;
72        let mut prev_suffix = domain;
73        for (index, _) in domain.match_indices('.') {
74            let next_suffix = &domain[index + 1..];
75            if self.exceptions.contains(suffix) {
76                return (next_suffix, suffix);
77            }
78            if self.wildcards.contains(next_suffix) || self.rules.contains(suffix) {
79                return (suffix, prev_suffix);
80            }
81            prev_suffix = suffix;
82            suffix = next_suffix;
83        }
84        (suffix, prev_suffix)
85    }
86    pub fn public_suffix<'a>(&self, domain: &'a str) -> &'a str {
87        let (public, _) = self.suffix_pair(domain);
88        public
89    }
90    pub fn registrable_suffix<'a>(&self, domain: &'a str) -> &'a str {
91        let (_, registrable) = self.suffix_pair(domain);
92        registrable
93    }
94    pub fn is_public_suffix(&self, domain: &str) -> bool {
95        // Speeded-up version of
96        // domain != "" &&
97        // self.public_suffix(domain) == domain.
98        let domain = domain.trim_start_matches('.');
99        match domain.find('.') {
100            None => !domain.is_empty(),
101            Some(index) => {
102                !self.exceptions.contains(domain) && self.wildcards.contains(&domain[index + 1..]) ||
103                    self.rules.contains(domain)
104            },
105        }
106    }
107    pub fn is_registrable_suffix(&self, domain: &str) -> bool {
108        // Speeded-up version of
109        // self.public_suffix(domain) != domain &&
110        // self.registrable_suffix(domain) == domain.
111        let domain = domain.trim_start_matches('.');
112        match domain.find('.') {
113            None => false,
114            Some(index) => {
115                self.exceptions.contains(domain) ||
116                    !self.wildcards.contains(&domain[index + 1..]) &&
117                        !self.rules.contains(domain) &&
118                        self.is_public_suffix(&domain[index + 1..])
119            },
120        }
121    }
122}
123
124fn load_pub_domains() -> PubDomainRules {
125    PubDomainRules::parse(&resources::read_string(Resource::DomainList))
126}
127
128pub fn pub_suffix(domain: &str) -> &str {
129    PUB_DOMAINS.public_suffix(domain)
130}
131
132pub fn reg_suffix(domain: &str) -> &str {
133    PUB_DOMAINS.registrable_suffix(domain)
134}
135
136pub fn is_pub_domain(domain: &str) -> bool {
137    PUB_DOMAINS.is_public_suffix(domain)
138}
139
140pub fn is_reg_domain(domain: &str) -> bool {
141    PUB_DOMAINS.is_registrable_suffix(domain)
142}
143
144/// <https://html.spec.whatwg.org/multipage/#same-site>
145pub fn is_same_site(site_a: &ImmutableOrigin, site_b: &ImmutableOrigin) -> bool {
146    // First steps are for
147    // https://html.spec.whatwg.org/multipage/#concept-site-same-site
148    //
149    // Step 1. If A and B are the same opaque origin, then return true.
150    if !site_a.is_tuple() && !site_b.is_tuple() && site_a == site_b {
151        return true;
152    }
153
154    // Step 2. If A or B is an opaque origin, then return false.
155    let ImmutableOrigin::Tuple(scheme_a, host_a, _) = site_a else {
156        return false;
157    };
158    let ImmutableOrigin::Tuple(scheme_b, host_b, _) = site_b else {
159        return false;
160    };
161
162    // Step 3. If A's and B's scheme values are different, then return false.
163    if scheme_a != scheme_b {
164        return false;
165    }
166
167    // Step 4. If A's and B's host values are not equal, then return false.
168    // Includes the steps of https://html.spec.whatwg.org/multipage/#obtain-a-site
169    if let (Host::Domain(domain_a), Host::Domain(domain_b)) = (host_a, host_b) {
170        if reg_suffix(domain_a) != reg_suffix(domain_b) {
171            return false;
172        }
173    } else if host_a != host_b {
174        return false;
175    }
176
177    // Step 5. Return true.
178    true
179}
180
181/// The registered domain name (aka eTLD+1) for a URL.
182/// Returns None if the URL has no host name.
183/// Returns the registered suffix for the host name if it is a domain.
184/// Leaves the host name alone if it is an IP address.
185pub fn registered_domain_name(url: &ServoUrl) -> Option<Host> {
186    match url.origin() {
187        ImmutableOrigin::Tuple(_, Host::Domain(domain), _) => {
188            Some(Host::Domain(String::from(reg_suffix(&domain))))
189        },
190        ImmutableOrigin::Tuple(_, ip, _) => Some(ip),
191        ImmutableOrigin::Opaque(_) => None,
192    }
193}