net_traits/
pub_domains.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5//! Implementation of public domain matching.
6//!
7//! The list is a file located on the `resources` folder and loaded once on first need.
8//!
9//! The list can be updated with `./mach update-pub-domains` from this source:
10//! <https://publicsuffix.org/list/>
11//!
12//! This implementation is not strictly following the specification of the list. Wildcards are not
13//! restricted to appear only in the leftmost position, but the current list has no such cases so
14//! we don't need to make the code more complex for it. The `mach` update command makes sure that
15//! those cases are not present.
16
17use std::collections::HashSet;
18use std::iter::FromIterator;
19use std::sync::LazyLock;
20
21use embedder_traits::resources::{self, Resource};
22use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
23use malloc_size_of_derive::MallocSizeOf;
24use servo_url::{Host, ImmutableOrigin, ServoUrl};
25
26#[derive(Clone, Debug, Default, MallocSizeOf)]
27pub struct PubDomainRules {
28    rules: HashSet<String>,
29    wildcards: HashSet<String>,
30    exceptions: HashSet<String>,
31}
32
33static PUB_DOMAINS: LazyLock<PubDomainRules> = LazyLock::new(load_pub_domains);
34
35pub fn public_suffix_list_size_of(ops: &mut MallocSizeOfOps) -> usize {
36    PUB_DOMAINS.size_of(ops)
37}
38
39impl<'a> FromIterator<&'a str> for PubDomainRules {
40    fn from_iter<T>(iter: T) -> Self
41    where
42        T: IntoIterator<Item = &'a str>,
43    {
44        let mut result = PubDomainRules::default();
45        for item in iter {
46            if let Some(stripped) = item.strip_prefix('!') {
47                result.exceptions.insert(String::from(stripped));
48            } else if let Some(stripped) = item.strip_prefix("*.") {
49                result.wildcards.insert(String::from(stripped));
50            } else {
51                result.rules.insert(String::from(item));
52            }
53        }
54        result
55    }
56}
57
58impl PubDomainRules {
59    pub fn parse(content: &str) -> PubDomainRules {
60        content
61            .lines()
62            .map(str::trim)
63            .filter(|s| !s.is_empty())
64            .filter(|s| !s.starts_with("//"))
65            .collect()
66    }
67    fn suffix_pair<'a>(&self, domain: &'a str) -> (&'a str, &'a str) {
68        let domain = domain.trim_start_matches('.');
69        let mut suffix = domain;
70        let mut prev_suffix = domain;
71        for (index, _) in domain.match_indices('.') {
72            let next_suffix = &domain[index + 1..];
73            if self.exceptions.contains(suffix) {
74                return (next_suffix, suffix);
75            }
76            if self.wildcards.contains(next_suffix) || self.rules.contains(suffix) {
77                return (suffix, prev_suffix);
78            }
79            prev_suffix = suffix;
80            suffix = next_suffix;
81        }
82        (suffix, prev_suffix)
83    }
84    pub fn public_suffix<'a>(&self, domain: &'a str) -> &'a str {
85        let (public, _) = self.suffix_pair(domain);
86        public
87    }
88    pub fn registrable_suffix<'a>(&self, domain: &'a str) -> &'a str {
89        let (_, registrable) = self.suffix_pair(domain);
90        registrable
91    }
92    pub fn is_public_suffix(&self, domain: &str) -> bool {
93        // Speeded-up version of
94        // domain != "" &&
95        // self.public_suffix(domain) == domain.
96        let domain = domain.trim_start_matches('.');
97        match domain.find('.') {
98            None => !domain.is_empty(),
99            Some(index) => {
100                !self.exceptions.contains(domain) && self.wildcards.contains(&domain[index + 1..]) ||
101                    self.rules.contains(domain)
102            },
103        }
104    }
105    pub fn is_registrable_suffix(&self, domain: &str) -> bool {
106        // Speeded-up version of
107        // self.public_suffix(domain) != domain &&
108        // self.registrable_suffix(domain) == domain.
109        let domain = domain.trim_start_matches('.');
110        match domain.find('.') {
111            None => false,
112            Some(index) => {
113                self.exceptions.contains(domain) ||
114                    !self.wildcards.contains(&domain[index + 1..]) &&
115                        !self.rules.contains(domain) &&
116                        self.is_public_suffix(&domain[index + 1..])
117            },
118        }
119    }
120}
121
122fn load_pub_domains() -> PubDomainRules {
123    PubDomainRules::parse(&resources::read_string(Resource::DomainList))
124}
125
126pub fn pub_suffix(domain: &str) -> &str {
127    PUB_DOMAINS.public_suffix(domain)
128}
129
130pub fn reg_suffix(domain: &str) -> &str {
131    PUB_DOMAINS.registrable_suffix(domain)
132}
133
134pub fn is_pub_domain(domain: &str) -> bool {
135    PUB_DOMAINS.is_public_suffix(domain)
136}
137
138pub fn is_reg_domain(domain: &str) -> bool {
139    PUB_DOMAINS.is_registrable_suffix(domain)
140}
141
142/// The registered domain name (aka eTLD+1) for a URL.
143/// Returns None if the URL has no host name.
144/// Returns the registered suffix for the host name if it is a domain.
145/// Leaves the host name alone if it is an IP address.
146pub fn reg_host(url: &ServoUrl) -> Option<Host> {
147    match url.origin() {
148        ImmutableOrigin::Tuple(_, Host::Domain(domain), _) => {
149            Some(Host::Domain(String::from(reg_suffix(&domain))))
150        },
151        ImmutableOrigin::Tuple(_, ip, _) => Some(ip),
152        ImmutableOrigin::Opaque(_) => None,
153    }
154}