1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */

//! Implementation of public domain matching.
//!
//! The list is a file located on the `resources` folder and loaded once on first need.
//!
//! The list can be updated with `./mach update-pub-domains` from this source:
//! <https://publicsuffix.org/list/>
//!
//! This implementation is not strictly following the specification of the list. Wildcards are not
//! restricted to appear only in the leftmost position, but the current list has no such cases so
//! we don't need to make the code more complex for it. The `mach` update command makes sure that
//! those cases are not present.

use std::collections::HashSet;
use std::iter::FromIterator;
use std::sync::LazyLock;

use embedder_traits::resources::{self, Resource};
use servo_url::{Host, ImmutableOrigin, ServoUrl};

#[derive(Clone, Debug, Default)]
pub struct PubDomainRules {
    rules: HashSet<String>,
    wildcards: HashSet<String>,
    exceptions: HashSet<String>,
}

static PUB_DOMAINS: LazyLock<PubDomainRules> = LazyLock::new(load_pub_domains);

impl<'a> FromIterator<&'a str> for PubDomainRules {
    fn from_iter<T>(iter: T) -> Self
    where
        T: IntoIterator<Item = &'a str>,
    {
        let mut result = PubDomainRules::default();
        for item in iter {
            if let Some(stripped) = item.strip_prefix('!') {
                result.exceptions.insert(String::from(stripped));
            } else if let Some(stripped) = item.strip_prefix("*.") {
                result.wildcards.insert(String::from(stripped));
            } else {
                result.rules.insert(String::from(item));
            }
        }
        result
    }
}

impl PubDomainRules {
    pub fn parse(content: &str) -> PubDomainRules {
        content
            .lines()
            .map(str::trim)
            .filter(|s| !s.is_empty())
            .filter(|s| !s.starts_with("//"))
            .collect()
    }
    fn suffix_pair<'a>(&self, domain: &'a str) -> (&'a str, &'a str) {
        let domain = domain.trim_start_matches('.');
        let mut suffix = domain;
        let mut prev_suffix = domain;
        for (index, _) in domain.match_indices('.') {
            let next_suffix = &domain[index + 1..];
            if self.exceptions.contains(suffix) {
                return (next_suffix, suffix);
            }
            if self.wildcards.contains(next_suffix) || self.rules.contains(suffix) {
                return (suffix, prev_suffix);
            }
            prev_suffix = suffix;
            suffix = next_suffix;
        }
        (suffix, prev_suffix)
    }
    pub fn public_suffix<'a>(&self, domain: &'a str) -> &'a str {
        let (public, _) = self.suffix_pair(domain);
        public
    }
    pub fn registrable_suffix<'a>(&self, domain: &'a str) -> &'a str {
        let (_, registrable) = self.suffix_pair(domain);
        registrable
    }
    pub fn is_public_suffix(&self, domain: &str) -> bool {
        // Speeded-up version of
        // domain != "" &&
        // self.public_suffix(domain) == domain.
        let domain = domain.trim_start_matches('.');
        match domain.find('.') {
            None => !domain.is_empty(),
            Some(index) => {
                !self.exceptions.contains(domain) && self.wildcards.contains(&domain[index + 1..]) ||
                    self.rules.contains(domain)
            },
        }
    }
    pub fn is_registrable_suffix(&self, domain: &str) -> bool {
        // Speeded-up version of
        // self.public_suffix(domain) != domain &&
        // self.registrable_suffix(domain) == domain.
        let domain = domain.trim_start_matches('.');
        match domain.find('.') {
            None => false,
            Some(index) => {
                self.exceptions.contains(domain) ||
                    !self.wildcards.contains(&domain[index + 1..]) &&
                        !self.rules.contains(domain) &&
                        self.is_public_suffix(&domain[index + 1..])
            },
        }
    }
}

fn load_pub_domains() -> PubDomainRules {
    PubDomainRules::parse(&resources::read_string(Resource::DomainList))
}

pub fn pub_suffix(domain: &str) -> &str {
    PUB_DOMAINS.public_suffix(domain)
}

pub fn reg_suffix(domain: &str) -> &str {
    PUB_DOMAINS.registrable_suffix(domain)
}

pub fn is_pub_domain(domain: &str) -> bool {
    PUB_DOMAINS.is_public_suffix(domain)
}

pub fn is_reg_domain(domain: &str) -> bool {
    PUB_DOMAINS.is_registrable_suffix(domain)
}

/// The registered domain name (aka eTLD+1) for a URL.
/// Returns None if the URL has no host name.
/// Returns the registered suffix for the host name if it is a domain.
/// Leaves the host name alone if it is an IP address.
pub fn reg_host(url: &ServoUrl) -> Option<Host> {
    match url.origin() {
        ImmutableOrigin::Tuple(_, Host::Domain(domain), _) => {
            Some(Host::Domain(String::from(reg_suffix(&domain))))
        },
        ImmutableOrigin::Tuple(_, ip, _) => Some(ip),
        ImmutableOrigin::Opaque(_) => None,
    }
}