xpath/
value.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::ToOwned;
6use std::collections::HashSet;
7use std::mem;
8
9use crate::Node;
10
11/// The primary types of values that an XPath expression returns as a result.
12#[derive(Debug)]
13pub enum Value<N: Node> {
14    Boolean(bool),
15    /// A IEEE-754 double-precision floating point number.
16    Number(f64),
17    String(String),
18    NodeSet(NodeSet<N>),
19}
20
21#[derive(Debug)]
22pub struct NodeSet<N: Node> {
23    nodes: Vec<N>,
24    is_sorted: bool,
25}
26
27impl<N: Node> Default for NodeSet<N> {
28    fn default() -> Self {
29        Self {
30            nodes: Default::default(),
31            is_sorted: false,
32        }
33    }
34}
35
36impl<N: Node> NodeSet<N> {
37    pub(crate) fn len(&self) -> usize {
38        self.nodes.len()
39    }
40
41    fn is_empty(&self) -> bool {
42        self.nodes.is_empty()
43    }
44
45    pub(crate) fn push(&mut self, node: N) {
46        self.is_sorted = false;
47        self.nodes.push(node);
48    }
49
50    pub(crate) fn extend<I>(&mut self, iter: I)
51    where
52        I: IntoIterator<Item = N>,
53    {
54        self.nodes.extend(iter);
55    }
56
57    /// Whether this set is known to be sorted in tree order.
58    ///
59    /// This method is pessimistic and will never look at the elements in the set.
60    /// As such, it *may* return `false` even if the set happens to be sorted.
61    pub(crate) fn is_sorted(&self) -> bool {
62        self.is_sorted || self.nodes.len() < 2
63    }
64
65    /// Assume that this set is sorted, without actually sorting it.
66    pub(crate) fn assume_sorted(&mut self) {
67        debug_assert!(
68            self.nodes
69                .is_sorted_by(|a, b| a.compare_tree_order(b).is_le())
70        );
71        self.is_sorted = true;
72    }
73
74    pub(crate) fn sort(&mut self) {
75        if self.is_sorted() {
76            return;
77        }
78
79        // Using sort_unstable_by here is fine because duplicates won't appear in the final
80        // result anyways.
81        self.nodes.sort_unstable_by(|a, b| a.compare_tree_order(b));
82    }
83
84    pub(crate) fn iter(&self) -> impl Iterator<Item = &N> {
85        self.nodes.iter()
86    }
87
88    /// Return the first node in tree order that appears within this set.
89    ///
90    /// This method will not sort the set itself.
91    pub(crate) fn first(&self) -> Option<N> {
92        if self.is_sorted() {
93            return self.nodes.first().cloned();
94        }
95
96        self.iter().min_by(|a, b| a.compare_tree_order(b)).cloned()
97    }
98
99    pub(crate) fn deduplicate(&mut self) {
100        let mut seen = HashSet::new();
101        self.nodes = mem::take(&mut self.nodes)
102            .into_iter()
103            .filter_map(|node| {
104                let opaque = node.to_opaque();
105                seen.insert(opaque).then_some(node)
106            })
107            .collect();
108    }
109
110    /// Retains only the elements specified by the predicate.
111    ///
112    /// In other words, remove all elements `e` for which `f(&e)` returns `false`.
113    /// This method operates in place, visiting each element exactly once in the
114    /// original order, and preserves the order of the retained elements.
115    pub(crate) fn retain<F>(&mut self, f: F)
116    where
117        F: FnMut(&N) -> bool,
118    {
119        self.nodes.retain(f)
120    }
121
122    pub(crate) fn reverse(&mut self) {
123        self.nodes = mem::take(&mut self.nodes).into_iter().rev().collect();
124    }
125}
126
127impl<N: Node> IntoIterator for NodeSet<N> {
128    type IntoIter = <Vec<N> as IntoIterator>::IntoIter;
129    type Item = <Vec<N> as IntoIterator>::Item;
130
131    fn into_iter(self) -> Self::IntoIter {
132        self.nodes.into_iter()
133    }
134}
135
136impl<N: Node> FromIterator<N> for NodeSet<N> {
137    fn from_iter<T: IntoIterator<Item = N>>(iter: T) -> Self {
138        Self {
139            nodes: iter.into_iter().collect(),
140            is_sorted: false,
141        }
142    }
143}
144
145pub(crate) fn parse_number_from_string(string: &str) -> f64 {
146    // https://www.w3.org/TR/1999/REC-xpath-19991116/#function-number:
147    // > a string that consists of optional whitespace followed by an optional minus sign followed
148    // > by a Number followed by whitespace is converted to the IEEE 754 number that is nearest
149    // > (according to the IEEE 754 round-to-nearest rule) to the mathematical value represented
150    // > by the string; any other string is converted to NaN
151
152    // The specification does not define what "whitespace" means exactly, we choose to trim only ascii whitespace,
153    // as that seems to be what other browsers do.
154    string.trim_ascii().parse().unwrap_or(f64::NAN)
155}
156
157/// Helper for `PartialEq<Value>` implementations
158fn num_vals<N: Node>(nodes: &NodeSet<N>) -> Vec<f64> {
159    nodes
160        .iter()
161        .map(|node| parse_number_from_string(&node.text_content()))
162        .collect()
163}
164
165impl<N: Node> PartialEq<Value<N>> for Value<N> {
166    fn eq(&self, other: &Value<N>) -> bool {
167        match (self, other) {
168            (Value::NodeSet(left_nodes), Value::NodeSet(right_nodes)) => {
169                let left_strings: HashSet<String> =
170                    left_nodes.iter().map(|node| node.text_content()).collect();
171                let right_strings: HashSet<String> =
172                    right_nodes.iter().map(|node| node.text_content()).collect();
173                !left_strings.is_disjoint(&right_strings)
174            },
175            (&Value::NodeSet(ref nodes), &Value::Number(val)) |
176            (&Value::Number(val), &Value::NodeSet(ref nodes)) => {
177                let numbers = num_vals(nodes);
178                numbers.contains(&val)
179            },
180            (&Value::NodeSet(ref nodes), &Value::String(ref string)) |
181            (&Value::String(ref string), &Value::NodeSet(ref nodes)) => nodes
182                .iter()
183                .map(|node| node.text_content())
184                .any(|text_content| &text_content == string),
185            (&Value::Boolean(_), _) | (_, &Value::Boolean(_)) => {
186                self.convert_to_boolean() == other.convert_to_boolean()
187            },
188            (&Value::Number(_), _) | (_, &Value::Number(_)) => {
189                self.convert_to_number() == other.convert_to_number()
190            },
191            _ => self.convert_to_string() == other.convert_to_string(),
192        }
193    }
194}
195
196impl<N: Node> Value<N> {
197    /// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-boolean>
198    pub fn convert_to_boolean(&self) -> bool {
199        match self {
200            Value::Boolean(boolean) => *boolean,
201            Value::Number(number) => *number != 0.0 && !number.is_nan(),
202            Value::String(string) => !string.is_empty(),
203            Value::NodeSet(nodeset) => !nodeset.is_empty(),
204        }
205    }
206
207    /// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-number>
208    pub fn convert_to_number(&self) -> f64 {
209        match self {
210            Value::Boolean(boolean) => {
211                if *boolean {
212                    1.0
213                } else {
214                    0.0
215                }
216            },
217            Value::Number(number) => *number,
218            Value::String(string) => parse_number_from_string(string),
219            Value::NodeSet(_) => parse_number_from_string(&self.convert_to_string()),
220        }
221    }
222
223    /// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-string>
224    pub fn convert_to_string(&self) -> String {
225        match self {
226            Value::Boolean(value) => value.to_string(),
227            Value::Number(number) => {
228                if number.is_infinite() {
229                    if number.is_sign_negative() {
230                        "-Infinity".to_owned()
231                    } else {
232                        "Infinity".to_owned()
233                    }
234                } else if *number == 0.0 {
235                    // catches -0.0 also
236                    "0".into()
237                } else {
238                    number.to_string()
239                }
240            },
241            Value::String(string) => string.to_owned(),
242            Value::NodeSet(nodes) => nodes
243                .first()
244                .as_ref()
245                .map(Node::text_content)
246                .unwrap_or_default(),
247        }
248    }
249}
250
251macro_rules! from_impl {
252    ($raw:ty, $variant:expr) => {
253        impl<N: Node> From<$raw> for Value<N> {
254            fn from(other: $raw) -> Self {
255                $variant(other)
256            }
257        }
258    };
259}
260
261from_impl!(bool, Value::Boolean);
262from_impl!(f64, Value::Number);
263from_impl!(String, Value::String);
264impl<'a, N: Node> From<&'a str> for Value<N> {
265    fn from(other: &'a str) -> Self {
266        Value::String(other.into())
267    }
268}
269
270macro_rules! partial_eq_impl {
271    ($raw:ty, $variant:pat => $b:expr) => {
272        impl<N: Node> PartialEq<$raw> for Value<N> {
273            fn eq(&self, other: &$raw) -> bool {
274                match *self {
275                    $variant => $b == other,
276                    _ => false,
277                }
278            }
279        }
280
281        impl<N: Node> PartialEq<Value<N>> for $raw {
282            fn eq(&self, other: &Value<N>) -> bool {
283                match *other {
284                    $variant => $b == self,
285                    _ => false,
286                }
287            }
288        }
289    };
290}
291
292partial_eq_impl!(bool, Value::Boolean(ref v) => v);
293partial_eq_impl!(f64, Value::Number(ref v) => v);
294partial_eq_impl!(String, Value::String(ref v) => v);
295partial_eq_impl!(&str, Value::String(ref v) => v);
296
297#[cfg(test)]
298mod tests {
299    use std::f64;
300
301    use crate::dummy_implementation;
302
303    type Value = super::Value<dummy_implementation::DummyNode>;
304
305    #[test]
306    fn string_value_to_number() {
307        assert_eq!(Value::String("42.123".into()).convert_to_number(), 42.123);
308        assert_eq!(Value::String(" 42\n".into()).convert_to_number(), 42.);
309        assert!(
310            Value::String("totally-invalid".into())
311                .convert_to_number()
312                .is_nan()
313        );
314
315        // U+2004 is non-ascii whitespace, which should be rejected
316        assert!(
317            Value::String("\u{2004}42".into())
318                .convert_to_number()
319                .is_nan()
320        );
321    }
322
323    #[test]
324    fn number_value_to_string() {
325        assert_eq!(Value::Number(f64::NAN).convert_to_string(), "NaN");
326        assert_eq!(Value::Number(0.).convert_to_string(), "0");
327        assert_eq!(Value::Number(-0.).convert_to_string(), "0");
328        assert_eq!(Value::Number(f64::INFINITY).convert_to_string(), "Infinity");
329        assert_eq!(
330            Value::Number(f64::NEG_INFINITY).convert_to_string(),
331            "-Infinity"
332        );
333        assert_eq!(Value::Number(42.0).convert_to_string(), "42");
334        assert_eq!(Value::Number(-42.0).convert_to_string(), "-42");
335        assert_eq!(Value::Number(0.75).convert_to_string(), "0.75");
336        assert_eq!(Value::Number(-0.75).convert_to_string(), "-0.75");
337    }
338
339    #[test]
340    fn boolean_value_to_string() {
341        assert_eq!(Value::Boolean(false).convert_to_string(), "false");
342        assert_eq!(Value::Boolean(true).convert_to_string(), "true");
343    }
344}