xpath/
lib.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5mod ast;
6mod context;
7mod eval;
8mod functions;
9mod parser;
10mod tokenizer;
11mod value;
12
13use std::fmt;
14use std::hash::Hash;
15
16pub use ast::Expression;
17use ast::QName;
18use context::EvaluationCtx;
19use markup5ever::{LocalName, Namespace, Prefix};
20pub use parser::{Error as ParserError, parse};
21pub use value::{NodeSet, Value};
22
23pub trait Dom {
24    type Node: Node;
25    type NamespaceResolver: NamespaceResolver;
26}
27
28/// A handle to a DOM node exposing all functionality needed by xpath.
29pub trait Node: Eq + Clone + fmt::Debug {
30    type ProcessingInstruction: ProcessingInstruction;
31    type Document: Document<Node = Self>;
32    type Attribute: Attribute<Node = Self>;
33    type Element: Element<Node = Self>;
34    type Opaque: Eq + Hash + 'static;
35
36    fn is_comment(&self) -> bool;
37    fn is_text(&self) -> bool;
38    /// Equivalent to [`textContent`](https://dom.spec.whatwg.org/#dom-node-textcontent) attribute.
39    fn text_content(&self) -> String;
40    /// <https://html.spec.whatwg.org/multipage/#language>
41    fn language(&self) -> Option<String>;
42    fn parent(&self) -> Option<Self>;
43    fn children(&self) -> impl Iterator<Item = Self>;
44    /// <https://dom.spec.whatwg.org/#concept-tree-order>
45    fn compare_tree_order(&self, other: &Self) -> std::cmp::Ordering;
46    /// A non-shadow-including preorder traversal.
47    fn traverse_preorder(&self) -> impl Iterator<Item = Self>;
48    fn inclusive_ancestors(&self) -> impl Iterator<Item = Self>;
49
50    /// Return an iterator over all nodes that come before `self` in [tree order],
51    /// excluding any ancestors and attribute nodes.
52    ///
53    /// [tree order]: https://dom.spec.whatwg.org/#concept-tree-order
54    fn preceding_nodes(&self) -> impl Iterator<Item = Self>;
55
56    /// Return an iterator over all nodes that come after `self` in [tree order],
57    /// excluding any descendants and attribute nodes.
58    ///
59    /// [tree order]: https://dom.spec.whatwg.org/#concept-tree-order
60    fn following_nodes(&self) -> impl Iterator<Item = Self>;
61    fn preceding_siblings(&self) -> impl Iterator<Item = Self>;
62    fn following_siblings(&self) -> impl Iterator<Item = Self>;
63    fn owner_document(&self) -> Self::Document;
64    fn to_opaque(&self) -> Self::Opaque;
65    fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction>;
66    fn as_attribute(&self) -> Option<Self::Attribute>;
67    fn as_element(&self) -> Option<Self::Element>;
68    fn get_root_node(&self) -> Self;
69}
70
71pub trait NamespaceResolver: Clone {
72    fn resolve_namespace_prefix(&self, prefix: &str) -> Option<String>;
73}
74
75pub trait ProcessingInstruction {
76    fn target(&self) -> String;
77}
78
79pub trait Document {
80    type Node: Node<Document = Self>;
81
82    /// Return an iterator over elements with the given ID in tree order.
83    fn get_elements_with_id(&self, id: &str)
84    -> impl Iterator<Item = <Self::Node as Node>::Element>;
85}
86
87pub trait Element {
88    type Node: Node<Element = Self>;
89    type Attribute: Attribute<Node = Self::Node>;
90
91    fn as_node(&self) -> Self::Node;
92    fn prefix(&self) -> Option<Prefix>;
93    fn namespace(&self) -> Namespace;
94    fn local_name(&self) -> LocalName;
95    fn attributes(&self) -> impl Iterator<Item = Self::Attribute>;
96    fn is_html_element_in_html_document(&self) -> bool;
97}
98
99pub trait Attribute {
100    type Node: Node<Attribute = Self>;
101
102    fn as_node(&self) -> Self::Node;
103    fn prefix(&self) -> Option<Prefix>;
104    fn namespace(&self) -> Namespace;
105    fn local_name(&self) -> LocalName;
106}
107
108/// Evaluate an already-parsed XPath expression
109pub fn evaluate_parsed_xpath<D: Dom>(
110    expr: &Expression,
111    context_node: D::Node,
112) -> Result<Value<D::Node>, Error> {
113    let context = EvaluationCtx::<D>::new(context_node);
114    match expr.evaluate(&context) {
115        Ok(mut value) => {
116            if let Value::NodeSet(node_set) = &mut value {
117                node_set.deduplicate();
118                node_set.sort();
119            }
120
121            log::debug!("Evaluated XPath: {value:?}");
122            Ok(value)
123        },
124        Err(error) => {
125            log::debug!("Unable to evaluate XPath: {error:?}");
126            Err(error)
127        },
128    }
129}
130
131#[derive(Clone, Debug)]
132pub enum Error {
133    NotANodeset,
134    /// It is not clear where variables used in XPath expression should come from.
135    /// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return
136    /// an empty result. We also error out.
137    ///
138    /// See <https://github.com/whatwg/dom/issues/67>
139    CannotUseVariables,
140    InvalidQName {
141        qname: QName,
142    },
143    Internal {
144        msg: String,
145    },
146}
147
148/// <https://www.w3.org/TR/xml/#NT-NameStartChar>
149fn is_valid_start(c: char) -> bool {
150    matches!(c, ':' |
151        'A'..='Z' |
152        '_' |
153        'a'..='z' |
154        '\u{C0}'..='\u{D6}' |
155        '\u{D8}'..='\u{F6}' |
156        '\u{F8}'..='\u{2FF}' |
157        '\u{370}'..='\u{37D}' |
158        '\u{37F}'..='\u{1FFF}' |
159        '\u{200C}'..='\u{200D}' |
160        '\u{2070}'..='\u{218F}' |
161        '\u{2C00}'..='\u{2FEF}' |
162        '\u{3001}'..='\u{D7FF}' |
163        '\u{F900}'..='\u{FDCF}' |
164        '\u{FDF0}'..='\u{FFFD}' |
165        '\u{10000}'..='\u{EFFFF}')
166}
167
168/// <https://www.w3.org/TR/xml/#NT-NameChar>
169fn is_valid_continuation(c: char) -> bool {
170    is_valid_start(c) ||
171        matches!(c,
172            '-' |
173            '.' |
174            '0'..='9' |
175            '\u{B7}' |
176            '\u{300}'..='\u{36F}' |
177            '\u{203F}'..='\u{2040}')
178}
179
180#[cfg(test)]
181/// Provides a dummy DOM to be used for tests.
182mod dummy_implementation {
183    use std::{cmp, iter};
184
185    use markup5ever::{LocalName, ns};
186
187    use super::*;
188
189    // FIXME: Expand this as more features are required
190    #[derive(Clone, Eq, Debug, PartialEq)]
191    pub(crate) struct DummyNode;
192    pub(crate) struct DummyProcessingInstruction;
193    pub(crate) struct DummyDocument;
194    pub(crate) struct DummyAttribute;
195    pub(crate) struct DummyElement;
196
197    impl Node for DummyNode {
198        type ProcessingInstruction = DummyProcessingInstruction;
199        type Document = DummyDocument;
200        type Attribute = DummyAttribute;
201        type Element = DummyElement;
202        type Opaque = usize;
203
204        fn is_comment(&self) -> bool {
205            false
206        }
207        fn is_text(&self) -> bool {
208            false
209        }
210        fn text_content(&self) -> String {
211            String::new()
212        }
213        fn language(&self) -> Option<String> {
214            None
215        }
216        fn parent(&self) -> Option<Self> {
217            None
218        }
219        fn children(&self) -> impl Iterator<Item = Self> {
220            iter::empty()
221        }
222        fn compare_tree_order(&self, _: &Self) -> cmp::Ordering {
223            cmp::Ordering::Greater
224        }
225        fn traverse_preorder(&self) -> impl Iterator<Item = Self> {
226            iter::empty()
227        }
228        fn inclusive_ancestors(&self) -> impl Iterator<Item = Self> {
229            iter::empty()
230        }
231        fn preceding_nodes(&self) -> impl Iterator<Item = Self> {
232            iter::empty()
233        }
234        fn following_nodes(&self) -> impl Iterator<Item = Self> {
235            iter::empty()
236        }
237        fn preceding_siblings(&self) -> impl Iterator<Item = Self> {
238            iter::empty()
239        }
240        fn following_siblings(&self) -> impl Iterator<Item = Self> {
241            iter::empty()
242        }
243        fn owner_document(&self) -> Self::Document {
244            DummyDocument
245        }
246        fn to_opaque(&self) -> Self::Opaque {
247            0
248        }
249        fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction> {
250            None
251        }
252        fn as_attribute(&self) -> Option<Self::Attribute> {
253            None
254        }
255        fn as_element(&self) -> Option<Self::Element> {
256            None
257        }
258        fn get_root_node(&self) -> Self {
259            self.clone()
260        }
261    }
262
263    impl ProcessingInstruction for DummyProcessingInstruction {
264        fn target(&self) -> String {
265            String::new()
266        }
267    }
268
269    impl Document for DummyDocument {
270        type Node = DummyNode;
271
272        fn get_elements_with_id(
273            &self,
274            _: &str,
275        ) -> impl Iterator<Item = <Self::Node as Node>::Element> {
276            iter::empty()
277        }
278    }
279
280    impl Element for DummyElement {
281        type Node = DummyNode;
282        type Attribute = DummyAttribute;
283
284        fn as_node(&self) -> Self::Node {
285            DummyNode
286        }
287        fn prefix(&self) -> Option<Prefix> {
288            None
289        }
290        fn namespace(&self) -> Namespace {
291            ns!()
292        }
293        fn local_name(&self) -> LocalName {
294            LocalName::from("")
295        }
296        fn attributes(&self) -> impl Iterator<Item = Self::Attribute> {
297            iter::empty()
298        }
299        fn is_html_element_in_html_document(&self) -> bool {
300            true
301        }
302    }
303
304    impl Attribute for DummyAttribute {
305        type Node = DummyNode;
306
307        fn as_node(&self) -> Self::Node {
308            DummyNode
309        }
310        fn prefix(&self) -> Option<Prefix> {
311            None
312        }
313        fn namespace(&self) -> Namespace {
314            ns!()
315        }
316        fn local_name(&self) -> LocalName {
317            LocalName::from("")
318        }
319    }
320}