xpath/
lib.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5mod ast;
6mod context;
7mod eval;
8mod eval_function;
9mod parser;
10mod tokenizer;
11mod value;
12
13use std::fmt;
14use std::hash::Hash;
15
16pub use ast::Expression;
17use ast::QName;
18use context::EvaluationCtx;
19use markup5ever::{LocalName, Namespace, Prefix};
20pub use parser::{Error as ParserError, parse};
21pub use value::{NodesetHelpers, Value};
22
23pub trait Dom {
24    type Node: Node;
25    type NamespaceResolver: NamespaceResolver;
26}
27
28/// A handle to a DOM node exposing all functionality needed by xpath.
29pub trait Node: Eq + Clone + fmt::Debug {
30    type ProcessingInstruction: ProcessingInstruction;
31    type Document: Document<Node = Self>;
32    type Attribute: Attribute<Node = Self>;
33    type Element: Element<Node = Self>;
34
35    fn is_comment(&self) -> bool;
36    fn is_text(&self) -> bool;
37    /// Equivalent to [`textContent`](https://dom.spec.whatwg.org/#dom-node-textcontent) attribute.
38    fn text_content(&self) -> String;
39    /// <https://html.spec.whatwg.org/multipage/#language>
40    fn language(&self) -> Option<String>;
41    fn parent(&self) -> Option<Self>;
42    fn children(&self) -> impl Iterator<Item = Self>;
43    /// <https://dom.spec.whatwg.org/#concept-tree-order>
44    fn compare_tree_order(&self, other: &Self) -> std::cmp::Ordering;
45    /// A non-shadow-including preorder traversal.
46    fn traverse_preorder(&self) -> impl Iterator<Item = Self>;
47    fn inclusive_ancestors(&self) -> impl Iterator<Item = Self>;
48    fn preceding_nodes(&self, root: &Self) -> impl Iterator<Item = Self>;
49    fn following_nodes(&self, root: &Self) -> impl Iterator<Item = Self>;
50    fn preceding_siblings(&self) -> impl Iterator<Item = Self>;
51    fn following_siblings(&self) -> impl Iterator<Item = Self>;
52    fn owner_document(&self) -> Self::Document;
53    fn to_opaque(&self) -> impl Eq + Hash;
54    fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction>;
55    fn as_attribute(&self) -> Option<Self::Attribute>;
56    fn as_element(&self) -> Option<Self::Element>;
57    fn get_root_node(&self) -> Self;
58}
59
60pub trait NamespaceResolver: Clone {
61    fn resolve_namespace_prefix(&self, prefix: &str) -> Option<String>;
62}
63
64pub trait ProcessingInstruction {
65    fn target(&self) -> String;
66}
67
68pub trait Document {
69    type Node: Node<Document = Self>;
70
71    fn get_elements_with_id(&self, id: &str)
72    -> impl Iterator<Item = <Self::Node as Node>::Element>;
73}
74
75pub trait Element {
76    type Node: Node<Element = Self>;
77    type Attribute: Attribute<Node = Self::Node>;
78
79    fn as_node(&self) -> Self::Node;
80    fn prefix(&self) -> Option<Prefix>;
81    fn namespace(&self) -> Namespace;
82    fn local_name(&self) -> LocalName;
83    fn attributes(&self) -> impl Iterator<Item = Self::Attribute>;
84    fn is_html_element_in_html_document(&self) -> bool;
85}
86
87pub trait Attribute {
88    type Node: Node<Attribute = Self>;
89
90    fn as_node(&self) -> Self::Node;
91    fn prefix(&self) -> Option<Prefix>;
92    fn namespace(&self) -> Namespace;
93    fn local_name(&self) -> LocalName;
94}
95
96/// Evaluate an already-parsed XPath expression
97pub fn evaluate_parsed_xpath<D: Dom>(
98    expr: &Expression,
99    context_node: D::Node,
100) -> Result<Value<D::Node>, Error> {
101    let context = EvaluationCtx::<D>::new(context_node);
102    match expr.evaluate(&context) {
103        Ok(value) => {
104            log::debug!("Evaluated XPath: {value:?}");
105            Ok(value)
106        },
107        Err(error) => {
108            log::debug!("Unable to evaluate XPath: {error:?}");
109            Err(error)
110        },
111    }
112}
113
114#[derive(Clone, Debug)]
115pub enum Error {
116    NotANodeset,
117    /// It is not clear where variables used in XPath expression should come from.
118    /// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return
119    /// an empty result. We also error out.
120    ///
121    /// See <https://github.com/whatwg/dom/issues/67>
122    CannotUseVariables,
123    InvalidQName {
124        qname: QName,
125    },
126    Internal {
127        msg: String,
128    },
129}
130
131/// <https://www.w3.org/TR/xml/#NT-NameStartChar>
132fn is_valid_start(c: char) -> bool {
133    matches!(c, ':' |
134        'A'..='Z' |
135        '_' |
136        'a'..='z' |
137        '\u{C0}'..='\u{D6}' |
138        '\u{D8}'..='\u{F6}' |
139        '\u{F8}'..='\u{2FF}' |
140        '\u{370}'..='\u{37D}' |
141        '\u{37F}'..='\u{1FFF}' |
142        '\u{200C}'..='\u{200D}' |
143        '\u{2070}'..='\u{218F}' |
144        '\u{2C00}'..='\u{2FEF}' |
145        '\u{3001}'..='\u{D7FF}' |
146        '\u{F900}'..='\u{FDCF}' |
147        '\u{FDF0}'..='\u{FFFD}' |
148        '\u{10000}'..='\u{EFFFF}')
149}
150
151/// <https://www.w3.org/TR/xml/#NT-NameChar>
152fn is_valid_continuation(c: char) -> bool {
153    is_valid_start(c) ||
154        matches!(c,
155            '-' |
156            '.' |
157            '0'..='9' |
158            '\u{B7}' |
159            '\u{300}'..='\u{36F}' |
160            '\u{203F}'..='\u{2040}')
161}
162
163#[cfg(test)]
164/// Provides a dummy DOM to be used for tests.
165mod dummy_implementation {
166    use std::{cmp, iter};
167
168    use markup5ever::{LocalName, ns};
169
170    use super::*;
171
172    // FIXME: Expand this as more features are required
173    #[derive(Clone, Eq, Debug, PartialEq)]
174    pub(crate) struct DummyNode;
175    pub(crate) struct DummyProcessingInstruction;
176    pub(crate) struct DummyDocument;
177    pub(crate) struct DummyAttribute;
178    pub(crate) struct DummyElement;
179
180    impl Node for DummyNode {
181        type ProcessingInstruction = DummyProcessingInstruction;
182        type Document = DummyDocument;
183        type Attribute = DummyAttribute;
184        type Element = DummyElement;
185
186        fn is_comment(&self) -> bool {
187            false
188        }
189        fn is_text(&self) -> bool {
190            false
191        }
192        fn text_content(&self) -> String {
193            String::new()
194        }
195        fn language(&self) -> Option<String> {
196            None
197        }
198        fn parent(&self) -> Option<Self> {
199            None
200        }
201        fn children(&self) -> impl Iterator<Item = Self> {
202            iter::empty()
203        }
204        fn compare_tree_order(&self, _: &Self) -> cmp::Ordering {
205            cmp::Ordering::Greater
206        }
207        fn traverse_preorder(&self) -> impl Iterator<Item = Self> {
208            iter::empty()
209        }
210        fn inclusive_ancestors(&self) -> impl Iterator<Item = Self> {
211            iter::empty()
212        }
213        fn preceding_nodes(&self, _: &Self) -> impl Iterator<Item = Self> {
214            iter::empty()
215        }
216        fn following_nodes(&self, _: &Self) -> impl Iterator<Item = Self> {
217            iter::empty()
218        }
219        fn preceding_siblings(&self) -> impl Iterator<Item = Self> {
220            iter::empty()
221        }
222        fn following_siblings(&self) -> impl Iterator<Item = Self> {
223            iter::empty()
224        }
225        fn owner_document(&self) -> Self::Document {
226            DummyDocument
227        }
228        fn to_opaque(&self) -> impl Eq + Hash {
229            0
230        }
231        fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction> {
232            None
233        }
234        fn as_attribute(&self) -> Option<Self::Attribute> {
235            None
236        }
237        fn as_element(&self) -> Option<Self::Element> {
238            None
239        }
240        fn get_root_node(&self) -> Self {
241            self.clone()
242        }
243    }
244
245    impl ProcessingInstruction for DummyProcessingInstruction {
246        fn target(&self) -> String {
247            String::new()
248        }
249    }
250
251    impl Document for DummyDocument {
252        type Node = DummyNode;
253
254        fn get_elements_with_id(
255            &self,
256            _: &str,
257        ) -> impl Iterator<Item = <Self::Node as Node>::Element> {
258            iter::empty()
259        }
260    }
261
262    impl Element for DummyElement {
263        type Node = DummyNode;
264        type Attribute = DummyAttribute;
265
266        fn as_node(&self) -> Self::Node {
267            DummyNode
268        }
269        fn prefix(&self) -> Option<Prefix> {
270            None
271        }
272        fn namespace(&self) -> Namespace {
273            ns!()
274        }
275        fn local_name(&self) -> LocalName {
276            LocalName::from("")
277        }
278        fn attributes(&self) -> impl Iterator<Item = Self::Attribute> {
279            iter::empty()
280        }
281        fn is_html_element_in_html_document(&self) -> bool {
282            true
283        }
284    }
285
286    impl Attribute for DummyAttribute {
287        type Node = DummyNode;
288
289        fn as_node(&self) -> Self::Node {
290            DummyNode
291        }
292        fn prefix(&self) -> Option<Prefix> {
293            None
294        }
295        fn namespace(&self) -> Namespace {
296            ns!()
297        }
298        fn local_name(&self) -> LocalName {
299            LocalName::from("")
300        }
301    }
302}