xpath/
lib.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5mod ast;
6mod context;
7mod eval;
8mod eval_function;
9mod parser;
10mod value;
11
12use std::fmt::Debug;
13use std::hash::Hash;
14
15pub use ast::Expression;
16use ast::QName;
17use context::EvaluationCtx;
18use markup5ever::{LocalName, Namespace, Prefix};
19use parser::{OwnedParserError, parse as parse_impl};
20pub use value::{NodesetHelpers, Value};
21
22pub trait Dom {
23    type Node: Node;
24    /// An exception that can occur during JS evaluation.
25    type JsError: Debug;
26    type NamespaceResolver: NamespaceResolver<Self::JsError>;
27}
28
29/// A handle to a DOM node exposing all functionality needed by xpath.
30pub trait Node: Eq + Clone + Debug {
31    type ProcessingInstruction: ProcessingInstruction;
32    type Document: Document<Node = Self>;
33    type Attribute: Attribute<Node = Self>;
34    type Element: Element<Node = Self>;
35
36    fn is_comment(&self) -> bool;
37    fn is_text(&self) -> bool;
38    /// Equivalent to [`textContent`](https://dom.spec.whatwg.org/#dom-node-textcontent) attribute.
39    fn text_content(&self) -> String;
40    /// <https://html.spec.whatwg.org/multipage/#language>
41    fn language(&self) -> Option<String>;
42    fn parent(&self) -> Option<Self>;
43    fn children(&self) -> impl Iterator<Item = Self>;
44    /// <https://dom.spec.whatwg.org/#concept-tree-order>
45    fn compare_tree_order(&self, other: &Self) -> std::cmp::Ordering;
46    /// A non-shadow-including preorder traversal.
47    fn traverse_preorder(&self) -> impl Iterator<Item = Self>;
48    fn inclusive_ancestors(&self) -> impl Iterator<Item = Self>;
49    fn preceding_nodes(&self, root: &Self) -> impl Iterator<Item = Self>;
50    fn following_nodes(&self, root: &Self) -> impl Iterator<Item = Self>;
51    fn preceding_siblings(&self) -> impl Iterator<Item = Self>;
52    fn following_siblings(&self) -> impl Iterator<Item = Self>;
53    fn owner_document(&self) -> Self::Document;
54    fn to_opaque(&self) -> impl Eq + Hash;
55    fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction>;
56    fn as_attribute(&self) -> Option<Self::Attribute>;
57    fn as_element(&self) -> Option<Self::Element>;
58    fn lookup_namespace_uri(&self, uri: Option<&str>) -> Option<String>;
59}
60
61pub trait NamespaceResolver<E>: Clone {
62    fn resolve_namespace_prefix(&self, prefix: Option<&str>) -> Result<Option<String>, E>;
63}
64
65pub trait ProcessingInstruction {
66    fn target(&self) -> String;
67}
68
69pub trait Document {
70    type Node: Node<Document = Self>;
71
72    fn is_html_document(&self) -> bool;
73    fn get_elements_with_id(&self, id: &str)
74    -> impl Iterator<Item = <Self::Node as Node>::Element>;
75}
76
77pub trait Element {
78    type Node: Node<Element = Self>;
79    type Attribute: Attribute<Node = Self::Node>;
80
81    fn as_node(&self) -> Self::Node;
82    fn prefix(&self) -> Option<Prefix>;
83    fn namespace(&self) -> Namespace;
84    fn local_name(&self) -> LocalName;
85    fn attributes(&self) -> impl Iterator<Item = Self::Attribute>;
86}
87
88pub trait Attribute {
89    type Node: Node<Attribute = Self>;
90
91    fn as_node(&self) -> Self::Node;
92    fn prefix(&self) -> Option<Prefix>;
93    fn namespace(&self) -> Namespace;
94    fn local_name(&self) -> LocalName;
95}
96
97/// Parse an XPath expression from a string
98pub fn parse<E>(xpath: &str) -> Result<Expression, Error<E>> {
99    match parse_impl(xpath) {
100        Ok(expression) => {
101            log::debug!("Parsed XPath: {expression:?}");
102            Ok(expression)
103        },
104        Err(error) => {
105            log::debug!("Unable to parse XPath: {error}");
106            Err(Error::Parsing(error))
107        },
108    }
109}
110
111/// Evaluate an already-parsed XPath expression
112pub fn evaluate_parsed_xpath<D: Dom>(
113    expr: &Expression,
114    context_node: D::Node,
115    resolver: Option<D::NamespaceResolver>,
116) -> Result<Value<D::Node>, Error<D::JsError>> {
117    let context = EvaluationCtx::<D>::new(context_node, resolver);
118    match expr.evaluate(&context) {
119        Ok(value) => {
120            log::debug!("Evaluated XPath: {value:?}");
121            Ok(value)
122        },
123        Err(error) => {
124            log::debug!("Unable to evaluate XPath: {error:?}");
125            Err(error)
126        },
127    }
128}
129
130#[derive(Clone, Debug)]
131pub enum Error<JsError> {
132    NotANodeset,
133    /// It is not clear where variables used in XPath expression should come from.
134    /// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return
135    /// an empty result. We also error out.
136    ///
137    /// See <https://github.com/whatwg/dom/issues/67>
138    CannotUseVariables,
139    InvalidQName {
140        qname: QName,
141    },
142    Internal {
143        msg: String,
144    },
145    /// A JS exception that needs to be propagated to the caller.
146    JsException(JsError),
147    Parsing(OwnedParserError),
148}
149
150/// <https://www.w3.org/TR/xml/#NT-NameStartChar>
151fn is_valid_start(c: char) -> bool {
152    matches!(c, ':' |
153        'A'..='Z' |
154        '_' |
155        'a'..='z' |
156        '\u{C0}'..='\u{D6}' |
157        '\u{D8}'..='\u{F6}' |
158        '\u{F8}'..='\u{2FF}' |
159        '\u{370}'..='\u{37D}' |
160        '\u{37F}'..='\u{1FFF}' |
161        '\u{200C}'..='\u{200D}' |
162        '\u{2070}'..='\u{218F}' |
163        '\u{2C00}'..='\u{2FEF}' |
164        '\u{3001}'..='\u{D7FF}' |
165        '\u{F900}'..='\u{FDCF}' |
166        '\u{FDF0}'..='\u{FFFD}' |
167        '\u{10000}'..='\u{EFFFF}')
168}
169
170/// <https://www.w3.org/TR/xml/#NT-NameChar>
171fn is_valid_continuation(c: char) -> bool {
172    is_valid_start(c) ||
173        matches!(c,
174            '-' |
175            '.' |
176            '0'..='9' |
177            '\u{B7}' |
178            '\u{300}'..='\u{36F}' |
179            '\u{203F}'..='\u{2040}')
180}
181
182#[cfg(test)]
183/// Provides a dummy DOM to be used for tests.
184mod dummy_implementation {
185    use std::{cmp, iter};
186
187    use markup5ever::{LocalName, ns};
188
189    use super::*;
190
191    // FIXME: Expand this as more features are required
192    #[derive(Clone, Eq, Debug, PartialEq)]
193    pub(crate) struct DummyNode;
194    pub(crate) struct DummyProcessingInstruction;
195    pub(crate) struct DummyDocument;
196    pub(crate) struct DummyAttribute;
197    pub(crate) struct DummyElement;
198
199    impl Node for DummyNode {
200        type ProcessingInstruction = DummyProcessingInstruction;
201        type Document = DummyDocument;
202        type Attribute = DummyAttribute;
203        type Element = DummyElement;
204
205        fn is_comment(&self) -> bool {
206            false
207        }
208        fn is_text(&self) -> bool {
209            false
210        }
211        fn text_content(&self) -> String {
212            String::new()
213        }
214        fn language(&self) -> Option<String> {
215            None
216        }
217        fn parent(&self) -> Option<Self> {
218            None
219        }
220        fn children(&self) -> impl Iterator<Item = Self> {
221            iter::empty()
222        }
223        fn compare_tree_order(&self, _: &Self) -> cmp::Ordering {
224            cmp::Ordering::Greater
225        }
226        fn traverse_preorder(&self) -> impl Iterator<Item = Self> {
227            iter::empty()
228        }
229        fn inclusive_ancestors(&self) -> impl Iterator<Item = Self> {
230            iter::empty()
231        }
232        fn preceding_nodes(&self, _: &Self) -> impl Iterator<Item = Self> {
233            iter::empty()
234        }
235        fn following_nodes(&self, _: &Self) -> impl Iterator<Item = Self> {
236            iter::empty()
237        }
238        fn preceding_siblings(&self) -> impl Iterator<Item = Self> {
239            iter::empty()
240        }
241        fn following_siblings(&self) -> impl Iterator<Item = Self> {
242            iter::empty()
243        }
244        fn owner_document(&self) -> Self::Document {
245            DummyDocument
246        }
247        fn to_opaque(&self) -> impl Eq + Hash {
248            0
249        }
250        fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction> {
251            None
252        }
253        fn as_attribute(&self) -> Option<Self::Attribute> {
254            None
255        }
256        fn as_element(&self) -> Option<Self::Element> {
257            None
258        }
259        fn lookup_namespace_uri(&self, _: Option<&str>) -> Option<String> {
260            None
261        }
262    }
263
264    impl ProcessingInstruction for DummyProcessingInstruction {
265        fn target(&self) -> String {
266            String::new()
267        }
268    }
269
270    impl Document for DummyDocument {
271        type Node = DummyNode;
272
273        fn is_html_document(&self) -> bool {
274            true
275        }
276        fn get_elements_with_id(
277            &self,
278            _: &str,
279        ) -> impl Iterator<Item = <Self::Node as Node>::Element> {
280            iter::empty()
281        }
282    }
283
284    impl Element for DummyElement {
285        type Node = DummyNode;
286        type Attribute = DummyAttribute;
287
288        fn as_node(&self) -> Self::Node {
289            DummyNode
290        }
291        fn prefix(&self) -> Option<Prefix> {
292            None
293        }
294        fn namespace(&self) -> Namespace {
295            ns!()
296        }
297        fn local_name(&self) -> LocalName {
298            LocalName::from("")
299        }
300        fn attributes(&self) -> impl Iterator<Item = Self::Attribute> {
301            iter::empty()
302        }
303    }
304
305    impl Attribute for DummyAttribute {
306        type Node = DummyNode;
307
308        fn as_node(&self) -> Self::Node {
309            DummyNode
310        }
311        fn prefix(&self) -> Option<Prefix> {
312            None
313        }
314        fn namespace(&self) -> Namespace {
315            ns!()
316        }
317        fn local_name(&self) -> LocalName {
318            LocalName::from("")
319        }
320    }
321}