Skip to main content

servo_xpath/
lib.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5mod ast;
6mod context;
7mod eval;
8mod functions;
9mod parser;
10mod tokenizer;
11mod value;
12
13use std::fmt;
14use std::hash::Hash;
15
16pub use ast::Expression;
17use ast::QName;
18use context::EvaluationCtx;
19use markup5ever::{LocalName, Namespace, Prefix};
20pub use parser::{Error as ParserError, parse};
21pub use value::{NodeSet, Value};
22
23pub trait Dom {
24    type Context;
25
26    type Node: Node<Context = Self::Context>;
27    type NamespaceResolver: NamespaceResolver<Context = Self::Context>;
28}
29
30/// A handle to a DOM node exposing all functionality needed by xpath.
31pub trait Node: Eq + Clone + fmt::Debug {
32    type Context;
33
34    type ProcessingInstruction: ProcessingInstruction;
35    type Document: Document<Node = Self>;
36    type Attribute: Attribute<Node = Self>;
37    type Element: Element<Node = Self, Context = Self::Context>;
38    type Opaque: Eq + Hash + 'static;
39
40    fn is_comment(&self) -> bool;
41    fn is_text(&self) -> bool;
42    /// Equivalent to [`textContent`](https://dom.spec.whatwg.org/#dom-node-textcontent) attribute.
43    fn text_content(&self) -> String;
44    /// <https://html.spec.whatwg.org/multipage/#language>
45    fn language(&self) -> Option<String>;
46    fn parent(&self) -> Option<Self>;
47    fn children(&self) -> impl Iterator<Item = Self>;
48    /// <https://dom.spec.whatwg.org/#concept-tree-order>
49    fn compare_tree_order(&self, other: &Self) -> std::cmp::Ordering;
50    /// A non-shadow-including preorder traversal.
51    fn traverse_preorder(&self) -> impl Iterator<Item = Self>;
52    fn inclusive_ancestors(&self) -> impl Iterator<Item = Self>;
53
54    /// Return an iterator over all nodes that come before `self` in [tree order],
55    /// excluding any ancestors and attribute nodes.
56    ///
57    /// [tree order]: https://dom.spec.whatwg.org/#concept-tree-order
58    fn preceding_nodes(&self) -> impl Iterator<Item = Self>;
59
60    /// Return an iterator over all nodes that come after `self` in [tree order],
61    /// excluding any descendants and attribute nodes.
62    ///
63    /// [tree order]: https://dom.spec.whatwg.org/#concept-tree-order
64    fn following_nodes(&self) -> impl Iterator<Item = Self>;
65    fn preceding_siblings(&self) -> impl Iterator<Item = Self>;
66    fn following_siblings(&self) -> impl Iterator<Item = Self>;
67    fn owner_document(&self) -> Self::Document;
68    fn to_opaque(&self) -> Self::Opaque;
69    fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction>;
70    fn as_attribute(&self) -> Option<Self::Attribute>;
71    fn as_element(&self) -> Option<Self::Element>;
72    fn get_root_node(&self) -> Self;
73}
74
75pub trait NamespaceResolver: Clone {
76    type Context;
77
78    fn resolve_namespace_prefix(&self, cx: &mut Self::Context, prefix: &str) -> Option<String>;
79}
80
81pub trait ProcessingInstruction {
82    fn target(&self) -> String;
83}
84
85pub trait Document {
86    type Node: Node<Document = Self>;
87
88    /// Return an iterator over elements with the given ID in tree order.
89    fn get_elements_with_id(
90        &self,
91        cx: &mut <Self::Node as Node>::Context,
92        id: &str,
93    ) -> impl Iterator<Item = <Self::Node as Node>::Element>;
94}
95
96pub trait Element {
97    type Context;
98
99    type Node: Node<Element = Self>;
100    type Attribute: Attribute<Node = Self::Node>;
101
102    fn as_node(&self) -> Self::Node;
103    fn prefix(&self) -> Option<Prefix>;
104    fn namespace(&self) -> Namespace;
105    fn local_name(&self) -> LocalName;
106    fn attributes(&self, cx: &mut Self::Context) -> impl Iterator<Item = Self::Attribute>;
107    fn is_html_element_in_html_document(&self) -> bool;
108}
109
110pub trait Attribute {
111    type Node: Node<Attribute = Self>;
112
113    fn as_node(&self) -> Self::Node;
114    fn prefix(&self) -> Option<Prefix>;
115    fn namespace(&self) -> Namespace;
116    fn local_name(&self) -> LocalName;
117}
118
119/// Evaluate an already-parsed XPath expression
120pub fn evaluate_parsed_xpath<D: Dom>(
121    cx: &mut D::Context,
122    expr: &Expression,
123    context_node: D::Node,
124) -> Result<Value<D::Node>, Error> {
125    let context = EvaluationCtx::<D>::new(context_node);
126    match expr.evaluate(cx, &context) {
127        Ok(mut value) => {
128            if let Value::NodeSet(node_set) = &mut value {
129                node_set.deduplicate();
130                node_set.sort();
131            }
132
133            log::debug!("Evaluated XPath: {value:?}");
134            Ok(value)
135        },
136        Err(error) => {
137            log::debug!("Unable to evaluate XPath: {error:?}");
138            Err(error)
139        },
140    }
141}
142
143#[derive(Clone, Debug)]
144pub enum Error {
145    NotANodeset,
146    /// It is not clear where variables used in XPath expression should come from.
147    /// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return
148    /// an empty result. We also error out.
149    ///
150    /// See <https://github.com/whatwg/dom/issues/67>
151    CannotUseVariables,
152    InvalidQName {
153        qname: QName,
154    },
155    Internal {
156        msg: String,
157    },
158}
159
160/// <https://www.w3.org/TR/xml/#NT-NameStartChar>
161fn is_valid_start(c: char) -> bool {
162    matches!(c, ':' |
163        'A'..='Z' |
164        '_' |
165        'a'..='z' |
166        '\u{C0}'..='\u{D6}' |
167        '\u{D8}'..='\u{F6}' |
168        '\u{F8}'..='\u{2FF}' |
169        '\u{370}'..='\u{37D}' |
170        '\u{37F}'..='\u{1FFF}' |
171        '\u{200C}'..='\u{200D}' |
172        '\u{2070}'..='\u{218F}' |
173        '\u{2C00}'..='\u{2FEF}' |
174        '\u{3001}'..='\u{D7FF}' |
175        '\u{F900}'..='\u{FDCF}' |
176        '\u{FDF0}'..='\u{FFFD}' |
177        '\u{10000}'..='\u{EFFFF}')
178}
179
180/// <https://www.w3.org/TR/xml/#NT-NameChar>
181fn is_valid_continuation(c: char) -> bool {
182    is_valid_start(c) ||
183        matches!(c,
184            '-' |
185            '.' |
186            '0'..='9' |
187            '\u{B7}' |
188            '\u{300}'..='\u{36F}' |
189            '\u{203F}'..='\u{2040}')
190}
191
192#[cfg(test)]
193/// Provides a dummy DOM to be used for tests.
194mod dummy_implementation {
195    use std::{cmp, iter};
196
197    use markup5ever::{LocalName, ns};
198
199    use super::*;
200
201    // FIXME: Expand this as more features are required
202    #[derive(Clone, Eq, Debug, PartialEq)]
203    pub(crate) struct DummyNode;
204    pub(crate) struct DummyProcessingInstruction;
205    pub(crate) struct DummyDocument;
206    pub(crate) struct DummyAttribute;
207    pub(crate) struct DummyElement;
208
209    impl Node for DummyNode {
210        type Context = ();
211        type ProcessingInstruction = DummyProcessingInstruction;
212        type Document = DummyDocument;
213        type Attribute = DummyAttribute;
214        type Element = DummyElement;
215        type Opaque = usize;
216
217        fn is_comment(&self) -> bool {
218            false
219        }
220        fn is_text(&self) -> bool {
221            false
222        }
223        fn text_content(&self) -> String {
224            String::new()
225        }
226        fn language(&self) -> Option<String> {
227            None
228        }
229        fn parent(&self) -> Option<Self> {
230            None
231        }
232        fn children(&self) -> impl Iterator<Item = Self> {
233            iter::empty()
234        }
235        fn compare_tree_order(&self, _: &Self) -> cmp::Ordering {
236            cmp::Ordering::Greater
237        }
238        fn traverse_preorder(&self) -> impl Iterator<Item = Self> {
239            iter::empty()
240        }
241        fn inclusive_ancestors(&self) -> impl Iterator<Item = Self> {
242            iter::empty()
243        }
244        fn preceding_nodes(&self) -> impl Iterator<Item = Self> {
245            iter::empty()
246        }
247        fn following_nodes(&self) -> impl Iterator<Item = Self> {
248            iter::empty()
249        }
250        fn preceding_siblings(&self) -> impl Iterator<Item = Self> {
251            iter::empty()
252        }
253        fn following_siblings(&self) -> impl Iterator<Item = Self> {
254            iter::empty()
255        }
256        fn owner_document(&self) -> Self::Document {
257            DummyDocument
258        }
259        fn to_opaque(&self) -> Self::Opaque {
260            0
261        }
262        fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction> {
263            None
264        }
265        fn as_attribute(&self) -> Option<Self::Attribute> {
266            None
267        }
268        fn as_element(&self) -> Option<Self::Element> {
269            None
270        }
271        fn get_root_node(&self) -> Self {
272            self.clone()
273        }
274    }
275
276    impl ProcessingInstruction for DummyProcessingInstruction {
277        fn target(&self) -> String {
278            String::new()
279        }
280    }
281
282    impl Document for DummyDocument {
283        type Node = DummyNode;
284
285        fn get_elements_with_id(
286            &self,
287            _: &mut (),
288            _: &str,
289        ) -> impl Iterator<Item = <Self::Node as Node>::Element> {
290            iter::empty()
291        }
292    }
293
294    impl Element for DummyElement {
295        type Context = ();
296        type Node = DummyNode;
297        type Attribute = DummyAttribute;
298
299        fn as_node(&self) -> Self::Node {
300            DummyNode
301        }
302        fn prefix(&self) -> Option<Prefix> {
303            None
304        }
305        fn namespace(&self) -> Namespace {
306            ns!()
307        }
308        fn local_name(&self) -> LocalName {
309            LocalName::from("")
310        }
311        fn attributes(&self, _: &mut ()) -> impl Iterator<Item = Self::Attribute> {
312            iter::empty()
313        }
314        fn is_html_element_in_html_document(&self) -> bool {
315            true
316        }
317    }
318
319    impl Attribute for DummyAttribute {
320        type Node = DummyNode;
321
322        fn as_node(&self) -> Self::Node {
323            DummyNode
324        }
325        fn prefix(&self) -> Option<Prefix> {
326            None
327        }
328        fn namespace(&self) -> Namespace {
329            ns!()
330        }
331        fn local_name(&self) -> LocalName {
332            LocalName::from("")
333        }
334    }
335}