xpath/
eval.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use markup5ever::{LocalName, Namespace, Prefix, QualName, local_name, namespace_prefix, ns};
6
7use crate::ast::{
8    Axis, BinaryOperator, Expression, FilterExpression, KindTest, Literal, LocationStepExpression,
9    NodeTest, PathExpression, PredicateListExpression,
10};
11use crate::context::PredicateCtx;
12use crate::{
13    Attribute, Document, Dom, Element, Error, EvaluationCtx, Node, ProcessingInstruction, Value,
14};
15
16pub(crate) fn try_extract_nodeset<E, N: Node>(v: Value<N>) -> Result<Vec<N>, Error<E>> {
17    match v {
18        Value::Nodeset(ns) => Ok(ns),
19        _ => Err(Error::NotANodeset),
20    }
21}
22
23impl Expression {
24    pub(crate) fn evaluate<D: Dom>(
25        &self,
26        context: &EvaluationCtx<D>,
27    ) -> Result<Value<D::Node>, Error<D::JsError>> {
28        match self {
29            // And/Or expression are seperated because they can sometimes be evaluated
30            // without evaluating both operands.
31            Expression::Binary(left, BinaryOperator::And, right) => {
32                let left_bool = left.evaluate(context)?.boolean();
33                let v = left_bool && right.evaluate(context)?.boolean();
34                Ok(Value::Boolean(v))
35            },
36            Expression::Binary(left, BinaryOperator::Or, right) => {
37                let left_bool = left.evaluate(context)?.boolean();
38                let v = left_bool || right.evaluate(context)?.boolean();
39                Ok(Value::Boolean(v))
40            },
41            Expression::Binary(left, binary_operator, right) => {
42                let left_value = left.evaluate(context)?;
43                let right_value = right.evaluate(context)?;
44
45                let value = match binary_operator {
46                    BinaryOperator::Equal => (left_value == right_value).into(),
47                    BinaryOperator::NotEqual => (left_value != right_value).into(),
48                    BinaryOperator::LessThan => (left_value.number() < right_value.number()).into(),
49                    BinaryOperator::GreaterThan => {
50                        (left_value.number() > right_value.number()).into()
51                    },
52                    BinaryOperator::LessThanOrEqual => {
53                        (left_value.number() <= right_value.number()).into()
54                    },
55                    BinaryOperator::GreaterThanOrEqual => {
56                        (left_value.number() >= right_value.number()).into()
57                    },
58                    BinaryOperator::Add => (left_value.number() + right_value.number()).into(),
59                    BinaryOperator::Subtract => (left_value.number() - right_value.number()).into(),
60                    BinaryOperator::Multiply => (left_value.number() * right_value.number()).into(),
61                    BinaryOperator::Divide => (left_value.number() / right_value.number()).into(),
62                    BinaryOperator::Modulo => (left_value.number() % right_value.number()).into(),
63                    BinaryOperator::Union => {
64                        let as_nodes =
65                            |e: &Expression| e.evaluate(context).and_then(try_extract_nodeset);
66                        let mut left_nodes = as_nodes(left)?;
67                        let right_nodes = as_nodes(right)?;
68
69                        left_nodes.extend(right_nodes);
70                        Value::Nodeset(left_nodes)
71                    },
72                    _ => unreachable!("And/Or were handled above"),
73                };
74
75                Ok(value)
76            },
77            Expression::Negate(expr) => {
78                let value = -expr.evaluate(context)?.number();
79                Ok(value.into())
80            },
81            Expression::Path(path_expr) => path_expr.evaluate(context),
82            Expression::LocationStep(location_step_expression) => {
83                location_step_expression.evaluate(context)
84            },
85            Expression::Filter(filter_expression) => filter_expression.evaluate(context),
86            Expression::Literal(literal) => Ok(literal.evaluate::<D>()),
87            Expression::Function(function) => function.evaluate(context),
88            Expression::ContextItem => Ok(Value::Nodeset(vec![context.context_node.clone()])),
89            Expression::Variable(_) => Err(Error::CannotUseVariables),
90        }
91    }
92}
93
94impl PathExpression {
95    fn evaluate<D: Dom>(
96        &self,
97        context: &EvaluationCtx<D>,
98    ) -> Result<Value<D::Node>, Error<D::JsError>> {
99        // Use starting_node for absolute paths, context_node otherwise
100        let mut current_nodes = if self.is_absolute {
101            vec![context.starting_node.clone()]
102        } else {
103            vec![context.context_node.clone()]
104        };
105
106        // If path starts with '//', add an implicit descendant-or-self::node() step
107        if self.has_implicit_descendant_or_self_step {
108            current_nodes = current_nodes
109                .iter()
110                .flat_map(|node| node.traverse_preorder())
111                .collect();
112        }
113
114        log::trace!("[PathExpr] Evaluating path expr: {:?}", self);
115
116        let have_multiple_steps = self.steps.len() > 1;
117
118        for step in &self.steps {
119            let mut next_nodes = Vec::new();
120            for node in current_nodes {
121                let step_context = context.subcontext_for_node(node.clone());
122                let step_result = step.evaluate(&step_context)?;
123                match (have_multiple_steps, step_result) {
124                    (_, Value::Nodeset(mut nodes)) => {
125                        // as long as we evaluate to nodesets, keep going
126                        next_nodes.append(&mut nodes);
127                    },
128                    (false, value) => {
129                        log::trace!("[PathExpr] Got single primitive value: {:?}", value);
130                        return Ok(value);
131                    },
132                    (true, value) => {
133                        log::error!(
134                            "Expected nodeset from step evaluation, got: {:?} node: {:?}, step: {:?}",
135                            value,
136                            node,
137                            step
138                        );
139                        return Ok(value);
140                    },
141                }
142            }
143            current_nodes = next_nodes;
144        }
145
146        log::trace!("[PathExpr] Got nodes: {:?}", current_nodes);
147
148        Ok(Value::Nodeset(current_nodes))
149    }
150}
151
152#[derive(Debug)]
153pub(crate) enum NameTestComparisonMode {
154    /// Namespaces must match exactly
155    XHtml,
156    /// Missing namespace information is treated as the HTML namespace
157    Html,
158}
159
160pub(crate) fn element_name_test(
161    expected_name: QualName,
162    element_qualname: QualName,
163    comparison_mode: NameTestComparisonMode,
164) -> bool {
165    let is_wildcard = expected_name.local == local_name!("*");
166
167    let test_prefix = expected_name
168        .prefix
169        .clone()
170        .unwrap_or(namespace_prefix!(""));
171    let test_ns_uri = match test_prefix {
172        namespace_prefix!("*") => ns!(*),
173        namespace_prefix!("html") => ns!(html),
174        namespace_prefix!("xml") => ns!(xml),
175        namespace_prefix!("xlink") => ns!(xlink),
176        namespace_prefix!("svg") => ns!(svg),
177        namespace_prefix!("mathml") => ns!(mathml),
178        namespace_prefix!("") => {
179            if matches!(comparison_mode, NameTestComparisonMode::XHtml) {
180                ns!()
181            } else {
182                ns!(html)
183            }
184        },
185        _ => {
186            // We don't support custom namespaces, use fallback or panic depending on strictness
187            if matches!(comparison_mode, NameTestComparisonMode::XHtml) {
188                panic!("Unrecognized namespace prefix: {}", test_prefix)
189            } else {
190                ns!(html)
191            }
192        },
193    };
194
195    if is_wildcard {
196        test_ns_uri == element_qualname.ns
197    } else {
198        test_ns_uri == element_qualname.ns && expected_name.local == element_qualname.local
199    }
200}
201
202fn apply_node_test<D: Dom>(
203    context: &EvaluationCtx<D>,
204    test: &NodeTest,
205    node: &D::Node,
206) -> Result<bool, Error<D::JsError>> {
207    let result = match test {
208        NodeTest::Name(qname) => {
209            let namespace = context
210                .resolve_namespace(qname.prefix.as_deref())
211                .map_err(Error::JsException)?
212                .map(Namespace::from)
213                .unwrap_or_default();
214
215            let wanted_name = QualName {
216                prefix: qname.prefix.as_deref().map(Prefix::from),
217                ns: namespace,
218                local: LocalName::from(qname.local_part.as_str()),
219            };
220
221            if let Some(element) = node.as_element() {
222                let comparison_mode = if node.owner_document().is_html_document() {
223                    NameTestComparisonMode::Html
224                } else {
225                    NameTestComparisonMode::XHtml
226                };
227                let element_qualname = QualName::new(
228                    element.prefix(),
229                    element.namespace().clone(),
230                    element.local_name().clone(),
231                );
232                element_name_test(wanted_name, element_qualname, comparison_mode)
233            } else if let Some(attribute) = node.as_attribute() {
234                let attr_qualname = QualName::new(
235                    attribute.prefix(),
236                    attribute.namespace().clone(),
237                    attribute.local_name().clone(),
238                );
239                // attributes are always compared with strict namespace matching
240                let comparison_mode = NameTestComparisonMode::XHtml;
241                element_name_test(wanted_name, attr_qualname, comparison_mode)
242            } else {
243                false
244            }
245        },
246        NodeTest::Wildcard => node.as_element().is_some(),
247        NodeTest::Kind(kind) => match kind {
248            KindTest::PI(target) => {
249                if let Some(processing_instruction) = node.as_processing_instruction() {
250                    match (target, processing_instruction.target()) {
251                        (Some(target_name), node_target_name)
252                            if target_name == &node_target_name.to_string() =>
253                        {
254                            true
255                        },
256                        (Some(_), _) => false,
257                        (None, _) => true,
258                    }
259                } else {
260                    false
261                }
262            },
263            KindTest::Comment => node.is_comment(),
264            KindTest::Text => node.is_text(),
265            KindTest::Node => true,
266        },
267    };
268    Ok(result)
269}
270
271impl LocationStepExpression {
272    fn evaluate<D: Dom>(
273        &self,
274        context: &EvaluationCtx<D>,
275    ) -> Result<Value<D::Node>, Error<D::JsError>> {
276        let nodes: Vec<D::Node> = match self.axis {
277            Axis::Child => context.context_node.children().collect(),
278            Axis::Descendant => context.context_node.traverse_preorder().skip(1).collect(),
279            Axis::Parent => vec![context.context_node.parent()]
280                .into_iter()
281                .flatten()
282                .collect(),
283            Axis::Ancestor => context.context_node.inclusive_ancestors().skip(1).collect(),
284            Axis::Following => context
285                .context_node
286                .following_nodes(&context.context_node)
287                .skip(1)
288                .collect(),
289            Axis::Preceding => context
290                .context_node
291                .preceding_nodes(&context.context_node)
292                .skip(1)
293                .collect(),
294            Axis::FollowingSibling => context.context_node.following_siblings().collect(),
295            Axis::PrecedingSibling => context.context_node.preceding_siblings().collect(),
296            Axis::Attribute => {
297                if let Some(element) = context.context_node.as_element() {
298                    element
299                        .attributes()
300                        .map(|attribute| attribute.as_node())
301                        .collect()
302                } else {
303                    vec![]
304                }
305            },
306            Axis::Self_ => vec![context.context_node.clone()],
307            Axis::DescendantOrSelf => context.context_node.traverse_preorder().collect(),
308            Axis::AncestorOrSelf => context.context_node.inclusive_ancestors().collect(),
309            Axis::Namespace => Vec::new(), // Namespace axis is not commonly implemented
310        };
311
312        log::trace!("[StepExpr] Axis {:?} got nodes {:?}", self.axis, nodes);
313
314        // Filter nodes according to the step's node_test. Will error out if any NodeTest
315        // application errors out.
316        let filtered_nodes: Vec<D::Node> = nodes
317            .into_iter()
318            .map(|node| {
319                apply_node_test(context, &self.node_test, &node)
320                    .map(|matches| matches.then_some(node))
321            })
322            .collect::<Result<Vec<_>, _>>()?
323            .into_iter()
324            .flatten()
325            .collect();
326
327        log::trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes);
328
329        if self.predicate_list.predicates.is_empty() {
330            log::trace!(
331                "[StepExpr] No predicates, returning nodes {:?}",
332                filtered_nodes
333            );
334            Ok(Value::Nodeset(filtered_nodes))
335        } else {
336            // Apply predicates
337            self.predicate_list
338                .evaluate(context, filtered_nodes.clone())
339        }
340    }
341}
342
343impl PredicateListExpression {
344    fn evaluate<D: Dom>(
345        &self,
346        context: &EvaluationCtx<D>,
347        mut matched_nodes: Vec<D::Node>,
348    ) -> Result<Value<D::Node>, Error<D::JsError>> {
349        for predicate_expr in &self.predicates {
350            let size = matched_nodes.len();
351            let mut new_matched = Vec::new();
352
353            for (i, node) in matched_nodes.iter().enumerate() {
354                // 1-based position, per XPath spec
355                let predicate_ctx: EvaluationCtx<D> = EvaluationCtx {
356                    starting_node: context.starting_node.clone(),
357                    context_node: node.clone(),
358                    predicate_ctx: Some(PredicateCtx { index: i + 1, size }),
359                    resolver: context.resolver.clone(),
360                };
361
362                let eval_result = predicate_expr.evaluate(&predicate_ctx);
363
364                let keep = match eval_result {
365                    Ok(Value::Number(number)) => (i + 1) as f64 == number,
366                    Ok(Value::Boolean(boolean)) => boolean,
367                    Ok(value) => value.boolean(),
368                    Err(_) => false,
369                };
370
371                if keep {
372                    new_matched.push(node.clone());
373                }
374            }
375
376            matched_nodes = new_matched;
377            log::trace!(
378                "[PredicateListExpr] Predicate {:?} matched nodes {:?}",
379                predicate_expr,
380                matched_nodes
381            );
382        }
383        Ok(Value::Nodeset(matched_nodes))
384    }
385}
386
387impl FilterExpression {
388    fn evaluate<D: Dom>(
389        &self,
390        context: &EvaluationCtx<D>,
391    ) -> Result<Value<D::Node>, Error<D::JsError>> {
392        let primary_result = self.expression.evaluate(context)?;
393        let have_predicates = !self.predicates.predicates.is_empty();
394
395        match (have_predicates, &primary_result) {
396            (false, _) => {
397                log::trace!(
398                    "[FilterExpr] No predicates, returning primary result: {:?}",
399                    primary_result
400                );
401                Ok(primary_result)
402            },
403            (true, Value::Nodeset(vec)) => {
404                let result_filtered_by_predicates = self.predicates.evaluate(context, vec.clone());
405                log::trace!(
406                    "[FilterExpr] Result filtered by predicates: {:?}",
407                    result_filtered_by_predicates
408                );
409                result_filtered_by_predicates
410            },
411            // You can't use filtering expressions `[]` on other than node-sets
412            (true, _) => Err(Error::NotANodeset),
413        }
414    }
415}
416
417impl Literal {
418    fn evaluate<D: Dom>(&self) -> Value<D::Node> {
419        match self {
420            Literal::Integer(integer) => Value::Number(*integer as f64),
421            Literal::Decimal(decimal) => Value::Number(*decimal),
422            Literal::String(s) => Value::String(s.into()),
423        }
424    }
425}