xpath/
eval.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use markup5ever::{QualName, local_name, ns};
6
7use crate::ast::{
8    Axis, BinaryOperator, Expression, FilterExpression, KindTest, Literal, LocationStepExpression,
9    NodeTest, PathExpression, PredicateListExpression,
10};
11use crate::context::PredicateCtx;
12use crate::{Attribute, Dom, Element, Error, EvaluationCtx, Node, ProcessingInstruction, Value};
13
14pub(crate) fn try_extract_nodeset<N: Node>(v: Value<N>) -> Result<Vec<N>, Error> {
15    match v {
16        Value::Nodeset(ns) => Ok(ns),
17        _ => Err(Error::NotANodeset),
18    }
19}
20
21impl Expression {
22    pub(crate) fn evaluate<D: Dom>(
23        &self,
24        context: &EvaluationCtx<D>,
25    ) -> Result<Value<D::Node>, Error> {
26        match self {
27            // And/Or expression are seperated because they can sometimes be evaluated
28            // without evaluating both operands.
29            Expression::Binary(left, BinaryOperator::And, right) => {
30                let left_bool = left.evaluate(context)?.convert_to_boolean();
31                let v = left_bool && right.evaluate(context)?.convert_to_boolean();
32                Ok(Value::Boolean(v))
33            },
34            Expression::Binary(left, BinaryOperator::Or, right) => {
35                let left_bool = left.evaluate(context)?.convert_to_boolean();
36                let v = left_bool || right.evaluate(context)?.convert_to_boolean();
37                Ok(Value::Boolean(v))
38            },
39            Expression::Binary(left, binary_operator, right) => {
40                let left_value = left.evaluate(context)?;
41                let right_value = right.evaluate(context)?;
42
43                let value = match binary_operator {
44                    BinaryOperator::Equal => (left_value == right_value).into(),
45                    BinaryOperator::NotEqual => (left_value != right_value).into(),
46                    BinaryOperator::LessThan => {
47                        (left_value.convert_to_number() < right_value.convert_to_number()).into()
48                    },
49                    BinaryOperator::GreaterThan => {
50                        (left_value.convert_to_number() > right_value.convert_to_number()).into()
51                    },
52                    BinaryOperator::LessThanOrEqual => {
53                        (left_value.convert_to_number() <= right_value.convert_to_number()).into()
54                    },
55                    BinaryOperator::GreaterThanOrEqual => {
56                        (left_value.convert_to_number() >= right_value.convert_to_number()).into()
57                    },
58                    BinaryOperator::Add => {
59                        (left_value.convert_to_number() + right_value.convert_to_number()).into()
60                    },
61                    BinaryOperator::Subtract => {
62                        (left_value.convert_to_number() - right_value.convert_to_number()).into()
63                    },
64                    BinaryOperator::Multiply => {
65                        (left_value.convert_to_number() * right_value.convert_to_number()).into()
66                    },
67                    BinaryOperator::Divide => {
68                        (left_value.convert_to_number() / right_value.convert_to_number()).into()
69                    },
70                    BinaryOperator::Modulo => {
71                        (left_value.convert_to_number() % right_value.convert_to_number()).into()
72                    },
73                    BinaryOperator::Union => {
74                        let as_nodes =
75                            |e: &Expression| e.evaluate(context).and_then(try_extract_nodeset);
76                        let mut left_nodes = as_nodes(left)?;
77                        let right_nodes = as_nodes(right)?;
78
79                        left_nodes.extend(right_nodes);
80                        Value::Nodeset(left_nodes)
81                    },
82                    _ => unreachable!("And/Or were handled above"),
83                };
84
85                Ok(value)
86            },
87            Expression::Negate(expr) => {
88                let value = -expr.evaluate(context)?.convert_to_number();
89                Ok(value.into())
90            },
91            Expression::Path(path_expr) => path_expr.evaluate(context),
92            Expression::LocationStep(location_step_expression) => {
93                location_step_expression.evaluate(context)
94            },
95            Expression::Filter(filter_expression) => filter_expression.evaluate(context),
96            Expression::Literal(literal) => Ok(literal.evaluate::<D>()),
97            Expression::Function(function) => function.evaluate(context),
98            Expression::ContextItem => Ok(Value::Nodeset(vec![context.context_node.clone()])),
99            Expression::Variable(_) => Err(Error::CannotUseVariables),
100        }
101    }
102}
103
104impl PathExpression {
105    fn evaluate<D: Dom>(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error> {
106        // Use root node for absolute paths, context_node otherwise
107        let mut current_nodes = if self.is_absolute {
108            vec![context.context_node.get_root_node()]
109        } else {
110            vec![context.context_node.clone()]
111        };
112
113        // If path starts with '//', add an implicit descendant-or-self::node() step
114        if self.has_implicit_descendant_or_self_step {
115            current_nodes = current_nodes
116                .iter()
117                .flat_map(|node| node.traverse_preorder())
118                .collect();
119        }
120
121        log::trace!("[PathExpr] Evaluating path expr: {:?}", self);
122
123        let have_multiple_steps = self.steps.len() > 1;
124
125        for step_expression in &self.steps {
126            let mut next_nodes = Vec::new();
127            for node in current_nodes {
128                let step_context = context.subcontext_for_node(node.clone());
129                let step_result = step_expression.evaluate(&step_context)?;
130                match (have_multiple_steps, step_result) {
131                    (_, Value::Nodeset(mut nodes)) => {
132                        // as long as we evaluate to nodesets, keep going
133                        next_nodes.append(&mut nodes);
134                    },
135                    (false, value) => {
136                        log::trace!("[PathExpr] Got single primitive value: {:?}", value);
137                        return Ok(value);
138                    },
139                    (true, value) => {
140                        log::error!(
141                            "Expected nodeset from step evaluation, got: {:?} node: {:?}, step: {:?}",
142                            value,
143                            node,
144                            step_expression
145                        );
146                        return Ok(value);
147                    },
148                }
149            }
150            current_nodes = next_nodes;
151        }
152
153        log::trace!("[PathExpr] Got nodes: {:?}", current_nodes);
154
155        Ok(Value::Nodeset(current_nodes))
156    }
157}
158
159#[derive(Debug, Eq, PartialEq)]
160pub(crate) enum NameTestComparisonMode {
161    /// Namespaces must match exactly
162    XHtml,
163    /// Missing namespace information is treated as the HTML namespace
164    Html,
165}
166
167pub(crate) fn element_name_test(
168    expected_name: &QualName,
169    actual_name: QualName,
170    comparison_mode: NameTestComparisonMode,
171) -> bool {
172    if expected_name.prefix.is_none() && expected_name.local == local_name!("*") {
173        return true;
174    }
175
176    let should_compare_namespaces =
177        comparison_mode == NameTestComparisonMode::XHtml || expected_name.ns != ns!();
178    if should_compare_namespaces && expected_name.ns != actual_name.ns {
179        return false;
180    }
181
182    if expected_name.local == local_name!("*") {
183        return true;
184    }
185
186    expected_name.local == actual_name.local
187}
188
189fn apply_node_test<D: Dom>(test: &NodeTest, node: &D::Node) -> Result<bool, Error> {
190    let result = match test {
191        NodeTest::Name(expected_name) => {
192            if let Some(element) = node.as_element() {
193                let comparison_mode = if element.is_html_element_in_html_document() {
194                    NameTestComparisonMode::Html
195                } else {
196                    NameTestComparisonMode::XHtml
197                };
198                let actual_name = QualName::new(
199                    element.prefix(),
200                    element.namespace().clone(),
201                    element.local_name().clone(),
202                );
203                element_name_test(expected_name, actual_name, comparison_mode)
204            } else if let Some(attribute) = node.as_attribute() {
205                let actual_name = QualName::new(
206                    attribute.prefix(),
207                    attribute.namespace().clone(),
208                    attribute.local_name().clone(),
209                );
210                // attributes are always compared with strict namespace matching
211                let comparison_mode = NameTestComparisonMode::XHtml;
212                element_name_test(expected_name, actual_name, comparison_mode)
213            } else {
214                false
215            }
216        },
217        NodeTest::Wildcard => node.as_element().is_some(),
218        NodeTest::Kind(kind) => match kind {
219            KindTest::PI(target) => {
220                if let Some(processing_instruction) = node.as_processing_instruction() {
221                    match (target, processing_instruction.target()) {
222                        (Some(target_name), node_target_name)
223                            if target_name == &node_target_name.to_string() =>
224                        {
225                            true
226                        },
227                        (Some(_), _) => false,
228                        (None, _) => true,
229                    }
230                } else {
231                    false
232                }
233            },
234            KindTest::Comment => node.is_comment(),
235            KindTest::Text => node.is_text(),
236            KindTest::Node => true,
237        },
238    };
239    Ok(result)
240}
241
242impl LocationStepExpression {
243    fn evaluate<D: Dom>(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error> {
244        let nodes: Vec<D::Node> = match self.axis {
245            Axis::Child => context.context_node.children().collect(),
246            Axis::Descendant => context.context_node.traverse_preorder().skip(1).collect(),
247            Axis::Parent => vec![context.context_node.parent()]
248                .into_iter()
249                .flatten()
250                .collect(),
251            Axis::Ancestor => context.context_node.inclusive_ancestors().skip(1).collect(),
252            Axis::Following => context
253                .context_node
254                .following_nodes(&context.context_node)
255                .skip(1)
256                .collect(),
257            Axis::Preceding => context
258                .context_node
259                .preceding_nodes(&context.context_node)
260                .skip(1)
261                .collect(),
262            Axis::FollowingSibling => context.context_node.following_siblings().collect(),
263            Axis::PrecedingSibling => context.context_node.preceding_siblings().collect(),
264            Axis::Attribute => {
265                if let Some(element) = context.context_node.as_element() {
266                    element
267                        .attributes()
268                        .map(|attribute| attribute.as_node())
269                        .collect()
270                } else {
271                    vec![]
272                }
273            },
274            Axis::Self_ => vec![context.context_node.clone()],
275            Axis::DescendantOrSelf => context.context_node.traverse_preorder().collect(),
276            Axis::AncestorOrSelf => context.context_node.inclusive_ancestors().collect(),
277            Axis::Namespace => Vec::new(), // Namespace axis is not commonly implemented
278        };
279
280        log::trace!("[StepExpr] Axis {:?} got nodes {:?}", self.axis, nodes);
281
282        // Filter nodes according to the step's node_test. Will error out if any NodeTest
283        // application errors out.
284        let filtered_nodes: Vec<D::Node> = nodes
285            .into_iter()
286            .map(|node| {
287                apply_node_test::<D>(&self.node_test, &node).map(|matches| matches.then_some(node))
288            })
289            .collect::<Result<Vec<_>, _>>()?
290            .into_iter()
291            .flatten()
292            .collect();
293
294        log::trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes);
295
296        if self.predicate_list.predicates.is_empty() {
297            log::trace!(
298                "[StepExpr] No predicates, returning nodes {:?}",
299                filtered_nodes
300            );
301            Ok(Value::Nodeset(filtered_nodes))
302        } else {
303            // Apply predicates
304            self.predicate_list.evaluate::<D>(filtered_nodes)
305        }
306    }
307}
308
309impl PredicateListExpression {
310    fn evaluate<D: Dom>(&self, mut matched_nodes: Vec<D::Node>) -> Result<Value<D::Node>, Error> {
311        for predicate_expr in &self.predicates {
312            let size = matched_nodes.len();
313            let mut new_matched = Vec::new();
314
315            for (i, node) in matched_nodes.iter().enumerate() {
316                // 1-based position, per XPath spec
317                let predicate_ctx: EvaluationCtx<D> = EvaluationCtx {
318                    context_node: node.clone(),
319                    predicate_ctx: Some(PredicateCtx { index: i + 1, size }),
320                };
321
322                let eval_result = predicate_expr.evaluate(&predicate_ctx);
323
324                let keep = match eval_result {
325                    Ok(Value::Number(number)) => (i + 1) as f64 == number,
326                    Ok(Value::Boolean(boolean)) => boolean,
327                    Ok(value) => value.convert_to_boolean(),
328                    Err(_) => false,
329                };
330
331                if keep {
332                    new_matched.push(node.clone());
333                }
334            }
335
336            matched_nodes = new_matched;
337            log::trace!(
338                "[PredicateListExpr] Predicate {:?} matched nodes {:?}",
339                predicate_expr,
340                matched_nodes
341            );
342        }
343        Ok(Value::Nodeset(matched_nodes))
344    }
345}
346
347impl FilterExpression {
348    fn evaluate<D: Dom>(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error> {
349        debug_assert!(!self.predicates.predicates.is_empty());
350
351        let Value::Nodeset(node_set) = self.expression.evaluate(context)? else {
352            // You can't use filtering expressions `[]` on other than node-sets
353            return Err(Error::NotANodeset);
354        };
355        let result_filtered_by_predicates = self.predicates.evaluate::<D>(node_set);
356        log::trace!(
357            "[FilterExpr] Result filtered by predicates: {:?}",
358            result_filtered_by_predicates
359        );
360        result_filtered_by_predicates
361    }
362}
363
364impl Literal {
365    fn evaluate<D: Dom>(&self) -> Value<D::Node> {
366        match self {
367            Literal::Integer(integer) => Value::Number(*integer as f64),
368            Literal::Decimal(decimal) => Value::Number(*decimal),
369            Literal::String(s) => Value::String(s.into()),
370        }
371    }
372}