xpath/
eval.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::iter;
6
7use markup5ever::{QualName, local_name, ns};
8
9use crate::ast::{
10    Axis, BinaryOperator, Expression, FilterExpression, KindTest, Literal, LocationStepExpression,
11    NodeTest, PathExpression, PredicateListExpression,
12};
13use crate::context::PredicateCtx;
14use crate::{
15    Attribute, Dom, Element, Error, EvaluationCtx, Node, NodeSet, ProcessingInstruction, Value,
16};
17
18pub(crate) fn try_extract_nodeset<N: Node>(v: Value<N>) -> Result<NodeSet<N>, Error> {
19    match v {
20        Value::NodeSet(node_set) => Ok(node_set),
21        _ => Err(Error::NotANodeset),
22    }
23}
24
25impl Expression {
26    pub(crate) fn evaluate<D: Dom>(
27        &self,
28        context: &EvaluationCtx<D>,
29    ) -> Result<Value<D::Node>, Error> {
30        match self {
31            // And/Or expression are seperated because they can sometimes be evaluated
32            // without evaluating both operands.
33            Expression::Binary(left, BinaryOperator::And, right) => {
34                let left_bool = left.evaluate(context)?.convert_to_boolean();
35                let v = left_bool && right.evaluate(context)?.convert_to_boolean();
36                Ok(Value::Boolean(v))
37            },
38            Expression::Binary(left, BinaryOperator::Or, right) => {
39                let left_bool = left.evaluate(context)?.convert_to_boolean();
40                let v = left_bool || right.evaluate(context)?.convert_to_boolean();
41                Ok(Value::Boolean(v))
42            },
43            Expression::Binary(left, binary_operator, right) => {
44                let left_value = left.evaluate(context)?;
45                let right_value = right.evaluate(context)?;
46
47                let value = match binary_operator {
48                    BinaryOperator::Equal => (left_value == right_value).into(),
49                    BinaryOperator::NotEqual => (left_value != right_value).into(),
50                    BinaryOperator::LessThan => {
51                        (left_value.convert_to_number() < right_value.convert_to_number()).into()
52                    },
53                    BinaryOperator::GreaterThan => {
54                        (left_value.convert_to_number() > right_value.convert_to_number()).into()
55                    },
56                    BinaryOperator::LessThanOrEqual => {
57                        (left_value.convert_to_number() <= right_value.convert_to_number()).into()
58                    },
59                    BinaryOperator::GreaterThanOrEqual => {
60                        (left_value.convert_to_number() >= right_value.convert_to_number()).into()
61                    },
62                    BinaryOperator::Add => {
63                        (left_value.convert_to_number() + right_value.convert_to_number()).into()
64                    },
65                    BinaryOperator::Subtract => {
66                        (left_value.convert_to_number() - right_value.convert_to_number()).into()
67                    },
68                    BinaryOperator::Multiply => {
69                        (left_value.convert_to_number() * right_value.convert_to_number()).into()
70                    },
71                    BinaryOperator::Divide => {
72                        (left_value.convert_to_number() / right_value.convert_to_number()).into()
73                    },
74                    BinaryOperator::Modulo => {
75                        (left_value.convert_to_number() % right_value.convert_to_number()).into()
76                    },
77                    BinaryOperator::Union => {
78                        let as_nodes =
79                            |e: &Expression| e.evaluate(context).and_then(try_extract_nodeset);
80                        let mut left_nodes = as_nodes(left)?;
81                        let right_nodes = as_nodes(right)?;
82
83                        left_nodes.extend(right_nodes);
84                        left_nodes.sort();
85                        Value::NodeSet(left_nodes)
86                    },
87                    _ => unreachable!("And/Or were handled above"),
88                };
89
90                Ok(value)
91            },
92            Expression::Negate(expr) => {
93                let value = -expr.evaluate(context)?.convert_to_number();
94                Ok(value.into())
95            },
96            Expression::Path(path_expr) => path_expr.evaluate(context),
97            Expression::LocationStep(location_step_expression) => {
98                location_step_expression.evaluate(context)
99            },
100            Expression::Filter(filter_expression) => filter_expression.evaluate(context),
101            Expression::Literal(literal) => Ok(literal.evaluate::<D>()),
102            Expression::Function(function) => function.evaluate(context),
103            Expression::ContextItem => {
104                let mut result = NodeSet::default();
105                result.push(context.context_node.clone());
106                Ok(Value::NodeSet(result))
107            },
108            Expression::Variable(_) => Err(Error::CannotUseVariables),
109        }
110    }
111}
112
113impl PathExpression {
114    fn evaluate<D: Dom>(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error> {
115        // Use root node for absolute paths, context_node otherwise
116        let starting_node = if self.is_absolute {
117            context.context_node.get_root_node()
118        } else {
119            context.context_node.clone()
120        };
121
122        // If path starts with '//', add an implicit descendant-or-self::node() step
123        let mut current_nodes = NodeSet::default();
124        if self.has_implicit_descendant_or_self_step {
125            for node in starting_node.traverse_preorder() {
126                current_nodes.push(node);
127            }
128        } else {
129            current_nodes.push(starting_node);
130        }
131        current_nodes.assume_sorted();
132
133        let have_multiple_steps = self.steps.len() > 1;
134
135        for step_expression in &self.steps {
136            let mut next_nodes = NodeSet::default();
137            for node in current_nodes {
138                let step_context = context.subcontext_for_node(node.clone());
139                let step_result = step_expression.evaluate(&step_context)?;
140                match (have_multiple_steps, step_result) {
141                    (_, Value::NodeSet(nodes)) => {
142                        // as long as we evaluate to nodesets, keep going
143                        next_nodes.extend(nodes);
144                    },
145                    (false, value) => {
146                        return Ok(value);
147                    },
148                    (true, value) => {
149                        log::debug!(
150                            "Expected nodeset from step evaluation, got: {:?} node: {:?}, step: {:?}",
151                            value,
152                            node,
153                            step_expression
154                        );
155                        return Ok(value);
156                    },
157                }
158            }
159            current_nodes = next_nodes;
160        }
161
162        Ok(Value::NodeSet(current_nodes))
163    }
164}
165
166#[derive(Debug, Eq, PartialEq)]
167pub(crate) enum NameTestComparisonMode {
168    /// Namespaces must match exactly
169    XHtml,
170    /// Missing namespace information is treated as the HTML namespace
171    Html,
172}
173
174pub(crate) fn element_name_test(
175    expected_name: &QualName,
176    actual_name: QualName,
177    comparison_mode: NameTestComparisonMode,
178) -> bool {
179    if expected_name.prefix.is_none() && expected_name.local == local_name!("*") {
180        return true;
181    }
182
183    let should_compare_namespaces =
184        comparison_mode == NameTestComparisonMode::XHtml || expected_name.ns != ns!();
185    if should_compare_namespaces && expected_name.ns != actual_name.ns {
186        return false;
187    }
188
189    if expected_name.local == local_name!("*") {
190        return true;
191    }
192
193    expected_name.local == actual_name.local
194}
195
196fn apply_node_test<D: Dom>(test: &NodeTest, node: &D::Node) -> Result<bool, Error> {
197    let result = match test {
198        NodeTest::Name(expected_name) => {
199            if let Some(element) = node.as_element() {
200                let comparison_mode = if element.is_html_element_in_html_document() {
201                    NameTestComparisonMode::Html
202                } else {
203                    NameTestComparisonMode::XHtml
204                };
205                let actual_name = QualName::new(
206                    element.prefix(),
207                    element.namespace().clone(),
208                    element.local_name().clone(),
209                );
210                element_name_test(expected_name, actual_name, comparison_mode)
211            } else if let Some(attribute) = node.as_attribute() {
212                let actual_name = QualName::new(
213                    attribute.prefix(),
214                    attribute.namespace().clone(),
215                    attribute.local_name().clone(),
216                );
217                // attributes are always compared with strict namespace matching
218                let comparison_mode = NameTestComparisonMode::XHtml;
219                element_name_test(expected_name, actual_name, comparison_mode)
220            } else {
221                false
222            }
223        },
224        NodeTest::Wildcard => node.as_element().is_some(),
225        NodeTest::Kind(kind) => match kind {
226            KindTest::PI(target) => {
227                if let Some(processing_instruction) = node.as_processing_instruction() {
228                    match (target, processing_instruction.target()) {
229                        (Some(target_name), node_target_name)
230                            if target_name == &node_target_name.to_string() =>
231                        {
232                            true
233                        },
234                        (Some(_), _) => false,
235                        (None, _) => true,
236                    }
237                } else {
238                    false
239                }
240            },
241            KindTest::Comment => node.is_comment(),
242            KindTest::Text => node.is_text(),
243            KindTest::Node => true,
244        },
245    };
246    Ok(result)
247}
248
249impl LocationStepExpression {
250    fn evaluate<D: Dom>(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error> {
251        let nodes: NodeSet<D::Node> = match self.axis {
252            Axis::Child => context.context_node.children().collect(),
253            Axis::Descendant => context.context_node.traverse_preorder().skip(1).collect(),
254            Axis::Parent => vec![context.context_node.parent()]
255                .into_iter()
256                .flatten()
257                .collect(),
258            Axis::Ancestor => context.context_node.inclusive_ancestors().skip(1).collect(),
259            Axis::Following => context.context_node.following_nodes().skip(1).collect(),
260            Axis::Preceding => context.context_node.preceding_nodes().skip(1).collect(),
261            Axis::FollowingSibling => context.context_node.following_siblings().collect(),
262            Axis::PrecedingSibling => context.context_node.preceding_siblings().collect(),
263            Axis::Attribute => {
264                if let Some(element) = context.context_node.as_element() {
265                    element
266                        .attributes()
267                        .map(|attribute| attribute.as_node())
268                        .collect()
269                } else {
270                    Default::default()
271                }
272            },
273            Axis::Self_ => iter::once(context.context_node.clone()).collect(),
274            Axis::DescendantOrSelf => context.context_node.traverse_preorder().collect(),
275            Axis::AncestorOrSelf => context.context_node.inclusive_ancestors().collect(),
276            Axis::Namespace => Default::default(), // Namespace axis is not commonly implemented
277        };
278
279        // Filter nodes according to the step's node_test. Will error out if any NodeTest
280        // application errors out.
281        // FIXME: Invent something like try_retain and use it here
282        let filtered_nodes: NodeSet<D::Node> = nodes
283            .into_iter()
284            .filter_map(|node| match apply_node_test::<D>(&self.node_test, &node) {
285                Ok(false) => None,
286                Ok(true) => Some(Ok(node)),
287                Err(error) => Some(Err(error)),
288            })
289            .collect::<Result<NodeSet<_>, _>>()?;
290
291        let mut filtered_nodes = if self.predicate_list.predicates.is_empty() {
292            filtered_nodes
293        } else {
294            // Apply predicates
295            self.predicate_list.apply::<D>(filtered_nodes)
296        };
297
298        // Enforce tree order between nodes in the list
299        if matches!(
300            self.axis,
301            Axis::Child |
302                Axis::Descendant |
303                Axis::Parent |
304                Axis::Following |
305                Axis::FollowingSibling |
306                Axis::Attribute |
307                Axis::Self_ |
308                Axis::DescendantOrSelf
309        ) {
310            // The elements on these axis values are already in tree order
311            filtered_nodes.assume_sorted();
312        } else {
313            // The elements on these axis values are in inverse tree order
314            filtered_nodes.reverse();
315            filtered_nodes.assume_sorted();
316        };
317
318        Ok(Value::NodeSet(filtered_nodes))
319    }
320}
321
322impl PredicateListExpression {
323    fn apply<D: Dom>(&self, mut matched_nodes: NodeSet<D::Node>) -> NodeSet<D::Node> {
324        for predicate_expr in &self.predicates {
325            let size = matched_nodes.len();
326
327            // 1-based position, per XPath spec
328            let mut position = 1;
329            matched_nodes.retain(|node| {
330                let predicate_ctx: EvaluationCtx<D> = EvaluationCtx {
331                    context_node: node.clone(),
332                    predicate_ctx: Some(PredicateCtx {
333                        index: position,
334                        size,
335                    }),
336                };
337                let eval_result = predicate_expr.evaluate(&predicate_ctx);
338
339                let keep = match eval_result {
340                    Ok(Value::Number(number)) => position as f64 == number,
341                    Ok(Value::Boolean(boolean)) => boolean,
342                    Ok(value) => value.convert_to_boolean(),
343                    Err(_) => false,
344                };
345                position += 1;
346
347                keep
348            });
349        }
350
351        matched_nodes
352    }
353}
354
355impl FilterExpression {
356    fn evaluate<D: Dom>(&self, context: &EvaluationCtx<D>) -> Result<Value<D::Node>, Error> {
357        debug_assert!(!self.predicates.predicates.is_empty());
358
359        let Value::NodeSet(node_set) = self.expression.evaluate(context)? else {
360            // You can't use filtering expressions `[]` on other than node-sets
361            return Err(Error::NotANodeset);
362        };
363        Ok(Value::NodeSet(self.predicates.apply::<D>(node_set)))
364    }
365}
366
367impl Literal {
368    fn evaluate<D: Dom>(&self) -> Value<D::Node> {
369        match self {
370            Literal::Integer(integer) => Value::Number(*integer as f64),
371            Literal::Decimal(decimal) => Value::Number(*decimal),
372            Literal::String(s) => Value::String(s.into()),
373        }
374    }
375}