Skip to main content

servo_xpath/
functions.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use crate::ast::CoreFunction;
6use crate::context::EvaluationCtx;
7use crate::eval::try_extract_nodeset;
8use crate::value::{NodeSet, parse_number_from_string};
9use crate::{Document, Dom, Element, Error, Node, Value};
10
11/// Returns e.g. "rect" for `<svg:rect>`
12fn local_name<N: Node>(node: &N) -> Option<String> {
13    node.as_element()
14        .map(|element| element.local_name().to_string())
15}
16
17/// Returns e.g. "svg:rect" for `<svg:rect>`
18fn name<N: Node>(node: &N) -> Option<String> {
19    node.as_element().map(|element| {
20        if let Some(prefix) = element.prefix().as_ref() {
21            format!("{}:{}", prefix, element.local_name())
22        } else {
23            element.local_name().to_string()
24        }
25    })
26}
27
28/// Returns e.g. the SVG namespace URI for `<svg:rect>`
29fn namespace_uri<N: Node>(node: &N) -> Option<String> {
30    node.as_element()
31        .map(|element| element.namespace().to_string())
32}
33
34/// If s2 is found inside s1, return everything *before* s2. Return all of s1 otherwise.
35fn substring_before(s1: &str, s2: &str) -> String {
36    match s1.find(s2) {
37        Some(pos) => s1[..pos].to_string(),
38        None => String::new(),
39    }
40}
41
42/// If s2 is found inside s1, return everything *after* s2. Return all of s1 otherwise.
43fn substring_after(s1: &str, s2: &str) -> String {
44    match s1.find(s2) {
45        Some(pos) => s1[pos + s2.len()..].to_string(),
46        None => String::new(),
47    }
48}
49
50/// <https://www.w3.org/TR/xpath-10/#function-substring>
51fn substring(source: &str, start: isize, length: Option<isize>) -> String {
52    let start_index = start.max(0) as usize;
53    let length = length
54        .map(|length| length.max(0) as usize)
55        .unwrap_or(usize::MAX);
56
57    // The specification doesn't tell us whether the term "length" refers
58    // to bytes, codepoints, graphemes etc. We choose code points.
59    // Firefox uses bytes and allows slicing at indices that are not char boundaries... Let's not do that.
60    source.chars().skip(start_index).take(length).collect()
61}
62
63/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-normalize-space>
64pub(crate) fn normalize_space(input: &str) -> String {
65    // Trim leading and trailing whitespace
66    let input = input.trim_ascii();
67
68    let mut result = String::with_capacity(input.len());
69    input
70        .split([' ', '\x09', '\x0D', '\x0A'])
71        .filter(|segment| !segment.is_empty())
72        .for_each(|segment| {
73            if !result.is_empty() {
74                result.push(' ');
75            }
76
77            result.push_str(segment);
78        });
79
80    result
81}
82
83/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-lang>
84fn lang_matches(context_lang: Option<&str>, target_lang: &str) -> bool {
85    let Some(context_lang) = context_lang else {
86        return false;
87    };
88
89    let context_lower = context_lang.to_ascii_lowercase();
90    let target_lower = target_lang.to_ascii_lowercase();
91
92    if context_lower == target_lower {
93        return true;
94    }
95
96    // Check if context is target with additional suffix
97    if context_lower.starts_with(&target_lower) {
98        // Make sure the next character is a hyphen to avoid matching
99        // e.g. "england" when target is "en"
100        if let Some(next_char) = context_lower.chars().nth(target_lower.len()) {
101            return next_char == '-';
102        }
103    }
104
105    false
106}
107
108/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-translate>
109fn translate(input: &str, from: &str, to: &str) -> String {
110    let mut result = String::with_capacity(input.len());
111
112    for character in input.chars() {
113        let Some(replacement_index) = from.chars().position(|to_replace| to_replace == character)
114        else {
115            result.push(character);
116            continue;
117        };
118
119        if let Some(replace_with) = to.chars().nth(replacement_index) {
120            result.push(replace_with);
121        }
122    }
123
124    result
125}
126
127impl CoreFunction {
128    pub(crate) fn evaluate<D: Dom>(
129        &self,
130        cx: &mut D::Context,
131        context: &EvaluationCtx<D>,
132    ) -> Result<Value<D::Node>, Error> {
133        match self {
134            CoreFunction::Last => {
135                let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
136                    msg: "[CoreFunction] last() is only usable as a predicate".to_string(),
137                })?;
138                Ok(Value::Number(predicate_ctx.size as f64))
139            },
140            CoreFunction::Position => {
141                let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
142                    msg: "[CoreFunction] position() is only usable as a predicate".to_string(),
143                })?;
144                Ok(Value::Number(predicate_ctx.index as f64))
145            },
146            CoreFunction::Count(expr) => {
147                let nodes = expr.evaluate(cx, context).and_then(try_extract_nodeset)?;
148                Ok(Value::Number(nodes.len() as f64))
149            },
150            CoreFunction::String(expr_opt) => match expr_opt {
151                Some(expr) => Ok(Value::String(
152                    expr.evaluate(cx, context)?.convert_to_string(),
153                )),
154                None => Ok(Value::String(context.context_node.text_content())),
155            },
156            CoreFunction::Concat(exprs) => {
157                let strings: Result<Vec<_>, _> = exprs
158                    .iter()
159                    .map(|e| Ok(e.evaluate(cx, context)?.convert_to_string()))
160                    .collect();
161                Ok(Value::String(strings?.join("")))
162            },
163            CoreFunction::Id(expr) => {
164                let argument = expr.evaluate(cx, context)?;
165                let document = context.context_node.owner_document();
166                let mut result = NodeSet::default();
167
168                // https://www.w3.org/TR/1999/REC-xpath-19991116/#function-id
169                // > When the argument to id is of type node-set, then the result is the union of the result
170                // > of applying id to the string-value of each of the nodes in the argument node-set.
171                let mut extend_result_with_matching_nodes = |cx: &mut D::Context, input: &str| {
172                    for id in normalize_space(input).split(' ') {
173                        result.extend(
174                            document
175                                .get_elements_with_id(cx, id)
176                                .map(|element| element.as_node()),
177                        );
178                    }
179                };
180
181                if let Value::NodeSet(node_set) = argument {
182                    for node in node_set.iter() {
183                        extend_result_with_matching_nodes(cx, &node.text_content())
184                    }
185                } else {
186                    extend_result_with_matching_nodes(cx, &argument.convert_to_string())
187                }
188
189                result.sort();
190                Ok(Value::NodeSet(result))
191            },
192            CoreFunction::LocalName(expr_opt) => {
193                let node = match expr_opt {
194                    Some(expr) => expr
195                        .evaluate(cx, context)
196                        .and_then(try_extract_nodeset)?
197                        .first(),
198                    None => Some(context.context_node.clone()),
199                };
200                let name = node.and_then(|n| local_name(&n)).unwrap_or_default();
201                Ok(Value::String(name))
202            },
203            CoreFunction::NamespaceUri(expr_opt) => {
204                let node = match expr_opt {
205                    Some(expr) => expr
206                        .evaluate(cx, context)
207                        .and_then(try_extract_nodeset)?
208                        .first(),
209                    None => Some(context.context_node.clone()),
210                };
211                let ns = node.and_then(|n| namespace_uri(&n)).unwrap_or_default();
212                Ok(Value::String(ns))
213            },
214            CoreFunction::Name(expr_opt) => {
215                let node = match expr_opt {
216                    Some(expr) => expr
217                        .evaluate(cx, context)
218                        .and_then(try_extract_nodeset)?
219                        .first(),
220                    None => Some(context.context_node.clone()),
221                };
222                let name = node.and_then(|n| name(&n)).unwrap_or_default();
223                Ok(Value::String(name))
224            },
225            CoreFunction::StartsWith(str1, str2) => {
226                let s1 = str1.evaluate(cx, context)?.convert_to_string();
227                let s2 = str2.evaluate(cx, context)?.convert_to_string();
228                Ok(Value::Boolean(s1.starts_with(&s2)))
229            },
230            CoreFunction::Contains(str1, str2) => {
231                let s1 = str1.evaluate(cx, context)?.convert_to_string();
232                let s2 = str2.evaluate(cx, context)?.convert_to_string();
233                Ok(Value::Boolean(s1.contains(&s2)))
234            },
235            CoreFunction::SubstringBefore(str1, str2) => {
236                let s1 = str1.evaluate(cx, context)?.convert_to_string();
237                let s2 = str2.evaluate(cx, context)?.convert_to_string();
238                Ok(Value::String(substring_before(&s1, &s2)))
239            },
240            CoreFunction::SubstringAfter(str1, str2) => {
241                let s1 = str1.evaluate(cx, context)?.convert_to_string();
242                let s2 = str2.evaluate(cx, context)?.convert_to_string();
243                Ok(Value::String(substring_after(&s1, &s2)))
244            },
245            CoreFunction::Substring(source_expression, start, length) => {
246                let source = source_expression.evaluate(cx, context)?.convert_to_string();
247                let start_idx =
248                    start.evaluate(cx, context)?.convert_to_number().round() as isize - 1;
249                let result = if let Some(length_expression) = length {
250                    let length = length_expression
251                        .evaluate(cx, context)?
252                        .convert_to_number()
253                        .round() as isize;
254                    substring(&source, start_idx, Some(length))
255                } else {
256                    substring(&source, start_idx, None)
257                };
258                Ok(Value::String(result))
259            },
260            CoreFunction::StringLength(expr_opt) => {
261                let string = match expr_opt {
262                    Some(expr) => expr.evaluate(cx, context)?.convert_to_string(),
263                    None => context.context_node.text_content(),
264                };
265                Ok(Value::Number(string.chars().count() as f64))
266            },
267            CoreFunction::NormalizeSpace(expr_opt) => {
268                let string = match expr_opt {
269                    Some(expr) => expr.evaluate(cx, context)?.convert_to_string(),
270                    None => context.context_node.text_content(),
271                };
272
273                Ok(Value::String(normalize_space(&string)))
274            },
275            CoreFunction::Translate(str1, str2, str3) => {
276                let string = str1.evaluate(cx, context)?.convert_to_string();
277                let from = str2.evaluate(cx, context)?.convert_to_string();
278                let to = str3.evaluate(cx, context)?.convert_to_string();
279                Ok(Value::String(translate(&string, &from, &to)))
280            },
281            CoreFunction::Number(expr_opt) => {
282                let val = match expr_opt {
283                    Some(expr) => expr.evaluate(cx, context)?,
284                    None => Value::String(context.context_node.text_content()),
285                };
286                Ok(Value::Number(val.convert_to_number()))
287            },
288            CoreFunction::Sum(expr) => {
289                let nodes = expr.evaluate(cx, context).and_then(try_extract_nodeset)?;
290                let sum = nodes
291                    .iter()
292                    .map(|node| parse_number_from_string(&node.text_content()))
293                    .sum();
294                Ok(Value::Number(sum))
295            },
296            CoreFunction::Floor(expr) => {
297                let num = expr.evaluate(cx, context)?.convert_to_number();
298                Ok(Value::Number(num.floor()))
299            },
300            CoreFunction::Ceiling(expr) => {
301                let num = expr.evaluate(cx, context)?.convert_to_number();
302                Ok(Value::Number(num.ceil()))
303            },
304            CoreFunction::Round(expr) => {
305                let num = expr.evaluate(cx, context)?.convert_to_number();
306                Ok(Value::Number(num.round()))
307            },
308            CoreFunction::Boolean(expr) => Ok(Value::Boolean(
309                expr.evaluate(cx, context)?.convert_to_boolean(),
310            )),
311            CoreFunction::Not(expr) => Ok(Value::Boolean(
312                !expr.evaluate(cx, context)?.convert_to_boolean(),
313            )),
314            CoreFunction::True => Ok(Value::Boolean(true)),
315            CoreFunction::False => Ok(Value::Boolean(false)),
316            CoreFunction::Lang(expr) => {
317                let context_lang = context.context_node.language();
318                let lang = expr.evaluate(cx, context)?.convert_to_string();
319                Ok(Value::Boolean(lang_matches(context_lang.as_deref(), &lang)))
320            },
321        }
322    }
323}
324#[cfg(test)]
325mod tests {
326    use super::{lang_matches, substring, substring_after, substring_before};
327    use crate::functions::{normalize_space, translate};
328
329    #[test]
330    fn test_substring_before() {
331        assert_eq!(substring_before("hello world", "world"), "hello ");
332        assert_eq!(substring_before("prefix:name", ":"), "prefix");
333        assert_eq!(substring_before("no-separator", "xyz"), "");
334        assert_eq!(substring_before("", "anything"), "");
335        assert_eq!(substring_before("multiple:colons:here", ":"), "multiple");
336        assert_eq!(substring_before("start-match-test", "start"), "");
337    }
338
339    #[test]
340    fn test_substring_after() {
341        assert_eq!(substring_after("hello world", "hello "), "world");
342        assert_eq!(substring_after("prefix:name", ":"), "name");
343        assert_eq!(substring_after("no-separator", "xyz"), "");
344        assert_eq!(substring_after("", "anything"), "");
345        assert_eq!(substring_after("multiple:colons:here", ":"), "colons:here");
346        assert_eq!(substring_after("test-end-match", "match"), "");
347    }
348
349    #[test]
350    fn test_substring() {
351        assert_eq!(substring("hello world", 0, Some(5)), "hello");
352        assert_eq!(substring("hello world", 6, Some(5)), "world");
353        assert_eq!(substring("hello", 1, Some(3)), "ell");
354        assert_eq!(substring("hello", -5, Some(2)), "he");
355        assert_eq!(substring("hello", 0, None), "hello");
356        assert_eq!(substring("hello", 2, Some(10)), "llo");
357        assert_eq!(substring("hello", 5, Some(1)), "");
358        assert_eq!(substring("", 0, Some(5)), "");
359        assert_eq!(substring("hello", 0, Some(0)), "");
360        assert_eq!(substring("hello", 0, Some(-5)), "");
361    }
362
363    #[test]
364    fn test_substring_with_out_of_bounds_index() {
365        assert_eq!(substring("Servo", 42, None), "");
366    }
367
368    #[test]
369    fn test_substring_with_multi_byte_characters() {
370        assert_eq!(substring("๐Ÿฆž๐Ÿฆž๐Ÿฆž", 1, None), "๐Ÿฆž๐Ÿฆž");
371    }
372
373    #[test]
374    fn test_lang_matches() {
375        assert!(lang_matches(Some("en"), "en"));
376        assert!(lang_matches(Some("EN"), "en"));
377        assert!(lang_matches(Some("en"), "EN"));
378        assert!(lang_matches(Some("en-US"), "en"));
379        assert!(lang_matches(Some("en-GB"), "en"));
380
381        assert!(!lang_matches(Some("eng"), "en"));
382        assert!(!lang_matches(Some("fr"), "en"));
383        assert!(!lang_matches(Some("fr-en"), "en"));
384        assert!(!lang_matches(None, "en"));
385    }
386
387    #[test]
388    fn test_normalize_space() {
389        assert_eq!(normalize_space(" "), "");
390        assert_eq!(normalize_space("\n\t\r "), "");
391        assert_eq!(normalize_space("no-space"), "no-space");
392        assert_eq!(normalize_space("one space"), "one space");
393        assert_eq!(normalize_space("more    whitespace"), "more whitespace");
394        assert_eq!(
395            normalize_space("  \t leading  and trailing\n"),
396            "leading and trailing"
397        );
398    }
399
400    #[test]
401    fn test_translate() {
402        assert_eq!(translate("", "", ""), "");
403        assert_eq!(translate("", "abc", ""), "");
404        assert_eq!(translate("abcd", "abc", ""), "d");
405        assert_eq!(translate("abcd", "abc", "cba"), "cbad");
406        assert_eq!(translate("abc", "", "abc"), "abc");
407    }
408
409    #[test]
410    fn test_translate_with_multi_byte_characters() {
411        assert_eq!(translate("a๐Ÿฆžb๐Ÿ˜c๐Ÿฆžd", "๐Ÿ˜c", "๐Ÿคจ๐Ÿค–"), "a๐Ÿฆžb๐Ÿคจ๐Ÿค–๐Ÿฆžd");
412    }
413}