xpath/
functions.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use crate::ast::CoreFunction;
6use crate::context::EvaluationCtx;
7use crate::eval::try_extract_nodeset;
8use crate::value::{NodeSet, parse_number_from_string};
9use crate::{Document, Dom, Element, Error, Node, Value};
10
11/// Returns e.g. "rect" for `<svg:rect>`
12fn local_name<N: Node>(node: &N) -> Option<String> {
13    node.as_element()
14        .map(|element| element.local_name().to_string())
15}
16
17/// Returns e.g. "svg:rect" for `<svg:rect>`
18fn name<N: Node>(node: &N) -> Option<String> {
19    node.as_element().map(|element| {
20        if let Some(prefix) = element.prefix().as_ref() {
21            format!("{}:{}", prefix, element.local_name())
22        } else {
23            element.local_name().to_string()
24        }
25    })
26}
27
28/// Returns e.g. the SVG namespace URI for `<svg:rect>`
29fn namespace_uri<N: Node>(node: &N) -> Option<String> {
30    node.as_element()
31        .map(|element| element.namespace().to_string())
32}
33
34/// If s2 is found inside s1, return everything *before* s2. Return all of s1 otherwise.
35fn substring_before(s1: &str, s2: &str) -> String {
36    match s1.find(s2) {
37        Some(pos) => s1[..pos].to_string(),
38        None => String::new(),
39    }
40}
41
42/// If s2 is found inside s1, return everything *after* s2. Return all of s1 otherwise.
43fn substring_after(s1: &str, s2: &str) -> String {
44    match s1.find(s2) {
45        Some(pos) => s1[pos + s2.len()..].to_string(),
46        None => String::new(),
47    }
48}
49
50/// <https://www.w3.org/TR/xpath-10/#function-substring>
51fn substring(source: &str, start: isize, length: Option<isize>) -> String {
52    let start_index = start.max(0) as usize;
53    let length = length
54        .map(|length| length.max(0) as usize)
55        .unwrap_or(usize::MAX);
56
57    // The specification doesn't tell us whether the term "length" refers
58    // to bytes, codepoints, graphemes etc. We choose code points.
59    // Firefox uses bytes and allows slicing at indices that are not char boundaries... Let's not do that.
60    source.chars().skip(start_index).take(length).collect()
61}
62
63/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-normalize-space>
64pub(crate) fn normalize_space(s: &str) -> String {
65    let mut result = String::with_capacity(s.len());
66    let mut last_was_whitespace = true; // Handles leading whitespace
67
68    for c in s.chars() {
69        match c {
70            '\x20' | '\x09' | '\x0D' | '\x0A' => {
71                if !last_was_whitespace {
72                    result.push(' ');
73                    last_was_whitespace = true;
74                }
75            },
76            other => {
77                result.push(other);
78                last_was_whitespace = false;
79            },
80        }
81    }
82
83    if last_was_whitespace {
84        result.pop();
85    }
86
87    result
88}
89
90/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-lang>
91fn lang_matches(context_lang: Option<&str>, target_lang: &str) -> bool {
92    let Some(context_lang) = context_lang else {
93        return false;
94    };
95
96    let context_lower = context_lang.to_ascii_lowercase();
97    let target_lower = target_lang.to_ascii_lowercase();
98
99    if context_lower == target_lower {
100        return true;
101    }
102
103    // Check if context is target with additional suffix
104    if context_lower.starts_with(&target_lower) {
105        // Make sure the next character is a hyphen to avoid matching
106        // e.g. "england" when target is "en"
107        if let Some(next_char) = context_lower.chars().nth(target_lower.len()) {
108            return next_char == '-';
109        }
110    }
111
112    false
113}
114
115impl CoreFunction {
116    pub(crate) fn evaluate<D: Dom>(
117        &self,
118        context: &EvaluationCtx<D>,
119    ) -> Result<Value<D::Node>, Error> {
120        match self {
121            CoreFunction::Last => {
122                let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
123                    msg: "[CoreFunction] last() is only usable as a predicate".to_string(),
124                })?;
125                Ok(Value::Number(predicate_ctx.size as f64))
126            },
127            CoreFunction::Position => {
128                let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
129                    msg: "[CoreFunction] position() is only usable as a predicate".to_string(),
130                })?;
131                Ok(Value::Number(predicate_ctx.index as f64))
132            },
133            CoreFunction::Count(expr) => {
134                let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?;
135                Ok(Value::Number(nodes.len() as f64))
136            },
137            CoreFunction::String(expr_opt) => match expr_opt {
138                Some(expr) => Ok(Value::String(expr.evaluate(context)?.convert_to_string())),
139                None => Ok(Value::String(context.context_node.text_content())),
140            },
141            CoreFunction::Concat(exprs) => {
142                let strings: Result<Vec<_>, _> = exprs
143                    .iter()
144                    .map(|e| Ok(e.evaluate(context)?.convert_to_string()))
145                    .collect();
146                Ok(Value::String(strings?.join("")))
147            },
148            CoreFunction::Id(expr) => {
149                let args_str = expr.evaluate(context)?.convert_to_string();
150                let args_normalized = normalize_space(&args_str);
151                let args = args_normalized.split(' ');
152
153                let document = context.context_node.owner_document();
154                let mut result = NodeSet::default();
155                for arg in args {
156                    for element in document.get_elements_with_id(arg) {
157                        result.push(element.as_node());
158                    }
159                }
160                result.assume_sorted();
161
162                Ok(Value::NodeSet(result))
163            },
164            CoreFunction::LocalName(expr_opt) => {
165                let node = match expr_opt {
166                    Some(expr) => expr
167                        .evaluate(context)
168                        .and_then(try_extract_nodeset)?
169                        .first(),
170                    None => Some(context.context_node.clone()),
171                };
172                let name = node.and_then(|n| local_name(&n)).unwrap_or_default();
173                Ok(Value::String(name.to_string()))
174            },
175            CoreFunction::NamespaceUri(expr_opt) => {
176                let node = match expr_opt {
177                    Some(expr) => expr
178                        .evaluate(context)
179                        .and_then(try_extract_nodeset)?
180                        .first(),
181                    None => Some(context.context_node.clone()),
182                };
183                let ns = node.and_then(|n| namespace_uri(&n)).unwrap_or_default();
184                Ok(Value::String(ns.to_string()))
185            },
186            CoreFunction::Name(expr_opt) => {
187                let node = match expr_opt {
188                    Some(expr) => expr
189                        .evaluate(context)
190                        .and_then(try_extract_nodeset)?
191                        .first(),
192                    None => Some(context.context_node.clone()),
193                };
194                let name = node.and_then(|n| name(&n)).unwrap_or_default();
195                Ok(Value::String(name))
196            },
197            CoreFunction::StartsWith(str1, str2) => {
198                let s1 = str1.evaluate(context)?.convert_to_string();
199                let s2 = str2.evaluate(context)?.convert_to_string();
200                Ok(Value::Boolean(s1.starts_with(&s2)))
201            },
202            CoreFunction::Contains(str1, str2) => {
203                let s1 = str1.evaluate(context)?.convert_to_string();
204                let s2 = str2.evaluate(context)?.convert_to_string();
205                Ok(Value::Boolean(s1.contains(&s2)))
206            },
207            CoreFunction::SubstringBefore(str1, str2) => {
208                let s1 = str1.evaluate(context)?.convert_to_string();
209                let s2 = str2.evaluate(context)?.convert_to_string();
210                Ok(Value::String(substring_before(&s1, &s2)))
211            },
212            CoreFunction::SubstringAfter(str1, str2) => {
213                let s1 = str1.evaluate(context)?.convert_to_string();
214                let s2 = str2.evaluate(context)?.convert_to_string();
215                Ok(Value::String(substring_after(&s1, &s2)))
216            },
217            CoreFunction::Substring(source_expression, start, length) => {
218                let source = source_expression.evaluate(context)?.convert_to_string();
219                let start_idx = start.evaluate(context)?.convert_to_number().round() as isize - 1;
220                let result = if let Some(length_expression) = length {
221                    let length = length_expression
222                        .evaluate(context)?
223                        .convert_to_number()
224                        .round() as isize;
225                    substring(&source, start_idx, Some(length))
226                } else {
227                    substring(&source, start_idx, None)
228                };
229                Ok(Value::String(result))
230            },
231            CoreFunction::StringLength(expr_opt) => {
232                let string = match expr_opt {
233                    Some(expr) => expr.evaluate(context)?.convert_to_string(),
234                    None => context.context_node.text_content(),
235                };
236                Ok(Value::Number(string.chars().count() as f64))
237            },
238            CoreFunction::NormalizeSpace(expr_opt) => {
239                let string = match expr_opt {
240                    Some(expr) => expr.evaluate(context)?.convert_to_string(),
241                    None => context.context_node.text_content(),
242                };
243
244                Ok(Value::String(normalize_space(&string)))
245            },
246            CoreFunction::Translate(str1, str2, str3) => {
247                let string = str1.evaluate(context)?.convert_to_string();
248                let from = str2.evaluate(context)?.convert_to_string();
249                let to = str3.evaluate(context)?.convert_to_string();
250                let result = string
251                    .chars()
252                    .map(|c| match from.find(c) {
253                        Some(i) if i < to.chars().count() => to.chars().nth(i).unwrap(),
254                        _ => c,
255                    })
256                    .collect();
257                Ok(Value::String(result))
258            },
259            CoreFunction::Number(expr_opt) => {
260                let val = match expr_opt {
261                    Some(expr) => expr.evaluate(context)?,
262                    None => Value::String(context.context_node.text_content()),
263                };
264                Ok(Value::Number(val.convert_to_number()))
265            },
266            CoreFunction::Sum(expr) => {
267                let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?;
268                let sum = nodes
269                    .iter()
270                    .map(|node| parse_number_from_string(&node.text_content()))
271                    .sum();
272                Ok(Value::Number(sum))
273            },
274            CoreFunction::Floor(expr) => {
275                let num = expr.evaluate(context)?.convert_to_number();
276                Ok(Value::Number(num.floor()))
277            },
278            CoreFunction::Ceiling(expr) => {
279                let num = expr.evaluate(context)?.convert_to_number();
280                Ok(Value::Number(num.ceil()))
281            },
282            CoreFunction::Round(expr) => {
283                let num = expr.evaluate(context)?.convert_to_number();
284                Ok(Value::Number(num.round()))
285            },
286            CoreFunction::Boolean(expr) => {
287                Ok(Value::Boolean(expr.evaluate(context)?.convert_to_boolean()))
288            },
289            CoreFunction::Not(expr) => Ok(Value::Boolean(
290                !expr.evaluate(context)?.convert_to_boolean(),
291            )),
292            CoreFunction::True => Ok(Value::Boolean(true)),
293            CoreFunction::False => Ok(Value::Boolean(false)),
294            CoreFunction::Lang(expr) => {
295                let context_lang = context.context_node.language();
296                let lang = expr.evaluate(context)?.convert_to_string();
297                Ok(Value::Boolean(lang_matches(context_lang.as_deref(), &lang)))
298            },
299        }
300    }
301}
302#[cfg(test)]
303mod tests {
304    use super::{lang_matches, substring, substring_after, substring_before};
305
306    #[test]
307    fn test_substring_before() {
308        assert_eq!(substring_before("hello world", "world"), "hello ");
309        assert_eq!(substring_before("prefix:name", ":"), "prefix");
310        assert_eq!(substring_before("no-separator", "xyz"), "");
311        assert_eq!(substring_before("", "anything"), "");
312        assert_eq!(substring_before("multiple:colons:here", ":"), "multiple");
313        assert_eq!(substring_before("start-match-test", "start"), "");
314    }
315
316    #[test]
317    fn test_substring_after() {
318        assert_eq!(substring_after("hello world", "hello "), "world");
319        assert_eq!(substring_after("prefix:name", ":"), "name");
320        assert_eq!(substring_after("no-separator", "xyz"), "");
321        assert_eq!(substring_after("", "anything"), "");
322        assert_eq!(substring_after("multiple:colons:here", ":"), "colons:here");
323        assert_eq!(substring_after("test-end-match", "match"), "");
324    }
325
326    #[test]
327    fn test_substring() {
328        assert_eq!(substring("hello world", 0, Some(5)), "hello");
329        assert_eq!(substring("hello world", 6, Some(5)), "world");
330        assert_eq!(substring("hello", 1, Some(3)), "ell");
331        assert_eq!(substring("hello", -5, Some(2)), "he");
332        assert_eq!(substring("hello", 0, None), "hello");
333        assert_eq!(substring("hello", 2, Some(10)), "llo");
334        assert_eq!(substring("hello", 5, Some(1)), "");
335        assert_eq!(substring("", 0, Some(5)), "");
336        assert_eq!(substring("hello", 0, Some(0)), "");
337        assert_eq!(substring("hello", 0, Some(-5)), "");
338    }
339
340    #[test]
341    fn test_substring_with_out_of_bounds_index() {
342        assert_eq!(substring("Servo", 42, None), "");
343    }
344
345    #[test]
346    fn test_substring_with_multi_byte_characters() {
347        assert_eq!(substring("🦞🦞🦞", 1, None), "🦞🦞");
348    }
349
350    #[test]
351    fn test_lang_matches() {
352        assert!(lang_matches(Some("en"), "en"));
353        assert!(lang_matches(Some("EN"), "en"));
354        assert!(lang_matches(Some("en"), "EN"));
355        assert!(lang_matches(Some("en-US"), "en"));
356        assert!(lang_matches(Some("en-GB"), "en"));
357
358        assert!(!lang_matches(Some("eng"), "en"));
359        assert!(!lang_matches(Some("fr"), "en"));
360        assert!(!lang_matches(Some("fr-en"), "en"));
361        assert!(!lang_matches(None, "en"));
362    }
363}