xpath/
eval_function.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use crate::ast::CoreFunction;
6use crate::context::EvaluationCtx;
7use crate::eval::try_extract_nodeset;
8use crate::value::parse_number_from_string;
9use crate::{Document, Dom, Element, Error, Node, Value};
10
11/// Returns e.g. "rect" for `<svg:rect>`
12fn local_name<N: Node>(node: &N) -> Option<String> {
13    node.as_element()
14        .map(|element| element.local_name().to_string())
15}
16
17/// Returns e.g. "svg:rect" for `<svg:rect>`
18fn name<N: Node>(node: &N) -> Option<String> {
19    node.as_element().map(|element| {
20        if let Some(prefix) = element.prefix().as_ref() {
21            format!("{}:{}", prefix, element.local_name())
22        } else {
23            element.local_name().to_string()
24        }
25    })
26}
27
28/// Returns e.g. the SVG namespace URI for `<svg:rect>`
29fn namespace_uri<N: Node>(node: &N) -> Option<String> {
30    node.as_element()
31        .map(|element| element.namespace().to_string())
32}
33
34/// If s2 is found inside s1, return everything *before* s2. Return all of s1 otherwise.
35fn substring_before(s1: &str, s2: &str) -> String {
36    match s1.find(s2) {
37        Some(pos) => s1[..pos].to_string(),
38        None => String::new(),
39    }
40}
41
42/// If s2 is found inside s1, return everything *after* s2. Return all of s1 otherwise.
43fn substring_after(s1: &str, s2: &str) -> String {
44    match s1.find(s2) {
45        Some(pos) => s1[pos + s2.len()..].to_string(),
46        None => String::new(),
47    }
48}
49
50fn substring(s: &str, start_idx: isize, len: Option<isize>) -> String {
51    let s_len = s.len();
52    let len = len.unwrap_or(s_len as isize).max(0) as usize;
53    let start_idx = start_idx.max(0) as usize;
54    let end_idx = (start_idx + len.max(0)).min(s_len);
55    s[start_idx..end_idx].to_string()
56}
57
58/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-normalize-space>
59pub(crate) fn normalize_space(s: &str) -> String {
60    let mut result = String::with_capacity(s.len());
61    let mut last_was_whitespace = true; // Handles leading whitespace
62
63    for c in s.chars() {
64        match c {
65            '\x20' | '\x09' | '\x0D' | '\x0A' => {
66                if !last_was_whitespace {
67                    result.push(' ');
68                    last_was_whitespace = true;
69                }
70            },
71            other => {
72                result.push(other);
73                last_was_whitespace = false;
74            },
75        }
76    }
77
78    if last_was_whitespace {
79        result.pop();
80    }
81
82    result
83}
84
85/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#function-lang>
86fn lang_matches(context_lang: Option<&str>, target_lang: &str) -> bool {
87    let Some(context_lang) = context_lang else {
88        return false;
89    };
90
91    let context_lower = context_lang.to_ascii_lowercase();
92    let target_lower = target_lang.to_ascii_lowercase();
93
94    if context_lower == target_lower {
95        return true;
96    }
97
98    // Check if context is target with additional suffix
99    if context_lower.starts_with(&target_lower) {
100        // Make sure the next character is a hyphen to avoid matching
101        // e.g. "england" when target is "en"
102        if let Some(next_char) = context_lower.chars().nth(target_lower.len()) {
103            return next_char == '-';
104        }
105    }
106
107    false
108}
109
110impl CoreFunction {
111    pub(crate) fn evaluate<D: Dom>(
112        &self,
113        context: &EvaluationCtx<D>,
114    ) -> Result<Value<D::Node>, Error<D::JsError>> {
115        match self {
116            CoreFunction::Last => {
117                let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
118                    msg: "[CoreFunction] last() is only usable as a predicate".to_string(),
119                })?;
120                Ok(Value::Number(predicate_ctx.size as f64))
121            },
122            CoreFunction::Position => {
123                let predicate_ctx = context.predicate_ctx.ok_or_else(|| Error::Internal {
124                    msg: "[CoreFunction] position() is only usable as a predicate".to_string(),
125                })?;
126                Ok(Value::Number(predicate_ctx.index as f64))
127            },
128            CoreFunction::Count(expr) => {
129                let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?;
130                Ok(Value::Number(nodes.len() as f64))
131            },
132            CoreFunction::String(expr_opt) => match expr_opt {
133                Some(expr) => Ok(Value::String(expr.evaluate(context)?.string())),
134                None => Ok(Value::String(context.context_node.text_content())),
135            },
136            CoreFunction::Concat(exprs) => {
137                let strings: Result<Vec<_>, _> = exprs
138                    .iter()
139                    .map(|e| Ok(e.evaluate(context)?.string()))
140                    .collect();
141                Ok(Value::String(strings?.join("")))
142            },
143            CoreFunction::Id(expr) => {
144                let args_str = expr.evaluate(context)?.string();
145                let args_normalized = normalize_space(&args_str);
146                let args = args_normalized.split(' ');
147
148                let document = context.context_node.owner_document();
149                let mut result = Vec::new();
150                for arg in args {
151                    for element in document.get_elements_with_id(arg) {
152                        result.push(element.as_node());
153                    }
154                }
155                Ok(Value::Nodeset(result))
156            },
157            CoreFunction::LocalName(expr_opt) => {
158                let node = match expr_opt {
159                    Some(expr) => expr
160                        .evaluate(context)
161                        .and_then(try_extract_nodeset)?
162                        .first()
163                        .cloned(),
164                    None => Some(context.context_node.clone()),
165                };
166                let name = node.and_then(|n| local_name(&n)).unwrap_or_default();
167                Ok(Value::String(name.to_string()))
168            },
169            CoreFunction::NamespaceUri(expr_opt) => {
170                let node = match expr_opt {
171                    Some(expr) => expr
172                        .evaluate(context)
173                        .and_then(try_extract_nodeset)?
174                        .first()
175                        .cloned(),
176                    None => Some(context.context_node.clone()),
177                };
178                let ns = node.and_then(|n| namespace_uri(&n)).unwrap_or_default();
179                Ok(Value::String(ns.to_string()))
180            },
181            CoreFunction::Name(expr_opt) => {
182                let node = match expr_opt {
183                    Some(expr) => expr
184                        .evaluate(context)
185                        .and_then(try_extract_nodeset)?
186                        .first()
187                        .cloned(),
188                    None => Some(context.context_node.clone()),
189                };
190                let name = node.and_then(|n| name(&n)).unwrap_or_default();
191                Ok(Value::String(name))
192            },
193            CoreFunction::StartsWith(str1, str2) => {
194                let s1 = str1.evaluate(context)?.string();
195                let s2 = str2.evaluate(context)?.string();
196                Ok(Value::Boolean(s1.starts_with(&s2)))
197            },
198            CoreFunction::Contains(str1, str2) => {
199                let s1 = str1.evaluate(context)?.string();
200                let s2 = str2.evaluate(context)?.string();
201                Ok(Value::Boolean(s1.contains(&s2)))
202            },
203            CoreFunction::SubstringBefore(str1, str2) => {
204                let s1 = str1.evaluate(context)?.string();
205                let s2 = str2.evaluate(context)?.string();
206                Ok(Value::String(substring_before(&s1, &s2)))
207            },
208            CoreFunction::SubstringAfter(str1, str2) => {
209                let s1 = str1.evaluate(context)?.string();
210                let s2 = str2.evaluate(context)?.string();
211                Ok(Value::String(substring_after(&s1, &s2)))
212            },
213            CoreFunction::Substring(str1, start, length_opt) => {
214                let s = str1.evaluate(context)?.string();
215                let start_idx = start.evaluate(context)?.number().round() as isize - 1;
216                let len = match length_opt {
217                    Some(len_expr) => Some(len_expr.evaluate(context)?.number().round() as isize),
218                    None => None,
219                };
220                Ok(Value::String(substring(&s, start_idx, len)))
221            },
222            CoreFunction::StringLength(expr_opt) => {
223                let s = match expr_opt {
224                    Some(expr) => expr.evaluate(context)?.string(),
225                    None => context.context_node.text_content(),
226                };
227                Ok(Value::Number(s.chars().count() as f64))
228            },
229            CoreFunction::NormalizeSpace(expr_opt) => {
230                let s = match expr_opt {
231                    Some(expr) => expr.evaluate(context)?.string(),
232                    None => context.context_node.text_content(),
233                };
234
235                Ok(Value::String(normalize_space(&s)))
236            },
237            CoreFunction::Translate(str1, str2, str3) => {
238                let s = str1.evaluate(context)?.string();
239                let from = str2.evaluate(context)?.string();
240                let to = str3.evaluate(context)?.string();
241                let result = s
242                    .chars()
243                    .map(|c| match from.find(c) {
244                        Some(i) if i < to.chars().count() => to.chars().nth(i).unwrap(),
245                        _ => c,
246                    })
247                    .collect();
248                Ok(Value::String(result))
249            },
250            CoreFunction::Number(expr_opt) => {
251                let val = match expr_opt {
252                    Some(expr) => expr.evaluate(context)?,
253                    None => Value::String(context.context_node.text_content()),
254                };
255                Ok(Value::Number(val.number()))
256            },
257            CoreFunction::Sum(expr) => {
258                let nodes = expr.evaluate(context).and_then(try_extract_nodeset)?;
259                let sum = nodes
260                    .iter()
261                    .map(|node| parse_number_from_string(&node.text_content()))
262                    .sum();
263                Ok(Value::Number(sum))
264            },
265            CoreFunction::Floor(expr) => {
266                let num = expr.evaluate(context)?.number();
267                Ok(Value::Number(num.floor()))
268            },
269            CoreFunction::Ceiling(expr) => {
270                let num = expr.evaluate(context)?.number();
271                Ok(Value::Number(num.ceil()))
272            },
273            CoreFunction::Round(expr) => {
274                let num = expr.evaluate(context)?.number();
275                Ok(Value::Number(num.round()))
276            },
277            CoreFunction::Boolean(expr) => Ok(Value::Boolean(expr.evaluate(context)?.boolean())),
278            CoreFunction::Not(expr) => Ok(Value::Boolean(!expr.evaluate(context)?.boolean())),
279            CoreFunction::True => Ok(Value::Boolean(true)),
280            CoreFunction::False => Ok(Value::Boolean(false)),
281            CoreFunction::Lang(expr) => {
282                let context_lang = context.context_node.language();
283                let lang = expr.evaluate(context)?.string();
284                Ok(Value::Boolean(lang_matches(context_lang.as_deref(), &lang)))
285            },
286        }
287    }
288}
289#[cfg(test)]
290mod tests {
291    use super::{lang_matches, substring, substring_after, substring_before};
292
293    #[test]
294    fn test_substring_before() {
295        assert_eq!(substring_before("hello world", "world"), "hello ");
296        assert_eq!(substring_before("prefix:name", ":"), "prefix");
297        assert_eq!(substring_before("no-separator", "xyz"), "");
298        assert_eq!(substring_before("", "anything"), "");
299        assert_eq!(substring_before("multiple:colons:here", ":"), "multiple");
300        assert_eq!(substring_before("start-match-test", "start"), "");
301    }
302
303    #[test]
304    fn test_substring_after() {
305        assert_eq!(substring_after("hello world", "hello "), "world");
306        assert_eq!(substring_after("prefix:name", ":"), "name");
307        assert_eq!(substring_after("no-separator", "xyz"), "");
308        assert_eq!(substring_after("", "anything"), "");
309        assert_eq!(substring_after("multiple:colons:here", ":"), "colons:here");
310        assert_eq!(substring_after("test-end-match", "match"), "");
311    }
312
313    #[test]
314    fn test_substring() {
315        assert_eq!(substring("hello world", 0, Some(5)), "hello");
316        assert_eq!(substring("hello world", 6, Some(5)), "world");
317        assert_eq!(substring("hello", 1, Some(3)), "ell");
318        assert_eq!(substring("hello", -5, Some(2)), "he");
319        assert_eq!(substring("hello", 0, None), "hello");
320        assert_eq!(substring("hello", 2, Some(10)), "llo");
321        assert_eq!(substring("hello", 5, Some(1)), "");
322        assert_eq!(substring("", 0, Some(5)), "");
323        assert_eq!(substring("hello", 0, Some(0)), "");
324        assert_eq!(substring("hello", 0, Some(-5)), "");
325    }
326
327    #[test]
328    fn test_lang_matches() {
329        assert!(lang_matches(Some("en"), "en"));
330        assert!(lang_matches(Some("EN"), "en"));
331        assert!(lang_matches(Some("en"), "EN"));
332        assert!(lang_matches(Some("en-US"), "en"));
333        assert!(lang_matches(Some("en-GB"), "en"));
334
335        assert!(!lang_matches(Some("eng"), "en"));
336        assert!(!lang_matches(Some("fr"), "en"));
337        assert!(!lang_matches(Some("fr-en"), "en"));
338        assert!(!lang_matches(None, "en"));
339    }
340}