Skip to main content

usvg/parser/svgtree/
text.rs

1// Copyright 2021 the Resvg Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4#![allow(clippy::comparison_chain)]
5
6use roxmltree::Error;
7
8use super::{AId, Document, EId, NodeId, NodeKind, SvgNode};
9
10const XLINK_NS: &str = "http://www.w3.org/1999/xlink";
11
12pub(crate) fn parse_svg_text_element<'input>(
13    parent: roxmltree::Node<'_, 'input>,
14    parent_id: NodeId,
15    style_sheet: &simplecss::StyleSheet,
16    doc: &mut Document<'input>,
17) -> Result<(), Error> {
18    debug_assert_eq!(parent.tag_name().name(), "text");
19
20    let space = if doc.get(parent_id).has_attribute(AId::Space) {
21        get_xmlspace(doc, parent_id, XmlSpace::Default)
22    } else {
23        if let Some(node) = doc
24            .get(parent_id)
25            .ancestors()
26            .find(|n| n.has_attribute(AId::Space))
27        {
28            get_xmlspace(doc, node.id, XmlSpace::Default)
29        } else {
30            XmlSpace::Default
31        }
32    };
33
34    parse_svg_text_element_impl(parent, parent_id, style_sheet, space, doc)?;
35
36    trim_text_nodes(parent_id, space, doc);
37    Ok(())
38}
39
40fn parse_svg_text_element_impl<'input>(
41    parent: roxmltree::Node<'_, 'input>,
42    parent_id: NodeId,
43    style_sheet: &simplecss::StyleSheet,
44    space: XmlSpace,
45    doc: &mut Document<'input>,
46) -> Result<(), Error> {
47    for node in parent.children() {
48        if node.is_text() {
49            let text = trim_text(node.text().unwrap(), space);
50            doc.append(parent_id, NodeKind::Text(text));
51            continue;
52        }
53
54        let mut tag_name = match super::parse::parse_tag_name(node) {
55            Some(v) => v,
56            None => continue,
57        };
58
59        if tag_name == EId::A {
60            // Treat links as simple text.
61            tag_name = EId::Tspan;
62        }
63
64        if !matches!(tag_name, EId::Tspan | EId::Tref | EId::TextPath) {
65            continue;
66        }
67
68        // `textPath` must be a direct `text` child.
69        if tag_name == EId::TextPath && parent.tag_name().name() != "text" {
70            continue;
71        }
72
73        // We are converting `tref` into `tspan` to simplify later use.
74        let mut is_tref = false;
75        if tag_name == EId::Tref {
76            tag_name = EId::Tspan;
77            is_tref = true;
78        }
79
80        let node_id =
81            super::parse::parse_svg_element(node, parent_id, tag_name, style_sheet, false, doc)?;
82        let space = get_xmlspace(doc, node_id, space);
83
84        if is_tref {
85            let link_value = node
86                .attribute((XLINK_NS, "href"))
87                .or_else(|| node.attribute("href"));
88
89            if let Some(href) = link_value {
90                if let Some(text) = resolve_tref_text(node.document(), href) {
91                    let text = trim_text(&text, space);
92                    doc.append(node_id, NodeKind::Text(text));
93                }
94            }
95        } else {
96            parse_svg_text_element_impl(node, node_id, style_sheet, space, doc)?;
97        }
98    }
99
100    Ok(())
101}
102
103fn resolve_tref_text(xml: &roxmltree::Document, href: &str) -> Option<String> {
104    let id = svgtypes::IRI::from_str(href).ok()?.0;
105
106    // Find linked element in the original tree.
107    let node = xml.descendants().find(|n| n.attribute("id") == Some(id))?;
108
109    // `tref` should be linked to an SVG element.
110    super::parse::parse_tag_name(node)?;
111
112    // 'All character data within the referenced element, including character data enclosed
113    // within additional markup, will be rendered.'
114    //
115    // So we don't care about attributes and everything. Just collecting text nodes data.
116    //
117    // Note: we have to filter nodes by `is_text()` first since `text()` will look up
118    // for text nodes in element children therefore we will get duplicates.
119    let text: String = node
120        .descendants()
121        .filter(|n| n.is_text())
122        .filter_map(|n| n.text())
123        .collect();
124    if text.is_empty() { None } else { Some(text) }
125}
126
127#[derive(Clone, Copy, PartialEq, Debug)]
128enum XmlSpace {
129    Default,
130    Preserve,
131}
132
133fn get_xmlspace(doc: &Document, node_id: NodeId, default: XmlSpace) -> XmlSpace {
134    match doc.get(node_id).attribute(AId::Space) {
135        Some("preserve") => XmlSpace::Preserve,
136        Some(_) => XmlSpace::Default,
137        _ => default,
138    }
139}
140
141trait StrTrim {
142    fn remove_first_space(&mut self);
143    fn remove_last_space(&mut self);
144}
145
146impl StrTrim for String {
147    fn remove_first_space(&mut self) {
148        debug_assert_eq!(self.chars().next().unwrap(), ' ');
149        self.drain(0..1);
150    }
151
152    fn remove_last_space(&mut self) {
153        debug_assert_eq!(self.chars().next_back().unwrap(), ' ');
154        self.pop();
155    }
156}
157
158/// Prepares text nodes according to the spec: https://www.w3.org/TR/SVG11/text.html#WhiteSpace
159///
160/// This function handles:
161/// - 'xml:space' processing
162/// - tabs and newlines removing/replacing
163/// - spaces trimming
164fn trim_text_nodes(text_elem_id: NodeId, xmlspace: XmlSpace, doc: &mut Document) {
165    let mut nodes = Vec::new(); // TODO: allocate only once
166    collect_text_nodes(doc.get(text_elem_id), 0, &mut nodes);
167
168    // `trim` method has already collapsed all spaces into a single one,
169    // so we have to check only for one leading or trailing space.
170
171    if nodes.len() == 1 {
172        // Process element with a single text node child.
173
174        let node_id = nodes[0].0;
175
176        if xmlspace == XmlSpace::Default {
177            if let NodeKind::Text(ref mut text) = doc.nodes[node_id.get_usize()].kind {
178                match text.len() {
179                    0 => {} // An empty string. Do nothing.
180                    1 => {
181                        // If string has only one character and it's a space - clear this string.
182                        if text.as_bytes()[0] == b' ' {
183                            text.clear();
184                        }
185                    }
186                    _ => {
187                        // 'text' has at least 2 bytes, so indexing is safe.
188                        let c1 = text.as_bytes()[0];
189                        let c2 = text.as_bytes()[text.len() - 1];
190
191                        if c1 == b' ' {
192                            text.remove_first_space();
193                        }
194
195                        if c2 == b' ' {
196                            text.remove_last_space();
197                        }
198                    }
199                }
200            }
201        } else {
202            // Do nothing when xml:space=preserve.
203        }
204    } else if nodes.len() > 1 {
205        // Process element with many text node children.
206
207        // We manage all text nodes as a single text node
208        // and trying to remove duplicated spaces across nodes.
209        //
210        // For example    '<text>Text <tspan> text </tspan> text</text>'
211        // is the same is '<text>Text <tspan>text</tspan> text</text>'
212
213        let mut i = 0;
214        let len = nodes.len() - 1;
215        let mut last_non_empty: Option<NodeId> = None;
216        while i < len {
217            // Process pairs.
218            let (mut node1_id, depth1) = nodes[i];
219            let (node2_id, depth2) = nodes[i + 1];
220
221            if doc.get(node1_id).text().is_empty() {
222                if let Some(n) = last_non_empty {
223                    node1_id = n;
224                }
225            }
226
227            // Parent of the text node is always an element node and always exist,
228            // so unwrap is safe.
229            let xmlspace1 = get_xmlspace(doc, doc.get(node1_id).parent().unwrap().id, xmlspace);
230            let xmlspace2 = get_xmlspace(doc, doc.get(node2_id).parent().unwrap().id, xmlspace);
231
232            // >text<..>text<
233            //  1  2    3  4
234            let (c1, c2, c3, c4) = {
235                let text1 = doc.get(node1_id).text();
236                let text2 = doc.get(node2_id).text();
237
238                let bytes1 = text1.as_bytes();
239                let bytes2 = text2.as_bytes();
240
241                let c1 = bytes1.first().cloned();
242                let c2 = bytes1.last().cloned();
243                let c3 = bytes2.first().cloned();
244                let c4 = bytes2.last().cloned();
245
246                (c1, c2, c3, c4)
247            };
248
249            // NOTE: xml:space processing is mostly an undefined behavior,
250            // because everyone do it differently.
251            // We're mimicking the Chrome behavior.
252
253            // Remove space from the second text node if both nodes has bound spaces.
254            // From: '<text>Text <tspan> text</tspan></text>'
255            // To:   '<text>Text <tspan>text</tspan></text>'
256            //
257            // See text-tspan-02-b.svg for details.
258            if depth1 < depth2 {
259                if c3 == Some(b' ') {
260                    if xmlspace2 == XmlSpace::Default {
261                        if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
262                            text.remove_first_space();
263                        }
264                    }
265                }
266            } else {
267                if c2 == Some(b' ') && c2 == c3 {
268                    if xmlspace1 == XmlSpace::Default && xmlspace2 == XmlSpace::Default {
269                        if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
270                            text.remove_last_space();
271                        }
272                    } else {
273                        if xmlspace1 == XmlSpace::Preserve && xmlspace2 == XmlSpace::Default {
274                            if let NodeKind::Text(ref mut text) =
275                                doc.nodes[node2_id.get_usize()].kind
276                            {
277                                text.remove_first_space();
278                            }
279                        }
280                    }
281                }
282            }
283
284            let is_first = i == 0;
285            let is_last = i == len - 1;
286
287            if is_first
288                && c1 == Some(b' ')
289                && xmlspace1 == XmlSpace::Default
290                && !doc.get(node1_id).text().is_empty()
291            {
292                // Remove a leading space from a first text node.
293                if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
294                    text.remove_first_space();
295                }
296            } else if is_last
297                && c4 == Some(b' ')
298                && !doc.get(node2_id).text().is_empty()
299                && xmlspace2 == XmlSpace::Default
300            {
301                // Remove a trailing space from a last text node.
302                // Also check that 'text2' is not empty already.
303                if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
304                    text.remove_last_space();
305                }
306            }
307
308            if is_last
309                && c2 == Some(b' ')
310                && !doc.get(node1_id).text().is_empty()
311                && doc.get(node2_id).text().is_empty()
312                && doc.get(node1_id).text().ends_with(' ')
313            {
314                if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
315                    text.remove_last_space();
316                }
317            }
318
319            if !doc.get(node1_id).text().trim().is_empty() {
320                last_non_empty = Some(node1_id);
321            }
322
323            i += 1;
324        }
325    }
326
327    // TODO: find a way to remove all empty text nodes
328}
329
330fn collect_text_nodes(parent: SvgNode, depth: usize, nodes: &mut Vec<(NodeId, usize)>) {
331    for child in parent.children() {
332        if child.is_text() {
333            nodes.push((child.id, depth));
334        } else if child.is_element() {
335            collect_text_nodes(child, depth + 1, nodes);
336        }
337    }
338}
339
340fn trim_text(text: &str, space: XmlSpace) -> String {
341    let mut s = String::with_capacity(text.len());
342
343    let mut prev = '0';
344    for c in text.chars() {
345        // \r, \n and \t should be converted into spaces.
346        let c = match c {
347            '\r' | '\n' | '\t' => ' ',
348            _ => c,
349        };
350
351        // Skip continuous spaces.
352        if space == XmlSpace::Default && c == ' ' && c == prev {
353            continue;
354        }
355
356        prev = c;
357
358        s.push(c);
359    }
360
361    s
362}