usvg/parser/svgtree/
text.rs

1// Copyright 2021 the Resvg Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4#![allow(clippy::comparison_chain)]
5
6use roxmltree::Error;
7
8use super::{AId, Document, EId, NodeId, NodeKind, SvgNode};
9
10const XLINK_NS: &str = "http://www.w3.org/1999/xlink";
11
12pub(crate) fn parse_svg_text_element<'input>(
13    parent: roxmltree::Node<'_, 'input>,
14    parent_id: NodeId,
15    style_sheet: &simplecss::StyleSheet,
16    doc: &mut Document<'input>,
17) -> Result<(), Error> {
18    debug_assert_eq!(parent.tag_name().name(), "text");
19
20    let space = if doc.get(parent_id).has_attribute(AId::Space) {
21        get_xmlspace(doc, parent_id, XmlSpace::Default)
22    } else {
23        if let Some(node) = doc
24            .get(parent_id)
25            .ancestors()
26            .find(|n| n.has_attribute(AId::Space))
27        {
28            get_xmlspace(doc, node.id, XmlSpace::Default)
29        } else {
30            XmlSpace::Default
31        }
32    };
33
34    parse_svg_text_element_impl(parent, parent_id, style_sheet, space, doc)?;
35
36    trim_text_nodes(parent_id, space, doc);
37    Ok(())
38}
39
40fn parse_svg_text_element_impl<'input>(
41    parent: roxmltree::Node<'_, 'input>,
42    parent_id: NodeId,
43    style_sheet: &simplecss::StyleSheet,
44    space: XmlSpace,
45    doc: &mut Document<'input>,
46) -> Result<(), Error> {
47    for node in parent.children() {
48        if node.is_text() {
49            let text = trim_text(node.text().unwrap(), space);
50            doc.append(parent_id, NodeKind::Text(text));
51            continue;
52        }
53
54        let mut tag_name = match super::parse::parse_tag_name(node) {
55            Some(v) => v,
56            None => continue,
57        };
58
59        if tag_name == EId::A {
60            // Treat links as simple text.
61            tag_name = EId::Tspan;
62        }
63
64        if !matches!(tag_name, EId::Tspan | EId::Tref | EId::TextPath) {
65            continue;
66        }
67
68        // `textPath` must be a direct `text` child.
69        if tag_name == EId::TextPath && parent.tag_name().name() != "text" {
70            continue;
71        }
72
73        // We are converting `tref` into `tspan` to simplify later use.
74        let mut is_tref = false;
75        if tag_name == EId::Tref {
76            tag_name = EId::Tspan;
77            is_tref = true;
78        }
79
80        let node_id =
81            super::parse::parse_svg_element(node, parent_id, tag_name, style_sheet, false, doc)?;
82        let space = get_xmlspace(doc, node_id, space);
83
84        if is_tref {
85            let link_value = node
86                .attribute((XLINK_NS, "href"))
87                .or_else(|| node.attribute("href"));
88
89            if let Some(href) = link_value {
90                if let Some(text) = resolve_tref_text(node.document(), href) {
91                    let text = trim_text(&text, space);
92                    doc.append(node_id, NodeKind::Text(text));
93                }
94            }
95        } else {
96            parse_svg_text_element_impl(node, node_id, style_sheet, space, doc)?;
97        }
98    }
99
100    Ok(())
101}
102
103fn resolve_tref_text(xml: &roxmltree::Document, href: &str) -> Option<String> {
104    let id = svgtypes::IRI::from_str(href).ok()?.0;
105
106    // Find linked element in the original tree.
107    let node = xml.descendants().find(|n| n.attribute("id") == Some(id))?;
108
109    // `tref` should be linked to an SVG element.
110    super::parse::parse_tag_name(node)?;
111
112    // 'All character data within the referenced element, including character data enclosed
113    // within additional markup, will be rendered.'
114    //
115    // So we don't care about attributes and everything. Just collecting text nodes data.
116    //
117    // Note: we have to filter nodes by `is_text()` first since `text()` will look up
118    // for text nodes in element children therefore we will get duplicates.
119    let text: String = node
120        .descendants()
121        .filter(|n| n.is_text())
122        .filter_map(|n| n.text())
123        .collect();
124    if text.is_empty() {
125        None
126    } else {
127        Some(text)
128    }
129}
130
131#[derive(Clone, Copy, PartialEq, Debug)]
132enum XmlSpace {
133    Default,
134    Preserve,
135}
136
137fn get_xmlspace(doc: &Document, node_id: NodeId, default: XmlSpace) -> XmlSpace {
138    match doc.get(node_id).attribute(AId::Space) {
139        Some("preserve") => XmlSpace::Preserve,
140        Some(_) => XmlSpace::Default,
141        _ => default,
142    }
143}
144
145trait StrTrim {
146    fn remove_first_space(&mut self);
147    fn remove_last_space(&mut self);
148}
149
150impl StrTrim for String {
151    fn remove_first_space(&mut self) {
152        debug_assert_eq!(self.chars().next().unwrap(), ' ');
153        self.drain(0..1);
154    }
155
156    fn remove_last_space(&mut self) {
157        debug_assert_eq!(self.chars().next_back().unwrap(), ' ');
158        self.pop();
159    }
160}
161
162/// Prepares text nodes according to the spec: https://www.w3.org/TR/SVG11/text.html#WhiteSpace
163///
164/// This function handles:
165/// - 'xml:space' processing
166/// - tabs and newlines removing/replacing
167/// - spaces trimming
168fn trim_text_nodes(text_elem_id: NodeId, xmlspace: XmlSpace, doc: &mut Document) {
169    let mut nodes = Vec::new(); // TODO: allocate only once
170    collect_text_nodes(doc.get(text_elem_id), 0, &mut nodes);
171
172    // `trim` method has already collapsed all spaces into a single one,
173    // so we have to check only for one leading or trailing space.
174
175    if nodes.len() == 1 {
176        // Process element with a single text node child.
177
178        let node_id = nodes[0].0;
179
180        if xmlspace == XmlSpace::Default {
181            if let NodeKind::Text(ref mut text) = doc.nodes[node_id.get_usize()].kind {
182                match text.len() {
183                    0 => {} // An empty string. Do nothing.
184                    1 => {
185                        // If string has only one character and it's a space - clear this string.
186                        if text.as_bytes()[0] == b' ' {
187                            text.clear();
188                        }
189                    }
190                    _ => {
191                        // 'text' has at least 2 bytes, so indexing is safe.
192                        let c1 = text.as_bytes()[0];
193                        let c2 = text.as_bytes()[text.len() - 1];
194
195                        if c1 == b' ' {
196                            text.remove_first_space();
197                        }
198
199                        if c2 == b' ' {
200                            text.remove_last_space();
201                        }
202                    }
203                }
204            }
205        } else {
206            // Do nothing when xml:space=preserve.
207        }
208    } else if nodes.len() > 1 {
209        // Process element with many text node children.
210
211        // We manage all text nodes as a single text node
212        // and trying to remove duplicated spaces across nodes.
213        //
214        // For example    '<text>Text <tspan> text </tspan> text</text>'
215        // is the same is '<text>Text <tspan>text</tspan> text</text>'
216
217        let mut i = 0;
218        let len = nodes.len() - 1;
219        let mut last_non_empty: Option<NodeId> = None;
220        while i < len {
221            // Process pairs.
222            let (mut node1_id, depth1) = nodes[i];
223            let (node2_id, depth2) = nodes[i + 1];
224
225            if doc.get(node1_id).text().is_empty() {
226                if let Some(n) = last_non_empty {
227                    node1_id = n;
228                }
229            }
230
231            // Parent of the text node is always an element node and always exist,
232            // so unwrap is safe.
233            let xmlspace1 = get_xmlspace(doc, doc.get(node1_id).parent().unwrap().id, xmlspace);
234            let xmlspace2 = get_xmlspace(doc, doc.get(node2_id).parent().unwrap().id, xmlspace);
235
236            // >text<..>text<
237            //  1  2    3  4
238            let (c1, c2, c3, c4) = {
239                let text1 = doc.get(node1_id).text();
240                let text2 = doc.get(node2_id).text();
241
242                let bytes1 = text1.as_bytes();
243                let bytes2 = text2.as_bytes();
244
245                let c1 = bytes1.first().cloned();
246                let c2 = bytes1.last().cloned();
247                let c3 = bytes2.first().cloned();
248                let c4 = bytes2.last().cloned();
249
250                (c1, c2, c3, c4)
251            };
252
253            // NOTE: xml:space processing is mostly an undefined behavior,
254            // because everyone do it differently.
255            // We're mimicking the Chrome behavior.
256
257            // Remove space from the second text node if both nodes has bound spaces.
258            // From: '<text>Text <tspan> text</tspan></text>'
259            // To:   '<text>Text <tspan>text</tspan></text>'
260            //
261            // See text-tspan-02-b.svg for details.
262            if depth1 < depth2 {
263                if c3 == Some(b' ') {
264                    if xmlspace2 == XmlSpace::Default {
265                        if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
266                            text.remove_first_space();
267                        }
268                    }
269                }
270            } else {
271                if c2 == Some(b' ') && c2 == c3 {
272                    if xmlspace1 == XmlSpace::Default && xmlspace2 == XmlSpace::Default {
273                        if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
274                            text.remove_last_space();
275                        }
276                    } else {
277                        if xmlspace1 == XmlSpace::Preserve && xmlspace2 == XmlSpace::Default {
278                            if let NodeKind::Text(ref mut text) =
279                                doc.nodes[node2_id.get_usize()].kind
280                            {
281                                text.remove_first_space();
282                            }
283                        }
284                    }
285                }
286            }
287
288            let is_first = i == 0;
289            let is_last = i == len - 1;
290
291            if is_first
292                && c1 == Some(b' ')
293                && xmlspace1 == XmlSpace::Default
294                && !doc.get(node1_id).text().is_empty()
295            {
296                // Remove a leading space from a first text node.
297                if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
298                    text.remove_first_space();
299                }
300            } else if is_last
301                && c4 == Some(b' ')
302                && !doc.get(node2_id).text().is_empty()
303                && xmlspace2 == XmlSpace::Default
304            {
305                // Remove a trailing space from a last text node.
306                // Also check that 'text2' is not empty already.
307                if let NodeKind::Text(ref mut text) = doc.nodes[node2_id.get_usize()].kind {
308                    text.remove_last_space();
309                }
310            }
311
312            if is_last
313                && c2 == Some(b' ')
314                && !doc.get(node1_id).text().is_empty()
315                && doc.get(node2_id).text().is_empty()
316                && doc.get(node1_id).text().ends_with(' ')
317            {
318                if let NodeKind::Text(ref mut text) = doc.nodes[node1_id.get_usize()].kind {
319                    text.remove_last_space();
320                }
321            }
322
323            if !doc.get(node1_id).text().trim().is_empty() {
324                last_non_empty = Some(node1_id);
325            }
326
327            i += 1;
328        }
329    }
330
331    // TODO: find a way to remove all empty text nodes
332}
333
334fn collect_text_nodes(parent: SvgNode, depth: usize, nodes: &mut Vec<(NodeId, usize)>) {
335    for child in parent.children() {
336        if child.is_text() {
337            nodes.push((child.id, depth));
338        } else if child.is_element() {
339            collect_text_nodes(child, depth + 1, nodes);
340        }
341    }
342}
343
344fn trim_text(text: &str, space: XmlSpace) -> String {
345    let mut s = String::with_capacity(text.len());
346
347    let mut prev = '0';
348    for c in text.chars() {
349        // \r, \n and \t should be converted into spaces.
350        let c = match c {
351            '\r' | '\n' | '\t' => ' ',
352            _ => c,
353        };
354
355        // Skip continuous spaces.
356        if space == XmlSpace::Default && c == ' ' && c == prev {
357            continue;
358        }
359
360        prev = c;
361
362        s.push(c);
363    }
364
365    s
366}