1use std::collections::HashMap;
5
6use roxmltree::Error;
7use simplecss::Declaration;
8use svgtypes::FontShorthand;
9
10use super::{AId, Attribute, Document, EId, NodeData, NodeId, NodeKind, ShortRange};
11
12const SVG_NS: &str = "http://www.w3.org/2000/svg";
13const XLINK_NS: &str = "http://www.w3.org/1999/xlink";
14const XML_NAMESPACE_NS: &str = "http://www.w3.org/XML/1998/namespace";
15
16impl<'input> Document<'input> {
17 pub fn parse_tree(
19 xml: &roxmltree::Document<'input>,
20 injected_stylesheet: Option<&'input str>,
21 ) -> Result<Document<'input>, Error> {
22 parse(xml, injected_stylesheet)
23 }
24
25 pub(crate) fn append(&mut self, parent_id: NodeId, kind: NodeKind) -> NodeId {
26 let new_child_id = NodeId::from(self.nodes.len());
27 self.nodes.push(NodeData {
28 parent: Some(parent_id),
29 next_sibling: None,
30 children: None,
31 kind,
32 });
33
34 let last_child_id = self.nodes[parent_id.get_usize()].children.map(|(_, id)| id);
35
36 if let Some(id) = last_child_id {
37 self.nodes[id.get_usize()].next_sibling = Some(new_child_id);
38 }
39
40 self.nodes[parent_id.get_usize()].children = Some(
41 if let Some((first_child_id, _)) = self.nodes[parent_id.get_usize()].children {
42 (first_child_id, new_child_id)
43 } else {
44 (new_child_id, new_child_id)
45 },
46 );
47
48 new_child_id
49 }
50
51 fn append_attribute(
52 &mut self,
53 name: AId,
54 value: roxmltree::StringStorage<'input>,
55 important: bool,
56 ) {
57 self.attrs.push(Attribute {
58 name,
59 value,
60 important,
61 });
62 }
63}
64
65fn parse<'input>(
66 xml: &roxmltree::Document<'input>,
67 injected_stylesheet: Option<&'input str>,
68) -> Result<Document<'input>, Error> {
69 let mut doc = Document {
70 nodes: Vec::new(),
71 attrs: Vec::new(),
72 links: HashMap::new(),
73 };
74
75 let mut id_map = HashMap::new();
77 for node in xml.descendants() {
78 if let Some(id) = node.attribute("id") {
79 if !id_map.contains_key(id) {
80 id_map.insert(id, node);
81 }
82 }
83 }
84
85 doc.nodes.push(NodeData {
87 parent: None,
88 next_sibling: None,
89 children: None,
90 kind: NodeKind::Root,
91 });
92
93 let style_sheet = resolve_css(xml, injected_stylesheet);
94
95 parse_xml_node_children(
96 xml.root(),
97 xml.root(),
98 doc.root().id,
99 &style_sheet,
100 false,
101 0,
102 &mut doc,
103 &id_map,
104 )?;
105
106 match doc.root().first_element_child() {
108 Some(child) => {
109 if child.tag_name() != Some(EId::Svg) {
110 return Err(roxmltree::Error::NoRootNode);
111 }
112 }
113 None => return Err(roxmltree::Error::NoRootNode),
114 }
115
116 let mut links = HashMap::new();
118 for node in doc.descendants() {
119 if let Some(id) = node.attribute::<&str>(AId::Id) {
120 links.insert(id.to_string(), node.id);
121 }
122 }
123 doc.links = links;
124
125 fix_recursive_patterns(&mut doc);
126 fix_recursive_links(EId::ClipPath, AId::ClipPath, &mut doc);
127 fix_recursive_links(EId::Mask, AId::Mask, &mut doc);
128 fix_recursive_links(EId::Filter, AId::Filter, &mut doc);
129 fix_recursive_fe_image(&mut doc);
130
131 Ok(doc)
132}
133
134pub(crate) fn parse_tag_name(node: roxmltree::Node) -> Option<EId> {
135 if !node.is_element() {
136 return None;
137 }
138
139 if !matches!(node.tag_name().namespace(), None | Some(SVG_NS)) {
140 return None;
141 }
142
143 EId::from_str(node.tag_name().name())
144}
145
146fn parse_xml_node_children<'input>(
147 parent: roxmltree::Node<'_, 'input>,
148 origin: roxmltree::Node,
149 parent_id: NodeId,
150 style_sheet: &simplecss::StyleSheet,
151 ignore_ids: bool,
152 depth: u32,
153 doc: &mut Document<'input>,
154 id_map: &HashMap<&str, roxmltree::Node<'_, 'input>>,
155) -> Result<(), Error> {
156 for node in parent.children() {
157 parse_xml_node(
158 node,
159 origin,
160 parent_id,
161 style_sheet,
162 ignore_ids,
163 depth,
164 doc,
165 id_map,
166 )?;
167 }
168
169 Ok(())
170}
171
172fn parse_xml_node<'input>(
173 node: roxmltree::Node<'_, 'input>,
174 origin: roxmltree::Node,
175 parent_id: NodeId,
176 style_sheet: &simplecss::StyleSheet,
177 ignore_ids: bool,
178 depth: u32,
179 doc: &mut Document<'input>,
180 id_map: &HashMap<&str, roxmltree::Node<'_, 'input>>,
181) -> Result<(), Error> {
182 if depth > 1024 {
183 return Err(Error::NodesLimitReached);
184 }
185
186 let mut tag_name = match parse_tag_name(node) {
187 Some(id) => id,
188 None => return Ok(()),
189 };
190
191 if tag_name == EId::Style {
192 return Ok(());
193 }
194
195 if tag_name == EId::A {
198 tag_name = EId::G;
199 }
200
201 let node_id = parse_svg_element(node, parent_id, tag_name, style_sheet, ignore_ids, doc)?;
202 if tag_name == EId::Text {
203 super::text::parse_svg_text_element(node, node_id, style_sheet, doc)?;
204 } else if tag_name == EId::Use {
205 parse_svg_use_element(node, origin, node_id, style_sheet, depth + 1, doc, id_map)?;
206 } else {
207 parse_xml_node_children(
208 node,
209 origin,
210 node_id,
211 style_sheet,
212 ignore_ids,
213 depth + 1,
214 doc,
215 id_map,
216 )?;
217 }
218
219 Ok(())
220}
221
222pub(crate) fn parse_svg_element<'input>(
223 xml_node: roxmltree::Node<'_, 'input>,
224 parent_id: NodeId,
225 tag_name: EId,
226 style_sheet: &simplecss::StyleSheet,
227 ignore_ids: bool,
228 doc: &mut Document<'input>,
229) -> Result<NodeId, Error> {
230 let attrs_start_idx = doc.attrs.len();
231
232 for attr in xml_node.attributes() {
234 match attr.namespace() {
235 None | Some(SVG_NS) | Some(XLINK_NS) | Some(XML_NAMESPACE_NS) => {}
236 _ => continue,
237 }
238
239 let aid = match AId::from_str(attr.name()) {
240 Some(v) => v,
241 None => continue,
242 };
243
244 if ignore_ids && aid == AId::Id {
247 continue;
248 }
249
250 if matches!(aid, AId::MixBlendMode | AId::Isolation | AId::FontKerning) {
252 continue;
253 } else if aid == AId::ImageRendering
254 && matches!(
255 attr.value(),
256 "smooth" | "high-quality" | "crisp-edges" | "pixelated"
257 )
258 {
259 continue;
260 }
261
262 append_attribute(
263 parent_id,
264 tag_name,
265 aid,
266 attr.value_storage().clone(),
267 false,
268 doc,
269 );
270 }
271
272 let mut insert_attribute = |aid, value: &str, important: bool| {
273 let idx = doc.attrs[attrs_start_idx..]
275 .iter_mut()
276 .position(|a| a.name == aid);
277
278 let added = append_attribute(
280 parent_id,
281 tag_name,
282 aid,
283 roxmltree::StringStorage::new_owned(value),
284 important,
285 doc,
286 );
287
288 if added {
290 if let Some(idx) = idx {
291 let last_idx = doc.attrs.len() - 1;
292 let existing_idx = attrs_start_idx + idx;
293
294 let has_precedence = !doc.attrs[existing_idx].important;
310
311 if has_precedence {
312 doc.attrs.swap(existing_idx, last_idx);
313 }
314
315 doc.attrs.pop();
317 }
318 }
319 };
320
321 let mut write_declaration = |declaration: &Declaration| {
322 let imp = declaration.important;
324 let val = declaration.value;
325
326 if declaration.name == "marker" {
327 insert_attribute(AId::MarkerStart, val, imp);
328 insert_attribute(AId::MarkerMid, val, imp);
329 insert_attribute(AId::MarkerEnd, val, imp);
330 } else if declaration.name == "font" {
331 if let Ok(shorthand) = FontShorthand::from_str(val) {
332 insert_attribute(AId::FontStyle, "normal", imp);
334 insert_attribute(AId::FontVariant, "normal", imp);
335 insert_attribute(AId::FontWeight, "normal", imp);
336 insert_attribute(AId::FontStretch, "normal", imp);
337 insert_attribute(AId::LineHeight, "normal", imp);
338 insert_attribute(AId::FontSizeAdjust, "none", imp);
339 insert_attribute(AId::FontKerning, "auto", imp);
340 insert_attribute(AId::FontVariantCaps, "normal", imp);
341 insert_attribute(AId::FontVariantLigatures, "normal", imp);
342 insert_attribute(AId::FontVariantNumeric, "normal", imp);
343 insert_attribute(AId::FontVariantEastAsian, "normal", imp);
344 insert_attribute(AId::FontVariantPosition, "normal", imp);
345
346 shorthand
348 .font_stretch
349 .map(|s| insert_attribute(AId::FontStretch, s, imp));
350 shorthand
351 .font_weight
352 .map(|s| insert_attribute(AId::FontWeight, s, imp));
353 shorthand
354 .font_variant
355 .map(|s| insert_attribute(AId::FontVariant, s, imp));
356 shorthand
357 .font_style
358 .map(|s| insert_attribute(AId::FontStyle, s, imp));
359 insert_attribute(AId::FontSize, shorthand.font_size, imp);
360 insert_attribute(AId::FontFamily, shorthand.font_family, imp);
361 } else {
362 log::warn!(
363 "Failed to parse {} value: '{}'",
364 AId::Font,
365 declaration.value
366 );
367 }
368 } else if let Some(aid) = AId::from_str(declaration.name) {
369 if aid.is_presentation() {
371 insert_attribute(aid, val, imp);
372 }
373 }
374 };
375
376 for rule in &style_sheet.rules {
378 if rule.selector.matches(&XmlNode(xml_node)) {
379 for declaration in &rule.declarations {
380 write_declaration(declaration);
381 }
382 }
383 }
384
385 if let Some(value) = xml_node.attribute("style") {
387 for declaration in simplecss::DeclarationTokenizer::from(value) {
388 write_declaration(&declaration);
389 }
390 }
391
392 if doc.nodes.len() > 1_000_000 {
393 return Err(Error::NodesLimitReached);
394 }
395
396 let node_id = doc.append(
397 parent_id,
398 NodeKind::Element {
399 tag_name,
400 attributes: ShortRange::new(attrs_start_idx as u32, doc.attrs.len() as u32),
401 },
402 );
403
404 Ok(node_id)
405}
406
407fn append_attribute<'input>(
408 parent_id: NodeId,
409 tag_name: EId,
410 aid: AId,
411 value: roxmltree::StringStorage<'input>,
412 important: bool,
413 doc: &mut Document<'input>,
414) -> bool {
415 match aid {
416 AId::Style |
418 AId::Class => return false,
420 _ => {}
421 }
422
423 if tag_name == EId::Tspan && aid == AId::Href {
426 return false;
427 }
428
429 if aid.allows_inherit_value() && &*value == "inherit" {
430 return resolve_inherit(parent_id, aid, doc);
431 }
432
433 doc.append_attribute(aid, value, important);
434 true
435}
436
437fn resolve_inherit(parent_id: NodeId, aid: AId, doc: &mut Document) -> bool {
438 if aid.is_inheritable() {
439 let node_id = doc
441 .get(parent_id)
442 .ancestors()
443 .find(|n| n.has_attribute(aid))
444 .map(|n| n.id);
445 if let Some(node_id) = node_id {
446 if let Some(attr) = doc
447 .get(node_id)
448 .attributes()
449 .iter()
450 .find(|a| a.name == aid)
451 .cloned()
452 {
453 doc.attrs.push(Attribute {
454 name: aid,
455 value: attr.value,
456 important: attr.important,
457 });
458
459 return true;
460 }
461 }
462 } else {
463 if let Some(attr) = doc
465 .get(parent_id)
466 .attributes()
467 .iter()
468 .find(|a| a.name == aid)
469 .cloned()
470 {
471 doc.attrs.push(Attribute {
472 name: aid,
473 value: attr.value,
474 important: attr.important,
475 });
476
477 return true;
478 }
479 }
480
481 let value = match aid {
483 AId::ImageRendering | AId::ShapeRendering | AId::TextRendering => "auto",
484
485 AId::ClipPath
486 | AId::Filter
487 | AId::MarkerEnd
488 | AId::MarkerMid
489 | AId::MarkerStart
490 | AId::Mask
491 | AId::Stroke
492 | AId::StrokeDasharray
493 | AId::TextDecoration => "none",
494
495 AId::FontStretch
496 | AId::FontStyle
497 | AId::FontVariant
498 | AId::FontWeight
499 | AId::LetterSpacing
500 | AId::WordSpacing => "normal",
501
502 AId::Fill | AId::FloodColor | AId::StopColor => "black",
503
504 AId::FillOpacity
505 | AId::FloodOpacity
506 | AId::Opacity
507 | AId::StopOpacity
508 | AId::StrokeOpacity => "1",
509
510 AId::ClipRule | AId::FillRule => "nonzero",
511
512 AId::BaselineShift => "baseline",
513 AId::ColorInterpolationFilters => "linearRGB",
514 AId::Direction => "ltr",
515 AId::Display => "inline",
516 AId::FontSize => "medium",
517 AId::Overflow => "visible",
518 AId::StrokeDashoffset => "0",
519 AId::StrokeLinecap => "butt",
520 AId::StrokeLinejoin => "miter",
521 AId::StrokeMiterlimit => "4",
522 AId::StrokeWidth => "1",
523 AId::TextAnchor => "start",
524 AId::Visibility => "visible",
525 AId::WritingMode => "lr-tb",
526 _ => return false,
527 };
528
529 doc.append_attribute(aid, roxmltree::StringStorage::Borrowed(value), false);
530 true
531}
532
533fn resolve_href<'a, 'input: 'a>(
534 node: roxmltree::Node<'a, 'input>,
535 id_map: &HashMap<&str, roxmltree::Node<'a, 'input>>,
536) -> Option<roxmltree::Node<'a, 'input>> {
537 let link_value = node
538 .attribute((XLINK_NS, "href"))
539 .or_else(|| node.attribute("href"))?;
540
541 let link_id = svgtypes::IRI::from_str(link_value).ok()?.0;
542
543 id_map.get(link_id).copied()
544}
545
546fn parse_svg_use_element<'input>(
547 node: roxmltree::Node<'_, 'input>,
548 origin: roxmltree::Node,
549 parent_id: NodeId,
550 style_sheet: &simplecss::StyleSheet,
551 depth: u32,
552 doc: &mut Document<'input>,
553 id_map: &HashMap<&str, roxmltree::Node<'_, 'input>>,
554) -> Result<(), Error> {
555 let link = match resolve_href(node, id_map) {
556 Some(v) => v,
557 None => return Ok(()),
558 };
559
560 if link == node || link == origin {
561 log::warn!(
562 "Recursive 'use' detected. '{}' will be skipped.",
563 node.attribute((SVG_NS, "id")).unwrap_or_default()
564 );
565 return Ok(());
566 }
567
568 if parse_tag_name(link).is_none() {
570 return Ok(());
571 }
572
573 let mut is_recursive = false;
591 for link_child in link
592 .descendants()
593 .skip(1)
594 .filter(|n| n.has_tag_name((SVG_NS, "use")))
595 {
596 if let Some(link2) = resolve_href(link_child, id_map) {
597 if link2 == node || link2 == link {
598 is_recursive = true;
599 break;
600 }
601 }
602 }
603
604 if is_recursive {
605 log::warn!(
606 "Recursive 'use' detected. '{}' will be skipped.",
607 node.attribute((SVG_NS, "id")).unwrap_or_default()
608 );
609 return Ok(());
610 }
611
612 parse_xml_node(
613 link,
614 node,
615 parent_id,
616 style_sheet,
617 true,
618 depth + 1,
619 doc,
620 id_map,
621 )
622}
623
624fn resolve_css<'a>(
625 xml: &'a roxmltree::Document<'a>,
626 style_sheet: Option<&'a str>,
627) -> simplecss::StyleSheet<'a> {
628 let mut sheet = simplecss::StyleSheet::new();
629
630 if let Some(style_sheet) = style_sheet {
633 sheet.parse_more(style_sheet);
634 }
635
636 for node in xml.descendants().filter(|n| n.has_tag_name("style")) {
637 match node.attribute("type") {
638 Some("text/css") => {}
639 Some(_) => continue,
640 None => {}
641 }
642
643 let text = match node.text() {
644 Some(v) => v,
645 None => continue,
646 };
647
648 sheet.parse_more(text);
649 }
650
651 sheet
652}
653
654struct XmlNode<'a, 'input: 'a>(roxmltree::Node<'a, 'input>);
655
656impl simplecss::Element for XmlNode<'_, '_> {
657 fn parent_element(&self) -> Option<Self> {
658 self.0.parent_element().map(XmlNode)
659 }
660
661 fn prev_sibling_element(&self) -> Option<Self> {
662 self.0.prev_sibling_element().map(XmlNode)
663 }
664
665 fn has_local_name(&self, local_name: &str) -> bool {
666 self.0.tag_name().name() == local_name
667 }
668
669 fn attribute_matches(&self, local_name: &str, operator: simplecss::AttributeOperator) -> bool {
670 match self.0.attribute(local_name) {
671 Some(value) => operator.matches(value),
672 None => false,
673 }
674 }
675
676 fn pseudo_class_matches(&self, class: simplecss::PseudoClass) -> bool {
677 match class {
678 simplecss::PseudoClass::FirstChild => self.prev_sibling_element().is_none(),
679 _ => false, }
682 }
683}
684
685fn fix_recursive_patterns(doc: &mut Document) {
686 while let Some(node_id) = find_recursive_pattern(AId::Fill, doc) {
687 let idx = doc.get(node_id).attribute_id(AId::Fill).unwrap();
688 doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
689 }
690
691 while let Some(node_id) = find_recursive_pattern(AId::Stroke, doc) {
692 let idx = doc.get(node_id).attribute_id(AId::Stroke).unwrap();
693 doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
694 }
695}
696
697fn find_recursive_pattern(aid: AId, doc: &mut Document) -> Option<NodeId> {
698 for pattern_node in doc
699 .root()
700 .descendants()
701 .filter(|n| n.tag_name() == Some(EId::Pattern))
702 {
703 for node in pattern_node.descendants() {
704 let value = match node.attribute(aid) {
705 Some(v) => v,
706 None => continue,
707 };
708
709 if let Ok(svgtypes::Paint::FuncIRI(link_id, _)) = svgtypes::Paint::from_str(value) {
710 if link_id == pattern_node.element_id() {
711 return Some(node.id);
715 } else {
716 if let Some(linked_node) = doc.element_by_id(link_id) {
718 for node2 in linked_node.descendants() {
719 let value2 = match node2.attribute(aid) {
720 Some(v) => v,
721 None => continue,
722 };
723
724 if let Ok(svgtypes::Paint::FuncIRI(link_id2, _)) =
725 svgtypes::Paint::from_str(value2)
726 {
727 if link_id2 == pattern_node.element_id() {
728 return Some(node2.id);
729 }
730 }
731 }
732 }
733 }
734 }
735 }
736 }
737
738 None
739}
740
741fn fix_recursive_links(eid: EId, aid: AId, doc: &mut Document) {
742 while let Some(node_id) = find_recursive_link(eid, aid, doc) {
743 let idx = doc.get(node_id).attribute_id(aid).unwrap();
744 doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
745 }
746}
747
748fn find_recursive_link(eid: EId, aid: AId, doc: &Document) -> Option<NodeId> {
749 for node in doc
750 .root()
751 .descendants()
752 .filter(|n| n.tag_name() == Some(eid))
753 {
754 for child in node.descendants() {
755 if let Some(link) = child.node_attribute(aid) {
756 if link == node {
757 return Some(child.id);
761 } else {
762 for node2 in link.descendants() {
764 if let Some(link2) = node2.node_attribute(aid) {
765 if link2 == node {
766 return Some(node2.id);
767 }
768 }
769 }
770 }
771 }
772 }
773 }
774
775 None
776}
777
778fn fix_recursive_fe_image(doc: &mut Document) {
787 let mut ids = Vec::new();
788 for fe_node in doc
789 .root()
790 .descendants()
791 .filter(|n| n.tag_name() == Some(EId::FeImage))
792 {
793 if let Some(link) = fe_node.node_attribute(AId::Href) {
794 if let Some(filter_uri) = link.attribute::<&str>(AId::Filter) {
795 let filter_id = fe_node.parent().unwrap().element_id();
796 for func in svgtypes::FilterValueListParser::from(filter_uri).flatten() {
797 if let svgtypes::FilterValue::Url(url) = func {
798 if url == filter_id {
799 ids.push(link.id);
800 }
801 }
802 }
803 }
804 }
805 }
806
807 for id in ids {
808 let idx = doc.get(id).attribute_id(AId::Filter).unwrap();
809 doc.attrs[idx].value = roxmltree::StringStorage::Borrowed("none");
810 }
811}