1mod types;
11
12use log::{debug, warn};
13use markup5ever::{local_name, namespace_prefix, ns};
14use std::borrow::Cow;
15use std::borrow::Cow::Borrowed;
16use std::cell::{Cell, Ref, RefCell};
17use std::collections::btree_map::Iter;
18use std::collections::{BTreeMap, HashSet, VecDeque};
19use std::fmt::{Debug, Error, Formatter};
20use std::mem;
21
22pub use self::interface::{ElemName, NodeOrText, Tracer, TreeSink};
23use self::types::*;
24use crate::interface::{self, create_element, AppendNode, Attribute, QualName};
25use crate::interface::{AppendText, ExpandedName};
26use crate::tokenizer::{self, EndTag, ProcessResult, StartTag, Tag, TokenSink};
27use crate::tokenizer::{Doctype, EmptyTag, Pi, ShortTag};
28use crate::{LocalName, Namespace, Prefix};
29
30use crate::tendril::{StrTendril, Tendril};
31
32static XML_URI: &str = "http://www.w3.org/XML/1998/namespace";
33static XMLNS_URI: &str = "http://www.w3.org/2000/xmlns/";
34
35type InsResult = Result<(), Cow<'static, str>>;
36
37#[derive(Debug)]
38struct NamespaceMapStack(Vec<NamespaceMap>);
39
40impl NamespaceMapStack {
41 fn new() -> NamespaceMapStack {
42 NamespaceMapStack(vec![NamespaceMap::default()])
43 }
44
45 fn push(&mut self, map: NamespaceMap) {
46 self.0.push(map);
47 }
48
49 fn pop(&mut self) {
50 self.0.pop();
51 }
52}
53
54pub(crate) struct NamespaceMap {
55 scope: BTreeMap<Option<Prefix>, Option<Namespace>>,
63}
64
65impl Debug for NamespaceMap {
66 fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
67 write!(f, "\nNamespaceMap[")?;
68 for (key, value) in &self.scope {
69 writeln!(f, " {key:?} : {value:?}")?;
70 }
71 write!(f, "]")
72 }
73}
74
75impl NamespaceMap {
76 pub(crate) fn empty() -> NamespaceMap {
78 NamespaceMap {
79 scope: BTreeMap::new(),
80 }
81 }
82
83 fn default() -> NamespaceMap {
84 NamespaceMap {
85 scope: {
86 let mut map = BTreeMap::new();
87 map.insert(None, None);
88 map.insert(Some(namespace_prefix!("xml")), Some(ns!(xml)));
89 map.insert(Some(namespace_prefix!("xmlns")), Some(ns!(xmlns)));
90 map
91 },
92 }
93 }
94
95 pub(crate) fn get(&self, prefix: &Option<Prefix>) -> Option<&Option<Namespace>> {
96 self.scope.get(prefix)
97 }
98
99 pub(crate) fn get_scope_iter(&self) -> Iter<'_, Option<Prefix>, Option<Namespace>> {
100 self.scope.iter()
101 }
102
103 pub(crate) fn insert(&mut self, name: &QualName) {
104 let prefix = name.prefix.as_ref().cloned();
105 let namespace = Some(Namespace::from(&*name.ns));
106 self.scope.insert(prefix, namespace);
107 }
108
109 fn insert_ns(&mut self, attr: &Attribute) -> InsResult {
110 if &*attr.value == XMLNS_URI {
111 return Err(Borrowed("Can't declare XMLNS URI"));
112 };
113
114 let opt_uri = if attr.value.is_empty() {
115 None
116 } else {
117 Some(Namespace::from(&*attr.value))
118 };
119
120 let result = match (&attr.name.prefix, &*attr.name.local) {
121 (&Some(namespace_prefix!("xmlns")), "xml") => {
122 if &*attr.value != XML_URI {
123 Err(Borrowed("XML namespace can't be redeclared"))
124 } else {
125 Ok(())
126 }
127 },
128
129 (&Some(namespace_prefix!("xmlns")), "xmlns") => {
130 Err(Borrowed("XMLNS namespaces can't be changed"))
131 },
132
133 (&Some(namespace_prefix!("xmlns")), _) | (&None, "xmlns") => {
134 let ns_prefix = if &*attr.name.local == "xmlns" {
139 None
140
141 } else {
145 Some(Prefix::from(&*attr.name.local))
146 };
147
148 if opt_uri.is_some() && self.scope.contains_key(&ns_prefix) {
149 Err(Borrowed("Namespace already defined"))
150 } else {
151 self.scope.insert(ns_prefix, opt_uri);
152 Ok(())
153 }
154 },
155
156 (_, _) => Err(Borrowed("Invalid namespace declaration.")),
157 };
158 result
159 }
160}
161
162#[derive(Copy, Clone, Default)]
164pub struct XmlTreeBuilderOpts {}
165
166pub struct XmlTreeBuilder<Handle, Sink> {
168 _opts: XmlTreeBuilderOpts,
170
171 pub sink: Sink,
173
174 doc_handle: Handle,
176
177 open_elems: RefCell<Vec<Handle>>,
179
180 curr_elem: RefCell<Option<Handle>>,
182
183 namespace_stack: RefCell<NamespaceMapStack>,
185
186 current_namespace: RefCell<NamespaceMap>,
188
189 phase: Cell<XmlPhase>,
191}
192impl<Handle, Sink> XmlTreeBuilder<Handle, Sink>
193where
194 Handle: Clone,
195 Sink: TreeSink<Handle = Handle>,
196{
197 pub fn new(sink: Sink, opts: XmlTreeBuilderOpts) -> XmlTreeBuilder<Handle, Sink> {
201 let doc_handle = sink.get_document();
202 XmlTreeBuilder {
203 _opts: opts,
204 sink,
205 doc_handle,
206 open_elems: RefCell::new(vec![]),
207 curr_elem: RefCell::new(None),
208 namespace_stack: RefCell::new(NamespaceMapStack::new()),
209 current_namespace: RefCell::new(NamespaceMap::empty()),
210 phase: Cell::new(XmlPhase::Start),
211 }
212 }
213
214 pub fn trace_handles(&self, tracer: &dyn Tracer<Handle = Handle>) {
217 tracer.trace_handle(&self.doc_handle);
218 for e in self.open_elems.borrow().iter() {
219 tracer.trace_handle(e);
220 }
221 if let Some(h) = self.curr_elem.borrow().as_ref() {
222 tracer.trace_handle(h);
223 }
224 }
225
226 #[cfg(not(for_c))]
228 #[allow(dead_code)]
229 fn dump_state(&self, label: String) {
230 debug!("dump_state on {label}");
231 debug!(" open_elems:");
232 for node in self.open_elems.borrow().iter() {
233 debug!(" {:?}", self.sink.elem_name(node));
234 }
235 debug!("");
236 }
237
238 #[cfg(for_c)]
239 fn debug_step(&self, _mode: XmlPhase, _token: &Token) {}
240
241 #[cfg(not(for_c))]
242 fn debug_step(&self, mode: XmlPhase, token: &Token) {
243 debug!(
244 "processing {:?} in insertion mode {:?}",
245 format!("{:?}", token),
246 mode
247 );
248 }
249
250 fn declare_ns(&self, attr: &mut Attribute) {
251 if let Err(msg) = self.current_namespace.borrow_mut().insert_ns(attr) {
252 self.sink.parse_error(msg);
253 } else {
254 attr.name.ns = ns!(xmlns);
255 }
256 }
257
258 fn find_uri(&self, prefix: &Option<Prefix>) -> Result<Option<Namespace>, Cow<'static, str>> {
259 let mut uri = Err(Borrowed("No appropriate namespace found"));
260
261 let current_namespace = self.current_namespace.borrow();
262 for ns in self
263 .namespace_stack
264 .borrow()
265 .0
266 .iter()
267 .chain(Some(&*current_namespace))
268 .rev()
269 {
270 if let Some(el) = ns.get(prefix) {
271 uri = Ok(el.clone());
272 break;
273 }
274 }
275 uri
276 }
277
278 fn bind_qname(&self, name: &mut QualName) {
279 match self.find_uri(&name.prefix) {
280 Ok(uri) => {
281 let ns_uri = match uri {
282 Some(e) => e,
283 None => ns!(),
284 };
285 name.ns = ns_uri;
286 },
287 Err(msg) => {
288 self.sink.parse_error(msg);
289 },
290 }
291 }
292
293 fn bind_attr_qname(
298 &self,
299 present_attrs: &mut HashSet<(Namespace, LocalName)>,
300 name: &mut QualName,
301 ) -> bool {
302 let mut not_duplicate = true;
304
305 if name.prefix.is_some() {
306 self.bind_qname(name);
307 not_duplicate = Self::check_duplicate_attr(present_attrs, name);
308 }
309 not_duplicate
310 }
311
312 fn check_duplicate_attr(
313 present_attrs: &mut HashSet<(Namespace, LocalName)>,
314 name: &QualName,
315 ) -> bool {
316 let pair = (name.ns.clone(), name.local.clone());
317
318 if present_attrs.contains(&pair) {
319 return false;
320 }
321 present_attrs.insert(pair);
322 true
323 }
324
325 fn process_namespaces(&self, tag: &mut Tag) {
326 let mut present_attrs: HashSet<(Namespace, LocalName)> = Default::default();
328
329 let mut new_attr = vec![];
330 for attr in tag.attrs.iter_mut().filter(|attr| {
332 attr.name.prefix == Some(namespace_prefix!("xmlns"))
333 || attr.name.local == local_name!("xmlns")
334 }) {
335 self.declare_ns(attr);
336 }
337
338 for attr in tag.attrs.iter_mut().filter(|attr| {
340 attr.name.prefix != Some(namespace_prefix!("xmlns"))
341 && attr.name.local != local_name!("xmlns")
342 }) {
343 if self.bind_attr_qname(&mut present_attrs, &mut attr.name) {
344 new_attr.push(attr.clone());
345 }
346 }
347 tag.attrs = new_attr;
348
349 self.bind_qname(&mut tag.name);
351
352 let x = mem::replace(
354 &mut *self.current_namespace.borrow_mut(),
355 NamespaceMap::empty(),
356 );
357
358 if tag.kind == StartTag || (tag.kind == EmptyTag && tag.name.local == local_name!("script"))
362 {
363 self.namespace_stack.borrow_mut().push(x);
364 }
365 }
366
367 fn process_to_completion(
368 &self,
369 mut token: Token,
370 ) -> ProcessResult<<Self as TokenSink>::Handle> {
371 let mut more_tokens = VecDeque::new();
374
375 loop {
376 let phase = self.phase.get();
377
378 #[allow(clippy::unused_unit)]
379 match self.step(phase, token) {
380 XmlProcessResult::Done => {
381 let Some(popped_token) = more_tokens.pop_front() else {
382 return ProcessResult::Continue;
383 };
384 token = popped_token;
385 },
386 XmlProcessResult::Reprocess(m, t) => {
387 self.phase.set(m);
388 token = t;
389 },
390 XmlProcessResult::Script(node) => {
391 assert!(more_tokens.is_empty());
392 return ProcessResult::Script(node);
393 },
394 }
395 }
396 }
397}
398
399impl<Handle, Sink> TokenSink for XmlTreeBuilder<Handle, Sink>
400where
401 Handle: Clone,
402 Sink: TreeSink<Handle = Handle>,
403{
404 type Handle = Handle;
405
406 fn process_token(&self, token: tokenizer::Token) -> ProcessResult<Self::Handle> {
407 let token = match token {
409 tokenizer::Token::ParseError(e) => {
410 self.sink.parse_error(e);
411 return ProcessResult::Done;
412 },
413
414 tokenizer::Token::Doctype(d) => Token::Doctype(d),
415 tokenizer::Token::ProcessingInstruction(instruction) => Token::Pi(instruction),
416 tokenizer::Token::Tag(x) => Token::Tag(x),
417 tokenizer::Token::Comment(x) => Token::Comment(x),
418 tokenizer::Token::NullCharacter => Token::NullCharacter,
419 tokenizer::Token::EndOfFile => Token::Eof,
420 tokenizer::Token::Characters(x) => Token::Characters(x),
421 };
422
423 self.process_to_completion(token)
424 }
425
426 fn end(&self) {
427 for node in self.open_elems.borrow_mut().drain(..).rev() {
428 self.sink.pop(&node);
429 }
430 }
431}
432
433fn current_node<Handle>(open_elems: &[Handle]) -> &Handle {
434 open_elems.last().expect("no current element")
435}
436
437impl<Handle, Sink> XmlTreeBuilder<Handle, Sink>
438where
439 Handle: Clone,
440 Sink: TreeSink<Handle = Handle>,
441{
442 fn current_node(&self) -> Ref<'_, Handle> {
443 Ref::map(self.open_elems.borrow(), |elems| {
444 elems.last().expect("no current element")
445 })
446 }
447
448 fn insert_appropriately(&self, child: NodeOrText<Handle>) {
449 let open_elems = self.open_elems.borrow();
450 let target = current_node(&open_elems);
451 self.sink.append(target, child);
452 }
453
454 fn insert_tag(&self, tag: Tag) -> XmlProcessResult<Handle> {
455 let child = create_element(&self.sink, tag.name, tag.attrs);
456 self.insert_appropriately(AppendNode(child.clone()));
457 self.add_to_open_elems(child)
458 }
459
460 fn append_tag(&self, tag: Tag) -> XmlProcessResult<Handle> {
461 let child = create_element(&self.sink, tag.name, tag.attrs);
462 self.insert_appropriately(AppendNode(child.clone()));
463 self.sink.pop(&child);
464 XmlProcessResult::Done
465 }
466
467 fn append_tag_to_doc(&self, tag: Tag) -> Handle {
468 let child = create_element(&self.sink, tag.name, tag.attrs);
469
470 self.sink
471 .append(&self.doc_handle, AppendNode(child.clone()));
472 child
473 }
474
475 fn add_to_open_elems(&self, el: Handle) -> XmlProcessResult<Handle> {
476 self.open_elems.borrow_mut().push(el);
477
478 XmlProcessResult::Done
479 }
480
481 fn append_comment_to_doc(&self, text: StrTendril) -> XmlProcessResult<Handle> {
482 let comment = self.sink.create_comment(text);
483 self.sink.append(&self.doc_handle, AppendNode(comment));
484 XmlProcessResult::Done
485 }
486
487 fn append_comment_to_tag(&self, text: StrTendril) -> XmlProcessResult<Handle> {
488 let open_elems = self.open_elems.borrow();
489 let target = current_node(&open_elems);
490 let comment = self.sink.create_comment(text);
491 self.sink.append(target, AppendNode(comment));
492 XmlProcessResult::Done
493 }
494
495 fn append_doctype_to_doc(&self, doctype: Doctype) -> XmlProcessResult<Handle> {
496 fn get_tendril(opt: Option<StrTendril>) -> StrTendril {
497 match opt {
498 Some(expr) => expr,
499 None => Tendril::new(),
500 }
501 }
502 self.sink.append_doctype_to_document(
503 get_tendril(doctype.name),
504 get_tendril(doctype.public_id),
505 get_tendril(doctype.system_id),
506 );
507 XmlProcessResult::Done
508 }
509
510 fn append_pi_to_doc(&self, pi: Pi) -> XmlProcessResult<Handle> {
511 let pi = self.sink.create_pi(pi.target, pi.data);
512 self.sink.append(&self.doc_handle, AppendNode(pi));
513 XmlProcessResult::Done
514 }
515
516 fn append_pi_to_tag(&self, pi: Pi) -> XmlProcessResult<Handle> {
517 let open_elems = self.open_elems.borrow();
518 let target = current_node(&open_elems);
519 let pi = self.sink.create_pi(pi.target, pi.data);
520 self.sink.append(target, AppendNode(pi));
521 XmlProcessResult::Done
522 }
523
524 fn append_text(&self, chars: StrTendril) -> XmlProcessResult<Handle> {
525 self.insert_appropriately(AppendText(chars));
526 XmlProcessResult::Done
527 }
528
529 fn tag_in_open_elems(&self, tag: &Tag) -> bool {
530 self.open_elems
531 .borrow()
532 .iter()
533 .any(|a| self.sink.elem_name(a).expanded() == tag.name.expanded())
534 }
535
536 fn pop_until<P>(&self, pred: P)
538 where
539 P: Fn(ExpandedName) -> bool,
540 {
541 loop {
542 if self.current_node_in(&pred) {
543 break;
544 }
545 self.pop();
546 }
547 }
548
549 fn current_node_in<TagSet>(&self, set: TagSet) -> bool
550 where
551 TagSet: Fn(ExpandedName) -> bool,
552 {
553 set(self.sink.elem_name(&self.current_node()).expanded())
555 }
556
557 fn close_tag(&self, tag: Tag) -> XmlProcessResult<Handle> {
558 debug!(
559 "Close tag: current_node.name {:?} \n Current tag {:?}",
560 self.sink.elem_name(&self.current_node()),
561 &tag.name
562 );
563
564 if *self.sink.elem_name(&self.current_node()).local_name() != tag.name.local {
565 self.sink
566 .parse_error(Borrowed("Current node doesn't match tag"));
567 }
568
569 let is_closed = self.tag_in_open_elems(&tag);
570
571 if is_closed {
572 self.pop_until(|p| p == tag.name.expanded());
573 self.pop();
574 }
575
576 XmlProcessResult::Done
577 }
578
579 fn no_open_elems(&self) -> bool {
580 self.open_elems.borrow().is_empty()
581 }
582
583 fn pop(&self) -> Handle {
584 self.namespace_stack.borrow_mut().pop();
585 let node = self
586 .open_elems
587 .borrow_mut()
588 .pop()
589 .expect("no current element");
590 self.sink.pop(&node);
591 node
592 }
593
594 fn stop_parsing(&self) -> XmlProcessResult<Handle> {
595 warn!("stop_parsing for XML5 not implemented, full speed ahead!");
596 XmlProcessResult::Done
597 }
598}
599
600fn any_not_whitespace(x: &StrTendril) -> bool {
601 !x.bytes()
602 .all(|b| matches!(b, b'\t' | b'\r' | b'\n' | b'\x0C' | b' '))
603}
604
605impl<Handle, Sink> XmlTreeBuilder<Handle, Sink>
606where
607 Handle: Clone,
608 Sink: TreeSink<Handle = Handle>,
609{
610 fn step(&self, mode: XmlPhase, token: Token) -> XmlProcessResult<<Self as TokenSink>::Handle> {
611 self.debug_step(mode, &token);
612
613 match mode {
614 XmlPhase::Start => match token {
615 Token::Tag(Tag {
616 kind: StartTag,
617 name,
618 attrs,
619 }) => {
620 let tag = {
621 let mut tag = Tag {
622 kind: StartTag,
623 name,
624 attrs,
625 };
626 self.process_namespaces(&mut tag);
627 tag
628 };
629 self.phase.set(XmlPhase::Main);
630 let handle = self.append_tag_to_doc(tag);
631 self.add_to_open_elems(handle)
632 },
633 Token::Tag(Tag {
634 kind: EmptyTag,
635 name,
636 attrs,
637 }) => {
638 let tag = {
639 let mut tag = Tag {
640 kind: EmptyTag,
641 name,
642 attrs,
643 };
644 self.process_namespaces(&mut tag);
645 tag
646 };
647 self.phase.set(XmlPhase::End);
648 let handle = self.append_tag_to_doc(tag);
649 self.sink.pop(&handle);
650 XmlProcessResult::Done
651 },
652 Token::Comment(comment) => self.append_comment_to_doc(comment),
653 Token::Pi(pi) => self.append_pi_to_doc(pi),
654 Token::Characters(ref chars) if !any_not_whitespace(chars) => {
655 XmlProcessResult::Done
656 },
657 Token::Eof => {
658 self.sink
659 .parse_error(Borrowed("Unexpected EOF in start phase"));
660 XmlProcessResult::Reprocess(XmlPhase::End, Token::Eof)
661 },
662 Token::Doctype(d) => {
663 self.append_doctype_to_doc(d);
664 XmlProcessResult::Done
665 },
666 _ => {
667 self.sink
668 .parse_error(Borrowed("Unexpected element in start phase"));
669 XmlProcessResult::Done
670 },
671 },
672 XmlPhase::Main => match token {
673 Token::Characters(chs) => self.append_text(chs),
674 Token::Tag(Tag {
675 kind: StartTag,
676 name,
677 attrs,
678 }) => {
679 let tag = {
680 let mut tag = Tag {
681 kind: StartTag,
682 name,
683 attrs,
684 };
685 self.process_namespaces(&mut tag);
686 tag
687 };
688 self.insert_tag(tag)
689 },
690 Token::Tag(Tag {
691 kind: EmptyTag,
692 name,
693 attrs,
694 }) => {
695 let tag = {
696 let mut tag = Tag {
697 kind: EmptyTag,
698 name,
699 attrs,
700 };
701 self.process_namespaces(&mut tag);
702 tag
703 };
704 if tag.name.local == local_name!("script") {
705 self.insert_tag(tag.clone());
706 let script = current_node(&self.open_elems.borrow()).clone();
707 self.close_tag(tag);
708 XmlProcessResult::Script(script)
709 } else {
710 self.append_tag(tag)
711 }
712 },
713 Token::Tag(Tag {
714 kind: EndTag,
715 name,
716 attrs,
717 }) => {
718 let tag = {
719 let mut tag = Tag {
720 kind: EndTag,
721 name,
722 attrs,
723 };
724 self.process_namespaces(&mut tag);
725 tag
726 };
727 if tag.name.local == local_name!("script") {
728 let script = current_node(&self.open_elems.borrow()).clone();
729 self.close_tag(tag);
730 if self.no_open_elems() {
731 self.phase.set(XmlPhase::End);
732 }
733 return XmlProcessResult::Script(script);
734 }
735 let retval = self.close_tag(tag);
736 if self.no_open_elems() {
737 self.phase.set(XmlPhase::End);
738 }
739 retval
740 },
741 Token::Tag(Tag { kind: ShortTag, .. }) => {
742 self.pop();
743 if self.no_open_elems() {
744 self.phase.set(XmlPhase::End);
745 }
746 XmlProcessResult::Done
747 },
748 Token::Comment(comment) => self.append_comment_to_tag(comment),
749 Token::Pi(pi) => self.append_pi_to_tag(pi),
750 Token::Eof | Token::NullCharacter => {
751 XmlProcessResult::Reprocess(XmlPhase::End, Token::Eof)
752 },
753 Token::Doctype(_) => {
754 self.sink
755 .parse_error(Borrowed("Unexpected element in main phase"));
756 XmlProcessResult::Done
757 },
758 },
759 XmlPhase::End => match token {
760 Token::Comment(comment) => self.append_comment_to_doc(comment),
761 Token::Pi(pi) => self.append_pi_to_doc(pi),
762 Token::Characters(ref chars) if !any_not_whitespace(chars) => {
763 XmlProcessResult::Done
764 },
765 Token::Eof => self.stop_parsing(),
766 _ => {
767 self.sink
768 .parse_error(Borrowed("Unexpected element in end phase"));
769 XmlProcessResult::Done
770 },
771 },
772 }
773 }
774}