1mod types;
11
12use log::{debug, warn};
13use markup5ever::{local_name, namespace_prefix, ns};
14use std::borrow::Cow;
15use std::borrow::Cow::Borrowed;
16use std::cell::{Cell, Ref, RefCell};
17use std::collections::btree_map::Iter;
18use std::collections::{BTreeMap, HashSet, VecDeque};
19use std::fmt::{Debug, Error, Formatter};
20use std::mem;
21
22pub use self::interface::{ElemName, NodeOrText, Tracer, TreeSink};
23use self::types::*;
24use crate::interface::{self, create_element, AppendNode, Attribute, QualName};
25use crate::interface::{AppendText, ExpandedName};
26use crate::macros::unwrap_or_return;
27use crate::tokenizer::{self, EndTag, ProcessResult, StartTag, Tag, TokenSink};
28use crate::tokenizer::{Doctype, EmptyTag, Pi, ShortTag};
29use crate::{LocalName, Namespace, Prefix};
30
31use crate::tendril::{StrTendril, Tendril};
32
33static XML_URI: &str = "http://www.w3.org/XML/1998/namespace";
34static XMLNS_URI: &str = "http://www.w3.org/2000/xmlns/";
35
36type InsResult = Result<(), Cow<'static, str>>;
37
38#[derive(Debug)]
39struct NamespaceMapStack(Vec<NamespaceMap>);
40
41impl NamespaceMapStack {
42 fn new() -> NamespaceMapStack {
43 NamespaceMapStack(vec![NamespaceMap::default()])
44 }
45
46 fn push(&mut self, map: NamespaceMap) {
47 self.0.push(map);
48 }
49
50 #[doc(hidden)]
51 pub fn pop(&mut self) {
52 self.0.pop();
53 }
54}
55
56#[doc(hidden)]
57pub struct NamespaceMap {
58 scope: BTreeMap<Option<Prefix>, Option<Namespace>>,
66}
67
68impl Debug for NamespaceMap {
69 fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
70 write!(f, "\nNamespaceMap[")?;
71 for (key, value) in &self.scope {
72 writeln!(f, " {key:?} : {value:?}")?;
73 }
74 write!(f, "]")
75 }
76}
77
78impl NamespaceMap {
79 #[doc(hidden)]
81 pub fn empty() -> NamespaceMap {
82 NamespaceMap {
83 scope: BTreeMap::new(),
84 }
85 }
86
87 fn default() -> NamespaceMap {
88 NamespaceMap {
89 scope: {
90 let mut map = BTreeMap::new();
91 map.insert(None, None);
92 map.insert(Some(namespace_prefix!("xml")), Some(ns!(xml)));
93 map.insert(Some(namespace_prefix!("xmlns")), Some(ns!(xmlns)));
94 map
95 },
96 }
97 }
98
99 #[doc(hidden)]
100 pub fn get(&self, prefix: &Option<Prefix>) -> Option<&Option<Namespace>> {
101 self.scope.get(prefix)
102 }
103
104 #[doc(hidden)]
105 pub fn get_scope_iter(&self) -> Iter<'_, Option<Prefix>, Option<Namespace>> {
106 self.scope.iter()
107 }
108
109 #[doc(hidden)]
110 pub fn insert(&mut self, name: &QualName) {
111 let prefix = name.prefix.as_ref().cloned();
112 let namespace = Some(Namespace::from(&*name.ns));
113 self.scope.insert(prefix, namespace);
114 }
115
116 fn insert_ns(&mut self, attr: &Attribute) -> InsResult {
117 if &*attr.value == XMLNS_URI {
118 return Err(Borrowed("Can't declare XMLNS URI"));
119 };
120
121 let opt_uri = if attr.value.is_empty() {
122 None
123 } else {
124 Some(Namespace::from(&*attr.value))
125 };
126
127 let result = match (&attr.name.prefix, &*attr.name.local) {
128 (&Some(namespace_prefix!("xmlns")), "xml") => {
129 if &*attr.value != XML_URI {
130 Err(Borrowed("XML namespace can't be redeclared"))
131 } else {
132 Ok(())
133 }
134 },
135
136 (&Some(namespace_prefix!("xmlns")), "xmlns") => {
137 Err(Borrowed("XMLNS namespaces can't be changed"))
138 },
139
140 (&Some(namespace_prefix!("xmlns")), _) | (&None, "xmlns") => {
141 let ns_prefix = if &*attr.name.local == "xmlns" {
146 None
147
148 } else {
152 Some(Prefix::from(&*attr.name.local))
153 };
154
155 if opt_uri.is_some() && self.scope.contains_key(&ns_prefix) {
156 Err(Borrowed("Namespace already defined"))
157 } else {
158 self.scope.insert(ns_prefix, opt_uri);
159 Ok(())
160 }
161 },
162
163 (_, _) => Err(Borrowed("Invalid namespace declaration.")),
164 };
165 result
166 }
167}
168
169#[derive(Copy, Clone, Default)]
171pub struct XmlTreeBuilderOpts {}
172
173pub struct XmlTreeBuilder<Handle, Sink> {
175 _opts: XmlTreeBuilderOpts,
177
178 pub sink: Sink,
180
181 doc_handle: Handle,
183
184 open_elems: RefCell<Vec<Handle>>,
186
187 curr_elem: RefCell<Option<Handle>>,
189
190 namespace_stack: RefCell<NamespaceMapStack>,
192
193 current_namespace: RefCell<NamespaceMap>,
195
196 phase: Cell<XmlPhase>,
198}
199impl<Handle, Sink> XmlTreeBuilder<Handle, Sink>
200where
201 Handle: Clone,
202 Sink: TreeSink<Handle = Handle>,
203{
204 pub fn new(sink: Sink, opts: XmlTreeBuilderOpts) -> XmlTreeBuilder<Handle, Sink> {
208 let doc_handle = sink.get_document();
209 XmlTreeBuilder {
210 _opts: opts,
211 sink,
212 doc_handle,
213 open_elems: RefCell::new(vec![]),
214 curr_elem: RefCell::new(None),
215 namespace_stack: RefCell::new(NamespaceMapStack::new()),
216 current_namespace: RefCell::new(NamespaceMap::empty()),
217 phase: Cell::new(XmlPhase::Start),
218 }
219 }
220
221 pub fn trace_handles(&self, tracer: &dyn Tracer<Handle = Handle>) {
224 tracer.trace_handle(&self.doc_handle);
225 for e in self.open_elems.borrow().iter() {
226 tracer.trace_handle(e);
227 }
228 if let Some(h) = self.curr_elem.borrow().as_ref() {
229 tracer.trace_handle(h);
230 }
231 }
232
233 #[cfg(not(for_c))]
235 #[allow(dead_code)]
236 fn dump_state(&self, label: String) {
237 debug!("dump_state on {label}");
238 debug!(" open_elems:");
239 for node in self.open_elems.borrow().iter() {
240 debug!(" {:?}", self.sink.elem_name(node));
241 }
242 debug!("");
243 }
244
245 #[cfg(for_c)]
246 fn debug_step(&self, _mode: XmlPhase, _token: &Token) {}
247
248 #[cfg(not(for_c))]
249 fn debug_step(&self, mode: XmlPhase, token: &Token) {
250 debug!(
251 "processing {:?} in insertion mode {:?}",
252 format!("{:?}", token),
253 mode
254 );
255 }
256
257 fn declare_ns(&self, attr: &mut Attribute) {
258 if let Err(msg) = self.current_namespace.borrow_mut().insert_ns(attr) {
259 self.sink.parse_error(msg);
260 } else {
261 attr.name.ns = ns!(xmlns);
262 }
263 }
264
265 fn find_uri(&self, prefix: &Option<Prefix>) -> Result<Option<Namespace>, Cow<'static, str>> {
266 let mut uri = Err(Borrowed("No appropriate namespace found"));
267
268 let current_namespace = self.current_namespace.borrow();
269 for ns in self
270 .namespace_stack
271 .borrow()
272 .0
273 .iter()
274 .chain(Some(&*current_namespace))
275 .rev()
276 {
277 if let Some(el) = ns.get(prefix) {
278 uri = Ok(el.clone());
279 break;
280 }
281 }
282 uri
283 }
284
285 fn bind_qname(&self, name: &mut QualName) {
286 match self.find_uri(&name.prefix) {
287 Ok(uri) => {
288 let ns_uri = match uri {
289 Some(e) => e,
290 None => ns!(),
291 };
292 name.ns = ns_uri;
293 },
294 Err(msg) => {
295 self.sink.parse_error(msg);
296 },
297 }
298 }
299
300 fn bind_attr_qname(
305 &self,
306 present_attrs: &mut HashSet<(Namespace, LocalName)>,
307 name: &mut QualName,
308 ) -> bool {
309 let mut not_duplicate = true;
311
312 if name.prefix.is_some() {
313 self.bind_qname(name);
314 not_duplicate = Self::check_duplicate_attr(present_attrs, name);
315 }
316 not_duplicate
317 }
318
319 fn check_duplicate_attr(
320 present_attrs: &mut HashSet<(Namespace, LocalName)>,
321 name: &QualName,
322 ) -> bool {
323 let pair = (name.ns.clone(), name.local.clone());
324
325 if present_attrs.contains(&pair) {
326 return false;
327 }
328 present_attrs.insert(pair);
329 true
330 }
331
332 fn process_namespaces(&self, tag: &mut Tag) {
333 let mut present_attrs: HashSet<(Namespace, LocalName)> = Default::default();
335
336 let mut new_attr = vec![];
337 for attr in tag.attrs.iter_mut().filter(|attr| {
339 attr.name.prefix == Some(namespace_prefix!("xmlns"))
340 || attr.name.local == local_name!("xmlns")
341 }) {
342 self.declare_ns(attr);
343 }
344
345 for attr in tag.attrs.iter_mut().filter(|attr| {
347 attr.name.prefix != Some(namespace_prefix!("xmlns"))
348 && attr.name.local != local_name!("xmlns")
349 }) {
350 if self.bind_attr_qname(&mut present_attrs, &mut attr.name) {
351 new_attr.push(attr.clone());
352 }
353 }
354 tag.attrs = new_attr;
355
356 self.bind_qname(&mut tag.name);
358
359 let x = mem::replace(
361 &mut *self.current_namespace.borrow_mut(),
362 NamespaceMap::empty(),
363 );
364
365 if tag.kind == StartTag || (tag.kind == EmptyTag && tag.name.local == local_name!("script"))
369 {
370 self.namespace_stack.borrow_mut().push(x);
371 }
372 }
373
374 fn process_to_completion(
375 &self,
376 mut token: Token,
377 ) -> ProcessResult<<Self as TokenSink>::Handle> {
378 let mut more_tokens = VecDeque::new();
381
382 loop {
383 let phase = self.phase.get();
384
385 #[allow(clippy::unused_unit)]
386 match self.step(phase, token) {
387 XmlProcessResult::Done => {
388 token = unwrap_or_return!(more_tokens.pop_front(), ProcessResult::Continue);
389 },
390 XmlProcessResult::Reprocess(m, t) => {
391 self.phase.set(m);
392 token = t;
393 },
394 XmlProcessResult::Script(node) => {
395 assert!(more_tokens.is_empty());
396 return ProcessResult::Script(node);
397 },
398 }
399 }
400 }
401}
402
403impl<Handle, Sink> TokenSink for XmlTreeBuilder<Handle, Sink>
404where
405 Handle: Clone,
406 Sink: TreeSink<Handle = Handle>,
407{
408 type Handle = Handle;
409
410 fn process_token(&self, token: tokenizer::Token) -> ProcessResult<Self::Handle> {
411 let token = match token {
413 tokenizer::Token::ParseError(e) => {
414 self.sink.parse_error(e);
415 return ProcessResult::Done;
416 },
417
418 tokenizer::Token::Doctype(d) => Token::Doctype(d),
419 tokenizer::Token::ProcessingInstruction(instruction) => Token::Pi(instruction),
420 tokenizer::Token::Tag(x) => Token::Tag(x),
421 tokenizer::Token::Comment(x) => Token::Comment(x),
422 tokenizer::Token::NullCharacter => Token::NullCharacter,
423 tokenizer::Token::EndOfFile => Token::Eof,
424 tokenizer::Token::Characters(x) => Token::Characters(x),
425 };
426
427 self.process_to_completion(token)
428 }
429
430 fn end(&self) {
431 for node in self.open_elems.borrow_mut().drain(..).rev() {
432 self.sink.pop(&node);
433 }
434 }
435}
436
437fn current_node<Handle>(open_elems: &[Handle]) -> &Handle {
438 open_elems.last().expect("no current element")
439}
440
441#[doc(hidden)]
442impl<Handle, Sink> XmlTreeBuilder<Handle, Sink>
443where
444 Handle: Clone,
445 Sink: TreeSink<Handle = Handle>,
446{
447 fn current_node(&self) -> Ref<'_, Handle> {
448 Ref::map(self.open_elems.borrow(), |elems| {
449 elems.last().expect("no current element")
450 })
451 }
452
453 fn insert_appropriately(&self, child: NodeOrText<Handle>) {
454 let open_elems = self.open_elems.borrow();
455 let target = current_node(&open_elems);
456 self.sink.append(target, child);
457 }
458
459 fn insert_tag(&self, tag: Tag) -> XmlProcessResult<Handle> {
460 let child = create_element(&self.sink, tag.name, tag.attrs);
461 self.insert_appropriately(AppendNode(child.clone()));
462 self.add_to_open_elems(child)
463 }
464
465 fn append_tag(&self, tag: Tag) -> XmlProcessResult<Handle> {
466 let child = create_element(&self.sink, tag.name, tag.attrs);
467 self.insert_appropriately(AppendNode(child.clone()));
468 self.sink.pop(&child);
469 XmlProcessResult::Done
470 }
471
472 fn append_tag_to_doc(&self, tag: Tag) -> Handle {
473 let child = create_element(&self.sink, tag.name, tag.attrs);
474
475 self.sink
476 .append(&self.doc_handle, AppendNode(child.clone()));
477 child
478 }
479
480 fn add_to_open_elems(&self, el: Handle) -> XmlProcessResult<Handle> {
481 self.open_elems.borrow_mut().push(el);
482
483 XmlProcessResult::Done
484 }
485
486 fn append_comment_to_doc(&self, text: StrTendril) -> XmlProcessResult<Handle> {
487 let comment = self.sink.create_comment(text);
488 self.sink.append(&self.doc_handle, AppendNode(comment));
489 XmlProcessResult::Done
490 }
491
492 fn append_comment_to_tag(&self, text: StrTendril) -> XmlProcessResult<Handle> {
493 let open_elems = self.open_elems.borrow();
494 let target = current_node(&open_elems);
495 let comment = self.sink.create_comment(text);
496 self.sink.append(target, AppendNode(comment));
497 XmlProcessResult::Done
498 }
499
500 fn append_doctype_to_doc(&self, doctype: Doctype) -> XmlProcessResult<Handle> {
501 fn get_tendril(opt: Option<StrTendril>) -> StrTendril {
502 match opt {
503 Some(expr) => expr,
504 None => Tendril::new(),
505 }
506 }
507 self.sink.append_doctype_to_document(
508 get_tendril(doctype.name),
509 get_tendril(doctype.public_id),
510 get_tendril(doctype.system_id),
511 );
512 XmlProcessResult::Done
513 }
514
515 fn append_pi_to_doc(&self, pi: Pi) -> XmlProcessResult<Handle> {
516 let pi = self.sink.create_pi(pi.target, pi.data);
517 self.sink.append(&self.doc_handle, AppendNode(pi));
518 XmlProcessResult::Done
519 }
520
521 fn append_pi_to_tag(&self, pi: Pi) -> XmlProcessResult<Handle> {
522 let open_elems = self.open_elems.borrow();
523 let target = current_node(&open_elems);
524 let pi = self.sink.create_pi(pi.target, pi.data);
525 self.sink.append(target, AppendNode(pi));
526 XmlProcessResult::Done
527 }
528
529 fn append_text(&self, chars: StrTendril) -> XmlProcessResult<Handle> {
530 self.insert_appropriately(AppendText(chars));
531 XmlProcessResult::Done
532 }
533
534 fn tag_in_open_elems(&self, tag: &Tag) -> bool {
535 self.open_elems
536 .borrow()
537 .iter()
538 .any(|a| self.sink.elem_name(a).expanded() == tag.name.expanded())
539 }
540
541 fn pop_until<P>(&self, pred: P)
543 where
544 P: Fn(ExpandedName) -> bool,
545 {
546 loop {
547 if self.current_node_in(&pred) {
548 break;
549 }
550 self.pop();
551 }
552 }
553
554 fn current_node_in<TagSet>(&self, set: TagSet) -> bool
555 where
556 TagSet: Fn(ExpandedName) -> bool,
557 {
558 set(self.sink.elem_name(&self.current_node()).expanded())
560 }
561
562 fn close_tag(&self, tag: Tag) -> XmlProcessResult<Handle> {
563 debug!(
564 "Close tag: current_node.name {:?} \n Current tag {:?}",
565 self.sink.elem_name(&self.current_node()),
566 &tag.name
567 );
568
569 if *self.sink.elem_name(&self.current_node()).local_name() != tag.name.local {
570 self.sink
571 .parse_error(Borrowed("Current node doesn't match tag"));
572 }
573
574 let is_closed = self.tag_in_open_elems(&tag);
575
576 if is_closed {
577 self.pop_until(|p| p == tag.name.expanded());
578 self.pop();
579 }
580
581 XmlProcessResult::Done
582 }
583
584 fn no_open_elems(&self) -> bool {
585 self.open_elems.borrow().is_empty()
586 }
587
588 fn pop(&self) -> Handle {
589 self.namespace_stack.borrow_mut().pop();
590 let node = self
591 .open_elems
592 .borrow_mut()
593 .pop()
594 .expect("no current element");
595 self.sink.pop(&node);
596 node
597 }
598
599 fn stop_parsing(&self) -> XmlProcessResult<Handle> {
600 warn!("stop_parsing for XML5 not implemented, full speed ahead!");
601 XmlProcessResult::Done
602 }
603}
604
605fn any_not_whitespace(x: &StrTendril) -> bool {
606 !x.bytes()
607 .all(|b| matches!(b, b'\t' | b'\r' | b'\n' | b'\x0C' | b' '))
608}
609
610#[doc(hidden)]
611impl<Handle, Sink> XmlTreeBuilder<Handle, Sink>
612where
613 Handle: Clone,
614 Sink: TreeSink<Handle = Handle>,
615{
616 fn step(&self, mode: XmlPhase, token: Token) -> XmlProcessResult<<Self as TokenSink>::Handle> {
617 self.debug_step(mode, &token);
618
619 match mode {
620 XmlPhase::Start => match token {
621 Token::Tag(Tag {
622 kind: StartTag,
623 name,
624 attrs,
625 }) => {
626 let tag = {
627 let mut tag = Tag {
628 kind: StartTag,
629 name,
630 attrs,
631 };
632 self.process_namespaces(&mut tag);
633 tag
634 };
635 self.phase.set(XmlPhase::Main);
636 let handle = self.append_tag_to_doc(tag);
637 self.add_to_open_elems(handle)
638 },
639 Token::Tag(Tag {
640 kind: EmptyTag,
641 name,
642 attrs,
643 }) => {
644 let tag = {
645 let mut tag = Tag {
646 kind: EmptyTag,
647 name,
648 attrs,
649 };
650 self.process_namespaces(&mut tag);
651 tag
652 };
653 self.phase.set(XmlPhase::End);
654 let handle = self.append_tag_to_doc(tag);
655 self.sink.pop(&handle);
656 XmlProcessResult::Done
657 },
658 Token::Comment(comment) => self.append_comment_to_doc(comment),
659 Token::Pi(pi) => self.append_pi_to_doc(pi),
660 Token::Characters(ref chars) if !any_not_whitespace(chars) => {
661 XmlProcessResult::Done
662 },
663 Token::Eof => {
664 self.sink
665 .parse_error(Borrowed("Unexpected EOF in start phase"));
666 XmlProcessResult::Reprocess(XmlPhase::End, Token::Eof)
667 },
668 Token::Doctype(d) => {
669 self.append_doctype_to_doc(d);
670 XmlProcessResult::Done
671 },
672 _ => {
673 self.sink
674 .parse_error(Borrowed("Unexpected element in start phase"));
675 XmlProcessResult::Done
676 },
677 },
678 XmlPhase::Main => match token {
679 Token::Characters(chs) => self.append_text(chs),
680 Token::Tag(Tag {
681 kind: StartTag,
682 name,
683 attrs,
684 }) => {
685 let tag = {
686 let mut tag = Tag {
687 kind: StartTag,
688 name,
689 attrs,
690 };
691 self.process_namespaces(&mut tag);
692 tag
693 };
694 self.insert_tag(tag)
695 },
696 Token::Tag(Tag {
697 kind: EmptyTag,
698 name,
699 attrs,
700 }) => {
701 let tag = {
702 let mut tag = Tag {
703 kind: EmptyTag,
704 name,
705 attrs,
706 };
707 self.process_namespaces(&mut tag);
708 tag
709 };
710 if tag.name.local == local_name!("script") {
711 self.insert_tag(tag.clone());
712 let script = current_node(&self.open_elems.borrow()).clone();
713 self.close_tag(tag);
714 XmlProcessResult::Script(script)
715 } else {
716 self.append_tag(tag)
717 }
718 },
719 Token::Tag(Tag {
720 kind: EndTag,
721 name,
722 attrs,
723 }) => {
724 let tag = {
725 let mut tag = Tag {
726 kind: EndTag,
727 name,
728 attrs,
729 };
730 self.process_namespaces(&mut tag);
731 tag
732 };
733 if tag.name.local == local_name!("script") {
734 let script = current_node(&self.open_elems.borrow()).clone();
735 self.close_tag(tag);
736 if self.no_open_elems() {
737 self.phase.set(XmlPhase::End);
738 }
739 return XmlProcessResult::Script(script);
740 }
741 let retval = self.close_tag(tag);
742 if self.no_open_elems() {
743 self.phase.set(XmlPhase::End);
744 }
745 retval
746 },
747 Token::Tag(Tag { kind: ShortTag, .. }) => {
748 self.pop();
749 if self.no_open_elems() {
750 self.phase.set(XmlPhase::End);
751 }
752 XmlProcessResult::Done
753 },
754 Token::Comment(comment) => self.append_comment_to_tag(comment),
755 Token::Pi(pi) => self.append_pi_to_tag(pi),
756 Token::Eof | Token::NullCharacter => {
757 XmlProcessResult::Reprocess(XmlPhase::End, Token::Eof)
758 },
759 Token::Doctype(_) => {
760 self.sink
761 .parse_error(Borrowed("Unexpected element in main phase"));
762 XmlProcessResult::Done
763 },
764 },
765 XmlPhase::End => match token {
766 Token::Comment(comment) => self.append_comment_to_doc(comment),
767 Token::Pi(pi) => self.append_pi_to_doc(pi),
768 Token::Characters(ref chars) if !any_not_whitespace(chars) => {
769 XmlProcessResult::Done
770 },
771 Token::Eof => self.stop_parsing(),
772 _ => {
773 self.sink
774 .parse_error(Borrowed("Unexpected element in end phase"));
775 XmlProcessResult::Done
776 },
777 },
778 }
779 }
780}