1use crate::tokenizer::{XmlTokenizer, XmlTokenizerOpts};
11use crate::tree_builder::{TreeSink, XmlTreeBuilder, XmlTreeBuilderOpts};
12
13use std::borrow::Cow;
14
15use crate::tendril;
16use crate::tendril::stream::{TendrilSink, Utf8LossyDecoder};
17use crate::tendril::StrTendril;
18use markup5ever::buffer_queue::BufferQueue;
19
20#[derive(Clone, Default)]
22pub struct XmlParseOpts {
23 pub tokenizer: XmlTokenizerOpts,
25 pub tree_builder: XmlTreeBuilderOpts,
27}
28
29pub fn parse_document<Sink>(sink: Sink, opts: XmlParseOpts) -> XmlParser<Sink>
38where
39 Sink: TreeSink,
40{
41 let tb = XmlTreeBuilder::new(sink, opts.tree_builder);
42 let tok = XmlTokenizer::new(tb, opts.tokenizer);
43 XmlParser {
44 tokenizer: tok,
45 input_buffer: BufferQueue::default(),
46 }
47}
48
49pub struct XmlParser<Sink>
52where
53 Sink: TreeSink,
54{
55 pub tokenizer: XmlTokenizer<XmlTreeBuilder<Sink::Handle, Sink>>,
57 pub input_buffer: BufferQueue,
59}
60
61impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for XmlParser<Sink> {
62 type Output = Sink::Output;
63
64 fn process(&mut self, t: StrTendril) {
65 self.input_buffer.push_back(t);
66 let _ = self.tokenizer.feed(&self.input_buffer);
68 }
69
70 fn error(&mut self, desc: Cow<'static, str>) {
72 self.tokenizer.sink.sink.parse_error(desc)
73 }
74
75 fn finish(self) -> Self::Output {
76 self.tokenizer.end();
77 self.tokenizer.sink.sink.finish()
78 }
79}
80
81impl<Sink: TreeSink> XmlParser<Sink> {
82 #[allow(clippy::wrong_self_convention)]
87 pub fn from_utf8(self) -> Utf8LossyDecoder<Self> {
88 Utf8LossyDecoder::new(self)
89 }
90}