1use crate::tokenizer::{XmlTokenizer, XmlTokenizerOpts};
11use crate::tree_builder::{TreeSink, XmlTreeBuilder, XmlTreeBuilderOpts};
12
13use std::borrow::Cow;
14
15use crate::tendril;
16use crate::tendril::stream::{TendrilSink, Utf8LossyDecoder};
17use crate::tendril::StrTendril;
18use markup5ever::buffer_queue::BufferQueue;
19use markup5ever::TokenizerResult;
20
21#[derive(Clone, Default)]
23pub struct XmlParseOpts {
24 pub tokenizer: XmlTokenizerOpts,
26 pub tree_builder: XmlTreeBuilderOpts,
28}
29
30pub fn parse_document<Sink>(sink: Sink, opts: XmlParseOpts) -> XmlParser<Sink>
39where
40 Sink: TreeSink,
41{
42 let tb = XmlTreeBuilder::new(sink, opts.tree_builder);
43 let tok = XmlTokenizer::new(tb, opts.tokenizer);
44 XmlParser {
45 tokenizer: tok,
46 input_buffer: BufferQueue::default(),
47 }
48}
49
50pub struct XmlParser<Sink>
53where
54 Sink: TreeSink,
55{
56 pub tokenizer: XmlTokenizer<XmlTreeBuilder<Sink::Handle, Sink>>,
58 pub input_buffer: BufferQueue,
60}
61
62impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for XmlParser<Sink> {
63 type Output = Sink::Output;
64
65 fn process(&mut self, t: StrTendril) {
66 self.input_buffer.push_back(t);
67 while let TokenizerResult::Script(_) = self.tokenizer.feed(&self.input_buffer) {}
69 }
70
71 fn error(&mut self, desc: Cow<'static, str>) {
73 self.tokenizer.sink.sink.parse_error(desc)
74 }
75
76 fn finish(self) -> Self::Output {
77 self.tokenizer.end();
78 self.tokenizer.sink.sink.finish()
79 }
80}
81
82impl<Sink: TreeSink> XmlParser<Sink> {
83 #[allow(clippy::wrong_self_convention)]
88 pub fn from_utf8(self) -> Utf8LossyDecoder<Self> {
89 Utf8LossyDecoder::new(self)
90 }
91}