1use crate::buffer_queue::BufferQueue;
13use crate::tokenizer::{Tokenizer, TokenizerOpts};
14use crate::tree_builder::{create_element, TreeBuilder, TreeBuilderOpts, TreeSink};
15use crate::{Attribute, QualName};
16use markup5ever::TokenizerResult;
17use std::borrow::Cow;
18
19use crate::tendril;
20use crate::tendril::stream::{TendrilSink, Utf8LossyDecoder};
21use crate::tendril::StrTendril;
22
23#[derive(Clone, Default)]
25pub struct ParseOpts {
26 pub tokenizer: TokenizerOpts,
28
29 pub tree_builder: TreeBuilderOpts,
31}
32
33pub fn parse_document<Sink>(sink: Sink, opts: ParseOpts) -> Parser<Sink>
41where
42 Sink: TreeSink,
43{
44 let tb = TreeBuilder::new(sink, opts.tree_builder);
45 let tok = Tokenizer::new(tb, opts.tokenizer);
46 Parser {
47 tokenizer: tok,
48 input_buffer: BufferQueue::default(),
49 }
50}
51
52pub fn parse_fragment<Sink>(
60 sink: Sink,
61 opts: ParseOpts,
62 context_name: QualName,
63 context_attrs: Vec<Attribute>,
64 context_element_allows_scripting: bool,
65) -> Parser<Sink>
66where
67 Sink: TreeSink,
68{
69 let context_elem = create_element(&sink, context_name, context_attrs);
70 parse_fragment_for_element(
71 sink,
72 opts,
73 context_elem,
74 context_element_allows_scripting,
75 None,
76 )
77}
78
79pub fn parse_fragment_for_element<Sink>(
82 sink: Sink,
83 opts: ParseOpts,
84 context_element: Sink::Handle,
85 context_element_allows_scripting: bool,
86 form_element: Option<Sink::Handle>,
87) -> Parser<Sink>
88where
89 Sink: TreeSink,
90{
91 let tree_builder =
92 TreeBuilder::new_for_fragment(sink, context_element, form_element, opts.tree_builder);
93 let tokenizer_options = TokenizerOpts {
94 initial_state: Some(
95 tree_builder.tokenizer_state_for_context_elem(context_element_allows_scripting),
96 ),
97 ..opts.tokenizer
98 };
99 let tokenizer = Tokenizer::new(tree_builder, tokenizer_options);
100 Parser {
101 tokenizer,
102 input_buffer: BufferQueue::default(),
103 }
104}
105
106pub struct Parser<Sink>
109where
110 Sink: TreeSink,
111{
112 pub tokenizer: Tokenizer<TreeBuilder<Sink::Handle, Sink>>,
113 pub input_buffer: BufferQueue,
114}
115
116impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for Parser<Sink> {
117 fn process(&mut self, t: StrTendril) {
118 self.input_buffer.push_back(t);
119 self.loop_until_done();
120 }
121
122 fn error(&mut self, desc: Cow<'static, str>) {
124 self.tokenizer.sink.sink.parse_error(desc)
125 }
126
127 type Output = Sink::Output;
128
129 fn finish(self) -> Self::Output {
130 self.loop_until_done();
131
132 assert!(
133 self.input_buffer.is_empty(),
134 "parser finished with remaining input"
135 );
136 self.tokenizer.end();
137 self.tokenizer.sink.sink.finish()
138 }
139}
140
141impl<Sink: TreeSink> Parser<Sink> {
142 #[allow(clippy::wrong_self_convention)]
147 pub fn from_utf8(self) -> Utf8LossyDecoder<Self> {
148 Utf8LossyDecoder::new(self)
149 }
150
151 fn loop_until_done(&self) {
152 loop {
154 if matches!(
155 self.tokenizer.feed(&self.input_buffer),
156 TokenizerResult::Done
157 ) {
158 break;
159 }
160 }
161 }
162}