1#![cfg_attr(crown, expect(crown::unrooted_must_root))]
6
7use std::cell::Cell;
8use std::io;
9
10use html5ever::buffer_queue::BufferQueue;
11use html5ever::serialize::TraversalScope::IncludeNode;
12use html5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
13use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts};
14use html5ever::tree_builder::{QuirksMode as HTML5EverQuirksMode, TreeBuilder, TreeBuilderOpts};
15use html5ever::{QualName, local_name, ns};
16use markup5ever::TokenizerResult;
17use script_bindings::trace::CustomTraceable;
18use servo_url::ServoUrl;
19use style::attr::AttrValue;
20use style::context::QuirksMode as StyleContextQuirksMode;
21use xml5ever::LocalName;
22
23use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
24use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::ShadowRootMode;
25use crate::dom::bindings::codegen::GenericBindings::ShadowRootBinding::ShadowRoot_Binding::ShadowRootMethods;
26use crate::dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId};
27use crate::dom::bindings::root::{Dom, DomRoot};
28use crate::dom::characterdata::CharacterData;
29use crate::dom::document::Document;
30use crate::dom::documentfragment::DocumentFragment;
31use crate::dom::documenttype::DocumentType;
32use crate::dom::element::Element;
33use crate::dom::html::htmlscriptelement::HTMLScriptElement;
34use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
35use crate::dom::node::Node;
36use crate::dom::processinginstruction::ProcessingInstruction;
37use crate::dom::servoparser::{ParsingAlgorithm, Sink};
38use crate::dom::shadowroot::ShadowRoot;
39use crate::script_runtime::CanGc;
40
41#[derive(JSTraceable, MallocSizeOf)]
42#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
43pub(crate) struct Tokenizer {
44 #[ignore_malloc_size_of = "Defined in html5ever"]
45 inner: HtmlTokenizer<TreeBuilder<Dom<Node>, Sink>>,
46}
47
48impl Tokenizer {
49 pub(crate) fn new(
50 document: &Document,
51 url: ServoUrl,
52 fragment_context: Option<super::FragmentContext>,
53 parsing_algorithm: ParsingAlgorithm,
54 ) -> Self {
55 let custom_element_reaction_stack = document.custom_element_reaction_stack();
56 let sink = Sink {
57 base_url: url,
58 document: Dom::from_ref(document),
59 current_line: Cell::new(1),
60 script: Default::default(),
61 parsing_algorithm,
62 custom_element_reaction_stack,
63 };
64
65 let quirks_mode = match document.quirks_mode() {
66 StyleContextQuirksMode::Quirks => HTML5EverQuirksMode::Quirks,
67 StyleContextQuirksMode::LimitedQuirks => HTML5EverQuirksMode::LimitedQuirks,
68 StyleContextQuirksMode::NoQuirks => HTML5EverQuirksMode::NoQuirks,
69 };
70
71 let options = TreeBuilderOpts {
72 scripting_enabled: document.scripting_enabled(),
73 iframe_srcdoc: document.url().as_str() == "about:srcdoc",
74 quirks_mode,
75 ..Default::default()
76 };
77
78 let inner = if let Some(fragment_context) = fragment_context {
79 let tree_builder = TreeBuilder::new_for_fragment(
80 sink,
81 Dom::from_ref(fragment_context.context_elem),
82 fragment_context.form_elem.map(Dom::from_ref),
83 options,
84 );
85
86 let tokenizer_options = TokenizerOpts {
87 initial_state: Some(tree_builder.tokenizer_state_for_context_elem(
88 fragment_context.context_element_allows_scripting,
89 )),
90 ..Default::default()
91 };
92
93 HtmlTokenizer::new(tree_builder, tokenizer_options)
94 } else {
95 HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
96 };
97
98 Tokenizer { inner }
99 }
100
101 pub(crate) fn feed(&self, input: &BufferQueue) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
102 match self.inner.feed(input) {
103 TokenizerResult::Done => TokenizerResult::Done,
104 TokenizerResult::Script(script) => {
105 TokenizerResult::Script(DomRoot::from_ref(script.downcast().unwrap()))
106 },
107 TokenizerResult::EncodingIndicator(encoding) => {
108 TokenizerResult::EncodingIndicator(encoding)
109 },
110 }
111 }
112
113 pub(crate) fn end(&self) {
114 self.inner.end();
115 }
116
117 pub(crate) fn url(&self) -> &ServoUrl {
118 &self.inner.sink.sink.base_url
119 }
120
121 pub(crate) fn set_plaintext_state(&self) {
122 self.inner.set_plaintext_state();
123 }
124
125 pub(crate) fn get_current_line(&self) -> u32 {
126 self.inner.sink.sink.current_line.get() as u32
127 }
128}
129
130fn start_element<S: Serializer>(element: &Element, serializer: &mut S) -> io::Result<()> {
132 let name = QualName::new(
133 None,
134 element.namespace().clone(),
135 element.local_name().clone(),
136 );
137
138 let mut attributes = vec![];
139
140 if !element.has_attribute(&LocalName::from("is")) {
143 if let Some(is_value) = element.get_is() {
144 let qualified_name = QualName::new(None, ns!(), LocalName::from("is"));
145
146 attributes.push((qualified_name, AttrValue::String(is_value.to_string())));
147 }
148 }
149
150 attributes.extend(element.attrs().iter().map(|attr| {
152 let qname = QualName::new(None, attr.namespace().clone(), attr.local_name().clone());
153 let value = attr.value().clone();
154 (qname, value)
155 }));
156
157 let attr_refs = attributes.iter().map(|(qname, value)| {
158 let ar: AttrRef = (qname, &**value);
159 ar
160 });
161 serializer.start_elem(name, attr_refs)?;
162 Ok(())
163}
164
165enum SerializationCommand {
166 OpenElement(DomRoot<Element>),
167 CloseElement(QualName),
168 SerializeNonelement(DomRoot<Node>),
169 SerializeShadowRoot(DomRoot<ShadowRoot>),
170}
171
172struct SerializationIterator {
173 stack: Vec<SerializationCommand>,
174
175 serialize_shadow_roots: bool,
177
178 shadow_roots: Vec<DomRoot<ShadowRoot>>,
180}
181
182enum SerializationChildrenIterator<C, S> {
183 None,
184 Children(C),
185 ShadowContents(S),
186}
187
188impl SerializationIterator {
189 fn new(
190 node: &Node,
191 skip_first: bool,
192 serialize_shadow_roots: bool,
193 shadow_roots: Vec<DomRoot<ShadowRoot>>,
194 can_gc: CanGc,
195 ) -> SerializationIterator {
196 let mut ret = SerializationIterator {
197 stack: vec![],
198 serialize_shadow_roots,
199 shadow_roots,
200 };
201 if skip_first || node.is::<DocumentFragment>() || node.is::<Document>() {
202 ret.handle_node_contents(node, can_gc);
203 } else {
204 ret.push_node(node);
205 }
206 ret
207 }
208
209 fn handle_node_contents(&mut self, node: &Node, can_gc: CanGc) {
210 if node.downcast::<Element>().is_some_and(Element::is_void) {
211 return;
212 }
213
214 if let Some(template_element) = node.downcast::<HTMLTemplateElement>() {
215 for child in template_element
216 .Content(can_gc)
217 .upcast::<Node>()
218 .rev_children()
219 {
220 self.push_node(&child);
221 }
222 } else {
223 for child in node.rev_children() {
224 self.push_node(&child);
225 }
226 }
227
228 if let Some(shadow_root) = node.downcast::<Element>().and_then(Element::shadow_root) {
229 let should_be_serialized = (self.serialize_shadow_roots && shadow_root.Serializable()) ||
230 self.shadow_roots.contains(&shadow_root);
231 if !shadow_root.is_user_agent_widget() && should_be_serialized {
232 self.stack
233 .push(SerializationCommand::SerializeShadowRoot(shadow_root));
234 }
235 }
236 }
237
238 fn push_node(&mut self, node: &Node) {
239 let Some(element) = node.downcast::<Element>() else {
240 self.stack.push(SerializationCommand::SerializeNonelement(
241 DomRoot::from_ref(node),
242 ));
243 return;
244 };
245
246 self.stack
247 .push(SerializationCommand::OpenElement(DomRoot::from_ref(
248 element,
249 )));
250 }
251}
252
253impl Iterator for SerializationIterator {
254 type Item = SerializationCommand;
255
256 fn next(&mut self) -> Option<SerializationCommand> {
257 let res = self.stack.pop()?;
258
259 match &res {
260 SerializationCommand::OpenElement(element) => {
261 let name = QualName::new(
262 None,
263 element.namespace().clone(),
264 element.local_name().clone(),
265 );
266 self.stack.push(SerializationCommand::CloseElement(name));
267 self.handle_node_contents(element.upcast(), CanGc::note());
268 },
269 SerializationCommand::SerializeShadowRoot(shadow_root) => {
270 self.stack
271 .push(SerializationCommand::CloseElement(QualName::new(
272 None,
273 ns!(),
274 local_name!("template"),
275 )));
276 self.handle_node_contents(shadow_root.upcast(), CanGc::note());
277 },
278 _ => {},
279 }
280
281 Some(res)
282 }
283}
284
285pub(crate) fn serialize_html_fragment<S: Serializer>(
287 node: &Node,
288 serializer: &mut S,
289 traversal_scope: TraversalScope,
290 serialize_shadow_roots: bool,
291 shadow_roots: Vec<DomRoot<ShadowRoot>>,
292 can_gc: CanGc,
293) -> io::Result<()> {
294 let iter = SerializationIterator::new(
295 node,
296 traversal_scope != IncludeNode,
297 serialize_shadow_roots,
298 shadow_roots,
299 can_gc,
300 );
301
302 for cmd in iter {
303 match cmd {
304 SerializationCommand::OpenElement(n) => {
305 start_element(&n, serializer)?;
306 },
307 SerializationCommand::CloseElement(name) => {
308 serializer.end_elem(name)?;
309 },
310 SerializationCommand::SerializeNonelement(n) => match n.type_id() {
311 NodeTypeId::DocumentType => {
312 let doctype = n.downcast::<DocumentType>().unwrap();
313 serializer.write_doctype(&doctype.name().str())?;
314 },
315
316 NodeTypeId::CharacterData(CharacterDataTypeId::Text(_)) => {
317 let cdata = n.downcast::<CharacterData>().unwrap();
318 serializer.write_text(&cdata.data())?;
319 },
320
321 NodeTypeId::CharacterData(CharacterDataTypeId::Comment) => {
322 let cdata = n.downcast::<CharacterData>().unwrap();
323 serializer.write_comment(&cdata.data())?;
324 },
325
326 NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) => {
327 let pi = n.downcast::<ProcessingInstruction>().unwrap();
328 let data = pi.upcast::<CharacterData>().data();
329 serializer.write_processing_instruction(&pi.target().str(), &data)?;
330 },
331
332 NodeTypeId::DocumentFragment(_) | NodeTypeId::Attr => {},
333
334 NodeTypeId::Document(_) => panic!("Can't serialize Document node itself"),
335 NodeTypeId::Element(_) => panic!("Element shouldn't appear here"),
336 },
337 SerializationCommand::SerializeShadowRoot(shadow_root) => {
338 let mut attributes = vec![];
342 let mut push_attribute = |name, value| {
343 let qualified_name = QualName::new(None, ns!(), LocalName::from(name));
344 attributes.push((qualified_name, value))
345 };
346
347 let mode = if shadow_root.Mode() == ShadowRootMode::Open {
348 "open"
349 } else {
350 "closed"
351 };
352 push_attribute("shadowrootmode", mode);
353
354 if shadow_root.DelegatesFocus() {
355 push_attribute("shadowrootdelegatesfocus", "");
356 }
357
358 if shadow_root.Serializable() {
359 push_attribute("shadowrootserializable", "");
360 }
361
362 if shadow_root.Clonable() {
363 push_attribute("shadowrootclonable", "");
364 }
365
366 let name = QualName::new(None, ns!(), local_name!("template"));
367 serializer.start_elem(name, attributes.iter().map(|(a, b)| (a, *b)))?;
368 },
369 }
370 }
371
372 Ok(())
373}
374
375pub(crate) struct HtmlSerialize<'a> {
376 node: &'a Node,
377}
378
379impl<'a> HtmlSerialize<'a> {
380 pub(crate) fn new(node: &'a Node) -> HtmlSerialize<'a> {
381 HtmlSerialize { node }
382 }
383}
384
385impl Serialize for HtmlSerialize<'_> {
386 fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
387 where
388 S: Serializer,
389 {
390 serialize_html_fragment(
391 self.node,
392 serializer,
393 traversal_scope,
394 false,
395 vec![],
396 CanGc::note(),
397 )
398 }
399}