1#![cfg_attr(crown, allow(crown::unrooted_must_root))]
6
7use std::cell::Cell;
8use std::io;
9
10use html5ever::buffer_queue::BufferQueue;
11use html5ever::serialize::TraversalScope::IncludeNode;
12use html5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
13use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts};
14use html5ever::tree_builder::{QuirksMode as HTML5EverQuirksMode, TreeBuilder, TreeBuilderOpts};
15use html5ever::{QualName, local_name, ns};
16use markup5ever::TokenizerResult;
17use script_bindings::trace::CustomTraceable;
18use servo_url::ServoUrl;
19use style::attr::AttrValue;
20use style::context::QuirksMode as StyleContextQuirksMode;
21use xml5ever::LocalName;
22
23use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
24use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::ShadowRootMode;
25use crate::dom::bindings::codegen::GenericBindings::ShadowRootBinding::ShadowRoot_Binding::ShadowRootMethods;
26use crate::dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId};
27use crate::dom::bindings::root::{Dom, DomRoot};
28use crate::dom::characterdata::CharacterData;
29use crate::dom::document::Document;
30use crate::dom::documentfragment::DocumentFragment;
31use crate::dom::documenttype::DocumentType;
32use crate::dom::element::Element;
33use crate::dom::html::htmlscriptelement::HTMLScriptElement;
34use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
35use crate::dom::node::Node;
36use crate::dom::processinginstruction::ProcessingInstruction;
37use crate::dom::servoparser::{ParsingAlgorithm, Sink};
38use crate::dom::shadowroot::ShadowRoot;
39use crate::script_runtime::CanGc;
40
41#[derive(JSTraceable, MallocSizeOf)]
42#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
43pub(crate) struct Tokenizer {
44 #[ignore_malloc_size_of = "Defined in html5ever"]
45 inner: HtmlTokenizer<TreeBuilder<Dom<Node>, Sink>>,
46}
47
48impl Tokenizer {
49 pub(crate) fn new(
50 document: &Document,
51 url: ServoUrl,
52 fragment_context: Option<super::FragmentContext>,
53 parsing_algorithm: ParsingAlgorithm,
54 ) -> Self {
55 let custom_element_reaction_stack = document.custom_element_reaction_stack();
56 let sink = Sink {
57 base_url: url,
58 document: Dom::from_ref(document),
59 current_line: Cell::new(1),
60 script: Default::default(),
61 parsing_algorithm,
62 custom_element_reaction_stack,
63 };
64
65 let quirks_mode = match document.quirks_mode() {
66 StyleContextQuirksMode::Quirks => HTML5EverQuirksMode::Quirks,
67 StyleContextQuirksMode::LimitedQuirks => HTML5EverQuirksMode::LimitedQuirks,
68 StyleContextQuirksMode::NoQuirks => HTML5EverQuirksMode::NoQuirks,
69 };
70
71 let options = TreeBuilderOpts {
72 scripting_enabled: document.scripting_enabled(),
73 iframe_srcdoc: document.url().as_str() == "about:srcdoc",
74 quirks_mode,
75 ..Default::default()
76 };
77
78 let inner = if let Some(fragment_context) = fragment_context {
79 let tree_builder = TreeBuilder::new_for_fragment(
80 sink,
81 Dom::from_ref(fragment_context.context_elem),
82 fragment_context.form_elem.map(Dom::from_ref),
83 options,
84 );
85
86 let tokenizer_options = TokenizerOpts {
87 initial_state: Some(tree_builder.tokenizer_state_for_context_elem(
88 fragment_context.context_element_allows_scripting,
89 )),
90 ..Default::default()
91 };
92
93 HtmlTokenizer::new(tree_builder, tokenizer_options)
94 } else {
95 HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
96 };
97
98 Tokenizer { inner }
99 }
100
101 pub(crate) fn feed(&self, input: &BufferQueue) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
102 match self.inner.feed(input) {
103 TokenizerResult::Done => TokenizerResult::Done,
104 TokenizerResult::Script(script) => {
105 TokenizerResult::Script(DomRoot::from_ref(script.downcast().unwrap()))
106 },
107 }
108 }
109
110 pub(crate) fn end(&self) {
111 self.inner.end();
112 }
113
114 pub(crate) fn url(&self) -> &ServoUrl {
115 &self.inner.sink.sink.base_url
116 }
117
118 pub(crate) fn set_plaintext_state(&self) {
119 self.inner.set_plaintext_state();
120 }
121
122 pub(crate) fn get_current_line(&self) -> u32 {
123 self.inner.sink.sink.current_line.get() as u32
124 }
125}
126
127fn start_element<S: Serializer>(element: &Element, serializer: &mut S) -> io::Result<()> {
129 let name = QualName::new(
130 None,
131 element.namespace().clone(),
132 element.local_name().clone(),
133 );
134
135 let mut attributes = vec![];
136
137 if !element.has_attribute(&LocalName::from("is")) {
140 if let Some(is_value) = element.get_is() {
141 let qualified_name = QualName::new(None, ns!(), LocalName::from("is"));
142
143 attributes.push((qualified_name, AttrValue::String(is_value.to_string())));
144 }
145 }
146
147 attributes.extend(element.attrs().iter().map(|attr| {
149 let qname = QualName::new(None, attr.namespace().clone(), attr.local_name().clone());
150 let value = attr.value().clone();
151 (qname, value)
152 }));
153
154 let attr_refs = attributes.iter().map(|(qname, value)| {
155 let ar: AttrRef = (qname, &**value);
156 ar
157 });
158 serializer.start_elem(name, attr_refs)?;
159 Ok(())
160}
161
162enum SerializationCommand {
163 OpenElement(DomRoot<Element>),
164 CloseElement(QualName),
165 SerializeNonelement(DomRoot<Node>),
166 SerializeShadowRoot(DomRoot<ShadowRoot>),
167}
168
169struct SerializationIterator {
170 stack: Vec<SerializationCommand>,
171
172 serialize_shadow_roots: bool,
174
175 shadow_roots: Vec<DomRoot<ShadowRoot>>,
177}
178
179enum SerializationChildrenIterator<C, S> {
180 None,
181 Children(C),
182 ShadowContents(S),
183}
184
185impl SerializationIterator {
186 fn new(
187 node: &Node,
188 skip_first: bool,
189 serialize_shadow_roots: bool,
190 shadow_roots: Vec<DomRoot<ShadowRoot>>,
191 can_gc: CanGc,
192 ) -> SerializationIterator {
193 let mut ret = SerializationIterator {
194 stack: vec![],
195 serialize_shadow_roots,
196 shadow_roots,
197 };
198 if skip_first || node.is::<DocumentFragment>() || node.is::<Document>() {
199 ret.handle_node_contents(node, can_gc);
200 } else {
201 ret.push_node(node);
202 }
203 ret
204 }
205
206 fn handle_node_contents(&mut self, node: &Node, can_gc: CanGc) {
207 if node.downcast::<Element>().is_some_and(Element::is_void) {
208 return;
209 }
210
211 if let Some(template_element) = node.downcast::<HTMLTemplateElement>() {
212 for child in template_element
213 .Content(can_gc)
214 .upcast::<Node>()
215 .rev_children()
216 {
217 self.push_node(&child);
218 }
219 } else {
220 for child in node.rev_children() {
221 self.push_node(&child);
222 }
223 }
224
225 if let Some(shadow_root) = node.downcast::<Element>().and_then(Element::shadow_root) {
226 let should_be_serialized = (self.serialize_shadow_roots && shadow_root.Serializable()) ||
227 self.shadow_roots.contains(&shadow_root);
228 if !shadow_root.is_user_agent_widget() && should_be_serialized {
229 self.stack
230 .push(SerializationCommand::SerializeShadowRoot(shadow_root));
231 }
232 }
233 }
234
235 fn push_node(&mut self, node: &Node) {
236 let Some(element) = node.downcast::<Element>() else {
237 self.stack.push(SerializationCommand::SerializeNonelement(
238 DomRoot::from_ref(node),
239 ));
240 return;
241 };
242
243 self.stack
244 .push(SerializationCommand::OpenElement(DomRoot::from_ref(
245 element,
246 )));
247 }
248}
249
250impl Iterator for SerializationIterator {
251 type Item = SerializationCommand;
252
253 fn next(&mut self) -> Option<SerializationCommand> {
254 let res = self.stack.pop()?;
255
256 match &res {
257 SerializationCommand::OpenElement(element) => {
258 let name = QualName::new(
259 None,
260 element.namespace().clone(),
261 element.local_name().clone(),
262 );
263 self.stack.push(SerializationCommand::CloseElement(name));
264 self.handle_node_contents(element.upcast(), CanGc::note());
265 },
266 SerializationCommand::SerializeShadowRoot(shadow_root) => {
267 self.stack
268 .push(SerializationCommand::CloseElement(QualName::new(
269 None,
270 ns!(),
271 local_name!("template"),
272 )));
273 self.handle_node_contents(shadow_root.upcast(), CanGc::note());
274 },
275 _ => {},
276 }
277
278 Some(res)
279 }
280}
281
282pub(crate) fn serialize_html_fragment<S: Serializer>(
284 node: &Node,
285 serializer: &mut S,
286 traversal_scope: TraversalScope,
287 serialize_shadow_roots: bool,
288 shadow_roots: Vec<DomRoot<ShadowRoot>>,
289 can_gc: CanGc,
290) -> io::Result<()> {
291 let iter = SerializationIterator::new(
292 node,
293 traversal_scope != IncludeNode,
294 serialize_shadow_roots,
295 shadow_roots,
296 can_gc,
297 );
298
299 for cmd in iter {
300 match cmd {
301 SerializationCommand::OpenElement(n) => {
302 start_element(&n, serializer)?;
303 },
304 SerializationCommand::CloseElement(name) => {
305 serializer.end_elem(name)?;
306 },
307 SerializationCommand::SerializeNonelement(n) => match n.type_id() {
308 NodeTypeId::DocumentType => {
309 let doctype = n.downcast::<DocumentType>().unwrap();
310 serializer.write_doctype(&doctype.name().str())?;
311 },
312
313 NodeTypeId::CharacterData(CharacterDataTypeId::Text(_)) => {
314 let cdata = n.downcast::<CharacterData>().unwrap();
315 serializer.write_text(&cdata.data())?;
316 },
317
318 NodeTypeId::CharacterData(CharacterDataTypeId::Comment) => {
319 let cdata = n.downcast::<CharacterData>().unwrap();
320 serializer.write_comment(&cdata.data())?;
321 },
322
323 NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) => {
324 let pi = n.downcast::<ProcessingInstruction>().unwrap();
325 let data = pi.upcast::<CharacterData>().data();
326 serializer.write_processing_instruction(&pi.target().str(), &data)?;
327 },
328
329 NodeTypeId::DocumentFragment(_) | NodeTypeId::Attr => {},
330
331 NodeTypeId::Document(_) => panic!("Can't serialize Document node itself"),
332 NodeTypeId::Element(_) => panic!("Element shouldn't appear here"),
333 },
334 SerializationCommand::SerializeShadowRoot(shadow_root) => {
335 let mut attributes = vec![];
339 let mut push_attribute = |name, value| {
340 let qualified_name = QualName::new(None, ns!(), LocalName::from(name));
341 attributes.push((qualified_name, value))
342 };
343
344 let mode = if shadow_root.Mode() == ShadowRootMode::Open {
345 "open"
346 } else {
347 "closed"
348 };
349 push_attribute("shadowrootmode", mode);
350
351 if shadow_root.DelegatesFocus() {
352 push_attribute("shadowrootdelegatesfocus", "");
353 }
354
355 if shadow_root.Serializable() {
356 push_attribute("shadowrootserializable", "");
357 }
358
359 if shadow_root.Clonable() {
360 push_attribute("shadowrootclonable", "");
361 }
362
363 let name = QualName::new(None, ns!(), local_name!("template"));
364 serializer.start_elem(name, attributes.iter().map(|(a, b)| (a, *b)))?;
365 },
366 }
367 }
368
369 Ok(())
370}
371
372pub(crate) struct HtmlSerialize<'a> {
373 node: &'a Node,
374}
375
376impl<'a> HtmlSerialize<'a> {
377 pub(crate) fn new(node: &'a Node) -> HtmlSerialize<'a> {
378 HtmlSerialize { node }
379 }
380}
381
382impl Serialize for HtmlSerialize<'_> {
383 fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
384 where
385 S: Serializer,
386 {
387 serialize_html_fragment(
388 self.node,
389 serializer,
390 traversal_scope,
391 false,
392 vec![],
393 CanGc::note(),
394 )
395 }
396}