1#![cfg_attr(crown, expect(crown::unrooted_must_root))]
6
7use std::cell::Cell;
8use std::io;
9
10use html5ever::buffer_queue::BufferQueue;
11use html5ever::serialize::TraversalScope::IncludeNode;
12use html5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
13use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts};
14use html5ever::tree_builder::{QuirksMode as HTML5EverQuirksMode, TreeBuilder, TreeBuilderOpts};
15use html5ever::{QualName, local_name, ns};
16use markup5ever::TokenizerResult;
17use script_bindings::script_runtime::temp_cx;
18use script_bindings::trace::CustomTraceable;
19use servo_url::ServoUrl;
20use style::attr::AttrValue;
21use style::context::QuirksMode as StyleContextQuirksMode;
22use xml5ever::LocalName;
23
24use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
25use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::ShadowRootMode;
26use crate::dom::bindings::codegen::GenericBindings::ShadowRootBinding::ShadowRoot_Binding::ShadowRootMethods;
27use crate::dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId};
28use crate::dom::bindings::root::{Dom, DomRoot};
29use crate::dom::characterdata::CharacterData;
30use crate::dom::document::Document;
31use crate::dom::documentfragment::DocumentFragment;
32use crate::dom::documenttype::DocumentType;
33use crate::dom::element::Element;
34use crate::dom::html::htmlscriptelement::HTMLScriptElement;
35use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
36use crate::dom::node::Node;
37use crate::dom::processinginstruction::ProcessingInstruction;
38use crate::dom::servoparser::{ParsingAlgorithm, Sink};
39use crate::dom::shadowroot::ShadowRoot;
40
41#[derive(JSTraceable, MallocSizeOf)]
42#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
43pub(crate) struct Tokenizer {
44 #[ignore_malloc_size_of = "Defined in html5ever"]
45 inner: HtmlTokenizer<TreeBuilder<Dom<Node>, Sink>>,
46}
47
48impl Tokenizer {
49 pub(crate) fn new(
50 document: &Document,
51 url: ServoUrl,
52 fragment_context: Option<super::FragmentContext>,
53 parsing_algorithm: ParsingAlgorithm,
54 ) -> Self {
55 let custom_element_reaction_stack = document.custom_element_reaction_stack();
56 let sink = Sink {
57 base_url: url,
58 document: Dom::from_ref(document),
59 current_line: Cell::new(1),
60 script: Default::default(),
61 parsing_algorithm,
62 custom_element_reaction_stack,
63 };
64
65 let quirks_mode = match document.quirks_mode() {
66 StyleContextQuirksMode::Quirks => HTML5EverQuirksMode::Quirks,
67 StyleContextQuirksMode::LimitedQuirks => HTML5EverQuirksMode::LimitedQuirks,
68 StyleContextQuirksMode::NoQuirks => HTML5EverQuirksMode::NoQuirks,
69 };
70
71 let options = TreeBuilderOpts {
72 scripting_enabled: document.scripting_enabled(),
73 iframe_srcdoc: document.url().as_str() == "about:srcdoc",
74 quirks_mode,
75 ..Default::default()
76 };
77
78 let inner = if let Some(fragment_context) = fragment_context {
79 let tree_builder = TreeBuilder::new_for_fragment(
80 sink,
81 Dom::from_ref(fragment_context.context_elem),
82 fragment_context.form_elem.map(Dom::from_ref),
83 options,
84 );
85
86 let tokenizer_options = TokenizerOpts {
87 initial_state: Some(tree_builder.tokenizer_state_for_context_elem(
88 fragment_context.context_element_allows_scripting,
89 )),
90 ..Default::default()
91 };
92
93 HtmlTokenizer::new(tree_builder, tokenizer_options)
94 } else {
95 HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
96 };
97
98 Tokenizer { inner }
99 }
100
101 pub(crate) fn feed(&self, input: &BufferQueue) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
102 match self.inner.feed(input) {
103 TokenizerResult::Done => TokenizerResult::Done,
104 TokenizerResult::Script(script) => {
105 TokenizerResult::Script(DomRoot::from_ref(script.downcast().unwrap()))
106 },
107 TokenizerResult::EncodingIndicator(encoding) => {
108 TokenizerResult::EncodingIndicator(encoding)
109 },
110 }
111 }
112
113 pub(crate) fn end(&self) {
114 self.inner.end();
115 }
116
117 pub(crate) fn url(&self) -> &ServoUrl {
118 &self.inner.sink.sink.base_url
119 }
120
121 pub(crate) fn set_plaintext_state(&self) {
122 self.inner.set_plaintext_state();
123 }
124
125 pub(crate) fn get_current_line(&self) -> u32 {
126 self.inner.sink.sink.current_line.get() as u32
127 }
128}
129
130fn start_element<S: Serializer>(element: &Element, serializer: &mut S) -> io::Result<()> {
132 let name = QualName::new(
133 None,
134 element.namespace().clone(),
135 element.local_name().clone(),
136 );
137
138 let mut attributes = vec![];
139
140 if !element.has_attribute(&LocalName::from("is")) {
143 if let Some(is_value) = element.get_is() {
144 let qualified_name = QualName::new(None, ns!(), LocalName::from("is"));
145
146 attributes.push((qualified_name, AttrValue::String(is_value.to_string())));
147 }
148 }
149
150 attributes.extend(element.attrs().iter().map(|attr| {
152 let qname = QualName::new(None, attr.namespace().clone(), attr.local_name().clone());
153 let value = attr.value().clone();
154 (qname, value)
155 }));
156
157 let attr_refs = attributes.iter().map(|(qname, value)| {
158 let ar: AttrRef = (qname, &**value);
159 ar
160 });
161 serializer.start_elem(name, attr_refs)?;
162 Ok(())
163}
164
165enum SerializationCommand {
166 OpenElement(DomRoot<Element>),
167 CloseElement(QualName),
168 SerializeNonelement(DomRoot<Node>),
169 SerializeShadowRoot(DomRoot<ShadowRoot>),
170}
171
172struct SerializationIterator {
173 stack: Vec<SerializationCommand>,
174
175 serialize_shadow_roots: bool,
177
178 shadow_roots: Vec<DomRoot<ShadowRoot>>,
180}
181
182enum SerializationChildrenIterator<C, S> {
183 None,
184 Children(C),
185 ShadowContents(S),
186}
187
188impl SerializationIterator {
189 fn new(
190 cx: &mut js::context::JSContext,
191 node: &Node,
192 skip_first: bool,
193 serialize_shadow_roots: bool,
194 shadow_roots: Vec<DomRoot<ShadowRoot>>,
195 ) -> SerializationIterator {
196 let mut ret = SerializationIterator {
197 stack: vec![],
198 serialize_shadow_roots,
199 shadow_roots,
200 };
201 if skip_first || node.is::<DocumentFragment>() || node.is::<Document>() {
202 ret.handle_node_contents(cx, node);
203 } else {
204 ret.push_node(node);
205 }
206 ret
207 }
208
209 fn handle_node_contents(&mut self, cx: &mut js::context::JSContext, node: &Node) {
210 if node.downcast::<Element>().is_some_and(Element::is_void) {
211 return;
212 }
213
214 if let Some(template_element) = node.downcast::<HTMLTemplateElement>() {
215 for child in template_element.Content(cx).upcast::<Node>().rev_children() {
216 self.push_node(&child);
217 }
218 } else {
219 for child in node.rev_children() {
220 self.push_node(&child);
221 }
222 }
223
224 if let Some(shadow_root) = node.downcast::<Element>().and_then(Element::shadow_root) {
225 let should_be_serialized = (self.serialize_shadow_roots && shadow_root.Serializable()) ||
226 self.shadow_roots.contains(&shadow_root);
227 if !shadow_root.is_user_agent_widget() && should_be_serialized {
228 self.stack
229 .push(SerializationCommand::SerializeShadowRoot(shadow_root));
230 }
231 }
232 }
233
234 fn push_node(&mut self, node: &Node) {
235 let Some(element) = node.downcast::<Element>() else {
236 self.stack.push(SerializationCommand::SerializeNonelement(
237 DomRoot::from_ref(node),
238 ));
239 return;
240 };
241
242 self.stack
243 .push(SerializationCommand::OpenElement(DomRoot::from_ref(
244 element,
245 )));
246 }
247}
248
249impl Iterator for SerializationIterator {
250 type Item = SerializationCommand;
251
252 #[expect(unsafe_code)]
253 fn next(&mut self) -> Option<SerializationCommand> {
254 let mut cx = unsafe { temp_cx() };
256 let cx = &mut cx;
257 let res = self.stack.pop()?;
258
259 match &res {
260 SerializationCommand::OpenElement(element) => {
261 let name = QualName::new(
262 None,
263 element.namespace().clone(),
264 element.local_name().clone(),
265 );
266 self.stack.push(SerializationCommand::CloseElement(name));
267 self.handle_node_contents(cx, element.upcast());
268 },
269 SerializationCommand::SerializeShadowRoot(shadow_root) => {
270 self.stack
271 .push(SerializationCommand::CloseElement(QualName::new(
272 None,
273 ns!(),
274 local_name!("template"),
275 )));
276 self.handle_node_contents(cx, shadow_root.upcast());
277 },
278 _ => {},
279 }
280
281 Some(res)
282 }
283}
284
285pub(crate) fn serialize_html_fragment<S: Serializer>(
287 cx: &mut js::context::JSContext,
288 node: &Node,
289 serializer: &mut S,
290 traversal_scope: TraversalScope,
291 serialize_shadow_roots: bool,
292 shadow_roots: Vec<DomRoot<ShadowRoot>>,
293) -> io::Result<()> {
294 let iter = SerializationIterator::new(
295 cx,
296 node,
297 traversal_scope != IncludeNode,
298 serialize_shadow_roots,
299 shadow_roots,
300 );
301
302 for cmd in iter {
303 match cmd {
304 SerializationCommand::OpenElement(n) => {
305 start_element(&n, serializer)?;
306 },
307 SerializationCommand::CloseElement(name) => {
308 serializer.end_elem(name)?;
309 },
310 SerializationCommand::SerializeNonelement(n) => match n.type_id() {
311 NodeTypeId::DocumentType => {
312 let doctype = n.downcast::<DocumentType>().unwrap();
313 serializer.write_doctype(&doctype.name().str())?;
314 },
315
316 NodeTypeId::CharacterData(CharacterDataTypeId::Text(_)) => {
317 let cdata = n.downcast::<CharacterData>().unwrap();
318 serializer.write_text(&cdata.data())?;
319 },
320
321 NodeTypeId::CharacterData(CharacterDataTypeId::Comment) => {
322 let cdata = n.downcast::<CharacterData>().unwrap();
323 serializer.write_comment(&cdata.data())?;
324 },
325
326 NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) => {
327 let pi = n.downcast::<ProcessingInstruction>().unwrap();
328 let data = pi.upcast::<CharacterData>().data();
329 serializer.write_processing_instruction(&pi.target().str(), &data)?;
330 },
331
332 NodeTypeId::DocumentFragment(_) | NodeTypeId::Attr => {},
333
334 NodeTypeId::Document(_) => panic!("Can't serialize Document node itself"),
335 NodeTypeId::Element(_) => panic!("Element shouldn't appear here"),
336 },
337 SerializationCommand::SerializeShadowRoot(shadow_root) => {
338 let mut attributes = vec![];
342 let mut push_attribute = |name, value| {
343 let qualified_name = QualName::new(None, ns!(), LocalName::from(name));
344 attributes.push((qualified_name, value))
345 };
346
347 let mode = if shadow_root.Mode() == ShadowRootMode::Open {
348 "open"
349 } else {
350 "closed"
351 };
352 push_attribute("shadowrootmode", mode);
353
354 if shadow_root.DelegatesFocus() {
355 push_attribute("shadowrootdelegatesfocus", "");
356 }
357
358 if shadow_root.Serializable() {
359 push_attribute("shadowrootserializable", "");
360 }
361
362 if shadow_root.Clonable() {
363 push_attribute("shadowrootclonable", "");
364 }
365
366 let name = QualName::new(None, ns!(), local_name!("template"));
367 serializer.start_elem(name, attributes.iter().map(|(a, b)| (a, *b)))?;
368 },
369 }
370 }
371
372 Ok(())
373}
374
375pub(crate) struct HtmlSerialize<'a> {
376 node: &'a Node,
377}
378
379impl<'a> HtmlSerialize<'a> {
380 pub(crate) fn new(node: &'a Node) -> HtmlSerialize<'a> {
381 HtmlSerialize { node }
382 }
383}
384
385impl Serialize for HtmlSerialize<'_> {
386 #[expect(unsafe_code)]
387 fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
388 where
389 S: Serializer,
390 {
391 let mut cx = unsafe { temp_cx() };
393 let cx = &mut cx;
394 serialize_html_fragment(cx, self.node, serializer, traversal_scope, false, vec![])
395 }
396}