script/dom/servoparser/
html.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![cfg_attr(crown, allow(crown::unrooted_must_root))]
6
7use std::cell::Cell;
8use std::io;
9
10use html5ever::buffer_queue::BufferQueue;
11use html5ever::serialize::TraversalScope::IncludeNode;
12use html5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
13use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts};
14use html5ever::tree_builder::{QuirksMode as HTML5EverQuirksMode, TreeBuilder, TreeBuilderOpts};
15use html5ever::{QualName, local_name, ns};
16use markup5ever::TokenizerResult;
17use script_bindings::trace::CustomTraceable;
18use servo_url::ServoUrl;
19use style::attr::AttrValue;
20use style::context::QuirksMode as StyleContextQuirksMode;
21use xml5ever::LocalName;
22
23use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
24use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::ShadowRootMode;
25use crate::dom::bindings::codegen::GenericBindings::ShadowRootBinding::ShadowRoot_Binding::ShadowRootMethods;
26use crate::dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId};
27use crate::dom::bindings::root::{Dom, DomRoot};
28use crate::dom::characterdata::CharacterData;
29use crate::dom::document::Document;
30use crate::dom::documentfragment::DocumentFragment;
31use crate::dom::documenttype::DocumentType;
32use crate::dom::element::Element;
33use crate::dom::html::htmlscriptelement::HTMLScriptElement;
34use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
35use crate::dom::node::Node;
36use crate::dom::processinginstruction::ProcessingInstruction;
37use crate::dom::servoparser::{ParsingAlgorithm, Sink};
38use crate::dom::shadowroot::ShadowRoot;
39use crate::script_runtime::CanGc;
40
41#[derive(JSTraceable, MallocSizeOf)]
42#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
43pub(crate) struct Tokenizer {
44    #[ignore_malloc_size_of = "Defined in html5ever"]
45    inner: HtmlTokenizer<TreeBuilder<Dom<Node>, Sink>>,
46}
47
48impl Tokenizer {
49    pub(crate) fn new(
50        document: &Document,
51        url: ServoUrl,
52        fragment_context: Option<super::FragmentContext>,
53        parsing_algorithm: ParsingAlgorithm,
54    ) -> Self {
55        let custom_element_reaction_stack = document.custom_element_reaction_stack();
56        let sink = Sink {
57            base_url: url,
58            document: Dom::from_ref(document),
59            current_line: Cell::new(1),
60            script: Default::default(),
61            parsing_algorithm,
62            custom_element_reaction_stack,
63        };
64
65        let quirks_mode = match document.quirks_mode() {
66            StyleContextQuirksMode::Quirks => HTML5EverQuirksMode::Quirks,
67            StyleContextQuirksMode::LimitedQuirks => HTML5EverQuirksMode::LimitedQuirks,
68            StyleContextQuirksMode::NoQuirks => HTML5EverQuirksMode::NoQuirks,
69        };
70
71        let options = TreeBuilderOpts {
72            scripting_enabled: document.scripting_enabled(),
73            iframe_srcdoc: document.url().as_str() == "about:srcdoc",
74            quirks_mode,
75            ..Default::default()
76        };
77
78        let inner = if let Some(fragment_context) = fragment_context {
79            let tree_builder = TreeBuilder::new_for_fragment(
80                sink,
81                Dom::from_ref(fragment_context.context_elem),
82                fragment_context.form_elem.map(Dom::from_ref),
83                options,
84            );
85
86            let tokenizer_options = TokenizerOpts {
87                initial_state: Some(tree_builder.tokenizer_state_for_context_elem(
88                    fragment_context.context_element_allows_scripting,
89                )),
90                ..Default::default()
91            };
92
93            HtmlTokenizer::new(tree_builder, tokenizer_options)
94        } else {
95            HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
96        };
97
98        Tokenizer { inner }
99    }
100
101    pub(crate) fn feed(&self, input: &BufferQueue) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
102        match self.inner.feed(input) {
103            TokenizerResult::Done => TokenizerResult::Done,
104            TokenizerResult::Script(script) => {
105                TokenizerResult::Script(DomRoot::from_ref(script.downcast().unwrap()))
106            },
107        }
108    }
109
110    pub(crate) fn end(&self) {
111        self.inner.end();
112    }
113
114    pub(crate) fn url(&self) -> &ServoUrl {
115        &self.inner.sink.sink.base_url
116    }
117
118    pub(crate) fn set_plaintext_state(&self) {
119        self.inner.set_plaintext_state();
120    }
121
122    pub(crate) fn get_current_line(&self) -> u32 {
123        self.inner.sink.sink.current_line.get() as u32
124    }
125}
126
127/// <https://html.spec.whatwg.org/multipage/#html-fragment-serialisation-algorithm>
128fn start_element<S: Serializer>(element: &Element, serializer: &mut S) -> io::Result<()> {
129    let name = QualName::new(
130        None,
131        element.namespace().clone(),
132        element.local_name().clone(),
133    );
134
135    let mut attributes = vec![];
136
137    // The "is" value of an element is treated as if it was an attribute and it is serialized before all
138    // other attributes. If the element already has an "is" attribute then the "is" value is ignored.
139    if !element.has_attribute(&LocalName::from("is")) {
140        if let Some(is_value) = element.get_is() {
141            let qualified_name = QualName::new(None, ns!(), LocalName::from("is"));
142
143            attributes.push((qualified_name, AttrValue::String(is_value.to_string())));
144        }
145    }
146
147    // Collect all the "normal" attributes
148    attributes.extend(element.attrs().iter().map(|attr| {
149        let qname = QualName::new(None, attr.namespace().clone(), attr.local_name().clone());
150        let value = attr.value().clone();
151        (qname, value)
152    }));
153
154    let attr_refs = attributes.iter().map(|(qname, value)| {
155        let ar: AttrRef = (qname, &**value);
156        ar
157    });
158    serializer.start_elem(name, attr_refs)?;
159    Ok(())
160}
161
162enum SerializationCommand {
163    OpenElement(DomRoot<Element>),
164    CloseElement(QualName),
165    SerializeNonelement(DomRoot<Node>),
166    SerializeShadowRoot(DomRoot<ShadowRoot>),
167}
168
169struct SerializationIterator {
170    stack: Vec<SerializationCommand>,
171
172    /// Whether or not shadow roots should be serialized
173    serialize_shadow_roots: bool,
174
175    /// List of shadow root objects that should be serialized
176    shadow_roots: Vec<DomRoot<ShadowRoot>>,
177}
178
179enum SerializationChildrenIterator<C, S> {
180    None,
181    Children(C),
182    ShadowContents(S),
183}
184
185impl SerializationIterator {
186    fn new(
187        node: &Node,
188        skip_first: bool,
189        serialize_shadow_roots: bool,
190        shadow_roots: Vec<DomRoot<ShadowRoot>>,
191        can_gc: CanGc,
192    ) -> SerializationIterator {
193        let mut ret = SerializationIterator {
194            stack: vec![],
195            serialize_shadow_roots,
196            shadow_roots,
197        };
198        if skip_first || node.is::<DocumentFragment>() || node.is::<Document>() {
199            ret.handle_node_contents(node, can_gc);
200        } else {
201            ret.push_node(node);
202        }
203        ret
204    }
205
206    fn handle_node_contents(&mut self, node: &Node, can_gc: CanGc) {
207        if node.downcast::<Element>().is_some_and(Element::is_void) {
208            return;
209        }
210
211        if let Some(template_element) = node.downcast::<HTMLTemplateElement>() {
212            for child in template_element
213                .Content(can_gc)
214                .upcast::<Node>()
215                .rev_children()
216            {
217                self.push_node(&child);
218            }
219        } else {
220            for child in node.rev_children() {
221                self.push_node(&child);
222            }
223        }
224
225        if let Some(shadow_root) = node.downcast::<Element>().and_then(Element::shadow_root) {
226            let should_be_serialized = (self.serialize_shadow_roots && shadow_root.Serializable()) ||
227                self.shadow_roots.contains(&shadow_root);
228            if !shadow_root.is_user_agent_widget() && should_be_serialized {
229                self.stack
230                    .push(SerializationCommand::SerializeShadowRoot(shadow_root));
231            }
232        }
233    }
234
235    fn push_node(&mut self, node: &Node) {
236        let Some(element) = node.downcast::<Element>() else {
237            self.stack.push(SerializationCommand::SerializeNonelement(
238                DomRoot::from_ref(node),
239            ));
240            return;
241        };
242
243        self.stack
244            .push(SerializationCommand::OpenElement(DomRoot::from_ref(
245                element,
246            )));
247    }
248}
249
250impl Iterator for SerializationIterator {
251    type Item = SerializationCommand;
252
253    fn next(&mut self) -> Option<SerializationCommand> {
254        let res = self.stack.pop()?;
255
256        match &res {
257            SerializationCommand::OpenElement(element) => {
258                let name = QualName::new(
259                    None,
260                    element.namespace().clone(),
261                    element.local_name().clone(),
262                );
263                self.stack.push(SerializationCommand::CloseElement(name));
264                self.handle_node_contents(element.upcast(), CanGc::note());
265            },
266            SerializationCommand::SerializeShadowRoot(shadow_root) => {
267                self.stack
268                    .push(SerializationCommand::CloseElement(QualName::new(
269                        None,
270                        ns!(),
271                        local_name!("template"),
272                    )));
273                self.handle_node_contents(shadow_root.upcast(), CanGc::note());
274            },
275            _ => {},
276        }
277
278        Some(res)
279    }
280}
281
282/// <https://html.spec.whatwg.org/multipage/#html-fragment-serialisation-algorithm>
283pub(crate) fn serialize_html_fragment<S: Serializer>(
284    node: &Node,
285    serializer: &mut S,
286    traversal_scope: TraversalScope,
287    serialize_shadow_roots: bool,
288    shadow_roots: Vec<DomRoot<ShadowRoot>>,
289    can_gc: CanGc,
290) -> io::Result<()> {
291    let iter = SerializationIterator::new(
292        node,
293        traversal_scope != IncludeNode,
294        serialize_shadow_roots,
295        shadow_roots,
296        can_gc,
297    );
298
299    for cmd in iter {
300        match cmd {
301            SerializationCommand::OpenElement(n) => {
302                start_element(&n, serializer)?;
303            },
304            SerializationCommand::CloseElement(name) => {
305                serializer.end_elem(name)?;
306            },
307            SerializationCommand::SerializeNonelement(n) => match n.type_id() {
308                NodeTypeId::DocumentType => {
309                    let doctype = n.downcast::<DocumentType>().unwrap();
310                    serializer.write_doctype(&doctype.name().str())?;
311                },
312
313                NodeTypeId::CharacterData(CharacterDataTypeId::Text(_)) => {
314                    let cdata = n.downcast::<CharacterData>().unwrap();
315                    serializer.write_text(&cdata.data())?;
316                },
317
318                NodeTypeId::CharacterData(CharacterDataTypeId::Comment) => {
319                    let cdata = n.downcast::<CharacterData>().unwrap();
320                    serializer.write_comment(&cdata.data())?;
321                },
322
323                NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) => {
324                    let pi = n.downcast::<ProcessingInstruction>().unwrap();
325                    let data = pi.upcast::<CharacterData>().data();
326                    serializer.write_processing_instruction(&pi.target().str(), &data)?;
327                },
328
329                NodeTypeId::DocumentFragment(_) | NodeTypeId::Attr => {},
330
331                NodeTypeId::Document(_) => panic!("Can't serialize Document node itself"),
332                NodeTypeId::Element(_) => panic!("Element shouldn't appear here"),
333            },
334            SerializationCommand::SerializeShadowRoot(shadow_root) => {
335                // Shadow roots are serialized as template elements with a fixed set of
336                // attributes. Because these template elements don't actually exist in the DOM
337                // we have to make up a vector of attributes ourselves.
338                let mut attributes = vec![];
339                let mut push_attribute = |name, value| {
340                    let qualified_name = QualName::new(None, ns!(), LocalName::from(name));
341                    attributes.push((qualified_name, value))
342                };
343
344                let mode = if shadow_root.Mode() == ShadowRootMode::Open {
345                    "open"
346                } else {
347                    "closed"
348                };
349                push_attribute("shadowrootmode", mode);
350
351                if shadow_root.DelegatesFocus() {
352                    push_attribute("shadowrootdelegatesfocus", "");
353                }
354
355                if shadow_root.Serializable() {
356                    push_attribute("shadowrootserializable", "");
357                }
358
359                if shadow_root.Clonable() {
360                    push_attribute("shadowrootclonable", "");
361                }
362
363                let name = QualName::new(None, ns!(), local_name!("template"));
364                serializer.start_elem(name, attributes.iter().map(|(a, b)| (a, *b)))?;
365            },
366        }
367    }
368
369    Ok(())
370}
371
372pub(crate) struct HtmlSerialize<'a> {
373    node: &'a Node,
374}
375
376impl<'a> HtmlSerialize<'a> {
377    pub(crate) fn new(node: &'a Node) -> HtmlSerialize<'a> {
378        HtmlSerialize { node }
379    }
380}
381
382impl Serialize for HtmlSerialize<'_> {
383    fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
384    where
385        S: Serializer,
386    {
387        serialize_html_fragment(
388            self.node,
389            serializer,
390            traversal_scope,
391            false,
392            vec![],
393            CanGc::note(),
394        )
395    }
396}