script/dom/servoparser/
html.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![cfg_attr(crown, allow(crown::unrooted_must_root))]
6
7use std::cell::Cell;
8use std::io;
9
10use html5ever::buffer_queue::BufferQueue;
11use html5ever::serialize::TraversalScope::IncludeNode;
12use html5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
13use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts};
14use html5ever::tree_builder::{QuirksMode as HTML5EverQuirksMode, TreeBuilder, TreeBuilderOpts};
15use html5ever::{QualName, local_name, ns};
16use markup5ever::TokenizerResult;
17use script_bindings::trace::CustomTraceable;
18use servo_url::ServoUrl;
19use style::attr::AttrValue;
20use style::context::QuirksMode as StyleContextQuirksMode;
21use xml5ever::LocalName;
22
23use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
24use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::ShadowRootMode;
25use crate::dom::bindings::codegen::GenericBindings::ShadowRootBinding::ShadowRoot_Binding::ShadowRootMethods;
26use crate::dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId};
27use crate::dom::bindings::root::{Dom, DomRoot};
28use crate::dom::characterdata::CharacterData;
29use crate::dom::document::Document;
30use crate::dom::documentfragment::DocumentFragment;
31use crate::dom::documenttype::DocumentType;
32use crate::dom::element::Element;
33use crate::dom::html::htmlscriptelement::HTMLScriptElement;
34use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
35use crate::dom::node::Node;
36use crate::dom::processinginstruction::ProcessingInstruction;
37use crate::dom::servoparser::{ParsingAlgorithm, Sink};
38use crate::dom::shadowroot::ShadowRoot;
39use crate::script_runtime::CanGc;
40
41#[derive(JSTraceable, MallocSizeOf)]
42#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
43pub(crate) struct Tokenizer {
44    #[ignore_malloc_size_of = "Defined in html5ever"]
45    inner: HtmlTokenizer<TreeBuilder<Dom<Node>, Sink>>,
46}
47
48impl Tokenizer {
49    pub(crate) fn new(
50        document: &Document,
51        url: ServoUrl,
52        fragment_context: Option<super::FragmentContext>,
53        parsing_algorithm: ParsingAlgorithm,
54    ) -> Self {
55        let custom_element_reaction_stack = document.custom_element_reaction_stack();
56        let sink = Sink {
57            base_url: url,
58            document: Dom::from_ref(document),
59            current_line: Cell::new(1),
60            script: Default::default(),
61            parsing_algorithm,
62            custom_element_reaction_stack,
63        };
64
65        let quirks_mode = match document.quirks_mode() {
66            StyleContextQuirksMode::Quirks => HTML5EverQuirksMode::Quirks,
67            StyleContextQuirksMode::LimitedQuirks => HTML5EverQuirksMode::LimitedQuirks,
68            StyleContextQuirksMode::NoQuirks => HTML5EverQuirksMode::NoQuirks,
69        };
70
71        let options = TreeBuilderOpts {
72            scripting_enabled: document.scripting_enabled(),
73            iframe_srcdoc: document.url().as_str() == "about:srcdoc",
74            quirks_mode,
75            ..Default::default()
76        };
77
78        let inner = if let Some(fragment_context) = fragment_context {
79            let tree_builder = TreeBuilder::new_for_fragment(
80                sink,
81                Dom::from_ref(fragment_context.context_elem),
82                fragment_context.form_elem.map(Dom::from_ref),
83                options,
84            );
85
86            let tokenizer_options = TokenizerOpts {
87                initial_state: Some(tree_builder.tokenizer_state_for_context_elem(
88                    fragment_context.context_element_allows_scripting,
89                )),
90                ..Default::default()
91            };
92
93            HtmlTokenizer::new(tree_builder, tokenizer_options)
94        } else {
95            HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
96        };
97
98        Tokenizer { inner }
99    }
100
101    pub(crate) fn feed(&self, input: &BufferQueue) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
102        match self.inner.feed(input) {
103            TokenizerResult::Done => TokenizerResult::Done,
104            TokenizerResult::Script(script) => {
105                TokenizerResult::Script(DomRoot::from_ref(script.downcast().unwrap()))
106            },
107        }
108    }
109
110    pub(crate) fn end(&self) {
111        self.inner.end();
112    }
113
114    pub(crate) fn url(&self) -> &ServoUrl {
115        &self.inner.sink.sink.base_url
116    }
117
118    pub(crate) fn set_plaintext_state(&self) {
119        self.inner.set_plaintext_state();
120    }
121}
122
123/// <https://html.spec.whatwg.org/multipage/#html-fragment-serialisation-algorithm>
124fn start_element<S: Serializer>(element: &Element, serializer: &mut S) -> io::Result<()> {
125    let name = QualName::new(
126        None,
127        element.namespace().clone(),
128        element.local_name().clone(),
129    );
130
131    let mut attributes = vec![];
132
133    // The "is" value of an element is treated as if it was an attribute and it is serialized before all
134    // other attributes. If the element already has an "is" attribute then the "is" value is ignored.
135    if !element.has_attribute(&LocalName::from("is")) {
136        if let Some(is_value) = element.get_is() {
137            let qualified_name = QualName::new(None, ns!(), LocalName::from("is"));
138
139            attributes.push((qualified_name, AttrValue::String(is_value.to_string())));
140        }
141    }
142
143    // Collect all the "normal" attributes
144    attributes.extend(element.attrs().iter().map(|attr| {
145        let qname = QualName::new(None, attr.namespace().clone(), attr.local_name().clone());
146        let value = attr.value().clone();
147        (qname, value)
148    }));
149
150    let attr_refs = attributes.iter().map(|(qname, value)| {
151        let ar: AttrRef = (qname, &**value);
152        ar
153    });
154    serializer.start_elem(name, attr_refs)?;
155    Ok(())
156}
157
158enum SerializationCommand {
159    OpenElement(DomRoot<Element>),
160    CloseElement(QualName),
161    SerializeNonelement(DomRoot<Node>),
162    SerializeShadowRoot(DomRoot<ShadowRoot>),
163}
164
165struct SerializationIterator {
166    stack: Vec<SerializationCommand>,
167
168    /// Whether or not shadow roots should be serialized
169    serialize_shadow_roots: bool,
170
171    /// List of shadow root objects that should be serialized
172    shadow_roots: Vec<DomRoot<ShadowRoot>>,
173}
174
175enum SerializationChildrenIterator<C, S> {
176    None,
177    Children(C),
178    ShadowContents(S),
179}
180
181impl SerializationIterator {
182    fn new(
183        node: &Node,
184        skip_first: bool,
185        serialize_shadow_roots: bool,
186        shadow_roots: Vec<DomRoot<ShadowRoot>>,
187        can_gc: CanGc,
188    ) -> SerializationIterator {
189        let mut ret = SerializationIterator {
190            stack: vec![],
191            serialize_shadow_roots,
192            shadow_roots,
193        };
194        if skip_first || node.is::<DocumentFragment>() || node.is::<Document>() {
195            ret.handle_node_contents(node, can_gc);
196        } else {
197            ret.push_node(node);
198        }
199        ret
200    }
201
202    fn handle_node_contents(&mut self, node: &Node, can_gc: CanGc) {
203        if node.downcast::<Element>().is_some_and(Element::is_void) {
204            return;
205        }
206
207        if let Some(template_element) = node.downcast::<HTMLTemplateElement>() {
208            for child in template_element
209                .Content(can_gc)
210                .upcast::<Node>()
211                .rev_children()
212            {
213                self.push_node(&child);
214            }
215        } else {
216            for child in node.rev_children() {
217                self.push_node(&child);
218            }
219        }
220
221        if let Some(shadow_root) = node.downcast::<Element>().and_then(Element::shadow_root) {
222            let should_be_serialized = (self.serialize_shadow_roots && shadow_root.Serializable()) ||
223                self.shadow_roots.contains(&shadow_root);
224            if !shadow_root.is_user_agent_widget() && should_be_serialized {
225                self.stack
226                    .push(SerializationCommand::SerializeShadowRoot(shadow_root));
227            }
228        }
229    }
230
231    fn push_node(&mut self, node: &Node) {
232        let Some(element) = node.downcast::<Element>() else {
233            self.stack.push(SerializationCommand::SerializeNonelement(
234                DomRoot::from_ref(node),
235            ));
236            return;
237        };
238
239        self.stack
240            .push(SerializationCommand::OpenElement(DomRoot::from_ref(
241                element,
242            )));
243    }
244}
245
246impl Iterator for SerializationIterator {
247    type Item = SerializationCommand;
248
249    fn next(&mut self) -> Option<SerializationCommand> {
250        let res = self.stack.pop()?;
251
252        match &res {
253            SerializationCommand::OpenElement(element) => {
254                let name = QualName::new(
255                    None,
256                    element.namespace().clone(),
257                    element.local_name().clone(),
258                );
259                self.stack.push(SerializationCommand::CloseElement(name));
260                self.handle_node_contents(element.upcast(), CanGc::note());
261            },
262            SerializationCommand::SerializeShadowRoot(shadow_root) => {
263                self.stack
264                    .push(SerializationCommand::CloseElement(QualName::new(
265                        None,
266                        ns!(),
267                        local_name!("template"),
268                    )));
269                self.handle_node_contents(shadow_root.upcast(), CanGc::note());
270            },
271            _ => {},
272        }
273
274        Some(res)
275    }
276}
277
278/// <https://html.spec.whatwg.org/multipage/#html-fragment-serialisation-algorithm>
279pub(crate) fn serialize_html_fragment<S: Serializer>(
280    node: &Node,
281    serializer: &mut S,
282    traversal_scope: TraversalScope,
283    serialize_shadow_roots: bool,
284    shadow_roots: Vec<DomRoot<ShadowRoot>>,
285    can_gc: CanGc,
286) -> io::Result<()> {
287    let iter = SerializationIterator::new(
288        node,
289        traversal_scope != IncludeNode,
290        serialize_shadow_roots,
291        shadow_roots,
292        can_gc,
293    );
294
295    for cmd in iter {
296        match cmd {
297            SerializationCommand::OpenElement(n) => {
298                start_element(&n, serializer)?;
299            },
300            SerializationCommand::CloseElement(name) => {
301                serializer.end_elem(name)?;
302            },
303            SerializationCommand::SerializeNonelement(n) => match n.type_id() {
304                NodeTypeId::DocumentType => {
305                    let doctype = n.downcast::<DocumentType>().unwrap();
306                    serializer.write_doctype(doctype.name())?;
307                },
308
309                NodeTypeId::CharacterData(CharacterDataTypeId::Text(_)) => {
310                    let cdata = n.downcast::<CharacterData>().unwrap();
311                    serializer.write_text(&cdata.data())?;
312                },
313
314                NodeTypeId::CharacterData(CharacterDataTypeId::Comment) => {
315                    let cdata = n.downcast::<CharacterData>().unwrap();
316                    serializer.write_comment(&cdata.data())?;
317                },
318
319                NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) => {
320                    let pi = n.downcast::<ProcessingInstruction>().unwrap();
321                    let data = pi.upcast::<CharacterData>().data();
322                    serializer.write_processing_instruction(pi.target(), &data)?;
323                },
324
325                NodeTypeId::DocumentFragment(_) | NodeTypeId::Attr => {},
326
327                NodeTypeId::Document(_) => panic!("Can't serialize Document node itself"),
328                NodeTypeId::Element(_) => panic!("Element shouldn't appear here"),
329            },
330            SerializationCommand::SerializeShadowRoot(shadow_root) => {
331                // Shadow roots are serialized as template elements with a fixed set of
332                // attributes. Because these template elements don't actually exist in the DOM
333                // we have to make up a vector of attributes ourselves.
334                let mut attributes = vec![];
335                let mut push_attribute = |name, value| {
336                    let qualified_name = QualName::new(None, ns!(), LocalName::from(name));
337                    attributes.push((qualified_name, value))
338                };
339
340                let mode = if shadow_root.Mode() == ShadowRootMode::Open {
341                    "open"
342                } else {
343                    "closed"
344                };
345                push_attribute("shadowrootmode", mode);
346
347                if shadow_root.DelegatesFocus() {
348                    push_attribute("shadowrootdelegatesfocus", "");
349                }
350
351                if shadow_root.Serializable() {
352                    push_attribute("shadowrootserializable", "");
353                }
354
355                if shadow_root.Clonable() {
356                    push_attribute("shadowrootclonable", "");
357                }
358
359                let name = QualName::new(None, ns!(), local_name!("template"));
360                serializer.start_elem(name, attributes.iter().map(|(a, b)| (a, *b)))?;
361            },
362        }
363    }
364
365    Ok(())
366}
367
368// TODO: This trait confuses the concepts of XML serialization and HTML serialization and
369// the impl should go away eventually
370impl Serialize for &Node {
371    fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
372    where
373        S: Serializer,
374    {
375        serialize_html_fragment(
376            self,
377            serializer,
378            traversal_scope,
379            false,
380            vec![],
381            CanGc::note(),
382        )
383    }
384}