script/dom/servoparser/
html.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![cfg_attr(crown, expect(crown::unrooted_must_root))]
6
7use std::cell::Cell;
8use std::io;
9
10use html5ever::buffer_queue::BufferQueue;
11use html5ever::serialize::TraversalScope::IncludeNode;
12use html5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
13use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts};
14use html5ever::tree_builder::{QuirksMode as HTML5EverQuirksMode, TreeBuilder, TreeBuilderOpts};
15use html5ever::{QualName, local_name, ns};
16use markup5ever::TokenizerResult;
17use script_bindings::trace::CustomTraceable;
18use servo_url::ServoUrl;
19use style::attr::AttrValue;
20use style::context::QuirksMode as StyleContextQuirksMode;
21use xml5ever::LocalName;
22
23use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
24use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::ShadowRootMode;
25use crate::dom::bindings::codegen::GenericBindings::ShadowRootBinding::ShadowRoot_Binding::ShadowRootMethods;
26use crate::dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId};
27use crate::dom::bindings::root::{Dom, DomRoot};
28use crate::dom::characterdata::CharacterData;
29use crate::dom::document::Document;
30use crate::dom::documentfragment::DocumentFragment;
31use crate::dom::documenttype::DocumentType;
32use crate::dom::element::Element;
33use crate::dom::html::htmlscriptelement::HTMLScriptElement;
34use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
35use crate::dom::node::Node;
36use crate::dom::processinginstruction::ProcessingInstruction;
37use crate::dom::servoparser::{ParsingAlgorithm, Sink};
38use crate::dom::shadowroot::ShadowRoot;
39use crate::script_runtime::CanGc;
40
41#[derive(JSTraceable, MallocSizeOf)]
42#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
43pub(crate) struct Tokenizer {
44    #[ignore_malloc_size_of = "Defined in html5ever"]
45    inner: HtmlTokenizer<TreeBuilder<Dom<Node>, Sink>>,
46}
47
48impl Tokenizer {
49    pub(crate) fn new(
50        document: &Document,
51        url: ServoUrl,
52        fragment_context: Option<super::FragmentContext>,
53        parsing_algorithm: ParsingAlgorithm,
54    ) -> Self {
55        let custom_element_reaction_stack = document.custom_element_reaction_stack();
56        let sink = Sink {
57            base_url: url,
58            document: Dom::from_ref(document),
59            current_line: Cell::new(1),
60            script: Default::default(),
61            parsing_algorithm,
62            custom_element_reaction_stack,
63        };
64
65        let quirks_mode = match document.quirks_mode() {
66            StyleContextQuirksMode::Quirks => HTML5EverQuirksMode::Quirks,
67            StyleContextQuirksMode::LimitedQuirks => HTML5EverQuirksMode::LimitedQuirks,
68            StyleContextQuirksMode::NoQuirks => HTML5EverQuirksMode::NoQuirks,
69        };
70
71        let options = TreeBuilderOpts {
72            scripting_enabled: document.scripting_enabled(),
73            iframe_srcdoc: document.url().as_str() == "about:srcdoc",
74            quirks_mode,
75            ..Default::default()
76        };
77
78        let inner = if let Some(fragment_context) = fragment_context {
79            let tree_builder = TreeBuilder::new_for_fragment(
80                sink,
81                Dom::from_ref(fragment_context.context_elem),
82                fragment_context.form_elem.map(Dom::from_ref),
83                options,
84            );
85
86            let tokenizer_options = TokenizerOpts {
87                initial_state: Some(tree_builder.tokenizer_state_for_context_elem(
88                    fragment_context.context_element_allows_scripting,
89                )),
90                ..Default::default()
91            };
92
93            HtmlTokenizer::new(tree_builder, tokenizer_options)
94        } else {
95            HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
96        };
97
98        Tokenizer { inner }
99    }
100
101    pub(crate) fn feed(&self, input: &BufferQueue) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
102        match self.inner.feed(input) {
103            TokenizerResult::Done => TokenizerResult::Done,
104            TokenizerResult::Script(script) => {
105                TokenizerResult::Script(DomRoot::from_ref(script.downcast().unwrap()))
106            },
107            TokenizerResult::EncodingIndicator(encoding) => {
108                TokenizerResult::EncodingIndicator(encoding)
109            },
110        }
111    }
112
113    pub(crate) fn end(&self) {
114        self.inner.end();
115    }
116
117    pub(crate) fn url(&self) -> &ServoUrl {
118        &self.inner.sink.sink.base_url
119    }
120
121    pub(crate) fn set_plaintext_state(&self) {
122        self.inner.set_plaintext_state();
123    }
124
125    pub(crate) fn get_current_line(&self) -> u32 {
126        self.inner.sink.sink.current_line.get() as u32
127    }
128}
129
130/// <https://html.spec.whatwg.org/multipage/#html-fragment-serialisation-algorithm>
131fn start_element<S: Serializer>(element: &Element, serializer: &mut S) -> io::Result<()> {
132    let name = QualName::new(
133        None,
134        element.namespace().clone(),
135        element.local_name().clone(),
136    );
137
138    let mut attributes = vec![];
139
140    // The "is" value of an element is treated as if it was an attribute and it is serialized before all
141    // other attributes. If the element already has an "is" attribute then the "is" value is ignored.
142    if !element.has_attribute(&LocalName::from("is")) {
143        if let Some(is_value) = element.get_is() {
144            let qualified_name = QualName::new(None, ns!(), LocalName::from("is"));
145
146            attributes.push((qualified_name, AttrValue::String(is_value.to_string())));
147        }
148    }
149
150    // Collect all the "normal" attributes
151    attributes.extend(element.attrs().iter().map(|attr| {
152        let qname = QualName::new(None, attr.namespace().clone(), attr.local_name().clone());
153        let value = attr.value().clone();
154        (qname, value)
155    }));
156
157    let attr_refs = attributes.iter().map(|(qname, value)| {
158        let ar: AttrRef = (qname, &**value);
159        ar
160    });
161    serializer.start_elem(name, attr_refs)?;
162    Ok(())
163}
164
165enum SerializationCommand {
166    OpenElement(DomRoot<Element>),
167    CloseElement(QualName),
168    SerializeNonelement(DomRoot<Node>),
169    SerializeShadowRoot(DomRoot<ShadowRoot>),
170}
171
172struct SerializationIterator {
173    stack: Vec<SerializationCommand>,
174
175    /// Whether or not shadow roots should be serialized
176    serialize_shadow_roots: bool,
177
178    /// List of shadow root objects that should be serialized
179    shadow_roots: Vec<DomRoot<ShadowRoot>>,
180}
181
182enum SerializationChildrenIterator<C, S> {
183    None,
184    Children(C),
185    ShadowContents(S),
186}
187
188impl SerializationIterator {
189    fn new(
190        node: &Node,
191        skip_first: bool,
192        serialize_shadow_roots: bool,
193        shadow_roots: Vec<DomRoot<ShadowRoot>>,
194        can_gc: CanGc,
195    ) -> SerializationIterator {
196        let mut ret = SerializationIterator {
197            stack: vec![],
198            serialize_shadow_roots,
199            shadow_roots,
200        };
201        if skip_first || node.is::<DocumentFragment>() || node.is::<Document>() {
202            ret.handle_node_contents(node, can_gc);
203        } else {
204            ret.push_node(node);
205        }
206        ret
207    }
208
209    fn handle_node_contents(&mut self, node: &Node, can_gc: CanGc) {
210        if node.downcast::<Element>().is_some_and(Element::is_void) {
211            return;
212        }
213
214        if let Some(template_element) = node.downcast::<HTMLTemplateElement>() {
215            for child in template_element
216                .Content(can_gc)
217                .upcast::<Node>()
218                .rev_children()
219            {
220                self.push_node(&child);
221            }
222        } else {
223            for child in node.rev_children() {
224                self.push_node(&child);
225            }
226        }
227
228        if let Some(shadow_root) = node.downcast::<Element>().and_then(Element::shadow_root) {
229            let should_be_serialized = (self.serialize_shadow_roots && shadow_root.Serializable()) ||
230                self.shadow_roots.contains(&shadow_root);
231            if !shadow_root.is_user_agent_widget() && should_be_serialized {
232                self.stack
233                    .push(SerializationCommand::SerializeShadowRoot(shadow_root));
234            }
235        }
236    }
237
238    fn push_node(&mut self, node: &Node) {
239        let Some(element) = node.downcast::<Element>() else {
240            self.stack.push(SerializationCommand::SerializeNonelement(
241                DomRoot::from_ref(node),
242            ));
243            return;
244        };
245
246        self.stack
247            .push(SerializationCommand::OpenElement(DomRoot::from_ref(
248                element,
249            )));
250    }
251}
252
253impl Iterator for SerializationIterator {
254    type Item = SerializationCommand;
255
256    fn next(&mut self) -> Option<SerializationCommand> {
257        let res = self.stack.pop()?;
258
259        match &res {
260            SerializationCommand::OpenElement(element) => {
261                let name = QualName::new(
262                    None,
263                    element.namespace().clone(),
264                    element.local_name().clone(),
265                );
266                self.stack.push(SerializationCommand::CloseElement(name));
267                self.handle_node_contents(element.upcast(), CanGc::note());
268            },
269            SerializationCommand::SerializeShadowRoot(shadow_root) => {
270                self.stack
271                    .push(SerializationCommand::CloseElement(QualName::new(
272                        None,
273                        ns!(),
274                        local_name!("template"),
275                    )));
276                self.handle_node_contents(shadow_root.upcast(), CanGc::note());
277            },
278            _ => {},
279        }
280
281        Some(res)
282    }
283}
284
285/// <https://html.spec.whatwg.org/multipage/#html-fragment-serialisation-algorithm>
286pub(crate) fn serialize_html_fragment<S: Serializer>(
287    node: &Node,
288    serializer: &mut S,
289    traversal_scope: TraversalScope,
290    serialize_shadow_roots: bool,
291    shadow_roots: Vec<DomRoot<ShadowRoot>>,
292    can_gc: CanGc,
293) -> io::Result<()> {
294    let iter = SerializationIterator::new(
295        node,
296        traversal_scope != IncludeNode,
297        serialize_shadow_roots,
298        shadow_roots,
299        can_gc,
300    );
301
302    for cmd in iter {
303        match cmd {
304            SerializationCommand::OpenElement(n) => {
305                start_element(&n, serializer)?;
306            },
307            SerializationCommand::CloseElement(name) => {
308                serializer.end_elem(name)?;
309            },
310            SerializationCommand::SerializeNonelement(n) => match n.type_id() {
311                NodeTypeId::DocumentType => {
312                    let doctype = n.downcast::<DocumentType>().unwrap();
313                    serializer.write_doctype(&doctype.name().str())?;
314                },
315
316                NodeTypeId::CharacterData(CharacterDataTypeId::Text(_)) => {
317                    let cdata = n.downcast::<CharacterData>().unwrap();
318                    serializer.write_text(&cdata.data())?;
319                },
320
321                NodeTypeId::CharacterData(CharacterDataTypeId::Comment) => {
322                    let cdata = n.downcast::<CharacterData>().unwrap();
323                    serializer.write_comment(&cdata.data())?;
324                },
325
326                NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) => {
327                    let pi = n.downcast::<ProcessingInstruction>().unwrap();
328                    let data = pi.upcast::<CharacterData>().data();
329                    serializer.write_processing_instruction(&pi.target().str(), &data)?;
330                },
331
332                NodeTypeId::DocumentFragment(_) | NodeTypeId::Attr => {},
333
334                NodeTypeId::Document(_) => panic!("Can't serialize Document node itself"),
335                NodeTypeId::Element(_) => panic!("Element shouldn't appear here"),
336            },
337            SerializationCommand::SerializeShadowRoot(shadow_root) => {
338                // Shadow roots are serialized as template elements with a fixed set of
339                // attributes. Because these template elements don't actually exist in the DOM
340                // we have to make up a vector of attributes ourselves.
341                let mut attributes = vec![];
342                let mut push_attribute = |name, value| {
343                    let qualified_name = QualName::new(None, ns!(), LocalName::from(name));
344                    attributes.push((qualified_name, value))
345                };
346
347                let mode = if shadow_root.Mode() == ShadowRootMode::Open {
348                    "open"
349                } else {
350                    "closed"
351                };
352                push_attribute("shadowrootmode", mode);
353
354                if shadow_root.DelegatesFocus() {
355                    push_attribute("shadowrootdelegatesfocus", "");
356                }
357
358                if shadow_root.Serializable() {
359                    push_attribute("shadowrootserializable", "");
360                }
361
362                if shadow_root.Clonable() {
363                    push_attribute("shadowrootclonable", "");
364                }
365
366                let name = QualName::new(None, ns!(), local_name!("template"));
367                serializer.start_elem(name, attributes.iter().map(|(a, b)| (a, *b)))?;
368            },
369        }
370    }
371
372    Ok(())
373}
374
375pub(crate) struct HtmlSerialize<'a> {
376    node: &'a Node,
377}
378
379impl<'a> HtmlSerialize<'a> {
380    pub(crate) fn new(node: &'a Node) -> HtmlSerialize<'a> {
381        HtmlSerialize { node }
382    }
383}
384
385impl Serialize for HtmlSerialize<'_> {
386    fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
387    where
388        S: Serializer,
389    {
390        serialize_html_fragment(
391            self.node,
392            serializer,
393            traversal_scope,
394            false,
395            vec![],
396            CanGc::note(),
397        )
398    }
399}