script/dom/servoparser/
mod.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::cell::Cell;
7use std::rc::Rc;
8
9use base::cross_process_instant::CrossProcessInstant;
10use base::id::PipelineId;
11use base64::Engine as _;
12use base64::engine::general_purpose;
13use devtools_traits::ScriptToDevtoolsControlMsg;
14use dom_struct::dom_struct;
15use embedder_traits::resources::{self, Resource};
16use encoding_rs::Encoding;
17use html5ever::buffer_queue::BufferQueue;
18use html5ever::tendril::fmt::UTF8;
19use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink};
20use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
21use html5ever::{Attribute, ExpandedName, LocalName, QualName, local_name, ns};
22use hyper_serde::Serde;
23use markup5ever::TokenizerResult;
24use mime::{self, Mime};
25use net_traits::policy_container::PolicyContainer;
26use net_traits::request::RequestId;
27use net_traits::{
28    FetchMetadata, FetchResponseListener, Metadata, NetworkError, ResourceFetchTiming,
29    ResourceTimingType,
30};
31use profile_traits::time::{
32    ProfilerCategory, ProfilerChan, TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType,
33};
34use profile_traits::time_profile;
35use script_traits::DocumentActivity;
36use servo_config::pref;
37use servo_url::ServoUrl;
38use style::context::QuirksMode as ServoQuirksMode;
39use tendril::stream::LossyDecoder;
40
41use crate::document_loader::{DocumentLoader, LoadType};
42use crate::dom::bindings::cell::DomRefCell;
43use crate::dom::bindings::codegen::Bindings::DocumentBinding::{
44    DocumentMethods, DocumentReadyState,
45};
46use crate::dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods;
47use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
48use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
49use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::{
50    ShadowRootMode, SlotAssignmentMode,
51};
52use crate::dom::bindings::inheritance::Castable;
53use crate::dom::bindings::refcounted::Trusted;
54use crate::dom::bindings::reflector::{DomGlobal, Reflector, reflect_dom_object};
55use crate::dom::bindings::root::{Dom, DomRoot, MutNullableDom};
56use crate::dom::bindings::settings_stack::is_execution_stack_empty;
57use crate::dom::bindings::str::{DOMString, USVString};
58use crate::dom::characterdata::CharacterData;
59use crate::dom::comment::Comment;
60use crate::dom::csp::{CspReporting, GlobalCspReporting, Violation, parse_csp_list_from_metadata};
61use crate::dom::customelementregistry::CustomElementReactionStack;
62use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument};
63use crate::dom::documentfragment::DocumentFragment;
64use crate::dom::documenttype::DocumentType;
65use crate::dom::element::{CustomElementCreationMode, Element, ElementCreator};
66use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
67use crate::dom::html::htmlimageelement::HTMLImageElement;
68use crate::dom::html::htmlinputelement::HTMLInputElement;
69use crate::dom::html::htmlscriptelement::{HTMLScriptElement, ScriptResult};
70use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
71use crate::dom::node::{Node, ShadowIncluding};
72use crate::dom::performanceentry::PerformanceEntry;
73use crate::dom::performancenavigationtiming::PerformanceNavigationTiming;
74use crate::dom::processinginstruction::ProcessingInstruction;
75use crate::dom::reportingendpoint::ReportingEndpoint;
76use crate::dom::shadowroot::IsUserAgentWidget;
77use crate::dom::text::Text;
78use crate::dom::virtualmethods::vtable_for;
79use crate::network_listener::PreInvoke;
80use crate::realms::enter_realm;
81use crate::script_runtime::{CanGc, IntroductionType};
82use crate::script_thread::ScriptThread;
83
84mod async_html;
85mod html;
86mod prefetch;
87mod xml;
88
89pub(crate) use html::serialize_html_fragment;
90
91#[dom_struct]
92/// The parser maintains two input streams: one for input from script through
93/// document.write(), and one for input from network.
94///
95/// There is no concrete representation of the insertion point, instead it
96/// always points to just before the next character from the network input,
97/// with all of the script input before itself.
98///
99/// ```text
100///     ... script input ... | ... network input ...
101///                          ^
102///                 insertion point
103/// ```
104pub(crate) struct ServoParser {
105    reflector: Reflector,
106    /// The document associated with this parser.
107    document: Dom<Document>,
108    /// The BOM sniffing state.
109    ///
110    /// `None` means we've found the BOM, we've found there isn't one, or
111    /// we're not parsing from a byte stream. `Some` contains the BOM bytes
112    /// found so far.
113    bom_sniff: DomRefCell<Option<Vec<u8>>>,
114    /// The decoder used for the network input.
115    network_decoder: DomRefCell<Option<NetworkDecoder>>,
116    /// Input received from network.
117    #[ignore_malloc_size_of = "Defined in html5ever"]
118    #[no_trace]
119    network_input: BufferQueue,
120    /// Input received from script. Used only to support document.write().
121    #[ignore_malloc_size_of = "Defined in html5ever"]
122    #[no_trace]
123    script_input: BufferQueue,
124    /// The tokenizer of this parser.
125    tokenizer: Tokenizer,
126    /// Whether to expect any further input from the associated network request.
127    last_chunk_received: Cell<bool>,
128    /// Whether this parser should avoid passing any further data to the tokenizer.
129    suspended: Cell<bool>,
130    /// <https://html.spec.whatwg.org/multipage/#script-nesting-level>
131    script_nesting_level: Cell<usize>,
132    /// <https://html.spec.whatwg.org/multipage/#abort-a-parser>
133    aborted: Cell<bool>,
134    /// <https://html.spec.whatwg.org/multipage/#script-created-parser>
135    script_created_parser: bool,
136    /// We do a quick-and-dirty parse of the input looking for resources to prefetch.
137    // TODO: if we had speculative parsing, we could do this when speculatively
138    // building the DOM. https://github.com/servo/servo/pull/19203
139    prefetch_tokenizer: prefetch::Tokenizer,
140    #[ignore_malloc_size_of = "Defined in html5ever"]
141    #[no_trace]
142    prefetch_input: BufferQueue,
143    // The whole input as a string, if needed for the devtools Sources panel.
144    // TODO: use a faster type for concatenating strings?
145    content_for_devtools: Option<DomRefCell<String>>,
146}
147
148pub(crate) struct ElementAttribute {
149    name: QualName,
150    value: DOMString,
151}
152
153#[derive(Clone, Copy, JSTraceable, MallocSizeOf, PartialEq)]
154pub(crate) enum ParsingAlgorithm {
155    Normal,
156    Fragment,
157}
158
159impl ElementAttribute {
160    pub(crate) fn new(name: QualName, value: DOMString) -> ElementAttribute {
161        ElementAttribute { name, value }
162    }
163}
164
165impl ServoParser {
166    pub(crate) fn parser_is_not_active(&self) -> bool {
167        self.can_write()
168    }
169
170    /// <https://html.spec.whatwg.org/multipage/#parse-html-from-a-string>
171    pub(crate) fn parse_html_document(
172        document: &Document,
173        input: Option<DOMString>,
174        url: ServoUrl,
175        can_gc: CanGc,
176    ) {
177        // Step 1. Set document's type to "html".
178        //
179        // Set by callers of this function and asserted here
180        assert!(document.is_html_document());
181        // Step 2. Create an HTML parser parser, associated with document.
182        let parser = if pref!(dom_servoparser_async_html_tokenizer_enabled) {
183            ServoParser::new(
184                document,
185                Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None)),
186                ParserKind::Normal,
187                can_gc,
188            )
189        } else {
190            ServoParser::new(
191                document,
192                Tokenizer::Html(self::html::Tokenizer::new(
193                    document,
194                    url,
195                    None,
196                    ParsingAlgorithm::Normal,
197                )),
198                ParserKind::Normal,
199                can_gc,
200            )
201        };
202        // Step 3. Place html into the input stream for parser. The encoding confidence is irrelevant.
203        // Step 4. Start parser and let it run until it has consumed all the
204        // characters just inserted into the input stream.
205        //
206        // Set as the document's current parser and initialize with `input`, if given.
207        if let Some(input) = input {
208            parser.parse_complete_string_chunk(String::from(input), can_gc);
209        } else {
210            parser.document.set_current_parser(Some(&parser));
211        }
212    }
213
214    /// <https://html.spec.whatwg.org/multipage/#parsing-html-fragments>
215    pub(crate) fn parse_html_fragment(
216        context: &Element,
217        input: DOMString,
218        allow_declarative_shadow_roots: bool,
219        can_gc: CanGc,
220    ) -> impl Iterator<Item = DomRoot<Node>> + use<'_> {
221        let context_node = context.upcast::<Node>();
222        let context_document = context_node.owner_doc();
223        let window = context_document.window();
224        let url = context_document.url();
225
226        // Step 1. Let document be a Document node whose type is "html".
227        let loader = DocumentLoader::new_with_threads(
228            context_document.loader().resource_threads().clone(),
229            Some(url.clone()),
230        );
231        let document = Document::new(
232            window,
233            HasBrowsingContext::No,
234            Some(url.clone()),
235            context_document.origin().clone(),
236            IsHTMLDocument::HTMLDocument,
237            None,
238            None,
239            DocumentActivity::Inactive,
240            DocumentSource::FromParser,
241            loader,
242            None,
243            None,
244            Default::default(),
245            false,
246            allow_declarative_shadow_roots,
247            Some(context_document.insecure_requests_policy()),
248            context_document.has_trustworthy_ancestor_or_current_origin(),
249            context_document.custom_element_reaction_stack(),
250            can_gc,
251        );
252
253        // Step 2. If context's node document is in quirks mode, then set document's mode to "quirks".
254        // Step 3. Otherwise, if context's node document is in limited-quirks mode, then set document's
255        // mode to "limited-quirks".
256        document.set_quirks_mode(context_document.quirks_mode());
257
258        // NOTE: The following steps happened as part of Step 1.
259        // Step 4. If allowDeclarativeShadowRoots is true, then set document's
260        // allow declarative shadow roots to true.
261        // Step 5. Create a new HTML parser, and associate it with document.
262
263        // Step 11.
264        let form = context_node
265            .inclusive_ancestors(ShadowIncluding::No)
266            .find(|element| element.is::<HTMLFormElement>());
267
268        let fragment_context = FragmentContext {
269            context_elem: context_node,
270            form_elem: form.as_deref(),
271            context_element_allows_scripting: context_document.scripting_enabled(),
272        };
273
274        let parser = ServoParser::new(
275            &document,
276            Tokenizer::Html(self::html::Tokenizer::new(
277                &document,
278                url,
279                Some(fragment_context),
280                ParsingAlgorithm::Fragment,
281            )),
282            ParserKind::Normal,
283            can_gc,
284        );
285        parser.parse_complete_string_chunk(String::from(input), can_gc);
286
287        // Step 14.
288        let root_element = document.GetDocumentElement().expect("no document element");
289        FragmentParsingResult {
290            inner: root_element.upcast::<Node>().children(),
291        }
292    }
293
294    pub(crate) fn parse_html_script_input(document: &Document, url: ServoUrl) {
295        let parser = ServoParser::new(
296            document,
297            Tokenizer::Html(self::html::Tokenizer::new(
298                document,
299                url,
300                None,
301                ParsingAlgorithm::Normal,
302            )),
303            ParserKind::ScriptCreated,
304            CanGc::note(),
305        );
306        *parser.bom_sniff.borrow_mut() = None;
307        document.set_current_parser(Some(&parser));
308    }
309
310    pub(crate) fn parse_xml_document(
311        document: &Document,
312        input: Option<DOMString>,
313        url: ServoUrl,
314        can_gc: CanGc,
315    ) {
316        let parser = ServoParser::new(
317            document,
318            Tokenizer::Xml(self::xml::Tokenizer::new(document, url)),
319            ParserKind::Normal,
320            can_gc,
321        );
322
323        // Set as the document's current parser and initialize with `input`, if given.
324        if let Some(input) = input {
325            parser.parse_complete_string_chunk(String::from(input), can_gc);
326        } else {
327            parser.document.set_current_parser(Some(&parser));
328        }
329    }
330
331    pub(crate) fn script_nesting_level(&self) -> usize {
332        self.script_nesting_level.get()
333    }
334
335    pub(crate) fn is_script_created(&self) -> bool {
336        self.script_created_parser
337    }
338
339    /// Corresponds to the latter part of the "Otherwise" branch of the 'An end
340    /// tag whose tag name is "script"' of
341    /// <https://html.spec.whatwg.org/multipage/#parsing-main-incdata>
342    ///
343    /// This first moves everything from the script input to the beginning of
344    /// the network input, effectively resetting the insertion point to just
345    /// before the next character to be consumed.
346    ///
347    ///
348    /// ```text
349    ///     | ... script input ... network input ...
350    ///     ^
351    ///     insertion point
352    /// ```
353    pub(crate) fn resume_with_pending_parsing_blocking_script(
354        &self,
355        script: &HTMLScriptElement,
356        result: ScriptResult,
357        can_gc: CanGc,
358    ) {
359        assert!(self.suspended.get());
360        self.suspended.set(false);
361
362        self.script_input.swap_with(&self.network_input);
363        while let Some(chunk) = self.script_input.pop_front() {
364            self.network_input.push_back(chunk);
365        }
366
367        let script_nesting_level = self.script_nesting_level.get();
368        assert_eq!(script_nesting_level, 0);
369
370        self.script_nesting_level.set(script_nesting_level + 1);
371        script.execute(result, can_gc);
372        self.script_nesting_level.set(script_nesting_level);
373
374        if !self.suspended.get() && !self.aborted.get() {
375            self.parse_sync(can_gc);
376        }
377    }
378
379    pub(crate) fn can_write(&self) -> bool {
380        self.script_created_parser || self.script_nesting_level.get() > 0
381    }
382
383    /// Steps 6-8 of <https://html.spec.whatwg.org/multipage/#document.write()>
384    pub(crate) fn write(&self, text: DOMString, can_gc: CanGc) {
385        assert!(self.can_write());
386
387        if self.document.has_pending_parsing_blocking_script() {
388            // There is already a pending parsing blocking script so the
389            // parser is suspended, we just append everything to the
390            // script input and abort these steps.
391            self.script_input.push_back(String::from(text).into());
392            return;
393        }
394
395        // There is no pending parsing blocking script, so all previous calls
396        // to document.write() should have seen their entire input tokenized
397        // and process, with nothing pushed to the parser script input.
398        assert!(self.script_input.is_empty());
399
400        let input = BufferQueue::default();
401        input.push_back(String::from(text).into());
402
403        let profiler_chan = self
404            .document
405            .window()
406            .as_global_scope()
407            .time_profiler_chan()
408            .clone();
409        let profiler_metadata = TimerMetadata {
410            url: self.document.url().as_str().into(),
411            iframe: TimerMetadataFrameType::RootWindow,
412            incremental: TimerMetadataReflowType::FirstReflow,
413        };
414        self.tokenize(
415            |tokenizer| {
416                tokenizer.feed(
417                    &input,
418                    can_gc,
419                    profiler_chan.clone(),
420                    profiler_metadata.clone(),
421                )
422            },
423            can_gc,
424        );
425
426        if self.suspended.get() {
427            // Parser got suspended, insert remaining input at end of
428            // script input, following anything written by scripts executed
429            // reentrantly during this call.
430            while let Some(chunk) = input.pop_front() {
431                self.script_input.push_back(chunk);
432            }
433            return;
434        }
435
436        assert!(input.is_empty());
437    }
438
439    // Steps 4-6 of https://html.spec.whatwg.org/multipage/#dom-document-close
440    pub(crate) fn close(&self, can_gc: CanGc) {
441        assert!(self.script_created_parser);
442
443        // Step 4.
444        self.last_chunk_received.set(true);
445
446        if self.suspended.get() {
447            // Step 5.
448            return;
449        }
450
451        // Step 6.
452        self.parse_sync(can_gc);
453    }
454
455    // https://html.spec.whatwg.org/multipage/#abort-a-parser
456    pub(crate) fn abort(&self, can_gc: CanGc) {
457        assert!(!self.aborted.get());
458        self.aborted.set(true);
459
460        // Step 1.
461        self.script_input.replace_with(BufferQueue::default());
462        self.network_input.replace_with(BufferQueue::default());
463
464        // Step 2.
465        self.document
466            .set_ready_state(DocumentReadyState::Interactive, can_gc);
467
468        // Step 3.
469        self.tokenizer.end(can_gc);
470        self.document.set_current_parser(None);
471
472        // Step 4.
473        self.document
474            .set_ready_state(DocumentReadyState::Complete, can_gc);
475    }
476
477    // https://html.spec.whatwg.org/multipage/#active-parser
478    pub(crate) fn is_active(&self) -> bool {
479        self.script_nesting_level() > 0 && !self.aborted.get()
480    }
481
482    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
483    fn new_inherited(document: &Document, tokenizer: Tokenizer, kind: ParserKind) -> Self {
484        // Store the whole input for the devtools Sources panel, if the devtools server is running
485        // and we are parsing for a document load (not just things like innerHTML).
486        // TODO: check if a devtools client is actually connected and/or wants the sources?
487        let content_for_devtools = (document.global().devtools_chan().is_some() &&
488            document.has_browsing_context())
489        .then_some(DomRefCell::new(String::new()));
490
491        ServoParser {
492            reflector: Reflector::new(),
493            document: Dom::from_ref(document),
494            bom_sniff: DomRefCell::new(Some(Vec::with_capacity(3))),
495            network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))),
496            network_input: BufferQueue::default(),
497            script_input: BufferQueue::default(),
498            tokenizer,
499            last_chunk_received: Cell::new(false),
500            suspended: Default::default(),
501            script_nesting_level: Default::default(),
502            aborted: Default::default(),
503            script_created_parser: kind == ParserKind::ScriptCreated,
504            prefetch_tokenizer: prefetch::Tokenizer::new(document),
505            prefetch_input: BufferQueue::default(),
506            content_for_devtools,
507        }
508    }
509
510    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
511    fn new(
512        document: &Document,
513        tokenizer: Tokenizer,
514        kind: ParserKind,
515        can_gc: CanGc,
516    ) -> DomRoot<Self> {
517        reflect_dom_object(
518            Box::new(ServoParser::new_inherited(document, tokenizer, kind)),
519            document.window(),
520            can_gc,
521        )
522    }
523
524    fn push_tendril_input_chunk(&self, chunk: StrTendril) {
525        if let Some(mut content_for_devtools) = self
526            .content_for_devtools
527            .as_ref()
528            .map(|content| content.borrow_mut())
529        {
530            // TODO: append these chunks more efficiently
531            content_for_devtools.push_str(chunk.as_ref());
532        }
533
534        if chunk.is_empty() {
535            return;
536        }
537        // Per https://github.com/whatwg/html/issues/1495
538        // stylesheets should not be loaded for documents
539        // without browsing contexts.
540        // https://github.com/whatwg/html/issues/1495#issuecomment-230334047
541        // suggests that no content should be preloaded in such a case.
542        // We're conservative, and only prefetch for documents
543        // with browsing contexts.
544        if self.document.browsing_context().is_some() {
545            // Push the chunk into the prefetch input stream,
546            // which is tokenized eagerly, to scan for resources
547            // to prefetch. If the user script uses `document.write()`
548            // to overwrite the network input, this prefetching may
549            // have been wasted, but in most cases it won't.
550            self.prefetch_input.push_back(chunk.clone());
551            self.prefetch_tokenizer.feed(&self.prefetch_input);
552        }
553        // Push the chunk into the network input stream,
554        // which is tokenized lazily.
555        self.network_input.push_back(chunk);
556    }
557
558    fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
559        // BOM sniff. This is needed because NetworkDecoder will switch the
560        // encoding based on the BOM, but it won't change
561        // `self.document.encoding` in the process.
562        {
563            let mut bom_sniff = self.bom_sniff.borrow_mut();
564            if let Some(partial_bom) = bom_sniff.as_mut() {
565                if partial_bom.len() + chunk.len() >= 3 {
566                    partial_bom.extend(chunk.iter().take(3 - partial_bom.len()).copied());
567                    if let Some((encoding, _)) = Encoding::for_bom(partial_bom) {
568                        self.document.set_encoding(encoding);
569                    }
570                    drop(bom_sniff);
571                    *self.bom_sniff.borrow_mut() = None;
572                } else {
573                    partial_bom.extend(chunk.iter().copied());
574                }
575            }
576        }
577
578        // For byte input, we convert it to text using the network decoder.
579        let chunk = self
580            .network_decoder
581            .borrow_mut()
582            .as_mut()
583            .unwrap()
584            .decode(chunk);
585        self.push_tendril_input_chunk(chunk);
586    }
587
588    fn push_string_input_chunk(&self, chunk: String) {
589        // If the input is a string, we don't have a BOM.
590        if self.bom_sniff.borrow().is_some() {
591            *self.bom_sniff.borrow_mut() = None;
592        }
593
594        // The input has already been decoded as a string, so doesn't need
595        // to be decoded by the network decoder again.
596        let chunk = StrTendril::from(chunk);
597        self.push_tendril_input_chunk(chunk);
598    }
599
600    fn parse_sync(&self, can_gc: CanGc) {
601        assert!(self.script_input.is_empty());
602
603        // This parser will continue to parse while there is either pending input or
604        // the parser remains unsuspended.
605
606        if self.last_chunk_received.get() {
607            if let Some(decoder) = self.network_decoder.borrow_mut().take() {
608                let chunk = decoder.finish();
609                if !chunk.is_empty() {
610                    self.network_input.push_back(chunk);
611                }
612            }
613        }
614
615        let profiler_chan = self
616            .document
617            .window()
618            .as_global_scope()
619            .time_profiler_chan()
620            .clone();
621        let profiler_metadata = TimerMetadata {
622            url: self.document.url().as_str().into(),
623            iframe: TimerMetadataFrameType::RootWindow,
624            incremental: TimerMetadataReflowType::FirstReflow,
625        };
626        self.tokenize(
627            |tokenizer| {
628                tokenizer.feed(
629                    &self.network_input,
630                    can_gc,
631                    profiler_chan.clone(),
632                    profiler_metadata.clone(),
633                )
634            },
635            can_gc,
636        );
637
638        if self.suspended.get() {
639            return;
640        }
641
642        assert!(self.network_input.is_empty());
643
644        if self.last_chunk_received.get() {
645            self.finish(can_gc);
646        }
647    }
648
649    fn parse_complete_string_chunk(&self, input: String, can_gc: CanGc) {
650        self.document.set_current_parser(Some(self));
651        self.push_string_input_chunk(input);
652        self.last_chunk_received.set(true);
653        if !self.suspended.get() {
654            self.parse_sync(can_gc);
655        }
656    }
657
658    fn parse_bytes_chunk(&self, input: Vec<u8>, can_gc: CanGc) {
659        self.document.set_current_parser(Some(self));
660        self.push_bytes_input_chunk(input);
661        if !self.suspended.get() {
662            self.parse_sync(can_gc);
663        }
664    }
665
666    fn tokenize<F>(&self, feed: F, can_gc: CanGc)
667    where
668        F: Fn(&Tokenizer) -> TokenizerResult<DomRoot<HTMLScriptElement>>,
669    {
670        loop {
671            assert!(!self.suspended.get());
672            assert!(!self.aborted.get());
673
674            self.document.window().reflow_if_reflow_timer_expired();
675            let script = match feed(&self.tokenizer) {
676                TokenizerResult::Done => return,
677                TokenizerResult::Script(script) => script,
678            };
679
680            // https://html.spec.whatwg.org/multipage/#parsing-main-incdata
681            // branch "An end tag whose tag name is "script"
682            // The spec says to perform the microtask checkpoint before
683            // setting the insertion mode back from Text, but this is not
684            // possible with the way servo and html5ever currently
685            // relate to each other, and hopefully it is not observable.
686            if is_execution_stack_empty() {
687                self.document
688                    .window()
689                    .as_global_scope()
690                    .perform_a_microtask_checkpoint(can_gc);
691            }
692
693            let script_nesting_level = self.script_nesting_level.get();
694
695            self.script_nesting_level.set(script_nesting_level + 1);
696            script.set_initial_script_text();
697            let introduction_type_override =
698                (script_nesting_level > 0).then_some(IntroductionType::INJECTED_SCRIPT);
699            script.prepare(introduction_type_override, can_gc);
700            self.script_nesting_level.set(script_nesting_level);
701
702            if self.document.has_pending_parsing_blocking_script() {
703                self.suspended.set(true);
704                return;
705            }
706            if self.aborted.get() {
707                return;
708            }
709        }
710    }
711
712    // https://html.spec.whatwg.org/multipage/#the-end
713    fn finish(&self, can_gc: CanGc) {
714        assert!(!self.suspended.get());
715        assert!(self.last_chunk_received.get());
716        assert!(self.script_input.is_empty());
717        assert!(self.network_input.is_empty());
718        assert!(self.network_decoder.borrow().is_none());
719
720        // Step 1.
721        self.document
722            .set_ready_state(DocumentReadyState::Interactive, can_gc);
723
724        // Step 2.
725        self.tokenizer.end(can_gc);
726        self.document.set_current_parser(None);
727
728        // Steps 3-12 are in another castle, namely finish_load.
729        let url = self.tokenizer.url().clone();
730        self.document.finish_load(LoadType::PageSource(url), can_gc);
731
732        // Send the source contents to devtools, if needed.
733        if let Some(content_for_devtools) = self
734            .content_for_devtools
735            .as_ref()
736            .map(|content| content.take())
737        {
738            let global = self.document.global();
739            let chan = global.devtools_chan().expect("Guaranteed by new");
740            let pipeline_id = self.document.global().pipeline_id();
741            let _ = chan.send(ScriptToDevtoolsControlMsg::UpdateSourceContent(
742                pipeline_id,
743                content_for_devtools,
744            ));
745        }
746    }
747}
748
749struct FragmentParsingResult<I>
750where
751    I: Iterator<Item = DomRoot<Node>>,
752{
753    inner: I,
754}
755
756impl<I> Iterator for FragmentParsingResult<I>
757where
758    I: Iterator<Item = DomRoot<Node>>,
759{
760    type Item = DomRoot<Node>;
761
762    fn next(&mut self) -> Option<DomRoot<Node>> {
763        let next = self.inner.next()?;
764        next.remove_self(CanGc::note());
765        Some(next)
766    }
767
768    fn size_hint(&self) -> (usize, Option<usize>) {
769        self.inner.size_hint()
770    }
771}
772
773#[derive(JSTraceable, MallocSizeOf, PartialEq)]
774enum ParserKind {
775    Normal,
776    ScriptCreated,
777}
778
779#[derive(JSTraceable, MallocSizeOf)]
780#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
781enum Tokenizer {
782    Html(self::html::Tokenizer),
783    AsyncHtml(self::async_html::Tokenizer),
784    Xml(self::xml::Tokenizer),
785}
786
787impl Tokenizer {
788    fn feed(
789        &self,
790        input: &BufferQueue,
791        can_gc: CanGc,
792        profiler_chan: ProfilerChan,
793        profiler_metadata: TimerMetadata,
794    ) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
795        match *self {
796            Tokenizer::Html(ref tokenizer) => time_profile!(
797                ProfilerCategory::ScriptParseHTML,
798                Some(profiler_metadata),
799                profiler_chan,
800                || tokenizer.feed(input),
801            ),
802            Tokenizer::AsyncHtml(ref tokenizer) => time_profile!(
803                ProfilerCategory::ScriptParseHTML,
804                Some(profiler_metadata),
805                profiler_chan,
806                || tokenizer.feed(input, can_gc),
807            ),
808            Tokenizer::Xml(ref tokenizer) => time_profile!(
809                ProfilerCategory::ScriptParseXML,
810                Some(profiler_metadata),
811                profiler_chan,
812                || tokenizer.feed(input),
813            ),
814        }
815    }
816
817    fn end(&self, can_gc: CanGc) {
818        match *self {
819            Tokenizer::Html(ref tokenizer) => tokenizer.end(),
820            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.end(can_gc),
821            Tokenizer::Xml(ref tokenizer) => tokenizer.end(),
822        }
823    }
824
825    fn url(&self) -> &ServoUrl {
826        match *self {
827            Tokenizer::Html(ref tokenizer) => tokenizer.url(),
828            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(),
829            Tokenizer::Xml(ref tokenizer) => tokenizer.url(),
830        }
831    }
832
833    fn set_plaintext_state(&self) {
834        match *self {
835            Tokenizer::Html(ref tokenizer) => tokenizer.set_plaintext_state(),
836            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.set_plaintext_state(),
837            Tokenizer::Xml(_) => unimplemented!(),
838        }
839    }
840}
841
842/// The context required for asynchronously fetching a document
843/// and parsing it progressively.
844pub(crate) struct ParserContext {
845    /// The parser that initiated the request.
846    parser: Option<Trusted<ServoParser>>,
847    /// Is this a synthesized document
848    is_synthesized_document: bool,
849    /// The pipeline associated with this document.
850    id: PipelineId,
851    /// The URL for this document.
852    url: ServoUrl,
853    /// timing data for this resource
854    resource_timing: ResourceFetchTiming,
855    /// pushed entry index
856    pushed_entry_index: Option<usize>,
857}
858
859impl ParserContext {
860    pub(crate) fn new(id: PipelineId, url: ServoUrl) -> ParserContext {
861        ParserContext {
862            parser: None,
863            is_synthesized_document: false,
864            id,
865            url,
866            resource_timing: ResourceFetchTiming::new(ResourceTimingType::Navigation),
867            pushed_entry_index: None,
868        }
869    }
870
871    pub(crate) fn append_parent_to_csp_list(&self, policy_container: Option<&PolicyContainer>) {
872        let Some(policy_container) = policy_container else {
873            return;
874        };
875        let Some(parser) = self.parser.as_ref().map(|p| p.root()) else {
876            return;
877        };
878        let new_csp_list = parser
879            .document
880            .get_csp_list()
881            .concatenate(policy_container.csp_list.clone());
882        parser.document.set_csp_list(new_csp_list);
883    }
884}
885
886impl FetchResponseListener for ParserContext {
887    fn process_request_body(&mut self, _: RequestId) {}
888
889    fn process_request_eof(&mut self, _: RequestId) {}
890
891    fn process_response(&mut self, _: RequestId, meta_result: Result<FetchMetadata, NetworkError>) {
892        let (metadata, error) = match meta_result {
893            Ok(meta) => (
894                Some(match meta {
895                    FetchMetadata::Unfiltered(m) => m,
896                    FetchMetadata::Filtered { unsafe_, .. } => unsafe_,
897                }),
898                None,
899            ),
900            Err(error) => (
901                // Check variant without moving
902                match &error {
903                    NetworkError::SslValidation(..) |
904                    NetworkError::Internal(..) |
905                    NetworkError::Crash(..) => {
906                        let mut meta = Metadata::default(self.url.clone());
907                        let mime: Option<Mime> = "text/html".parse().ok();
908                        meta.set_content_type(mime.as_ref());
909                        Some(meta)
910                    },
911                    _ => None,
912                },
913                Some(error),
914            ),
915        };
916        let content_type: Option<Mime> = metadata
917            .clone()
918            .and_then(|meta| meta.content_type)
919            .map(Serde::into_inner)
920            .map(Into::into);
921
922        let (csp_list, endpoints_list) = match metadata.as_ref() {
923            None => (None, None),
924            Some(m) => (
925                parse_csp_list_from_metadata(&m.headers),
926                ReportingEndpoint::parse_reporting_endpoints_header(&self.url.clone(), &m.headers),
927            ),
928        };
929
930        let parser = match ScriptThread::page_headers_available(&self.id, metadata, CanGc::note()) {
931            Some(parser) => parser,
932            None => return,
933        };
934        if parser.aborted.get() {
935            return;
936        }
937
938        let _realm = enter_realm(&*parser.document);
939
940        if let Some(endpoints) = endpoints_list {
941            parser.document.window().set_endpoints_list(endpoints);
942        }
943        self.parser = Some(Trusted::new(&*parser));
944        self.submit_resource_timing();
945
946        let content_type = match content_type {
947            Some(ref content_type) => content_type,
948            None => {
949                // No content-type header.
950                // Merge with #4212 when fixed.
951                return;
952            },
953        };
954
955        match (
956            content_type.type_(),
957            content_type.subtype(),
958            content_type.suffix(),
959        ) {
960            (mime::IMAGE, _, _) => {
961                self.is_synthesized_document = true;
962                let page = "<html><body></body></html>".into();
963                parser.push_string_input_chunk(page);
964                parser.parse_sync(CanGc::note());
965
966                let doc = &parser.document;
967                let doc_body = DomRoot::upcast::<Node>(doc.GetBody().unwrap());
968                let img = HTMLImageElement::new(
969                    local_name!("img"),
970                    None,
971                    doc,
972                    None,
973                    ElementCreator::ParserCreated(1),
974                    CanGc::note(),
975                );
976                img.SetSrc(USVString(self.url.to_string()));
977                doc_body
978                    .AppendChild(&DomRoot::upcast::<Node>(img), CanGc::note())
979                    .expect("Appending failed");
980            },
981            (mime::TEXT, mime::PLAIN, _) => {
982                // https://html.spec.whatwg.org/multipage/#read-text
983                let page = "<pre>\n".into();
984                parser.push_string_input_chunk(page);
985                parser.parse_sync(CanGc::note());
986                parser.tokenizer.set_plaintext_state();
987            },
988            (mime::TEXT, mime::HTML, _) => match error {
989                Some(NetworkError::SslValidation(reason, bytes)) => {
990                    self.is_synthesized_document = true;
991                    let page = resources::read_string(Resource::BadCertHTML);
992                    let page = page.replace("${reason}", &reason);
993                    let encoded_bytes = general_purpose::STANDARD_NO_PAD.encode(bytes);
994                    let page = page.replace("${bytes}", encoded_bytes.as_str());
995                    let page =
996                        page.replace("${secret}", &net_traits::PRIVILEGED_SECRET.to_string());
997                    parser.push_string_input_chunk(page);
998                    parser.parse_sync(CanGc::note());
999                },
1000                Some(NetworkError::Internal(reason)) => {
1001                    self.is_synthesized_document = true;
1002                    let page = resources::read_string(Resource::NetErrorHTML);
1003                    let page = page.replace("${reason}", &reason);
1004                    parser.push_string_input_chunk(page);
1005                    parser.parse_sync(CanGc::note());
1006                },
1007                Some(NetworkError::Crash(details)) => {
1008                    self.is_synthesized_document = true;
1009                    let page = resources::read_string(Resource::CrashHTML);
1010                    let page = page.replace("${details}", &details);
1011                    parser.push_string_input_chunk(page);
1012                    parser.parse_sync(CanGc::note());
1013                },
1014                Some(_) => {},
1015                None => parser.document.set_csp_list(csp_list),
1016            },
1017            (mime::TEXT, mime::XML, _) |
1018            (mime::APPLICATION, mime::XML, _) |
1019            (mime::APPLICATION, mime::JSON, _) => parser.document.set_csp_list(csp_list),
1020            (mime::APPLICATION, subtype, Some(mime::XML)) if subtype == "xhtml" => {
1021                parser.document.set_csp_list(csp_list)
1022            },
1023            (mime_type, subtype, _) => {
1024                // Show warning page for unknown mime types.
1025                let page = format!(
1026                    "<html><body><p>Unknown content type ({}/{}).</p></body></html>",
1027                    mime_type.as_str(),
1028                    subtype.as_str()
1029                );
1030                self.is_synthesized_document = true;
1031                parser.push_string_input_chunk(page);
1032                parser.parse_sync(CanGc::note());
1033            },
1034        }
1035    }
1036
1037    fn process_response_chunk(&mut self, _: RequestId, payload: Vec<u8>) {
1038        if self.is_synthesized_document {
1039            return;
1040        }
1041        let parser = match self.parser.as_ref() {
1042            Some(parser) => parser.root(),
1043            None => return,
1044        };
1045        if parser.aborted.get() {
1046            return;
1047        }
1048        let _realm = enter_realm(&*parser);
1049        parser.parse_bytes_chunk(payload, CanGc::note());
1050    }
1051
1052    // This method is called via script_thread::handle_fetch_eof, so we must call
1053    // submit_resource_timing in this function
1054    // Resource listeners are called via net_traits::Action::process, which handles submission for them
1055    fn process_response_eof(
1056        &mut self,
1057        _: RequestId,
1058        status: Result<ResourceFetchTiming, NetworkError>,
1059    ) {
1060        let parser = match self.parser.as_ref() {
1061            Some(parser) => parser.root(),
1062            None => return,
1063        };
1064        if parser.aborted.get() {
1065            return;
1066        }
1067
1068        let _realm = enter_realm(&*parser);
1069
1070        match status {
1071            // are we throwing this away or can we use it?
1072            Ok(_) => (),
1073            // TODO(Savago): we should send a notification to callers #5463.
1074            Err(err) => debug!("Failed to load page URL {}, error: {:?}", self.url, err),
1075        }
1076
1077        parser
1078            .document
1079            .set_redirect_count(self.resource_timing.redirect_count);
1080
1081        parser.last_chunk_received.set(true);
1082        if !parser.suspended.get() {
1083            parser.parse_sync(CanGc::note());
1084        }
1085
1086        // TODO: Only update if this is the current document resource.
1087        // TODO(mrobinson): Pass a proper fetch_start parameter here instead of `CrossProcessInstant::now()`.
1088        if let Some(pushed_index) = self.pushed_entry_index {
1089            let document = &parser.document;
1090            let performance_entry = PerformanceNavigationTiming::new(
1091                &document.global(),
1092                CrossProcessInstant::now(),
1093                document,
1094                CanGc::note(),
1095            );
1096            document
1097                .global()
1098                .performance()
1099                .update_entry(pushed_index, performance_entry.upcast::<PerformanceEntry>());
1100        }
1101    }
1102
1103    fn resource_timing_mut(&mut self) -> &mut ResourceFetchTiming {
1104        &mut self.resource_timing
1105    }
1106
1107    fn resource_timing(&self) -> &ResourceFetchTiming {
1108        &self.resource_timing
1109    }
1110
1111    // store a PerformanceNavigationTiming entry in the globalscope's Performance buffer
1112    fn submit_resource_timing(&mut self) {
1113        let parser = match self.parser.as_ref() {
1114            Some(parser) => parser.root(),
1115            None => return,
1116        };
1117        if parser.aborted.get() {
1118            return;
1119        }
1120
1121        let document = &parser.document;
1122
1123        // TODO: Pass a proper fetch start time here.
1124        let performance_entry = PerformanceNavigationTiming::new(
1125            &document.global(),
1126            CrossProcessInstant::now(),
1127            document,
1128            CanGc::note(),
1129        );
1130        self.pushed_entry_index = document.global().performance().queue_entry(
1131            performance_entry.upcast::<PerformanceEntry>(),
1132            CanGc::note(),
1133        );
1134    }
1135
1136    fn process_csp_violations(&mut self, _request_id: RequestId, violations: Vec<Violation>) {
1137        let parser = match self.parser.as_ref() {
1138            Some(parser) => parser.root(),
1139            None => return,
1140        };
1141        let document = &parser.document;
1142        let global = &document.global();
1143        // TODO(https://github.com/w3c/webappsec-csp/issues/687): Update after spec is resolved
1144        global.report_csp_violations(violations, None, None);
1145    }
1146}
1147
1148impl PreInvoke for ParserContext {}
1149
1150pub(crate) struct FragmentContext<'a> {
1151    pub(crate) context_elem: &'a Node,
1152    pub(crate) form_elem: Option<&'a Node>,
1153    pub(crate) context_element_allows_scripting: bool,
1154}
1155
1156#[cfg_attr(crown, allow(crown::unrooted_must_root))]
1157fn insert(
1158    parent: &Node,
1159    reference_child: Option<&Node>,
1160    child: NodeOrText<Dom<Node>>,
1161    parsing_algorithm: ParsingAlgorithm,
1162    custom_element_reaction_stack: &CustomElementReactionStack,
1163    can_gc: CanGc,
1164) {
1165    match child {
1166        NodeOrText::AppendNode(n) => {
1167            // https://html.spec.whatwg.org/multipage/#insert-a-foreign-element
1168            // applies if this is an element; if not, it may be
1169            // https://html.spec.whatwg.org/multipage/#insert-a-comment
1170            let element_in_non_fragment =
1171                parsing_algorithm != ParsingAlgorithm::Fragment && n.is::<Element>();
1172            if element_in_non_fragment {
1173                custom_element_reaction_stack.push_new_element_queue();
1174            }
1175            parent.InsertBefore(&n, reference_child, can_gc).unwrap();
1176            if element_in_non_fragment {
1177                custom_element_reaction_stack.pop_current_element_queue(can_gc);
1178            }
1179        },
1180        NodeOrText::AppendText(t) => {
1181            // https://html.spec.whatwg.org/multipage/#insert-a-character
1182            let text = reference_child
1183                .and_then(Node::GetPreviousSibling)
1184                .or_else(|| parent.GetLastChild())
1185                .and_then(DomRoot::downcast::<Text>);
1186
1187            if let Some(text) = text {
1188                text.upcast::<CharacterData>().append_data(&t);
1189            } else {
1190                let text = Text::new(String::from(t).into(), &parent.owner_doc(), can_gc);
1191                parent
1192                    .InsertBefore(text.upcast(), reference_child, can_gc)
1193                    .unwrap();
1194            }
1195        },
1196    }
1197}
1198
1199#[derive(JSTraceable, MallocSizeOf)]
1200#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
1201pub(crate) struct Sink {
1202    #[no_trace]
1203    base_url: ServoUrl,
1204    document: Dom<Document>,
1205    current_line: Cell<u64>,
1206    script: MutNullableDom<HTMLScriptElement>,
1207    parsing_algorithm: ParsingAlgorithm,
1208    #[conditional_malloc_size_of]
1209    custom_element_reaction_stack: Rc<CustomElementReactionStack>,
1210}
1211
1212impl Sink {
1213    fn same_tree(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1214        let x = x.downcast::<Element>().expect("Element node expected");
1215        let y = y.downcast::<Element>().expect("Element node expected");
1216
1217        x.is_in_same_home_subtree(y)
1218    }
1219
1220    fn has_parent_node(&self, node: &Dom<Node>) -> bool {
1221        node.GetParentNode().is_some()
1222    }
1223}
1224
1225impl TreeSink for Sink {
1226    type Output = Self;
1227    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1228    fn finish(self) -> Self {
1229        self
1230    }
1231
1232    type Handle = Dom<Node>;
1233    type ElemName<'a>
1234        = ExpandedName<'a>
1235    where
1236        Self: 'a;
1237
1238    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1239    fn get_document(&self) -> Dom<Node> {
1240        Dom::from_ref(self.document.upcast())
1241    }
1242
1243    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1244    fn get_template_contents(&self, target: &Dom<Node>) -> Dom<Node> {
1245        let template = target
1246            .downcast::<HTMLTemplateElement>()
1247            .expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing");
1248        Dom::from_ref(template.Content(CanGc::note()).upcast())
1249    }
1250
1251    fn same_node(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1252        x == y
1253    }
1254
1255    fn elem_name<'a>(&self, target: &'a Dom<Node>) -> ExpandedName<'a> {
1256        let elem = target
1257            .downcast::<Element>()
1258            .expect("tried to get name of non-Element in HTML parsing");
1259        ExpandedName {
1260            ns: elem.namespace(),
1261            local: elem.local_name(),
1262        }
1263    }
1264
1265    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1266    fn create_element(
1267        &self,
1268        name: QualName,
1269        attrs: Vec<Attribute>,
1270        flags: ElementFlags,
1271    ) -> Dom<Node> {
1272        let attrs = attrs
1273            .into_iter()
1274            .map(|attr| ElementAttribute::new(attr.name, DOMString::from(String::from(attr.value))))
1275            .collect();
1276        let parsing_algorithm = if flags.template {
1277            ParsingAlgorithm::Fragment
1278        } else {
1279            self.parsing_algorithm
1280        };
1281        let element = create_element_for_token(
1282            name,
1283            attrs,
1284            &self.document,
1285            ElementCreator::ParserCreated(self.current_line.get()),
1286            parsing_algorithm,
1287            &self.custom_element_reaction_stack,
1288            CanGc::note(),
1289        );
1290        Dom::from_ref(element.upcast())
1291    }
1292
1293    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1294    fn create_comment(&self, text: StrTendril) -> Dom<Node> {
1295        let comment = Comment::new(
1296            DOMString::from(String::from(text)),
1297            &self.document,
1298            None,
1299            CanGc::note(),
1300        );
1301        Dom::from_ref(comment.upcast())
1302    }
1303
1304    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1305    fn create_pi(&self, target: StrTendril, data: StrTendril) -> Dom<Node> {
1306        let doc = &*self.document;
1307        let pi = ProcessingInstruction::new(
1308            DOMString::from(String::from(target)),
1309            DOMString::from(String::from(data)),
1310            doc,
1311            CanGc::note(),
1312        );
1313        Dom::from_ref(pi.upcast())
1314    }
1315
1316    fn associate_with_form(
1317        &self,
1318        target: &Dom<Node>,
1319        form: &Dom<Node>,
1320        nodes: (&Dom<Node>, Option<&Dom<Node>>),
1321    ) {
1322        let (element, prev_element) = nodes;
1323        let tree_node = prev_element.map_or(element, |prev| {
1324            if self.has_parent_node(element) {
1325                element
1326            } else {
1327                prev
1328            }
1329        });
1330        if !self.same_tree(tree_node, form) {
1331            return;
1332        }
1333
1334        let node = target;
1335        let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
1336            .expect("Owner must be a form element");
1337
1338        let elem = node.downcast::<Element>();
1339        let control = elem.and_then(|e| e.as_maybe_form_control());
1340
1341        if let Some(control) = control {
1342            control.set_form_owner_from_parser(&form, CanGc::note());
1343        }
1344    }
1345
1346    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1347    fn append_before_sibling(&self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) {
1348        let parent = sibling
1349            .GetParentNode()
1350            .expect("append_before_sibling called on node without parent");
1351
1352        insert(
1353            &parent,
1354            Some(sibling),
1355            new_node,
1356            self.parsing_algorithm,
1357            &self.custom_element_reaction_stack,
1358            CanGc::note(),
1359        );
1360    }
1361
1362    fn parse_error(&self, msg: Cow<'static, str>) {
1363        debug!("Parse error: {}", msg);
1364    }
1365
1366    fn set_quirks_mode(&self, mode: QuirksMode) {
1367        let mode = match mode {
1368            QuirksMode::Quirks => ServoQuirksMode::Quirks,
1369            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
1370            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
1371        };
1372        self.document.set_quirks_mode(mode);
1373    }
1374
1375    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1376    fn append(&self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) {
1377        insert(
1378            parent,
1379            None,
1380            child,
1381            self.parsing_algorithm,
1382            &self.custom_element_reaction_stack,
1383            CanGc::note(),
1384        );
1385    }
1386
1387    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1388    fn append_based_on_parent_node(
1389        &self,
1390        elem: &Dom<Node>,
1391        prev_elem: &Dom<Node>,
1392        child: NodeOrText<Dom<Node>>,
1393    ) {
1394        if self.has_parent_node(elem) {
1395            self.append_before_sibling(elem, child);
1396        } else {
1397            self.append(prev_elem, child);
1398        }
1399    }
1400
1401    fn append_doctype_to_document(
1402        &self,
1403        name: StrTendril,
1404        public_id: StrTendril,
1405        system_id: StrTendril,
1406    ) {
1407        let doc = &*self.document;
1408        let doctype = DocumentType::new(
1409            DOMString::from(String::from(name)),
1410            Some(DOMString::from(String::from(public_id))),
1411            Some(DOMString::from(String::from(system_id))),
1412            doc,
1413            CanGc::note(),
1414        );
1415        doc.upcast::<Node>()
1416            .AppendChild(doctype.upcast(), CanGc::note())
1417            .expect("Appending failed");
1418    }
1419
1420    fn add_attrs_if_missing(&self, target: &Dom<Node>, attrs: Vec<Attribute>) {
1421        let elem = target
1422            .downcast::<Element>()
1423            .expect("tried to set attrs on non-Element in HTML parsing");
1424        for attr in attrs {
1425            elem.set_attribute_from_parser(
1426                attr.name,
1427                DOMString::from(String::from(attr.value)),
1428                None,
1429                CanGc::note(),
1430            );
1431        }
1432    }
1433
1434    fn remove_from_parent(&self, target: &Dom<Node>) {
1435        if let Some(ref parent) = target.GetParentNode() {
1436            parent.RemoveChild(target, CanGc::note()).unwrap();
1437        }
1438    }
1439
1440    fn mark_script_already_started(&self, node: &Dom<Node>) {
1441        let script = node.downcast::<HTMLScriptElement>();
1442        if let Some(script) = script {
1443            script.set_already_started(true)
1444        }
1445    }
1446
1447    fn reparent_children(&self, node: &Dom<Node>, new_parent: &Dom<Node>) {
1448        while let Some(ref child) = node.GetFirstChild() {
1449            new_parent.AppendChild(child, CanGc::note()).unwrap();
1450        }
1451    }
1452
1453    /// <https://html.spec.whatwg.org/multipage/#html-integration-point>
1454    /// Specifically, the `<annotation-xml>` cases.
1455    fn is_mathml_annotation_xml_integration_point(&self, handle: &Dom<Node>) -> bool {
1456        let elem = handle.downcast::<Element>().unwrap();
1457        elem.get_attribute(&ns!(), &local_name!("encoding"))
1458            .is_some_and(|attr| {
1459                attr.value().eq_ignore_ascii_case("text/html") ||
1460                    attr.value().eq_ignore_ascii_case("application/xhtml+xml")
1461            })
1462    }
1463
1464    fn set_current_line(&self, line_number: u64) {
1465        self.current_line.set(line_number);
1466    }
1467
1468    fn pop(&self, node: &Dom<Node>) {
1469        let node = DomRoot::from_ref(&**node);
1470        vtable_for(&node).pop();
1471    }
1472
1473    fn allow_declarative_shadow_roots(&self, intended_parent: &Dom<Node>) -> bool {
1474        intended_parent.owner_doc().allow_declarative_shadow_roots()
1475    }
1476
1477    /// <https://html.spec.whatwg.org/multipage/#parsing-main-inhead>
1478    /// A start tag whose tag name is "template"
1479    /// Attach shadow path
1480    fn attach_declarative_shadow(
1481        &self,
1482        host: &Dom<Node>,
1483        template: &Dom<Node>,
1484        attributes: &[Attribute],
1485    ) -> bool {
1486        attach_declarative_shadow_inner(host, template, attributes)
1487    }
1488}
1489
1490/// <https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token>
1491fn create_element_for_token(
1492    name: QualName,
1493    attrs: Vec<ElementAttribute>,
1494    document: &Document,
1495    creator: ElementCreator,
1496    parsing_algorithm: ParsingAlgorithm,
1497    custom_element_reaction_stack: &CustomElementReactionStack,
1498    can_gc: CanGc,
1499) -> DomRoot<Element> {
1500    // Step 3.
1501    let is = attrs
1502        .iter()
1503        .find(|attr| attr.name.local.eq_str_ignore_ascii_case("is"))
1504        .map(|attr| LocalName::from(&*attr.value));
1505
1506    // Step 4.
1507    let definition = document.lookup_custom_element_definition(&name.ns, &name.local, is.as_ref());
1508
1509    // Step 5.
1510    let will_execute_script =
1511        definition.is_some() && parsing_algorithm != ParsingAlgorithm::Fragment;
1512
1513    // Step 6.
1514    if will_execute_script {
1515        // Step 6.1.
1516        document.increment_throw_on_dynamic_markup_insertion_counter();
1517        // Step 6.2
1518        if is_execution_stack_empty() {
1519            document
1520                .window()
1521                .as_global_scope()
1522                .perform_a_microtask_checkpoint(can_gc);
1523        }
1524        // Step 6.3
1525        custom_element_reaction_stack.push_new_element_queue()
1526    }
1527
1528    // Step 7.
1529    let creation_mode = if will_execute_script {
1530        CustomElementCreationMode::Synchronous
1531    } else {
1532        CustomElementCreationMode::Asynchronous
1533    };
1534
1535    let element = Element::create(name, is, document, creator, creation_mode, None, can_gc);
1536
1537    // https://html.spec.whatwg.org/multipage#the-input-element:value-sanitization-algorithm-3
1538    // says to invoke sanitization "when an input element is first created";
1539    // however, since sanitization requires content attributes to function,
1540    // it can't mean that literally.
1541    // Indeed, to make sanitization work correctly, we need to _not_ sanitize
1542    // until after all content attributes have been added
1543
1544    let maybe_input = element.downcast::<HTMLInputElement>();
1545    if let Some(input) = maybe_input {
1546        input.disable_sanitization();
1547    }
1548
1549    // Step 8
1550    for attr in attrs {
1551        element.set_attribute_from_parser(attr.name, attr.value, None, can_gc);
1552    }
1553
1554    // _now_ we can sanitize (and we sanitize now even if the "value"
1555    // attribute isn't present!)
1556    if let Some(input) = maybe_input {
1557        input.enable_sanitization();
1558    }
1559
1560    // Step 9.
1561    if will_execute_script {
1562        // Steps 9.1 - 9.2.
1563        custom_element_reaction_stack.pop_current_element_queue(can_gc);
1564        // Step 9.3.
1565        document.decrement_throw_on_dynamic_markup_insertion_counter();
1566    }
1567
1568    // TODO: Step 10.
1569    // TODO: Step 11.
1570
1571    // Step 12 is handled in `associate_with_form`.
1572
1573    // Step 13.
1574    element
1575}
1576
1577#[derive(JSTraceable, MallocSizeOf)]
1578struct NetworkDecoder {
1579    #[ignore_malloc_size_of = "Defined in tendril"]
1580    #[custom_trace]
1581    decoder: LossyDecoder<NetworkSink>,
1582}
1583
1584impl NetworkDecoder {
1585    fn new(encoding: &'static Encoding) -> Self {
1586        Self {
1587            decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()),
1588        }
1589    }
1590
1591    fn decode(&mut self, chunk: Vec<u8>) -> StrTendril {
1592        self.decoder.process(ByteTendril::from(&*chunk));
1593        std::mem::take(&mut self.decoder.inner_sink_mut().output)
1594    }
1595
1596    fn finish(self) -> StrTendril {
1597        self.decoder.finish()
1598    }
1599}
1600
1601#[derive(Default, JSTraceable)]
1602struct NetworkSink {
1603    #[no_trace]
1604    output: StrTendril,
1605}
1606
1607impl TendrilSink<UTF8> for NetworkSink {
1608    type Output = StrTendril;
1609
1610    fn process(&mut self, t: StrTendril) {
1611        if self.output.is_empty() {
1612            self.output = t;
1613        } else {
1614            self.output.push_tendril(&t);
1615        }
1616    }
1617
1618    fn error(&mut self, _desc: Cow<'static, str>) {}
1619
1620    fn finish(self) -> Self::Output {
1621        self.output
1622    }
1623}
1624
1625fn attach_declarative_shadow_inner(host: &Node, template: &Node, attributes: &[Attribute]) -> bool {
1626    let host_element = host.downcast::<Element>().unwrap();
1627
1628    if host_element.shadow_root().is_some() {
1629        return false;
1630    }
1631
1632    let template_element = template.downcast::<HTMLTemplateElement>().unwrap();
1633
1634    // Step 3. Let mode be template start tag's shadowrootmode attribute's value.
1635    // Step 4. Let clonable be true if template start tag has a shadowrootclonable attribute; otherwise false.
1636    // Step 5. Let delegatesfocus be true if template start tag
1637    // has a shadowrootdelegatesfocus attribute; otherwise false.
1638    // Step 6. Let serializable be true if template start tag
1639    // has a shadowrootserializable attribute; otherwise false.
1640    let mut shadow_root_mode = ShadowRootMode::Open;
1641    let mut clonable = false;
1642    let mut delegatesfocus = false;
1643    let mut serializable = false;
1644
1645    let attributes: Vec<ElementAttribute> = attributes
1646        .iter()
1647        .map(|attr| {
1648            ElementAttribute::new(
1649                attr.name.clone(),
1650                DOMString::from(String::from(attr.value.clone())),
1651            )
1652        })
1653        .collect();
1654
1655    attributes
1656        .iter()
1657        .for_each(|attr: &ElementAttribute| match attr.name.local {
1658            local_name!("shadowrootmode") => {
1659                if attr.value.str().eq_ignore_ascii_case("open") {
1660                    shadow_root_mode = ShadowRootMode::Open;
1661                } else if attr.value.str().eq_ignore_ascii_case("closed") {
1662                    shadow_root_mode = ShadowRootMode::Closed;
1663                } else {
1664                    unreachable!("shadowrootmode value is not open nor closed");
1665                }
1666            },
1667            local_name!("shadowrootclonable") => {
1668                clonable = true;
1669            },
1670            local_name!("shadowrootdelegatesfocus") => {
1671                delegatesfocus = true;
1672            },
1673            local_name!("shadowrootserializable") => {
1674                serializable = true;
1675            },
1676            _ => {},
1677        });
1678
1679    // Step 8.1. Attach a shadow root with declarative shadow host element,
1680    // mode, clonable, serializable, delegatesFocus, and "named".
1681    match host_element.attach_shadow(
1682        IsUserAgentWidget::No,
1683        shadow_root_mode,
1684        clonable,
1685        serializable,
1686        delegatesfocus,
1687        SlotAssignmentMode::Named,
1688        CanGc::note(),
1689    ) {
1690        Ok(shadow_root) => {
1691            // Step 8.3. Set shadow's declarative to true.
1692            shadow_root.set_declarative(true);
1693
1694            // Set 8.4. Set template's template contents property to shadow.
1695            let shadow = shadow_root.upcast::<DocumentFragment>();
1696            template_element.set_contents(Some(shadow));
1697
1698            // Step 8.5. Set shadow’s available to element internals to true.
1699            shadow_root.set_available_to_element_internals(true);
1700
1701            true
1702        },
1703        Err(_) => false,
1704    }
1705}