script/dom/servoparser/
mod.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::cell::Cell;
7use std::rc::Rc;
8
9use base::cross_process_instant::CrossProcessInstant;
10use base::id::PipelineId;
11use base64::Engine as _;
12use base64::engine::general_purpose;
13use content_security_policy::sandboxing_directive::SandboxingFlagSet;
14use devtools_traits::ScriptToDevtoolsControlMsg;
15use dom_struct::dom_struct;
16use embedder_traits::resources::{self, Resource};
17use encoding_rs::Encoding;
18use html5ever::buffer_queue::BufferQueue;
19use html5ever::tendril::fmt::UTF8;
20use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink};
21use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
22use html5ever::{Attribute, ExpandedName, LocalName, QualName, local_name, ns};
23use hyper_serde::Serde;
24use markup5ever::TokenizerResult;
25use mime::{self, Mime};
26use net_traits::mime_classifier::{ApacheBugFlag, MediaType, MimeClassifier, NoSniffFlag};
27use net_traits::policy_container::PolicyContainer;
28use net_traits::request::RequestId;
29use net_traits::{
30    FetchMetadata, FetchResponseListener, LoadContext, Metadata, NetworkError, ReferrerPolicy,
31    ResourceFetchTiming, ResourceTimingType,
32};
33use profile_traits::time::{
34    ProfilerCategory, ProfilerChan, TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType,
35};
36use profile_traits::time_profile;
37use script_traits::DocumentActivity;
38use servo_config::pref;
39use servo_url::ServoUrl;
40use style::context::QuirksMode as ServoQuirksMode;
41use tendril::stream::LossyDecoder;
42
43use crate::document_loader::{DocumentLoader, LoadType};
44use crate::dom::bindings::cell::DomRefCell;
45use crate::dom::bindings::codegen::Bindings::DocumentBinding::{
46    DocumentMethods, DocumentReadyState,
47};
48use crate::dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods;
49use crate::dom::bindings::codegen::Bindings::HTMLMediaElementBinding::HTMLMediaElementMethods;
50use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
51use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
52use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::{
53    ShadowRootMode, SlotAssignmentMode,
54};
55use crate::dom::bindings::inheritance::Castable;
56use crate::dom::bindings::refcounted::Trusted;
57use crate::dom::bindings::reflector::{DomGlobal, Reflector, reflect_dom_object};
58use crate::dom::bindings::root::{Dom, DomRoot, MutNullableDom};
59use crate::dom::bindings::settings_stack::is_execution_stack_empty;
60use crate::dom::bindings::str::{DOMString, USVString};
61use crate::dom::characterdata::CharacterData;
62use crate::dom::comment::Comment;
63use crate::dom::csp::{GlobalCspReporting, Violation, parse_csp_list_from_metadata};
64use crate::dom::customelementregistry::CustomElementReactionStack;
65use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument};
66use crate::dom::documentfragment::DocumentFragment;
67use crate::dom::documenttype::DocumentType;
68use crate::dom::element::{CustomElementCreationMode, Element, ElementCreator};
69use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
70use crate::dom::html::htmlimageelement::HTMLImageElement;
71use crate::dom::html::htmlinputelement::HTMLInputElement;
72use crate::dom::html::htmlscriptelement::{HTMLScriptElement, ScriptResult};
73use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
74use crate::dom::node::{Node, ShadowIncluding};
75use crate::dom::performanceentry::PerformanceEntry;
76use crate::dom::performancenavigationtiming::PerformanceNavigationTiming;
77use crate::dom::processinginstruction::ProcessingInstruction;
78use crate::dom::reportingendpoint::ReportingEndpoint;
79use crate::dom::shadowroot::IsUserAgentWidget;
80use crate::dom::text::Text;
81use crate::dom::types::HTMLMediaElement;
82use crate::dom::virtualmethods::vtable_for;
83use crate::network_listener::PreInvoke;
84use crate::realms::enter_realm;
85use crate::script_runtime::{CanGc, IntroductionType};
86use crate::script_thread::ScriptThread;
87
88mod async_html;
89mod html;
90mod prefetch;
91mod xml;
92
93pub(crate) use html::serialize_html_fragment;
94
95#[dom_struct]
96/// The parser maintains two input streams: one for input from script through
97/// document.write(), and one for input from network.
98///
99/// There is no concrete representation of the insertion point, instead it
100/// always points to just before the next character from the network input,
101/// with all of the script input before itself.
102///
103/// ```text
104///     ... script input ... | ... network input ...
105///                          ^
106///                 insertion point
107/// ```
108pub(crate) struct ServoParser {
109    reflector: Reflector,
110    /// The document associated with this parser.
111    document: Dom<Document>,
112    /// The BOM sniffing state.
113    ///
114    /// `None` means we've found the BOM, we've found there isn't one, or
115    /// we're not parsing from a byte stream. `Some` contains the BOM bytes
116    /// found so far.
117    bom_sniff: DomRefCell<Option<Vec<u8>>>,
118    /// The decoder used for the network input.
119    network_decoder: DomRefCell<Option<NetworkDecoder>>,
120    /// Input received from network.
121    #[ignore_malloc_size_of = "Defined in html5ever"]
122    #[no_trace]
123    network_input: BufferQueue,
124    /// Input received from script. Used only to support document.write().
125    #[ignore_malloc_size_of = "Defined in html5ever"]
126    #[no_trace]
127    script_input: BufferQueue,
128    /// The tokenizer of this parser.
129    tokenizer: Tokenizer,
130    /// Whether to expect any further input from the associated network request.
131    last_chunk_received: Cell<bool>,
132    /// Whether this parser should avoid passing any further data to the tokenizer.
133    suspended: Cell<bool>,
134    /// <https://html.spec.whatwg.org/multipage/#script-nesting-level>
135    script_nesting_level: Cell<usize>,
136    /// <https://html.spec.whatwg.org/multipage/#abort-a-parser>
137    aborted: Cell<bool>,
138    /// <https://html.spec.whatwg.org/multipage/#script-created-parser>
139    script_created_parser: bool,
140    /// We do a quick-and-dirty parse of the input looking for resources to prefetch.
141    // TODO: if we had speculative parsing, we could do this when speculatively
142    // building the DOM. https://github.com/servo/servo/pull/19203
143    prefetch_tokenizer: prefetch::Tokenizer,
144    #[ignore_malloc_size_of = "Defined in html5ever"]
145    #[no_trace]
146    prefetch_input: BufferQueue,
147    // The whole input as a string, if needed for the devtools Sources panel.
148    // TODO: use a faster type for concatenating strings?
149    content_for_devtools: Option<DomRefCell<String>>,
150}
151
152pub(crate) struct ElementAttribute {
153    name: QualName,
154    value: DOMString,
155}
156
157#[derive(Clone, Copy, JSTraceable, MallocSizeOf, PartialEq)]
158pub(crate) enum ParsingAlgorithm {
159    Normal,
160    Fragment,
161}
162
163impl ElementAttribute {
164    pub(crate) fn new(name: QualName, value: DOMString) -> ElementAttribute {
165        ElementAttribute { name, value }
166    }
167}
168
169impl ServoParser {
170    pub(crate) fn parser_is_not_active(&self) -> bool {
171        self.can_write()
172    }
173
174    /// <https://html.spec.whatwg.org/multipage/#parse-html-from-a-string>
175    pub(crate) fn parse_html_document(
176        document: &Document,
177        input: Option<DOMString>,
178        url: ServoUrl,
179        can_gc: CanGc,
180    ) {
181        // Step 1. Set document's type to "html".
182        //
183        // Set by callers of this function and asserted here
184        assert!(document.is_html_document());
185        // Step 2. Create an HTML parser parser, associated with document.
186        let parser = if pref!(dom_servoparser_async_html_tokenizer_enabled) {
187            ServoParser::new(
188                document,
189                Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None)),
190                ParserKind::Normal,
191                can_gc,
192            )
193        } else {
194            ServoParser::new(
195                document,
196                Tokenizer::Html(self::html::Tokenizer::new(
197                    document,
198                    url,
199                    None,
200                    ParsingAlgorithm::Normal,
201                )),
202                ParserKind::Normal,
203                can_gc,
204            )
205        };
206        // Step 3. Place html into the input stream for parser. The encoding confidence is irrelevant.
207        // Step 4. Start parser and let it run until it has consumed all the
208        // characters just inserted into the input stream.
209        //
210        // Set as the document's current parser and initialize with `input`, if given.
211        if let Some(input) = input {
212            parser.parse_complete_string_chunk(String::from(input), can_gc);
213        } else {
214            parser.document.set_current_parser(Some(&parser));
215        }
216    }
217
218    /// <https://html.spec.whatwg.org/multipage/#parsing-html-fragments>
219    pub(crate) fn parse_html_fragment(
220        context: &Element,
221        input: DOMString,
222        allow_declarative_shadow_roots: bool,
223        can_gc: CanGc,
224    ) -> impl Iterator<Item = DomRoot<Node>> + use<'_> {
225        let context_node = context.upcast::<Node>();
226        let context_document = context_node.owner_doc();
227        let window = context_document.window();
228        let url = context_document.url();
229
230        // Step 1. Let document be a Document node whose type is "html".
231        let loader = DocumentLoader::new_with_threads(
232            context_document.loader().resource_threads().clone(),
233            Some(url.clone()),
234        );
235        let document = Document::new(
236            window,
237            HasBrowsingContext::No,
238            Some(url.clone()),
239            context_document.origin().clone(),
240            IsHTMLDocument::HTMLDocument,
241            None,
242            None,
243            DocumentActivity::Inactive,
244            DocumentSource::FromParser,
245            loader,
246            None,
247            None,
248            Default::default(),
249            false,
250            allow_declarative_shadow_roots,
251            Some(context_document.insecure_requests_policy()),
252            context_document.has_trustworthy_ancestor_or_current_origin(),
253            context_document.custom_element_reaction_stack(),
254            can_gc,
255        );
256
257        // Step 2. If context's node document is in quirks mode, then set document's mode to "quirks".
258        // Step 3. Otherwise, if context's node document is in limited-quirks mode, then set document's
259        // mode to "limited-quirks".
260        document.set_quirks_mode(context_document.quirks_mode());
261
262        // NOTE: The following steps happened as part of Step 1.
263        // Step 4. If allowDeclarativeShadowRoots is true, then set document's
264        // allow declarative shadow roots to true.
265        // Step 5. Create a new HTML parser, and associate it with document.
266
267        // Step 11.
268        let form = context_node
269            .inclusive_ancestors(ShadowIncluding::No)
270            .find(|element| element.is::<HTMLFormElement>());
271
272        let fragment_context = FragmentContext {
273            context_elem: context_node,
274            form_elem: form.as_deref(),
275            context_element_allows_scripting: context_document.scripting_enabled(),
276        };
277
278        let parser = ServoParser::new(
279            &document,
280            Tokenizer::Html(self::html::Tokenizer::new(
281                &document,
282                url,
283                Some(fragment_context),
284                ParsingAlgorithm::Fragment,
285            )),
286            ParserKind::Normal,
287            can_gc,
288        );
289        parser.parse_complete_string_chunk(String::from(input), can_gc);
290
291        // Step 14.
292        let root_element = document.GetDocumentElement().expect("no document element");
293        FragmentParsingResult {
294            inner: root_element.upcast::<Node>().children(),
295        }
296    }
297
298    pub(crate) fn parse_html_script_input(document: &Document, url: ServoUrl) {
299        let parser = ServoParser::new(
300            document,
301            Tokenizer::Html(self::html::Tokenizer::new(
302                document,
303                url,
304                None,
305                ParsingAlgorithm::Normal,
306            )),
307            ParserKind::ScriptCreated,
308            CanGc::note(),
309        );
310        *parser.bom_sniff.borrow_mut() = None;
311        document.set_current_parser(Some(&parser));
312    }
313
314    pub(crate) fn parse_xml_document(
315        document: &Document,
316        input: Option<DOMString>,
317        url: ServoUrl,
318        can_gc: CanGc,
319    ) {
320        let parser = ServoParser::new(
321            document,
322            Tokenizer::Xml(self::xml::Tokenizer::new(document, url)),
323            ParserKind::Normal,
324            can_gc,
325        );
326
327        // Set as the document's current parser and initialize with `input`, if given.
328        if let Some(input) = input {
329            parser.parse_complete_string_chunk(String::from(input), can_gc);
330        } else {
331            parser.document.set_current_parser(Some(&parser));
332        }
333    }
334
335    pub(crate) fn script_nesting_level(&self) -> usize {
336        self.script_nesting_level.get()
337    }
338
339    pub(crate) fn is_script_created(&self) -> bool {
340        self.script_created_parser
341    }
342
343    /// Corresponds to the latter part of the "Otherwise" branch of the 'An end
344    /// tag whose tag name is "script"' of
345    /// <https://html.spec.whatwg.org/multipage/#parsing-main-incdata>
346    ///
347    /// This first moves everything from the script input to the beginning of
348    /// the network input, effectively resetting the insertion point to just
349    /// before the next character to be consumed.
350    ///
351    ///
352    /// ```text
353    ///     | ... script input ... network input ...
354    ///     ^
355    ///     insertion point
356    /// ```
357    pub(crate) fn resume_with_pending_parsing_blocking_script(
358        &self,
359        script: &HTMLScriptElement,
360        result: ScriptResult,
361        can_gc: CanGc,
362    ) {
363        assert!(self.suspended.get());
364        self.suspended.set(false);
365
366        self.script_input.swap_with(&self.network_input);
367        while let Some(chunk) = self.script_input.pop_front() {
368            self.network_input.push_back(chunk);
369        }
370
371        let script_nesting_level = self.script_nesting_level.get();
372        assert_eq!(script_nesting_level, 0);
373
374        self.script_nesting_level.set(script_nesting_level + 1);
375        script.execute(result, can_gc);
376        self.script_nesting_level.set(script_nesting_level);
377
378        if !self.suspended.get() && !self.aborted.get() {
379            self.parse_sync(can_gc);
380        }
381    }
382
383    pub(crate) fn can_write(&self) -> bool {
384        self.script_created_parser || self.script_nesting_level.get() > 0
385    }
386
387    /// Steps 6-8 of <https://html.spec.whatwg.org/multipage/#document.write()>
388    pub(crate) fn write(&self, text: DOMString, can_gc: CanGc) {
389        assert!(self.can_write());
390
391        if self.document.has_pending_parsing_blocking_script() {
392            // There is already a pending parsing blocking script so the
393            // parser is suspended, we just append everything to the
394            // script input and abort these steps.
395            self.script_input.push_back(String::from(text).into());
396            return;
397        }
398
399        // There is no pending parsing blocking script, so all previous calls
400        // to document.write() should have seen their entire input tokenized
401        // and process, with nothing pushed to the parser script input.
402        assert!(self.script_input.is_empty());
403
404        let input = BufferQueue::default();
405        input.push_back(String::from(text).into());
406
407        let profiler_chan = self
408            .document
409            .window()
410            .as_global_scope()
411            .time_profiler_chan()
412            .clone();
413        let profiler_metadata = TimerMetadata {
414            url: self.document.url().as_str().into(),
415            iframe: TimerMetadataFrameType::RootWindow,
416            incremental: TimerMetadataReflowType::FirstReflow,
417        };
418        self.tokenize(
419            |tokenizer| {
420                tokenizer.feed(
421                    &input,
422                    can_gc,
423                    profiler_chan.clone(),
424                    profiler_metadata.clone(),
425                )
426            },
427            can_gc,
428        );
429
430        if self.suspended.get() {
431            // Parser got suspended, insert remaining input at end of
432            // script input, following anything written by scripts executed
433            // reentrantly during this call.
434            while let Some(chunk) = input.pop_front() {
435                self.script_input.push_back(chunk);
436            }
437            return;
438        }
439
440        assert!(input.is_empty());
441    }
442
443    // Steps 4-6 of https://html.spec.whatwg.org/multipage/#dom-document-close
444    pub(crate) fn close(&self, can_gc: CanGc) {
445        assert!(self.script_created_parser);
446
447        // Step 4.
448        self.last_chunk_received.set(true);
449
450        if self.suspended.get() {
451            // Step 5.
452            return;
453        }
454
455        // Step 6.
456        self.parse_sync(can_gc);
457    }
458
459    // https://html.spec.whatwg.org/multipage/#abort-a-parser
460    pub(crate) fn abort(&self, can_gc: CanGc) {
461        assert!(!self.aborted.get());
462        self.aborted.set(true);
463
464        // Step 1.
465        self.script_input.replace_with(BufferQueue::default());
466        self.network_input.replace_with(BufferQueue::default());
467
468        // Step 2.
469        self.document
470            .set_ready_state(DocumentReadyState::Interactive, can_gc);
471
472        // Step 3.
473        self.tokenizer.end(can_gc);
474        self.document.set_current_parser(None);
475
476        // Step 4.
477        self.document
478            .set_ready_state(DocumentReadyState::Complete, can_gc);
479    }
480
481    // https://html.spec.whatwg.org/multipage/#active-parser
482    pub(crate) fn is_active(&self) -> bool {
483        self.script_nesting_level() > 0 && !self.aborted.get()
484    }
485
486    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
487    fn new_inherited(document: &Document, tokenizer: Tokenizer, kind: ParserKind) -> Self {
488        // Store the whole input for the devtools Sources panel, if the devtools server is running
489        // and we are parsing for a document load (not just things like innerHTML).
490        // TODO: check if a devtools client is actually connected and/or wants the sources?
491        let content_for_devtools = (document.global().devtools_chan().is_some() &&
492            document.has_browsing_context())
493        .then_some(DomRefCell::new(String::new()));
494
495        ServoParser {
496            reflector: Reflector::new(),
497            document: Dom::from_ref(document),
498            bom_sniff: DomRefCell::new(Some(Vec::with_capacity(3))),
499            network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))),
500            network_input: BufferQueue::default(),
501            script_input: BufferQueue::default(),
502            tokenizer,
503            last_chunk_received: Cell::new(false),
504            suspended: Default::default(),
505            script_nesting_level: Default::default(),
506            aborted: Default::default(),
507            script_created_parser: kind == ParserKind::ScriptCreated,
508            prefetch_tokenizer: prefetch::Tokenizer::new(document),
509            prefetch_input: BufferQueue::default(),
510            content_for_devtools,
511        }
512    }
513
514    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
515    fn new(
516        document: &Document,
517        tokenizer: Tokenizer,
518        kind: ParserKind,
519        can_gc: CanGc,
520    ) -> DomRoot<Self> {
521        reflect_dom_object(
522            Box::new(ServoParser::new_inherited(document, tokenizer, kind)),
523            document.window(),
524            can_gc,
525        )
526    }
527
528    fn push_tendril_input_chunk(&self, chunk: StrTendril) {
529        if let Some(mut content_for_devtools) = self
530            .content_for_devtools
531            .as_ref()
532            .map(|content| content.borrow_mut())
533        {
534            // TODO: append these chunks more efficiently
535            content_for_devtools.push_str(chunk.as_ref());
536        }
537
538        if chunk.is_empty() {
539            return;
540        }
541        // Per https://github.com/whatwg/html/issues/1495
542        // stylesheets should not be loaded for documents
543        // without browsing contexts.
544        // https://github.com/whatwg/html/issues/1495#issuecomment-230334047
545        // suggests that no content should be preloaded in such a case.
546        // We're conservative, and only prefetch for documents
547        // with browsing contexts.
548        if self.document.browsing_context().is_some() {
549            // Push the chunk into the prefetch input stream,
550            // which is tokenized eagerly, to scan for resources
551            // to prefetch. If the user script uses `document.write()`
552            // to overwrite the network input, this prefetching may
553            // have been wasted, but in most cases it won't.
554            self.prefetch_input.push_back(chunk.clone());
555            self.prefetch_tokenizer.feed(&self.prefetch_input);
556        }
557        // Push the chunk into the network input stream,
558        // which is tokenized lazily.
559        self.network_input.push_back(chunk);
560    }
561
562    fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
563        // BOM sniff. This is needed because NetworkDecoder will switch the
564        // encoding based on the BOM, but it won't change
565        // `self.document.encoding` in the process.
566        {
567            let mut bom_sniff = self.bom_sniff.borrow_mut();
568            if let Some(partial_bom) = bom_sniff.as_mut() {
569                if partial_bom.len() + chunk.len() >= 3 {
570                    partial_bom.extend(chunk.iter().take(3 - partial_bom.len()).copied());
571                    if let Some((encoding, _)) = Encoding::for_bom(partial_bom) {
572                        self.document.set_encoding(encoding);
573                    }
574                    drop(bom_sniff);
575                    *self.bom_sniff.borrow_mut() = None;
576                } else {
577                    partial_bom.extend(chunk.iter().copied());
578                }
579            }
580        }
581
582        // For byte input, we convert it to text using the network decoder.
583        let chunk = self
584            .network_decoder
585            .borrow_mut()
586            .as_mut()
587            .unwrap()
588            .decode(chunk);
589        self.push_tendril_input_chunk(chunk);
590    }
591
592    fn push_string_input_chunk(&self, chunk: String) {
593        // If the input is a string, we don't have a BOM.
594        if self.bom_sniff.borrow().is_some() {
595            *self.bom_sniff.borrow_mut() = None;
596        }
597
598        // The input has already been decoded as a string, so doesn't need
599        // to be decoded by the network decoder again.
600        let chunk = StrTendril::from(chunk);
601        self.push_tendril_input_chunk(chunk);
602    }
603
604    fn parse_sync(&self, can_gc: CanGc) {
605        assert!(self.script_input.is_empty());
606
607        // This parser will continue to parse while there is either pending input or
608        // the parser remains unsuspended.
609
610        if self.last_chunk_received.get() {
611            if let Some(decoder) = self.network_decoder.borrow_mut().take() {
612                let chunk = decoder.finish();
613                if !chunk.is_empty() {
614                    self.network_input.push_back(chunk);
615                }
616            }
617        }
618
619        if self.aborted.get() {
620            return;
621        }
622
623        let profiler_chan = self
624            .document
625            .window()
626            .as_global_scope()
627            .time_profiler_chan()
628            .clone();
629        let profiler_metadata = TimerMetadata {
630            url: self.document.url().as_str().into(),
631            iframe: TimerMetadataFrameType::RootWindow,
632            incremental: TimerMetadataReflowType::FirstReflow,
633        };
634        self.tokenize(
635            |tokenizer| {
636                tokenizer.feed(
637                    &self.network_input,
638                    can_gc,
639                    profiler_chan.clone(),
640                    profiler_metadata.clone(),
641                )
642            },
643            can_gc,
644        );
645
646        if self.suspended.get() {
647            return;
648        }
649
650        assert!(self.network_input.is_empty());
651
652        if self.last_chunk_received.get() {
653            self.finish(can_gc);
654        }
655    }
656
657    fn parse_complete_string_chunk(&self, input: String, can_gc: CanGc) {
658        self.document.set_current_parser(Some(self));
659        self.push_string_input_chunk(input);
660        self.last_chunk_received.set(true);
661        if !self.suspended.get() {
662            self.parse_sync(can_gc);
663        }
664    }
665
666    fn parse_bytes_chunk(&self, input: Vec<u8>, can_gc: CanGc) {
667        let _realm = enter_realm(&*self.document);
668        self.document.set_current_parser(Some(self));
669        self.push_bytes_input_chunk(input);
670        if !self.suspended.get() {
671            self.parse_sync(can_gc);
672        }
673    }
674
675    fn tokenize<F>(&self, feed: F, can_gc: CanGc)
676    where
677        F: Fn(&Tokenizer) -> TokenizerResult<DomRoot<HTMLScriptElement>>,
678    {
679        loop {
680            assert!(!self.suspended.get());
681            assert!(!self.aborted.get());
682
683            self.document.window().reflow_if_reflow_timer_expired();
684            let script = match feed(&self.tokenizer) {
685                TokenizerResult::Done => return,
686                TokenizerResult::Script(script) => script,
687            };
688
689            // https://html.spec.whatwg.org/multipage/#parsing-main-incdata
690            // branch "An end tag whose tag name is "script"
691            // The spec says to perform the microtask checkpoint before
692            // setting the insertion mode back from Text, but this is not
693            // possible with the way servo and html5ever currently
694            // relate to each other, and hopefully it is not observable.
695            if is_execution_stack_empty() {
696                self.document
697                    .window()
698                    .as_global_scope()
699                    .perform_a_microtask_checkpoint(can_gc);
700            }
701
702            let script_nesting_level = self.script_nesting_level.get();
703
704            self.script_nesting_level.set(script_nesting_level + 1);
705            script.set_initial_script_text();
706            let introduction_type_override =
707                (script_nesting_level > 0).then_some(IntroductionType::INJECTED_SCRIPT);
708            script.prepare(introduction_type_override, can_gc);
709            self.script_nesting_level.set(script_nesting_level);
710
711            if self.document.has_pending_parsing_blocking_script() {
712                self.suspended.set(true);
713                return;
714            }
715            if self.aborted.get() {
716                return;
717            }
718        }
719    }
720
721    // https://html.spec.whatwg.org/multipage/#the-end
722    fn finish(&self, can_gc: CanGc) {
723        assert!(!self.suspended.get());
724        assert!(self.last_chunk_received.get());
725        assert!(self.script_input.is_empty());
726        assert!(self.network_input.is_empty());
727        assert!(self.network_decoder.borrow().is_none());
728
729        // Step 1.
730        self.document
731            .set_ready_state(DocumentReadyState::Interactive, can_gc);
732
733        // Step 2.
734        self.tokenizer.end(can_gc);
735        self.document.set_current_parser(None);
736
737        // Steps 3-12 are in another castle, namely finish_load.
738        let url = self.tokenizer.url().clone();
739        self.document.finish_load(LoadType::PageSource(url), can_gc);
740
741        // Send the source contents to devtools, if needed.
742        if let Some(content_for_devtools) = self
743            .content_for_devtools
744            .as_ref()
745            .map(|content| content.take())
746        {
747            let global = self.document.global();
748            let chan = global.devtools_chan().expect("Guaranteed by new");
749            let pipeline_id = self.document.global().pipeline_id();
750            let _ = chan.send(ScriptToDevtoolsControlMsg::UpdateSourceContent(
751                pipeline_id,
752                content_for_devtools,
753            ));
754        }
755    }
756}
757
758struct FragmentParsingResult<I>
759where
760    I: Iterator<Item = DomRoot<Node>>,
761{
762    inner: I,
763}
764
765impl<I> Iterator for FragmentParsingResult<I>
766where
767    I: Iterator<Item = DomRoot<Node>>,
768{
769    type Item = DomRoot<Node>;
770
771    fn next(&mut self) -> Option<DomRoot<Node>> {
772        let next = self.inner.next()?;
773        next.remove_self(CanGc::note());
774        Some(next)
775    }
776
777    fn size_hint(&self) -> (usize, Option<usize>) {
778        self.inner.size_hint()
779    }
780}
781
782#[derive(JSTraceable, MallocSizeOf, PartialEq)]
783enum ParserKind {
784    Normal,
785    ScriptCreated,
786}
787
788#[derive(JSTraceable, MallocSizeOf)]
789#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
790enum Tokenizer {
791    Html(self::html::Tokenizer),
792    AsyncHtml(self::async_html::Tokenizer),
793    Xml(self::xml::Tokenizer),
794}
795
796impl Tokenizer {
797    fn feed(
798        &self,
799        input: &BufferQueue,
800        can_gc: CanGc,
801        profiler_chan: ProfilerChan,
802        profiler_metadata: TimerMetadata,
803    ) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
804        match *self {
805            Tokenizer::Html(ref tokenizer) => time_profile!(
806                ProfilerCategory::ScriptParseHTML,
807                Some(profiler_metadata),
808                profiler_chan,
809                || tokenizer.feed(input),
810            ),
811            Tokenizer::AsyncHtml(ref tokenizer) => time_profile!(
812                ProfilerCategory::ScriptParseHTML,
813                Some(profiler_metadata),
814                profiler_chan,
815                || tokenizer.feed(input, can_gc),
816            ),
817            Tokenizer::Xml(ref tokenizer) => time_profile!(
818                ProfilerCategory::ScriptParseXML,
819                Some(profiler_metadata),
820                profiler_chan,
821                || tokenizer.feed(input),
822            ),
823        }
824    }
825
826    fn end(&self, can_gc: CanGc) {
827        match *self {
828            Tokenizer::Html(ref tokenizer) => tokenizer.end(),
829            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.end(can_gc),
830            Tokenizer::Xml(ref tokenizer) => tokenizer.end(),
831        }
832    }
833
834    fn url(&self) -> &ServoUrl {
835        match *self {
836            Tokenizer::Html(ref tokenizer) => tokenizer.url(),
837            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(),
838            Tokenizer::Xml(ref tokenizer) => tokenizer.url(),
839        }
840    }
841
842    fn set_plaintext_state(&self) {
843        match *self {
844            Tokenizer::Html(ref tokenizer) => tokenizer.set_plaintext_state(),
845            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.set_plaintext_state(),
846            Tokenizer::Xml(_) => unimplemented!(),
847        }
848    }
849}
850
851/// <https://html.spec.whatwg.org/multipage/#navigation-params>
852/// This does not have the relevant fields, but mimics the intent
853/// of the struct when used in loading document spec algorithms.
854struct NavigationParams {
855    /// <https://html.spec.whatwg.org/multipage/#navigation-params-policy-container>
856    policy_container: PolicyContainer,
857    /// content-type of this document, if known. Otherwise need to sniff it
858    content_type: Option<Mime>,
859    /// <https://html.spec.whatwg.org/multipage/#navigation-params-sandboxing>
860    final_sandboxing_flag_set: SandboxingFlagSet,
861    /// <https://mimesniff.spec.whatwg.org/#resource-header>
862    resource_header: Vec<u8>,
863}
864
865/// The context required for asynchronously fetching a document
866/// and parsing it progressively.
867pub(crate) struct ParserContext {
868    /// The parser that initiated the request.
869    parser: Option<Trusted<ServoParser>>,
870    /// Is this a synthesized document
871    is_synthesized_document: bool,
872    /// Has a document already been loaded (relevant for checking the resource header)
873    has_loaded_document: bool,
874    /// The pipeline associated with this document.
875    id: PipelineId,
876    /// The URL for this document.
877    url: ServoUrl,
878    /// timing data for this resource
879    resource_timing: ResourceFetchTiming,
880    /// pushed entry index
881    pushed_entry_index: Option<usize>,
882    /// params required in document load algorithms
883    navigation_params: NavigationParams,
884}
885
886impl ParserContext {
887    pub(crate) fn new(id: PipelineId, url: ServoUrl) -> ParserContext {
888        ParserContext {
889            parser: None,
890            is_synthesized_document: false,
891            has_loaded_document: false,
892            id,
893            url,
894            resource_timing: ResourceFetchTiming::new(ResourceTimingType::Navigation),
895            pushed_entry_index: None,
896            navigation_params: NavigationParams {
897                policy_container: Default::default(),
898                content_type: None,
899                final_sandboxing_flag_set: SandboxingFlagSet::empty(),
900                resource_header: vec![],
901            },
902        }
903    }
904
905    pub(crate) fn set_policy_container(&mut self, policy_container: Option<&PolicyContainer>) {
906        let Some(policy_container) = policy_container else {
907            return;
908        };
909        self.navigation_params.policy_container = policy_container.clone();
910    }
911
912    /// <https://html.spec.whatwg.org/multipage/#creating-a-policy-container-from-a-fetch-response>
913    fn create_policy_container_from_fetch_response(metadata: &Metadata) -> PolicyContainer {
914        // Step 1. If response's URL's scheme is "blob", then return a clone of response's URL's blob URL entry's environment's policy container.
915        // TODO
916        // Step 2. Let result be a new policy container.
917        // Step 7. Return result.
918        PolicyContainer {
919            // Step 3. Set result's CSP list to the result of parsing a response's Content Security Policies given response.
920            csp_list: parse_csp_list_from_metadata(&metadata.headers),
921            // Step 5. Set result's referrer policy to the result of parsing the `Referrer-Policy` header given response. [REFERRERPOLICY]
922            referrer_policy: ReferrerPolicy::parse_header_for_response(&metadata.headers),
923        }
924    }
925
926    /// <https://html.spec.whatwg.org/multipage/#initialise-the-document-object>
927    fn initialize_document_object(&self, document: &Document) {
928        // Step 9. Let document be a new Document, with
929        document.set_policy_container(self.navigation_params.policy_container.clone());
930        document.set_active_sandboxing_flag_set(self.navigation_params.final_sandboxing_flag_set);
931    }
932
933    /// <https://html.spec.whatwg.org/multipage/#loading-a-document>
934    fn load_document(&mut self, can_gc: CanGc) {
935        assert!(!self.has_loaded_document);
936        self.has_loaded_document = true;
937        let Some(ref parser) = self.parser.as_ref().map(|p| p.root()) else {
938            return;
939        };
940        // Step 1. Let type be the computed type of navigationParams's response.
941        let content_type = &self.navigation_params.content_type;
942        let mime_type = MimeClassifier::default().classify(
943            LoadContext::Browsing,
944            NoSniffFlag::Off,
945            ApacheBugFlag::from_content_type(content_type.as_ref()),
946            content_type,
947            &self.navigation_params.resource_header,
948        );
949        // Step 2. If the user agent has been configured to process resources of the given type using
950        // some mechanism other than rendering the content in a navigable, then skip this step.
951        // Otherwise, if the type is one of the following types:
952        let Some(media_type) = MimeClassifier::get_media_type(&mime_type) else {
953            let page = format!(
954                "<html><body><p>Unknown content type ({}).</p></body></html>",
955                &mime_type,
956            );
957            self.load_inline_unknown_content(parser, page);
958            return;
959        };
960        match media_type {
961            // Return the result of loading an HTML document, given navigationParams.
962            MediaType::Html => self.load_html_document(parser),
963            // Return the result of loading an XML document given navigationParams and type.
964            MediaType::Xml => self.load_xml_document(parser),
965            // Return the result of loading a text document given navigationParams and type.
966            MediaType::JavaScript | MediaType::Json | MediaType::Text | MediaType::Css => {
967                self.load_text_document(parser)
968            },
969            // Return the result of loading a media document given navigationParams and type.
970            MediaType::Image | MediaType::AudioVideo => {
971                self.load_media_document(parser, media_type, &mime_type)
972            },
973            MediaType::Font => {
974                let page = format!(
975                    "<html><body><p>Unable to load font with content type ({}).</p></body></html>",
976                    &mime_type,
977                );
978                self.load_inline_unknown_content(parser, page);
979                return;
980            },
981        };
982
983        parser.parse_bytes_chunk(
984            std::mem::take(&mut self.navigation_params.resource_header),
985            can_gc,
986        );
987    }
988
989    /// <https://html.spec.whatwg.org/multipage/#navigate-html>
990    fn load_html_document(&self, parser: &ServoParser) {
991        // Step 1. Let document be the result of creating and initializing a
992        // Document object given "html", "text/html", and navigationParams.
993        self.initialize_document_object(&parser.document);
994    }
995
996    /// <https://html.spec.whatwg.org/multipage/#read-xml>
997    fn load_xml_document(&self, parser: &ServoParser) {
998        // When faced with displaying an XML file inline, provided navigation params navigationParams
999        // and a string type, user agents must follow the requirements defined in XML and Namespaces in XML,
1000        // XML Media Types, DOM, and other relevant specifications to create and initialize a
1001        // Document object document, given "xml", type, and navigationParams, and return that Document.
1002        // They must also create a corresponding XML parser. [XML] [XMLNS] [RFC7303] [DOM]
1003        self.initialize_document_object(&parser.document);
1004    }
1005
1006    /// <https://html.spec.whatwg.org/multipage/#navigate-text>
1007    fn load_text_document(&self, parser: &ServoParser) {
1008        // Step 4. Create an HTML parser and associate it with the document.
1009        // Act as if the tokenizer had emitted a start tag token with the tag name "pre" followed by
1010        // a single U+000A LINE FEED (LF) character, and switch the HTML parser's tokenizer to the PLAINTEXT state.
1011        // Each task that the networking task source places on the task queue while fetching runs must then
1012        // fill the parser's input byte stream with the fetched bytes and cause the HTML parser to perform
1013        // the appropriate processing of the input stream.
1014        let page = "<pre>\n".into();
1015        parser.push_string_input_chunk(page);
1016        parser.parse_sync(CanGc::note());
1017        parser.tokenizer.set_plaintext_state();
1018    }
1019
1020    /// <https://html.spec.whatwg.org/multipage/#navigate-media>
1021    fn load_media_document(
1022        &mut self,
1023        parser: &ServoParser,
1024        media_type: MediaType,
1025        mime_type: &Mime,
1026    ) {
1027        // Step 8. Act as if the user agent had stopped parsing document.
1028        self.is_synthesized_document = true;
1029        // Step 3. Populate with html/head/body given document.
1030        let page = "<html><body></body></html>".into();
1031        parser.push_string_input_chunk(page);
1032        parser.parse_sync(CanGc::note());
1033
1034        let doc = &parser.document;
1035        // Step 5. Set the appropriate attribute of the element host element, as described below,
1036        // to the address of the image, video, or audio resource.
1037        let node = if media_type == MediaType::Image {
1038            let img = Element::create(
1039                QualName::new(None, ns!(html), local_name!("img")),
1040                None,
1041                doc,
1042                ElementCreator::ParserCreated(1),
1043                CustomElementCreationMode::Asynchronous,
1044                None,
1045                CanGc::note(),
1046            );
1047            let img = DomRoot::downcast::<HTMLImageElement>(img).unwrap();
1048            img.SetSrc(USVString(self.url.to_string()));
1049            DomRoot::upcast::<Node>(img)
1050        } else if mime_type.type_() == mime::AUDIO {
1051            let audio = Element::create(
1052                QualName::new(None, ns!(html), local_name!("audio")),
1053                None,
1054                doc,
1055                ElementCreator::ParserCreated(1),
1056                CustomElementCreationMode::Asynchronous,
1057                None,
1058                CanGc::note(),
1059            );
1060            let audio = DomRoot::downcast::<HTMLMediaElement>(audio).unwrap();
1061            audio.SetSrc(USVString(self.url.to_string()));
1062            DomRoot::upcast::<Node>(audio)
1063        } else {
1064            let video = Element::create(
1065                QualName::new(None, ns!(html), local_name!("video")),
1066                None,
1067                doc,
1068                ElementCreator::ParserCreated(1),
1069                CustomElementCreationMode::Asynchronous,
1070                None,
1071                CanGc::note(),
1072            );
1073            let video = DomRoot::downcast::<HTMLMediaElement>(video).unwrap();
1074            video.SetSrc(USVString(self.url.to_string()));
1075            DomRoot::upcast::<Node>(video)
1076        };
1077        // Step 4. Append an element host element for the media, as described below, to the body element.
1078        let doc_body = DomRoot::upcast::<Node>(doc.GetBody().unwrap());
1079        doc_body
1080            .AppendChild(&node, CanGc::note())
1081            .expect("Appending failed");
1082    }
1083
1084    /// <https://html.spec.whatwg.org/multipage/#read-ua-inline>
1085    fn load_inline_unknown_content(&mut self, parser: &ServoParser, page: String) {
1086        self.is_synthesized_document = true;
1087        parser.push_string_input_chunk(page);
1088        parser.parse_sync(CanGc::note());
1089    }
1090}
1091
1092impl FetchResponseListener for ParserContext {
1093    fn process_request_body(&mut self, _: RequestId) {}
1094
1095    fn process_request_eof(&mut self, _: RequestId) {}
1096
1097    fn process_response(&mut self, _: RequestId, meta_result: Result<FetchMetadata, NetworkError>) {
1098        let (metadata, error) = match meta_result {
1099            Ok(meta) => (
1100                Some(match meta {
1101                    FetchMetadata::Unfiltered(m) => m,
1102                    FetchMetadata::Filtered { unsafe_, .. } => unsafe_,
1103                }),
1104                None,
1105            ),
1106            Err(error) => (
1107                // Check variant without moving
1108                match &error {
1109                    NetworkError::SslValidation(..) |
1110                    NetworkError::Internal(..) |
1111                    NetworkError::Crash(..) => {
1112                        let mut meta = Metadata::default(self.url.clone());
1113                        let mime: Option<Mime> = "text/html".parse().ok();
1114                        meta.set_content_type(mime.as_ref());
1115                        Some(meta)
1116                    },
1117                    _ => None,
1118                },
1119                Some(error),
1120            ),
1121        };
1122        let content_type: Option<Mime> = metadata
1123            .clone()
1124            .and_then(|meta| meta.content_type)
1125            .map(Serde::into_inner)
1126            .map(Into::into);
1127
1128        let (policy_container, endpoints_list) = match metadata.as_ref() {
1129            None => (PolicyContainer::default(), None),
1130            Some(metadata) => (
1131                Self::create_policy_container_from_fetch_response(metadata),
1132                ReportingEndpoint::parse_reporting_endpoints_header(
1133                    &self.url.clone(),
1134                    &metadata.headers,
1135                ),
1136            ),
1137        };
1138
1139        let parser = match ScriptThread::page_headers_available(&self.id, metadata, CanGc::note()) {
1140            Some(parser) => parser,
1141            None => return,
1142        };
1143        if parser.aborted.get() {
1144            return;
1145        }
1146
1147        let _realm = enter_realm(&*parser.document);
1148
1149        // From Step 23.8.3 of https://html.spec.whatwg.org/multipage/#navigate
1150        // Let finalSandboxFlags be the union of targetSnapshotParams's sandboxing flags and
1151        // policyContainer's CSP list's CSP-derived sandboxing flags.
1152        // TODO: implement targetSnapshotParam's sandboxing flags
1153        let final_sandboxing_flag_set = policy_container
1154            .csp_list
1155            .as_ref()
1156            .and_then(|csp| csp.get_sandboxing_flag_set_for_document())
1157            .unwrap_or(SandboxingFlagSet::empty());
1158
1159        if let Some(endpoints) = endpoints_list {
1160            parser.document.window().set_endpoints_list(endpoints);
1161        }
1162        self.parser = Some(Trusted::new(&*parser));
1163        self.navigation_params = NavigationParams {
1164            policy_container,
1165            content_type,
1166            final_sandboxing_flag_set,
1167            resource_header: vec![],
1168        };
1169        self.submit_resource_timing();
1170
1171        // Part of https://html.spec.whatwg.org/multipage/#loading-a-document
1172        //
1173        // Step 3. If, given type, the new resource is to be handled by displaying some sort of inline content,
1174        // e.g., a native rendering of the content or an error message because the specified type is not supported,
1175        // then return the result of creating a document for inline content that doesn't have a DOM given
1176        // navigationParams's navigable, navigationParams's id, navigationParams's navigation timing type,
1177        // and navigationParams's user involvement.
1178        if let Some(error) = error {
1179            let page = match error {
1180                NetworkError::SslValidation(reason, bytes) => {
1181                    let page = resources::read_string(Resource::BadCertHTML);
1182                    let page = page.replace("${reason}", &reason);
1183                    let encoded_bytes = general_purpose::STANDARD_NO_PAD.encode(bytes);
1184                    let page = page.replace("${bytes}", encoded_bytes.as_str());
1185                    page.replace("${secret}", &net_traits::PRIVILEGED_SECRET.to_string())
1186                },
1187                NetworkError::Internal(reason) => {
1188                    let page = resources::read_string(Resource::NetErrorHTML);
1189                    page.replace("${reason}", &reason)
1190                },
1191                NetworkError::Crash(details) => {
1192                    let page = resources::read_string(Resource::CrashHTML);
1193                    page.replace("${details}", &details)
1194                },
1195                NetworkError::LoadCancelled => {
1196                    // The next load will show a page
1197                    return;
1198                },
1199            };
1200            self.load_inline_unknown_content(&parser, page);
1201        }
1202    }
1203
1204    fn process_response_chunk(&mut self, _: RequestId, payload: Vec<u8>) {
1205        if self.is_synthesized_document {
1206            return;
1207        }
1208        let Some(parser) = self.parser.as_ref().map(|p| p.root()) else {
1209            return;
1210        };
1211        if parser.aborted.get() {
1212            return;
1213        }
1214        if !self.has_loaded_document {
1215            // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1216            self.navigation_params
1217                .resource_header
1218                .extend_from_slice(&payload);
1219            // the number of bytes in buffer is greater than or equal to 1445.
1220            if self.navigation_params.resource_header.len() >= 1445 {
1221                self.load_document(CanGc::note());
1222            }
1223        } else {
1224            parser.parse_bytes_chunk(payload, CanGc::note());
1225        }
1226    }
1227
1228    // This method is called via script_thread::handle_fetch_eof, so we must call
1229    // submit_resource_timing in this function
1230    // Resource listeners are called via net_traits::Action::process, which handles submission for them
1231    fn process_response_eof(
1232        &mut self,
1233        _: RequestId,
1234        status: Result<ResourceFetchTiming, NetworkError>,
1235    ) {
1236        let parser = match self.parser.as_ref() {
1237            Some(parser) => parser.root(),
1238            None => return,
1239        };
1240        if parser.aborted.get() {
1241            return;
1242        }
1243
1244        match status {
1245            // are we throwing this away or can we use it?
1246            Ok(_) => (),
1247            // TODO(Savago): we should send a notification to callers #5463.
1248            Err(err) => debug!("Failed to load page URL {}, error: {:?}", self.url, err),
1249        }
1250
1251        // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1252        //
1253        // the end of the resource is reached.
1254        if !self.has_loaded_document {
1255            self.load_document(CanGc::note());
1256        }
1257
1258        let _realm = enter_realm(&*parser);
1259
1260        parser
1261            .document
1262            .set_redirect_count(self.resource_timing.redirect_count);
1263
1264        parser.last_chunk_received.set(true);
1265        if !parser.suspended.get() {
1266            parser.parse_sync(CanGc::note());
1267        }
1268
1269        // TODO: Only update if this is the current document resource.
1270        // TODO(mrobinson): Pass a proper fetch_start parameter here instead of `CrossProcessInstant::now()`.
1271        if let Some(pushed_index) = self.pushed_entry_index {
1272            let document = &parser.document;
1273            let performance_entry = PerformanceNavigationTiming::new(
1274                &document.global(),
1275                CrossProcessInstant::now(),
1276                document,
1277                CanGc::note(),
1278            );
1279            document
1280                .global()
1281                .performance()
1282                .update_entry(pushed_index, performance_entry.upcast::<PerformanceEntry>());
1283        }
1284    }
1285
1286    fn resource_timing_mut(&mut self) -> &mut ResourceFetchTiming {
1287        &mut self.resource_timing
1288    }
1289
1290    fn resource_timing(&self) -> &ResourceFetchTiming {
1291        &self.resource_timing
1292    }
1293
1294    // store a PerformanceNavigationTiming entry in the globalscope's Performance buffer
1295    fn submit_resource_timing(&mut self) {
1296        let parser = match self.parser.as_ref() {
1297            Some(parser) => parser.root(),
1298            None => return,
1299        };
1300        if parser.aborted.get() {
1301            return;
1302        }
1303
1304        let document = &parser.document;
1305
1306        // TODO: Pass a proper fetch start time here.
1307        let performance_entry = PerformanceNavigationTiming::new(
1308            &document.global(),
1309            CrossProcessInstant::now(),
1310            document,
1311            CanGc::note(),
1312        );
1313        self.pushed_entry_index = document.global().performance().queue_entry(
1314            performance_entry.upcast::<PerformanceEntry>(),
1315            CanGc::note(),
1316        );
1317    }
1318
1319    fn process_csp_violations(&mut self, _request_id: RequestId, violations: Vec<Violation>) {
1320        let parser = match self.parser.as_ref() {
1321            Some(parser) => parser.root(),
1322            None => return,
1323        };
1324        let document = &parser.document;
1325        let global = &document.global();
1326        // TODO(https://github.com/w3c/webappsec-csp/issues/687): Update after spec is resolved
1327        global.report_csp_violations(violations, None, None);
1328    }
1329}
1330
1331impl PreInvoke for ParserContext {}
1332
1333pub(crate) struct FragmentContext<'a> {
1334    pub(crate) context_elem: &'a Node,
1335    pub(crate) form_elem: Option<&'a Node>,
1336    pub(crate) context_element_allows_scripting: bool,
1337}
1338
1339#[cfg_attr(crown, allow(crown::unrooted_must_root))]
1340fn insert(
1341    parent: &Node,
1342    reference_child: Option<&Node>,
1343    child: NodeOrText<Dom<Node>>,
1344    parsing_algorithm: ParsingAlgorithm,
1345    custom_element_reaction_stack: &CustomElementReactionStack,
1346    can_gc: CanGc,
1347) {
1348    match child {
1349        NodeOrText::AppendNode(n) => {
1350            // https://html.spec.whatwg.org/multipage/#insert-a-foreign-element
1351            // applies if this is an element; if not, it may be
1352            // https://html.spec.whatwg.org/multipage/#insert-a-comment
1353            let element_in_non_fragment =
1354                parsing_algorithm != ParsingAlgorithm::Fragment && n.is::<Element>();
1355            if element_in_non_fragment {
1356                custom_element_reaction_stack.push_new_element_queue();
1357            }
1358            parent.InsertBefore(&n, reference_child, can_gc).unwrap();
1359            if element_in_non_fragment {
1360                custom_element_reaction_stack.pop_current_element_queue(can_gc);
1361            }
1362        },
1363        NodeOrText::AppendText(t) => {
1364            // https://html.spec.whatwg.org/multipage/#insert-a-character
1365            let text = reference_child
1366                .and_then(Node::GetPreviousSibling)
1367                .or_else(|| parent.GetLastChild())
1368                .and_then(DomRoot::downcast::<Text>);
1369
1370            if let Some(text) = text {
1371                text.upcast::<CharacterData>().append_data(&t);
1372            } else {
1373                let text = Text::new(String::from(t).into(), &parent.owner_doc(), can_gc);
1374                parent
1375                    .InsertBefore(text.upcast(), reference_child, can_gc)
1376                    .unwrap();
1377            }
1378        },
1379    }
1380}
1381
1382#[derive(JSTraceable, MallocSizeOf)]
1383#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
1384pub(crate) struct Sink {
1385    #[no_trace]
1386    base_url: ServoUrl,
1387    document: Dom<Document>,
1388    current_line: Cell<u64>,
1389    script: MutNullableDom<HTMLScriptElement>,
1390    parsing_algorithm: ParsingAlgorithm,
1391    #[conditional_malloc_size_of]
1392    custom_element_reaction_stack: Rc<CustomElementReactionStack>,
1393}
1394
1395impl Sink {
1396    fn same_tree(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1397        let x = x.downcast::<Element>().expect("Element node expected");
1398        let y = y.downcast::<Element>().expect("Element node expected");
1399
1400        x.is_in_same_home_subtree(y)
1401    }
1402
1403    fn has_parent_node(&self, node: &Dom<Node>) -> bool {
1404        node.GetParentNode().is_some()
1405    }
1406}
1407
1408impl TreeSink for Sink {
1409    type Output = Self;
1410    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1411    fn finish(self) -> Self {
1412        self
1413    }
1414
1415    type Handle = Dom<Node>;
1416    type ElemName<'a>
1417        = ExpandedName<'a>
1418    where
1419        Self: 'a;
1420
1421    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1422    fn get_document(&self) -> Dom<Node> {
1423        Dom::from_ref(self.document.upcast())
1424    }
1425
1426    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1427    fn get_template_contents(&self, target: &Dom<Node>) -> Dom<Node> {
1428        let template = target
1429            .downcast::<HTMLTemplateElement>()
1430            .expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing");
1431        Dom::from_ref(template.Content(CanGc::note()).upcast())
1432    }
1433
1434    fn same_node(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1435        x == y
1436    }
1437
1438    fn elem_name<'a>(&self, target: &'a Dom<Node>) -> ExpandedName<'a> {
1439        let elem = target
1440            .downcast::<Element>()
1441            .expect("tried to get name of non-Element in HTML parsing");
1442        ExpandedName {
1443            ns: elem.namespace(),
1444            local: elem.local_name(),
1445        }
1446    }
1447
1448    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1449    fn create_element(
1450        &self,
1451        name: QualName,
1452        attrs: Vec<Attribute>,
1453        flags: ElementFlags,
1454    ) -> Dom<Node> {
1455        let attrs = attrs
1456            .into_iter()
1457            .map(|attr| ElementAttribute::new(attr.name, DOMString::from(String::from(attr.value))))
1458            .collect();
1459        let parsing_algorithm = if flags.template {
1460            ParsingAlgorithm::Fragment
1461        } else {
1462            self.parsing_algorithm
1463        };
1464        let element = create_element_for_token(
1465            name,
1466            attrs,
1467            &self.document,
1468            ElementCreator::ParserCreated(self.current_line.get()),
1469            parsing_algorithm,
1470            &self.custom_element_reaction_stack,
1471            CanGc::note(),
1472        );
1473        Dom::from_ref(element.upcast())
1474    }
1475
1476    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1477    fn create_comment(&self, text: StrTendril) -> Dom<Node> {
1478        let comment = Comment::new(
1479            DOMString::from(String::from(text)),
1480            &self.document,
1481            None,
1482            CanGc::note(),
1483        );
1484        Dom::from_ref(comment.upcast())
1485    }
1486
1487    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1488    fn create_pi(&self, target: StrTendril, data: StrTendril) -> Dom<Node> {
1489        let doc = &*self.document;
1490        let pi = ProcessingInstruction::new(
1491            DOMString::from(String::from(target)),
1492            DOMString::from(String::from(data)),
1493            doc,
1494            CanGc::note(),
1495        );
1496        Dom::from_ref(pi.upcast())
1497    }
1498
1499    fn associate_with_form(
1500        &self,
1501        target: &Dom<Node>,
1502        form: &Dom<Node>,
1503        nodes: (&Dom<Node>, Option<&Dom<Node>>),
1504    ) {
1505        let (element, prev_element) = nodes;
1506        let tree_node = prev_element.map_or(element, |prev| {
1507            if self.has_parent_node(element) {
1508                element
1509            } else {
1510                prev
1511            }
1512        });
1513        if !self.same_tree(tree_node, form) {
1514            return;
1515        }
1516
1517        let node = target;
1518        let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
1519            .expect("Owner must be a form element");
1520
1521        let elem = node.downcast::<Element>();
1522        let control = elem.and_then(|e| e.as_maybe_form_control());
1523
1524        if let Some(control) = control {
1525            control.set_form_owner_from_parser(&form, CanGc::note());
1526        }
1527    }
1528
1529    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1530    fn append_before_sibling(&self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) {
1531        let parent = sibling
1532            .GetParentNode()
1533            .expect("append_before_sibling called on node without parent");
1534
1535        insert(
1536            &parent,
1537            Some(sibling),
1538            new_node,
1539            self.parsing_algorithm,
1540            &self.custom_element_reaction_stack,
1541            CanGc::note(),
1542        );
1543    }
1544
1545    fn parse_error(&self, msg: Cow<'static, str>) {
1546        debug!("Parse error: {}", msg);
1547    }
1548
1549    fn set_quirks_mode(&self, mode: QuirksMode) {
1550        let mode = match mode {
1551            QuirksMode::Quirks => ServoQuirksMode::Quirks,
1552            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
1553            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
1554        };
1555        self.document.set_quirks_mode(mode);
1556    }
1557
1558    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1559    fn append(&self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) {
1560        insert(
1561            parent,
1562            None,
1563            child,
1564            self.parsing_algorithm,
1565            &self.custom_element_reaction_stack,
1566            CanGc::note(),
1567        );
1568    }
1569
1570    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1571    fn append_based_on_parent_node(
1572        &self,
1573        elem: &Dom<Node>,
1574        prev_elem: &Dom<Node>,
1575        child: NodeOrText<Dom<Node>>,
1576    ) {
1577        if self.has_parent_node(elem) {
1578            self.append_before_sibling(elem, child);
1579        } else {
1580            self.append(prev_elem, child);
1581        }
1582    }
1583
1584    fn append_doctype_to_document(
1585        &self,
1586        name: StrTendril,
1587        public_id: StrTendril,
1588        system_id: StrTendril,
1589    ) {
1590        let doc = &*self.document;
1591        let doctype = DocumentType::new(
1592            DOMString::from(String::from(name)),
1593            Some(DOMString::from(String::from(public_id))),
1594            Some(DOMString::from(String::from(system_id))),
1595            doc,
1596            CanGc::note(),
1597        );
1598        doc.upcast::<Node>()
1599            .AppendChild(doctype.upcast(), CanGc::note())
1600            .expect("Appending failed");
1601    }
1602
1603    fn add_attrs_if_missing(&self, target: &Dom<Node>, attrs: Vec<Attribute>) {
1604        let elem = target
1605            .downcast::<Element>()
1606            .expect("tried to set attrs on non-Element in HTML parsing");
1607        for attr in attrs {
1608            elem.set_attribute_from_parser(
1609                attr.name,
1610                DOMString::from(String::from(attr.value)),
1611                None,
1612                CanGc::note(),
1613            );
1614        }
1615    }
1616
1617    fn remove_from_parent(&self, target: &Dom<Node>) {
1618        if let Some(ref parent) = target.GetParentNode() {
1619            parent.RemoveChild(target, CanGc::note()).unwrap();
1620        }
1621    }
1622
1623    fn mark_script_already_started(&self, node: &Dom<Node>) {
1624        let script = node.downcast::<HTMLScriptElement>();
1625        if let Some(script) = script {
1626            script.set_already_started(true)
1627        }
1628    }
1629
1630    fn reparent_children(&self, node: &Dom<Node>, new_parent: &Dom<Node>) {
1631        while let Some(ref child) = node.GetFirstChild() {
1632            new_parent.AppendChild(child, CanGc::note()).unwrap();
1633        }
1634    }
1635
1636    /// <https://html.spec.whatwg.org/multipage/#html-integration-point>
1637    /// Specifically, the `<annotation-xml>` cases.
1638    fn is_mathml_annotation_xml_integration_point(&self, handle: &Dom<Node>) -> bool {
1639        let elem = handle.downcast::<Element>().unwrap();
1640        elem.get_attribute(&ns!(), &local_name!("encoding"))
1641            .is_some_and(|attr| {
1642                attr.value().eq_ignore_ascii_case("text/html") ||
1643                    attr.value().eq_ignore_ascii_case("application/xhtml+xml")
1644            })
1645    }
1646
1647    fn set_current_line(&self, line_number: u64) {
1648        self.current_line.set(line_number);
1649    }
1650
1651    fn pop(&self, node: &Dom<Node>) {
1652        let node = DomRoot::from_ref(&**node);
1653        vtable_for(&node).pop();
1654    }
1655
1656    fn allow_declarative_shadow_roots(&self, intended_parent: &Dom<Node>) -> bool {
1657        intended_parent.owner_doc().allow_declarative_shadow_roots()
1658    }
1659
1660    /// <https://html.spec.whatwg.org/multipage/#parsing-main-inhead>
1661    /// A start tag whose tag name is "template"
1662    /// Attach shadow path
1663    fn attach_declarative_shadow(
1664        &self,
1665        host: &Dom<Node>,
1666        template: &Dom<Node>,
1667        attributes: &[Attribute],
1668    ) -> bool {
1669        attach_declarative_shadow_inner(host, template, attributes)
1670    }
1671}
1672
1673/// <https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token>
1674fn create_element_for_token(
1675    name: QualName,
1676    attrs: Vec<ElementAttribute>,
1677    document: &Document,
1678    creator: ElementCreator,
1679    parsing_algorithm: ParsingAlgorithm,
1680    custom_element_reaction_stack: &CustomElementReactionStack,
1681    can_gc: CanGc,
1682) -> DomRoot<Element> {
1683    // Step 3.
1684    let is = attrs
1685        .iter()
1686        .find(|attr| attr.name.local.eq_str_ignore_ascii_case("is"))
1687        .map(|attr| LocalName::from(&*attr.value));
1688
1689    // Step 4.
1690    let definition = document.lookup_custom_element_definition(&name.ns, &name.local, is.as_ref());
1691
1692    // Step 5.
1693    let will_execute_script =
1694        definition.is_some() && parsing_algorithm != ParsingAlgorithm::Fragment;
1695
1696    // Step 6.
1697    if will_execute_script {
1698        // Step 6.1.
1699        document.increment_throw_on_dynamic_markup_insertion_counter();
1700        // Step 6.2
1701        if is_execution_stack_empty() {
1702            document
1703                .window()
1704                .as_global_scope()
1705                .perform_a_microtask_checkpoint(can_gc);
1706        }
1707        // Step 6.3
1708        custom_element_reaction_stack.push_new_element_queue()
1709    }
1710
1711    // Step 7.
1712    let creation_mode = if will_execute_script {
1713        CustomElementCreationMode::Synchronous
1714    } else {
1715        CustomElementCreationMode::Asynchronous
1716    };
1717
1718    let element = Element::create(name, is, document, creator, creation_mode, None, can_gc);
1719
1720    // https://html.spec.whatwg.org/multipage#the-input-element:value-sanitization-algorithm-3
1721    // says to invoke sanitization "when an input element is first created";
1722    // however, since sanitization requires content attributes to function,
1723    // it can't mean that literally.
1724    // Indeed, to make sanitization work correctly, we need to _not_ sanitize
1725    // until after all content attributes have been added
1726
1727    let maybe_input = element.downcast::<HTMLInputElement>();
1728    if let Some(input) = maybe_input {
1729        input.disable_sanitization();
1730    }
1731
1732    // Step 8
1733    for attr in attrs {
1734        element.set_attribute_from_parser(attr.name, attr.value, None, can_gc);
1735    }
1736
1737    // _now_ we can sanitize (and we sanitize now even if the "value"
1738    // attribute isn't present!)
1739    if let Some(input) = maybe_input {
1740        input.enable_sanitization();
1741    }
1742
1743    // Step 9.
1744    if will_execute_script {
1745        // Steps 9.1 - 9.2.
1746        custom_element_reaction_stack.pop_current_element_queue(can_gc);
1747        // Step 9.3.
1748        document.decrement_throw_on_dynamic_markup_insertion_counter();
1749    }
1750
1751    // TODO: Step 10.
1752    // TODO: Step 11.
1753
1754    // Step 12 is handled in `associate_with_form`.
1755
1756    // Step 13.
1757    element
1758}
1759
1760#[derive(JSTraceable, MallocSizeOf)]
1761struct NetworkDecoder {
1762    #[ignore_malloc_size_of = "Defined in tendril"]
1763    #[custom_trace]
1764    decoder: LossyDecoder<NetworkSink>,
1765}
1766
1767impl NetworkDecoder {
1768    fn new(encoding: &'static Encoding) -> Self {
1769        Self {
1770            decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()),
1771        }
1772    }
1773
1774    fn decode(&mut self, chunk: Vec<u8>) -> StrTendril {
1775        self.decoder.process(ByteTendril::from(&*chunk));
1776        std::mem::take(&mut self.decoder.inner_sink_mut().output)
1777    }
1778
1779    fn finish(self) -> StrTendril {
1780        self.decoder.finish()
1781    }
1782}
1783
1784#[derive(Default, JSTraceable)]
1785struct NetworkSink {
1786    #[no_trace]
1787    output: StrTendril,
1788}
1789
1790impl TendrilSink<UTF8> for NetworkSink {
1791    type Output = StrTendril;
1792
1793    fn process(&mut self, t: StrTendril) {
1794        if self.output.is_empty() {
1795            self.output = t;
1796        } else {
1797            self.output.push_tendril(&t);
1798        }
1799    }
1800
1801    fn error(&mut self, _desc: Cow<'static, str>) {}
1802
1803    fn finish(self) -> Self::Output {
1804        self.output
1805    }
1806}
1807
1808fn attach_declarative_shadow_inner(host: &Node, template: &Node, attributes: &[Attribute]) -> bool {
1809    let host_element = host.downcast::<Element>().unwrap();
1810
1811    if host_element.shadow_root().is_some() {
1812        return false;
1813    }
1814
1815    let template_element = template.downcast::<HTMLTemplateElement>().unwrap();
1816
1817    // Step 3. Let mode be template start tag's shadowrootmode attribute's value.
1818    // Step 4. Let clonable be true if template start tag has a shadowrootclonable attribute; otherwise false.
1819    // Step 5. Let delegatesfocus be true if template start tag
1820    // has a shadowrootdelegatesfocus attribute; otherwise false.
1821    // Step 6. Let serializable be true if template start tag
1822    // has a shadowrootserializable attribute; otherwise false.
1823    let mut shadow_root_mode = ShadowRootMode::Open;
1824    let mut clonable = false;
1825    let mut delegatesfocus = false;
1826    let mut serializable = false;
1827
1828    let attributes: Vec<ElementAttribute> = attributes
1829        .iter()
1830        .map(|attr| {
1831            ElementAttribute::new(
1832                attr.name.clone(),
1833                DOMString::from(String::from(attr.value.clone())),
1834            )
1835        })
1836        .collect();
1837
1838    attributes
1839        .iter()
1840        .for_each(|attr: &ElementAttribute| match attr.name.local {
1841            local_name!("shadowrootmode") => {
1842                if attr.value.str().eq_ignore_ascii_case("open") {
1843                    shadow_root_mode = ShadowRootMode::Open;
1844                } else if attr.value.str().eq_ignore_ascii_case("closed") {
1845                    shadow_root_mode = ShadowRootMode::Closed;
1846                } else {
1847                    unreachable!("shadowrootmode value is not open nor closed");
1848                }
1849            },
1850            local_name!("shadowrootclonable") => {
1851                clonable = true;
1852            },
1853            local_name!("shadowrootdelegatesfocus") => {
1854                delegatesfocus = true;
1855            },
1856            local_name!("shadowrootserializable") => {
1857                serializable = true;
1858            },
1859            _ => {},
1860        });
1861
1862    // Step 8.1. Attach a shadow root with declarative shadow host element,
1863    // mode, clonable, serializable, delegatesFocus, and "named".
1864    match host_element.attach_shadow(
1865        IsUserAgentWidget::No,
1866        shadow_root_mode,
1867        clonable,
1868        serializable,
1869        delegatesfocus,
1870        SlotAssignmentMode::Named,
1871        CanGc::note(),
1872    ) {
1873        Ok(shadow_root) => {
1874            // Step 8.3. Set shadow's declarative to true.
1875            shadow_root.set_declarative(true);
1876
1877            // Set 8.4. Set template's template contents property to shadow.
1878            let shadow = shadow_root.upcast::<DocumentFragment>();
1879            template_element.set_contents(Some(shadow));
1880
1881            // Step 8.5. Set shadow’s available to element internals to true.
1882            shadow_root.set_available_to_element_internals(true);
1883
1884            true
1885        },
1886        Err(_) => false,
1887    }
1888}