script/dom/servoparser/
mod.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::cell::Cell;
7use std::rc::Rc;
8
9use base::cross_process_instant::CrossProcessInstant;
10use base::id::{PipelineId, WebViewId};
11use base64::Engine as _;
12use base64::engine::general_purpose;
13use content_security_policy::sandboxing_directive::SandboxingFlagSet;
14use devtools_traits::ScriptToDevtoolsControlMsg;
15use dom_struct::dom_struct;
16use embedder_traits::resources::{self, Resource};
17use encoding_rs::Encoding;
18use html5ever::buffer_queue::BufferQueue;
19use html5ever::tendril::fmt::UTF8;
20use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink};
21use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
22use html5ever::{Attribute, ExpandedName, LocalName, QualName, local_name, ns};
23use hyper_serde::Serde;
24use markup5ever::TokenizerResult;
25use mime::{self, Mime};
26use net_traits::mime_classifier::{ApacheBugFlag, MediaType, MimeClassifier, NoSniffFlag};
27use net_traits::policy_container::PolicyContainer;
28use net_traits::request::RequestId;
29use net_traits::{
30    FetchMetadata, LoadContext, Metadata, NetworkError, ReferrerPolicy, ResourceFetchTiming,
31};
32use profile_traits::time::{
33    ProfilerCategory, ProfilerChan, TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType,
34};
35use profile_traits::time_profile;
36use script_traits::DocumentActivity;
37use servo_config::pref;
38use servo_url::ServoUrl;
39use style::context::QuirksMode as ServoQuirksMode;
40use tendril::stream::LossyDecoder;
41
42use crate::document_loader::{DocumentLoader, LoadType};
43use crate::dom::bindings::cell::DomRefCell;
44use crate::dom::bindings::codegen::Bindings::DocumentBinding::{
45    DocumentMethods, DocumentReadyState,
46};
47use crate::dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods;
48use crate::dom::bindings::codegen::Bindings::HTMLMediaElementBinding::HTMLMediaElementMethods;
49use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
50use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
51use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::{
52    ShadowRootMode, SlotAssignmentMode,
53};
54use crate::dom::bindings::inheritance::Castable;
55use crate::dom::bindings::refcounted::Trusted;
56use crate::dom::bindings::reflector::{DomGlobal, Reflector, reflect_dom_object};
57use crate::dom::bindings::root::{Dom, DomRoot, MutNullableDom};
58use crate::dom::bindings::settings_stack::is_execution_stack_empty;
59use crate::dom::bindings::str::{DOMString, USVString};
60use crate::dom::characterdata::CharacterData;
61use crate::dom::comment::Comment;
62use crate::dom::csp::{GlobalCspReporting, Violation, parse_csp_list_from_metadata};
63use crate::dom::customelementregistry::CustomElementReactionStack;
64use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument};
65use crate::dom::documentfragment::DocumentFragment;
66use crate::dom::documenttype::DocumentType;
67use crate::dom::element::{CustomElementCreationMode, Element, ElementCreator};
68use crate::dom::globalscope::GlobalScope;
69use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
70use crate::dom::html::htmlimageelement::HTMLImageElement;
71use crate::dom::html::htmlinputelement::HTMLInputElement;
72use crate::dom::html::htmlscriptelement::{HTMLScriptElement, ScriptResult};
73use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
74use crate::dom::node::{Node, ShadowIncluding};
75use crate::dom::performance::performanceentry::PerformanceEntry;
76use crate::dom::performance::performancenavigationtiming::PerformanceNavigationTiming;
77use crate::dom::processinginstruction::ProcessingInstruction;
78use crate::dom::processingoptions::{
79    LinkHeader, LinkProcessingPhase, extract_links_from_headers, process_link_headers,
80};
81use crate::dom::reportingendpoint::ReportingEndpoint;
82use crate::dom::shadowroot::IsUserAgentWidget;
83use crate::dom::text::Text;
84use crate::dom::types::HTMLMediaElement;
85use crate::dom::virtualmethods::vtable_for;
86use crate::network_listener::FetchResponseListener;
87use crate::realms::enter_realm;
88use crate::script_runtime::{CanGc, IntroductionType};
89use crate::script_thread::ScriptThread;
90
91mod async_html;
92pub(crate) mod html;
93mod prefetch;
94mod xml;
95
96pub(crate) use html::serialize_html_fragment;
97
98#[dom_struct]
99/// The parser maintains two input streams: one for input from script through
100/// document.write(), and one for input from network.
101///
102/// There is no concrete representation of the insertion point, instead it
103/// always points to just before the next character from the network input,
104/// with all of the script input before itself.
105///
106/// ```text
107///     ... script input ... | ... network input ...
108///                          ^
109///                 insertion point
110/// ```
111pub(crate) struct ServoParser {
112    reflector: Reflector,
113    /// The document associated with this parser.
114    document: Dom<Document>,
115    /// The BOM sniffing state.
116    ///
117    /// `None` means we've found the BOM, we've found there isn't one, or
118    /// we're not parsing from a byte stream. `Some` contains the BOM bytes
119    /// found so far.
120    bom_sniff: DomRefCell<Option<Vec<u8>>>,
121    /// The decoder used for the network input.
122    network_decoder: DomRefCell<Option<NetworkDecoder>>,
123    /// Input received from network.
124    #[ignore_malloc_size_of = "Defined in html5ever"]
125    #[no_trace]
126    network_input: BufferQueue,
127    /// Input received from script. Used only to support document.write().
128    #[ignore_malloc_size_of = "Defined in html5ever"]
129    #[no_trace]
130    script_input: BufferQueue,
131    /// The tokenizer of this parser.
132    tokenizer: Tokenizer,
133    /// Whether to expect any further input from the associated network request.
134    last_chunk_received: Cell<bool>,
135    /// Whether this parser should avoid passing any further data to the tokenizer.
136    suspended: Cell<bool>,
137    /// <https://html.spec.whatwg.org/multipage/#script-nesting-level>
138    script_nesting_level: Cell<usize>,
139    /// <https://html.spec.whatwg.org/multipage/#abort-a-parser>
140    aborted: Cell<bool>,
141    /// <https://html.spec.whatwg.org/multipage/#script-created-parser>
142    script_created_parser: bool,
143    /// We do a quick-and-dirty parse of the input looking for resources to prefetch.
144    // TODO: if we had speculative parsing, we could do this when speculatively
145    // building the DOM. https://github.com/servo/servo/pull/19203
146    prefetch_tokenizer: prefetch::Tokenizer,
147    #[ignore_malloc_size_of = "Defined in html5ever"]
148    #[no_trace]
149    prefetch_input: BufferQueue,
150    // The whole input as a string, if needed for the devtools Sources panel.
151    // TODO: use a faster type for concatenating strings?
152    content_for_devtools: Option<DomRefCell<String>>,
153}
154
155pub(crate) struct ElementAttribute {
156    name: QualName,
157    value: DOMString,
158}
159
160#[derive(Clone, Copy, JSTraceable, MallocSizeOf, PartialEq)]
161pub(crate) enum ParsingAlgorithm {
162    Normal,
163    Fragment,
164}
165
166impl ElementAttribute {
167    pub(crate) fn new(name: QualName, value: DOMString) -> ElementAttribute {
168        ElementAttribute { name, value }
169    }
170}
171
172impl ServoParser {
173    pub(crate) fn parser_is_not_active(&self) -> bool {
174        self.can_write()
175    }
176
177    /// <https://html.spec.whatwg.org/multipage/#parse-html-from-a-string>
178    pub(crate) fn parse_html_document(
179        document: &Document,
180        input: Option<DOMString>,
181        url: ServoUrl,
182        can_gc: CanGc,
183    ) {
184        // Step 1. Set document's type to "html".
185        //
186        // Set by callers of this function and asserted here
187        assert!(document.is_html_document());
188        // Step 2. Create an HTML parser parser, associated with document.
189        let parser = if pref!(dom_servoparser_async_html_tokenizer_enabled) {
190            ServoParser::new(
191                document,
192                Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None)),
193                ParserKind::Normal,
194                can_gc,
195            )
196        } else {
197            ServoParser::new(
198                document,
199                Tokenizer::Html(self::html::Tokenizer::new(
200                    document,
201                    url,
202                    None,
203                    ParsingAlgorithm::Normal,
204                )),
205                ParserKind::Normal,
206                can_gc,
207            )
208        };
209        // Step 3. Place html into the input stream for parser. The encoding confidence is irrelevant.
210        // Step 4. Start parser and let it run until it has consumed all the
211        // characters just inserted into the input stream.
212        //
213        // Set as the document's current parser and initialize with `input`, if given.
214        if let Some(input) = input {
215            parser.parse_complete_string_chunk(String::from(input), can_gc);
216        } else {
217            parser.document.set_current_parser(Some(&parser));
218        }
219    }
220
221    /// <https://html.spec.whatwg.org/multipage/#parsing-html-fragments>
222    pub(crate) fn parse_html_fragment(
223        context: &Element,
224        input: DOMString,
225        allow_declarative_shadow_roots: bool,
226        can_gc: CanGc,
227    ) -> impl Iterator<Item = DomRoot<Node>> + use<'_> {
228        let context_node = context.upcast::<Node>();
229        let context_document = context_node.owner_doc();
230        let window = context_document.window();
231        let url = context_document.url();
232
233        // Step 1. Let document be a Document node whose type is "html".
234        let loader = DocumentLoader::new_with_threads(
235            context_document.loader().resource_threads().clone(),
236            Some(url.clone()),
237        );
238        let document = Document::new(
239            window,
240            HasBrowsingContext::No,
241            Some(url.clone()),
242            context_document.origin().clone(),
243            IsHTMLDocument::HTMLDocument,
244            None,
245            None,
246            DocumentActivity::Inactive,
247            DocumentSource::FromParser,
248            loader,
249            None,
250            None,
251            Default::default(),
252            false,
253            allow_declarative_shadow_roots,
254            Some(context_document.insecure_requests_policy()),
255            context_document.has_trustworthy_ancestor_or_current_origin(),
256            context_document.custom_element_reaction_stack(),
257            context_document.creation_sandboxing_flag_set(),
258            can_gc,
259        );
260
261        // Step 2. If context's node document is in quirks mode, then set document's mode to "quirks".
262        // Step 3. Otherwise, if context's node document is in limited-quirks mode, then set document's
263        // mode to "limited-quirks".
264        document.set_quirks_mode(context_document.quirks_mode());
265
266        // NOTE: The following steps happened as part of Step 1.
267        // Step 4. If allowDeclarativeShadowRoots is true, then set document's
268        // allow declarative shadow roots to true.
269        // Step 5. Create a new HTML parser, and associate it with document.
270
271        // Step 11.
272        let form = context_node
273            .inclusive_ancestors(ShadowIncluding::No)
274            .find(|element| element.is::<HTMLFormElement>());
275
276        let fragment_context = FragmentContext {
277            context_elem: context_node,
278            form_elem: form.as_deref(),
279            context_element_allows_scripting: context_document.scripting_enabled(),
280        };
281
282        let parser = ServoParser::new(
283            &document,
284            Tokenizer::Html(self::html::Tokenizer::new(
285                &document,
286                url,
287                Some(fragment_context),
288                ParsingAlgorithm::Fragment,
289            )),
290            ParserKind::Normal,
291            can_gc,
292        );
293        parser.parse_complete_string_chunk(String::from(input), can_gc);
294
295        // Step 14.
296        let root_element = document.GetDocumentElement().expect("no document element");
297        FragmentParsingResult {
298            inner: root_element.upcast::<Node>().children(),
299        }
300    }
301
302    pub(crate) fn parse_html_script_input(document: &Document, url: ServoUrl) {
303        let parser = ServoParser::new(
304            document,
305            Tokenizer::Html(self::html::Tokenizer::new(
306                document,
307                url,
308                None,
309                ParsingAlgorithm::Normal,
310            )),
311            ParserKind::ScriptCreated,
312            CanGc::note(),
313        );
314        *parser.bom_sniff.borrow_mut() = None;
315        document.set_current_parser(Some(&parser));
316    }
317
318    pub(crate) fn parse_xml_document(
319        document: &Document,
320        input: Option<DOMString>,
321        url: ServoUrl,
322        can_gc: CanGc,
323    ) {
324        let parser = ServoParser::new(
325            document,
326            Tokenizer::Xml(self::xml::Tokenizer::new(document, url)),
327            ParserKind::Normal,
328            can_gc,
329        );
330
331        // Set as the document's current parser and initialize with `input`, if given.
332        if let Some(input) = input {
333            parser.parse_complete_string_chunk(String::from(input), can_gc);
334        } else {
335            parser.document.set_current_parser(Some(&parser));
336        }
337    }
338
339    pub(crate) fn script_nesting_level(&self) -> usize {
340        self.script_nesting_level.get()
341    }
342
343    pub(crate) fn is_script_created(&self) -> bool {
344        self.script_created_parser
345    }
346
347    /// Corresponds to the latter part of the "Otherwise" branch of the 'An end
348    /// tag whose tag name is "script"' of
349    /// <https://html.spec.whatwg.org/multipage/#parsing-main-incdata>
350    ///
351    /// This first moves everything from the script input to the beginning of
352    /// the network input, effectively resetting the insertion point to just
353    /// before the next character to be consumed.
354    ///
355    ///
356    /// ```text
357    ///     | ... script input ... network input ...
358    ///     ^
359    ///     insertion point
360    /// ```
361    pub(crate) fn resume_with_pending_parsing_blocking_script(
362        &self,
363        script: &HTMLScriptElement,
364        result: ScriptResult,
365        can_gc: CanGc,
366    ) {
367        assert!(self.suspended.get());
368        self.suspended.set(false);
369
370        self.script_input.swap_with(&self.network_input);
371        while let Some(chunk) = self.script_input.pop_front() {
372            self.network_input.push_back(chunk);
373        }
374
375        let script_nesting_level = self.script_nesting_level.get();
376        assert_eq!(script_nesting_level, 0);
377
378        self.script_nesting_level.set(script_nesting_level + 1);
379        script.execute(result, can_gc);
380        self.script_nesting_level.set(script_nesting_level);
381
382        if !self.suspended.get() && !self.aborted.get() {
383            self.parse_sync(can_gc);
384        }
385    }
386
387    pub(crate) fn can_write(&self) -> bool {
388        self.script_created_parser || self.script_nesting_level.get() > 0
389    }
390
391    /// Steps 6-8 of <https://html.spec.whatwg.org/multipage/#document.write()>
392    pub(crate) fn write(&self, text: DOMString, can_gc: CanGc) {
393        assert!(self.can_write());
394
395        if self.document.has_pending_parsing_blocking_script() {
396            // There is already a pending parsing blocking script so the
397            // parser is suspended, we just append everything to the
398            // script input and abort these steps.
399            self.script_input.push_back(String::from(text).into());
400            return;
401        }
402
403        // There is no pending parsing blocking script, so all previous calls
404        // to document.write() should have seen their entire input tokenized
405        // and process, with nothing pushed to the parser script input.
406        assert!(self.script_input.is_empty());
407
408        let input = BufferQueue::default();
409        input.push_back(String::from(text).into());
410
411        let profiler_chan = self
412            .document
413            .window()
414            .as_global_scope()
415            .time_profiler_chan()
416            .clone();
417        let profiler_metadata = TimerMetadata {
418            url: self.document.url().as_str().into(),
419            iframe: TimerMetadataFrameType::RootWindow,
420            incremental: TimerMetadataReflowType::FirstReflow,
421        };
422        self.tokenize(
423            |tokenizer| {
424                tokenizer.feed(
425                    &input,
426                    can_gc,
427                    profiler_chan.clone(),
428                    profiler_metadata.clone(),
429                )
430            },
431            can_gc,
432        );
433
434        if self.suspended.get() {
435            // Parser got suspended, insert remaining input at end of
436            // script input, following anything written by scripts executed
437            // reentrantly during this call.
438            while let Some(chunk) = input.pop_front() {
439                self.script_input.push_back(chunk);
440            }
441            return;
442        }
443
444        assert!(input.is_empty());
445    }
446
447    // Steps 4-6 of https://html.spec.whatwg.org/multipage/#dom-document-close
448    pub(crate) fn close(&self, can_gc: CanGc) {
449        assert!(self.script_created_parser);
450
451        // Step 4.
452        self.last_chunk_received.set(true);
453
454        if self.suspended.get() {
455            // Step 5.
456            return;
457        }
458
459        // Step 6.
460        self.parse_sync(can_gc);
461    }
462
463    // https://html.spec.whatwg.org/multipage/#abort-a-parser
464    pub(crate) fn abort(&self, can_gc: CanGc) {
465        assert!(!self.aborted.get());
466        self.aborted.set(true);
467
468        // Step 1.
469        self.script_input.replace_with(BufferQueue::default());
470        self.network_input.replace_with(BufferQueue::default());
471
472        // Step 2.
473        self.document
474            .set_ready_state(DocumentReadyState::Interactive, can_gc);
475
476        // Step 3.
477        self.tokenizer.end(can_gc);
478        self.document.set_current_parser(None);
479
480        // Step 4.
481        self.document
482            .set_ready_state(DocumentReadyState::Complete, can_gc);
483    }
484
485    // https://html.spec.whatwg.org/multipage/#active-parser
486    pub(crate) fn is_active(&self) -> bool {
487        self.script_nesting_level() > 0 && !self.aborted.get()
488    }
489
490    pub(crate) fn get_current_line(&self) -> u32 {
491        self.tokenizer.get_current_line()
492    }
493
494    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
495    fn new_inherited(document: &Document, tokenizer: Tokenizer, kind: ParserKind) -> Self {
496        // Store the whole input for the devtools Sources panel, if the devtools server is running
497        // and we are parsing for a document load (not just things like innerHTML).
498        // TODO: check if a devtools client is actually connected and/or wants the sources?
499        let content_for_devtools = (document.global().devtools_chan().is_some() &&
500            document.has_browsing_context())
501        .then_some(DomRefCell::new(String::new()));
502
503        ServoParser {
504            reflector: Reflector::new(),
505            document: Dom::from_ref(document),
506            bom_sniff: DomRefCell::new(Some(Vec::with_capacity(3))),
507            network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))),
508            network_input: BufferQueue::default(),
509            script_input: BufferQueue::default(),
510            tokenizer,
511            last_chunk_received: Cell::new(false),
512            suspended: Default::default(),
513            script_nesting_level: Default::default(),
514            aborted: Default::default(),
515            script_created_parser: kind == ParserKind::ScriptCreated,
516            prefetch_tokenizer: prefetch::Tokenizer::new(document),
517            prefetch_input: BufferQueue::default(),
518            content_for_devtools,
519        }
520    }
521
522    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
523    fn new(
524        document: &Document,
525        tokenizer: Tokenizer,
526        kind: ParserKind,
527        can_gc: CanGc,
528    ) -> DomRoot<Self> {
529        reflect_dom_object(
530            Box::new(ServoParser::new_inherited(document, tokenizer, kind)),
531            document.window(),
532            can_gc,
533        )
534    }
535
536    fn push_tendril_input_chunk(&self, chunk: StrTendril) {
537        if let Some(mut content_for_devtools) = self
538            .content_for_devtools
539            .as_ref()
540            .map(|content| content.borrow_mut())
541        {
542            // TODO: append these chunks more efficiently
543            content_for_devtools.push_str(chunk.as_ref());
544        }
545
546        if chunk.is_empty() {
547            return;
548        }
549        // Per https://github.com/whatwg/html/issues/1495
550        // stylesheets should not be loaded for documents
551        // without browsing contexts.
552        // https://github.com/whatwg/html/issues/1495#issuecomment-230334047
553        // suggests that no content should be preloaded in such a case.
554        // We're conservative, and only prefetch for documents
555        // with browsing contexts.
556        if self.document.browsing_context().is_some() {
557            // Push the chunk into the prefetch input stream,
558            // which is tokenized eagerly, to scan for resources
559            // to prefetch. If the user script uses `document.write()`
560            // to overwrite the network input, this prefetching may
561            // have been wasted, but in most cases it won't.
562            self.prefetch_input.push_back(chunk.clone());
563            self.prefetch_tokenizer.feed(&self.prefetch_input);
564        }
565        // Push the chunk into the network input stream,
566        // which is tokenized lazily.
567        self.network_input.push_back(chunk);
568    }
569
570    fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
571        // BOM sniff. This is needed because NetworkDecoder will switch the
572        // encoding based on the BOM, but it won't change
573        // `self.document.encoding` in the process.
574        {
575            let mut bom_sniff = self.bom_sniff.borrow_mut();
576            if let Some(partial_bom) = bom_sniff.as_mut() {
577                if partial_bom.len() + chunk.len() >= 3 {
578                    partial_bom.extend(chunk.iter().take(3 - partial_bom.len()).copied());
579                    if let Some((encoding, _)) = Encoding::for_bom(partial_bom) {
580                        self.document.set_encoding(encoding);
581                    }
582                    drop(bom_sniff);
583                    *self.bom_sniff.borrow_mut() = None;
584                } else {
585                    partial_bom.extend(chunk.iter().copied());
586                }
587            }
588        }
589
590        // For byte input, we convert it to text using the network decoder.
591        let chunk = self
592            .network_decoder
593            .borrow_mut()
594            .as_mut()
595            .unwrap()
596            .decode(chunk);
597        self.push_tendril_input_chunk(chunk);
598    }
599
600    fn push_string_input_chunk(&self, chunk: String) {
601        // If the input is a string, we don't have a BOM.
602        if self.bom_sniff.borrow().is_some() {
603            *self.bom_sniff.borrow_mut() = None;
604        }
605
606        // The input has already been decoded as a string, so doesn't need
607        // to be decoded by the network decoder again.
608        let chunk = StrTendril::from(chunk);
609        self.push_tendril_input_chunk(chunk);
610    }
611
612    fn parse_sync(&self, can_gc: CanGc) {
613        assert!(self.script_input.is_empty());
614
615        // This parser will continue to parse while there is either pending input or
616        // the parser remains unsuspended.
617
618        if self.last_chunk_received.get() {
619            if let Some(decoder) = self.network_decoder.borrow_mut().take() {
620                let chunk = decoder.finish();
621                if !chunk.is_empty() {
622                    self.network_input.push_back(chunk);
623                }
624            }
625        }
626
627        if self.aborted.get() {
628            return;
629        }
630
631        let profiler_chan = self
632            .document
633            .window()
634            .as_global_scope()
635            .time_profiler_chan()
636            .clone();
637        let profiler_metadata = TimerMetadata {
638            url: self.document.url().as_str().into(),
639            iframe: TimerMetadataFrameType::RootWindow,
640            incremental: TimerMetadataReflowType::FirstReflow,
641        };
642        self.tokenize(
643            |tokenizer| {
644                tokenizer.feed(
645                    &self.network_input,
646                    can_gc,
647                    profiler_chan.clone(),
648                    profiler_metadata.clone(),
649                )
650            },
651            can_gc,
652        );
653
654        if self.suspended.get() {
655            return;
656        }
657
658        assert!(self.network_input.is_empty());
659
660        if self.last_chunk_received.get() {
661            self.finish(can_gc);
662        }
663    }
664
665    fn parse_complete_string_chunk(&self, input: String, can_gc: CanGc) {
666        self.document.set_current_parser(Some(self));
667        self.push_string_input_chunk(input);
668        self.last_chunk_received.set(true);
669        if !self.suspended.get() {
670            self.parse_sync(can_gc);
671        }
672    }
673
674    fn parse_bytes_chunk(&self, input: Vec<u8>, can_gc: CanGc) {
675        let _realm = enter_realm(&*self.document);
676        self.document.set_current_parser(Some(self));
677        self.push_bytes_input_chunk(input);
678        if !self.suspended.get() {
679            self.parse_sync(can_gc);
680        }
681    }
682
683    fn tokenize<F>(&self, feed: F, can_gc: CanGc)
684    where
685        F: Fn(&Tokenizer) -> TokenizerResult<DomRoot<HTMLScriptElement>>,
686    {
687        loop {
688            assert!(!self.suspended.get());
689            assert!(!self.aborted.get());
690
691            self.document.window().reflow_if_reflow_timer_expired();
692            let script = match feed(&self.tokenizer) {
693                TokenizerResult::Done => return,
694                TokenizerResult::Script(script) => script,
695            };
696
697            // https://html.spec.whatwg.org/multipage/#parsing-main-incdata
698            // branch "An end tag whose tag name is "script"
699            // The spec says to perform the microtask checkpoint before
700            // setting the insertion mode back from Text, but this is not
701            // possible with the way servo and html5ever currently
702            // relate to each other, and hopefully it is not observable.
703            if is_execution_stack_empty() {
704                self.document
705                    .window()
706                    .perform_a_microtask_checkpoint(can_gc);
707            }
708
709            let script_nesting_level = self.script_nesting_level.get();
710
711            self.script_nesting_level.set(script_nesting_level + 1);
712            script.set_initial_script_text();
713            let introduction_type_override =
714                (script_nesting_level > 0).then_some(IntroductionType::INJECTED_SCRIPT);
715            script.prepare(introduction_type_override, can_gc);
716            self.script_nesting_level.set(script_nesting_level);
717
718            if self.document.has_pending_parsing_blocking_script() {
719                self.suspended.set(true);
720                return;
721            }
722            if self.aborted.get() {
723                return;
724            }
725        }
726    }
727
728    /// <https://html.spec.whatwg.org/multipage/#the-end>
729    fn finish(&self, can_gc: CanGc) {
730        assert!(!self.suspended.get());
731        assert!(self.last_chunk_received.get());
732        assert!(self.script_input.is_empty());
733        assert!(self.network_input.is_empty());
734        assert!(self.network_decoder.borrow().is_none());
735
736        // Step 1.
737        self.document
738            .set_ready_state(DocumentReadyState::Interactive, can_gc);
739
740        // Step 2.
741        self.tokenizer.end(can_gc);
742        self.document.set_current_parser(None);
743
744        // Steps 3-12 are in another castle, namely finish_load.
745        let url = self.tokenizer.url().clone();
746        self.document.finish_load(LoadType::PageSource(url), can_gc);
747
748        // Send the source contents to devtools, if needed.
749        if let Some(content_for_devtools) = self
750            .content_for_devtools
751            .as_ref()
752            .map(|content| content.take())
753        {
754            let global = self.document.global();
755            let chan = global.devtools_chan().expect("Guaranteed by new");
756            let pipeline_id = self.document.global().pipeline_id();
757            let _ = chan.send(ScriptToDevtoolsControlMsg::UpdateSourceContent(
758                pipeline_id,
759                content_for_devtools,
760            ));
761        }
762    }
763}
764
765struct FragmentParsingResult<I>
766where
767    I: Iterator<Item = DomRoot<Node>>,
768{
769    inner: I,
770}
771
772impl<I> Iterator for FragmentParsingResult<I>
773where
774    I: Iterator<Item = DomRoot<Node>>,
775{
776    type Item = DomRoot<Node>;
777
778    fn next(&mut self) -> Option<DomRoot<Node>> {
779        let next = self.inner.next()?;
780        next.remove_self(CanGc::note());
781        Some(next)
782    }
783
784    fn size_hint(&self) -> (usize, Option<usize>) {
785        self.inner.size_hint()
786    }
787}
788
789#[derive(JSTraceable, MallocSizeOf, PartialEq)]
790enum ParserKind {
791    Normal,
792    ScriptCreated,
793}
794
795#[derive(JSTraceable, MallocSizeOf)]
796#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
797enum Tokenizer {
798    Html(self::html::Tokenizer),
799    AsyncHtml(self::async_html::Tokenizer),
800    Xml(self::xml::Tokenizer),
801}
802
803impl Tokenizer {
804    fn feed(
805        &self,
806        input: &BufferQueue,
807        can_gc: CanGc,
808        profiler_chan: ProfilerChan,
809        profiler_metadata: TimerMetadata,
810    ) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
811        match *self {
812            Tokenizer::Html(ref tokenizer) => time_profile!(
813                ProfilerCategory::ScriptParseHTML,
814                Some(profiler_metadata),
815                profiler_chan,
816                || tokenizer.feed(input),
817            ),
818            Tokenizer::AsyncHtml(ref tokenizer) => time_profile!(
819                ProfilerCategory::ScriptParseHTML,
820                Some(profiler_metadata),
821                profiler_chan,
822                || tokenizer.feed(input, can_gc),
823            ),
824            Tokenizer::Xml(ref tokenizer) => time_profile!(
825                ProfilerCategory::ScriptParseXML,
826                Some(profiler_metadata),
827                profiler_chan,
828                || tokenizer.feed(input),
829            ),
830        }
831    }
832
833    fn end(&self, can_gc: CanGc) {
834        match *self {
835            Tokenizer::Html(ref tokenizer) => tokenizer.end(),
836            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.end(can_gc),
837            Tokenizer::Xml(ref tokenizer) => tokenizer.end(),
838        }
839    }
840
841    fn url(&self) -> &ServoUrl {
842        match *self {
843            Tokenizer::Html(ref tokenizer) => tokenizer.url(),
844            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(),
845            Tokenizer::Xml(ref tokenizer) => tokenizer.url(),
846        }
847    }
848
849    fn set_plaintext_state(&self) {
850        match *self {
851            Tokenizer::Html(ref tokenizer) => tokenizer.set_plaintext_state(),
852            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.set_plaintext_state(),
853            Tokenizer::Xml(_) => unimplemented!(),
854        }
855    }
856
857    fn get_current_line(&self) -> u32 {
858        match *self {
859            Tokenizer::Html(ref tokenizer) => tokenizer.get_current_line(),
860            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.get_current_line(),
861            Tokenizer::Xml(ref tokenizer) => tokenizer.get_current_line(),
862        }
863    }
864}
865
866/// <https://html.spec.whatwg.org/multipage/#navigation-params>
867/// This does not have the relevant fields, but mimics the intent
868/// of the struct when used in loading document spec algorithms.
869struct NavigationParams {
870    /// <https://html.spec.whatwg.org/multipage/#navigation-params-policy-container>
871    policy_container: PolicyContainer,
872    /// content-type of this document, if known. Otherwise need to sniff it
873    content_type: Option<Mime>,
874    /// link headers from the response
875    link_headers: Vec<LinkHeader>,
876    /// <https://html.spec.whatwg.org/multipage/#navigation-params-sandboxing>
877    final_sandboxing_flag_set: SandboxingFlagSet,
878    /// <https://mimesniff.spec.whatwg.org/#resource-header>
879    resource_header: Vec<u8>,
880}
881
882/// The context required for asynchronously fetching a document
883/// and parsing it progressively.
884pub(crate) struct ParserContext {
885    /// The parser that initiated the request.
886    parser: Option<Trusted<ServoParser>>,
887    /// Is this a synthesized document
888    is_synthesized_document: bool,
889    /// Has a document already been loaded (relevant for checking the resource header)
890    has_loaded_document: bool,
891    /// The [`WebViewId`] of the `WebView` associated with this document.
892    webview_id: WebViewId,
893    /// The [`PipelineId`] of the `Pipeline` associated with this document.
894    pipeline_id: PipelineId,
895    /// The URL for this document.
896    url: ServoUrl,
897    /// pushed entry index
898    pushed_entry_index: Option<usize>,
899    /// params required in document load algorithms
900    navigation_params: NavigationParams,
901}
902
903impl ParserContext {
904    pub(crate) fn new(
905        webview_id: WebViewId,
906        pipeline_id: PipelineId,
907        url: ServoUrl,
908        creation_sandboxing_flag_set: SandboxingFlagSet,
909    ) -> ParserContext {
910        ParserContext {
911            parser: None,
912            is_synthesized_document: false,
913            has_loaded_document: false,
914            webview_id,
915            pipeline_id,
916            url,
917            pushed_entry_index: None,
918            navigation_params: NavigationParams {
919                policy_container: Default::default(),
920                content_type: None,
921                link_headers: vec![],
922                final_sandboxing_flag_set: creation_sandboxing_flag_set,
923                resource_header: vec![],
924            },
925        }
926    }
927
928    pub(crate) fn set_policy_container(&mut self, policy_container: Option<&PolicyContainer>) {
929        let Some(policy_container) = policy_container else {
930            return;
931        };
932        self.navigation_params.policy_container = policy_container.clone();
933    }
934
935    /// <https://html.spec.whatwg.org/multipage/#creating-a-policy-container-from-a-fetch-response>
936    fn create_policy_container_from_fetch_response(metadata: &Metadata) -> PolicyContainer {
937        // Step 1. If response's URL's scheme is "blob", then return a clone of response's URL's blob URL entry's environment's policy container.
938        // TODO
939        // Step 2. Let result be a new policy container.
940        // Step 7. Return result.
941        PolicyContainer {
942            // Step 3. Set result's CSP list to the result of parsing a response's Content Security Policies given response.
943            csp_list: parse_csp_list_from_metadata(&metadata.headers),
944            // Step 5. Set result's referrer policy to the result of parsing the `Referrer-Policy` header given response. [REFERRERPOLICY]
945            referrer_policy: ReferrerPolicy::parse_header_for_response(&metadata.headers),
946        }
947    }
948
949    /// <https://html.spec.whatwg.org/multipage/#initialise-the-document-object>
950    fn initialize_document_object(&self, document: &Document) {
951        // Step 9. Let document be a new Document, with
952        document.set_policy_container(self.navigation_params.policy_container.clone());
953        document.set_active_sandboxing_flag_set(self.navigation_params.final_sandboxing_flag_set);
954        // Step 17. Process link headers given document, navigationParams's response, and "pre-media".
955        process_link_headers(
956            &self.navigation_params.link_headers,
957            document,
958            LinkProcessingPhase::PreMedia,
959        );
960    }
961
962    /// Part of various load document methods
963    fn process_link_headers_in_media_phase_with_task(&mut self, document: &Document) {
964        // The first task that the networking task source places on the task queue
965        // while fetching runs must process link headers given document,
966        // navigationParams's response, and "media", after the task has been processed by the HTML parser.
967        let link_headers = std::mem::take(&mut self.navigation_params.link_headers);
968        if !link_headers.is_empty() {
969            let window = document.window();
970            let document = Trusted::new(document);
971            window
972                .upcast::<GlobalScope>()
973                .task_manager()
974                .networking_task_source()
975                .queue(task!(process_link_headers_task: move || {
976                    process_link_headers(&link_headers, &document.root(), LinkProcessingPhase::Media);
977                }));
978        }
979    }
980
981    /// <https://html.spec.whatwg.org/multipage/#loading-a-document>
982    fn load_document(&mut self, can_gc: CanGc) {
983        assert!(!self.has_loaded_document);
984        self.has_loaded_document = true;
985        let Some(ref parser) = self.parser.as_ref().map(|p| p.root()) else {
986            return;
987        };
988        // Step 1. Let type be the computed type of navigationParams's response.
989        let content_type = &self.navigation_params.content_type;
990        let mime_type = MimeClassifier::default().classify(
991            LoadContext::Browsing,
992            NoSniffFlag::Off,
993            ApacheBugFlag::from_content_type(content_type.as_ref()),
994            content_type,
995            &self.navigation_params.resource_header,
996        );
997        // Step 2. If the user agent has been configured to process resources of the given type using
998        // some mechanism other than rendering the content in a navigable, then skip this step.
999        // Otherwise, if the type is one of the following types:
1000        let Some(media_type) = MimeClassifier::get_media_type(&mime_type) else {
1001            let page = format!(
1002                "<html><body><p>Unknown content type ({}).</p></body></html>",
1003                &mime_type,
1004            );
1005            self.load_inline_unknown_content(parser, page);
1006            return;
1007        };
1008        match media_type {
1009            // Return the result of loading an HTML document, given navigationParams.
1010            MediaType::Html => self.load_html_document(parser),
1011            // Return the result of loading an XML document given navigationParams and type.
1012            MediaType::Xml => self.load_xml_document(parser),
1013            // Return the result of loading a text document given navigationParams and type.
1014            MediaType::JavaScript | MediaType::Json | MediaType::Text | MediaType::Css => {
1015                self.load_text_document(parser)
1016            },
1017            // Return the result of loading a media document given navigationParams and type.
1018            MediaType::Image | MediaType::AudioVideo => {
1019                self.load_media_document(parser, media_type, &mime_type);
1020                return;
1021            },
1022            MediaType::Font => {
1023                let page = format!(
1024                    "<html><body><p>Unable to load font with content type ({}).</p></body></html>",
1025                    &mime_type,
1026                );
1027                self.load_inline_unknown_content(parser, page);
1028                return;
1029            },
1030        };
1031
1032        parser.parse_bytes_chunk(
1033            std::mem::take(&mut self.navigation_params.resource_header),
1034            can_gc,
1035        );
1036    }
1037
1038    /// <https://html.spec.whatwg.org/multipage/#navigate-html>
1039    fn load_html_document(&mut self, parser: &ServoParser) {
1040        // Step 1. Let document be the result of creating and initializing a
1041        // Document object given "html", "text/html", and navigationParams.
1042        self.initialize_document_object(&parser.document);
1043        // The first task that the networking task source places on the task queue while fetching
1044        // runs must process link headers given document, navigationParams's response, and "media",
1045        // after the task has been processed by the HTML parser.
1046        self.process_link_headers_in_media_phase_with_task(&parser.document);
1047    }
1048
1049    /// <https://html.spec.whatwg.org/multipage/#read-xml>
1050    fn load_xml_document(&mut self, parser: &ServoParser) {
1051        // When faced with displaying an XML file inline, provided navigation params navigationParams
1052        // and a string type, user agents must follow the requirements defined in XML and Namespaces in XML,
1053        // XML Media Types, DOM, and other relevant specifications to create and initialize a
1054        // Document object document, given "xml", type, and navigationParams, and return that Document.
1055        // They must also create a corresponding XML parser. [XML] [XMLNS] [RFC7303] [DOM]
1056        self.initialize_document_object(&parser.document);
1057        // The first task that the networking task source places on the task queue while fetching
1058        // runs must process link headers given document, navigationParams's response, and "media",
1059        // after the task has been processed by the XML parser.
1060        self.process_link_headers_in_media_phase_with_task(&parser.document);
1061    }
1062
1063    /// <https://html.spec.whatwg.org/multipage/#navigate-text>
1064    fn load_text_document(&mut self, parser: &ServoParser) {
1065        // Step 4. Create an HTML parser and associate it with the document.
1066        // Act as if the tokenizer had emitted a start tag token with the tag name "pre" followed by
1067        // a single U+000A LINE FEED (LF) character, and switch the HTML parser's tokenizer to the PLAINTEXT state.
1068        // Each task that the networking task source places on the task queue while fetching runs must then
1069        // fill the parser's input byte stream with the fetched bytes and cause the HTML parser to perform
1070        // the appropriate processing of the input stream.
1071        let page = "<pre>\n".into();
1072        parser.push_string_input_chunk(page);
1073        parser.parse_sync(CanGc::note());
1074        parser.tokenizer.set_plaintext_state();
1075        // The first task that the networking task source places on the task queue while fetching
1076        // runs must process link headers given document, navigationParams's response, and "media",
1077        // after the task has been processed by the HTML parser.
1078        self.process_link_headers_in_media_phase_with_task(&parser.document);
1079    }
1080
1081    /// <https://html.spec.whatwg.org/multipage/#navigate-media>
1082    fn load_media_document(
1083        &mut self,
1084        parser: &ServoParser,
1085        media_type: MediaType,
1086        mime_type: &Mime,
1087    ) {
1088        // Step 8. Act as if the user agent had stopped parsing document.
1089        self.is_synthesized_document = true;
1090        // Step 3. Populate with html/head/body given document.
1091        let page = "<html><body></body></html>".into();
1092        parser.push_string_input_chunk(page);
1093        parser.parse_sync(CanGc::note());
1094
1095        let doc = &parser.document;
1096        // Step 5. Set the appropriate attribute of the element host element, as described below,
1097        // to the address of the image, video, or audio resource.
1098        let node = if media_type == MediaType::Image {
1099            let img = Element::create(
1100                QualName::new(None, ns!(html), local_name!("img")),
1101                None,
1102                doc,
1103                ElementCreator::ParserCreated(1),
1104                CustomElementCreationMode::Asynchronous,
1105                None,
1106                CanGc::note(),
1107            );
1108            let img = DomRoot::downcast::<HTMLImageElement>(img).unwrap();
1109            img.SetSrc(USVString(self.url.to_string()));
1110            DomRoot::upcast::<Node>(img)
1111        } else if mime_type.type_() == mime::AUDIO {
1112            let audio = Element::create(
1113                QualName::new(None, ns!(html), local_name!("audio")),
1114                None,
1115                doc,
1116                ElementCreator::ParserCreated(1),
1117                CustomElementCreationMode::Asynchronous,
1118                None,
1119                CanGc::note(),
1120            );
1121            let audio = DomRoot::downcast::<HTMLMediaElement>(audio).unwrap();
1122            audio.SetSrc(USVString(self.url.to_string()));
1123            DomRoot::upcast::<Node>(audio)
1124        } else {
1125            let video = Element::create(
1126                QualName::new(None, ns!(html), local_name!("video")),
1127                None,
1128                doc,
1129                ElementCreator::ParserCreated(1),
1130                CustomElementCreationMode::Asynchronous,
1131                None,
1132                CanGc::note(),
1133            );
1134            let video = DomRoot::downcast::<HTMLMediaElement>(video).unwrap();
1135            video.SetSrc(USVString(self.url.to_string()));
1136            DomRoot::upcast::<Node>(video)
1137        };
1138        // Step 4. Append an element host element for the media, as described below, to the body element.
1139        let doc_body = DomRoot::upcast::<Node>(doc.GetBody().unwrap());
1140        doc_body
1141            .AppendChild(&node, CanGc::note())
1142            .expect("Appending failed");
1143        // Step 7. Process link headers given document, navigationParams's response, and "media".
1144        let link_headers = std::mem::take(&mut self.navigation_params.link_headers);
1145        process_link_headers(&link_headers, doc, LinkProcessingPhase::Media);
1146    }
1147
1148    /// <https://html.spec.whatwg.org/multipage/#read-ua-inline>
1149    fn load_inline_unknown_content(&mut self, parser: &ServoParser, page: String) {
1150        self.is_synthesized_document = true;
1151        parser.push_string_input_chunk(page);
1152        parser.parse_sync(CanGc::note());
1153    }
1154
1155    /// Store a PerformanceNavigationTiming entry in the globalscope's Performance buffer
1156    fn submit_resource_timing(&mut self) {
1157        let Some(parser) = self.parser.as_ref() else {
1158            return;
1159        };
1160        let parser = parser.root();
1161        if parser.aborted.get() {
1162            return;
1163        }
1164
1165        let document = &parser.document;
1166
1167        // TODO: Pass a proper fetch start time here.
1168        let performance_entry = PerformanceNavigationTiming::new(
1169            &document.global(),
1170            CrossProcessInstant::now(),
1171            document,
1172            CanGc::note(),
1173        );
1174        self.pushed_entry_index = document.global().performance().queue_entry(
1175            performance_entry.upcast::<PerformanceEntry>(),
1176            CanGc::note(),
1177        );
1178    }
1179}
1180
1181impl FetchResponseListener for ParserContext {
1182    fn process_request_body(&mut self, _: RequestId) {}
1183
1184    fn process_request_eof(&mut self, _: RequestId) {}
1185
1186    fn process_response(&mut self, _: RequestId, meta_result: Result<FetchMetadata, NetworkError>) {
1187        let (metadata, error) = match meta_result {
1188            Ok(meta) => (
1189                Some(match meta {
1190                    FetchMetadata::Unfiltered(m) => m,
1191                    FetchMetadata::Filtered { unsafe_, .. } => unsafe_,
1192                }),
1193                None,
1194            ),
1195            Err(error) => (
1196                // Check variant without moving
1197                match &error {
1198                    NetworkError::SslValidation(..) |
1199                    NetworkError::Internal(..) |
1200                    NetworkError::Crash(..) => {
1201                        let mut meta = Metadata::default(self.url.clone());
1202                        let mime: Option<Mime> = "text/html".parse().ok();
1203                        meta.set_content_type(mime.as_ref());
1204                        Some(meta)
1205                    },
1206                    _ => None,
1207                },
1208                Some(error),
1209            ),
1210        };
1211        let content_type: Option<Mime> = metadata
1212            .clone()
1213            .and_then(|meta| meta.content_type)
1214            .map(Serde::into_inner)
1215            .map(Into::into);
1216
1217        let (policy_container, endpoints_list, link_headers) = match metadata.as_ref() {
1218            None => (PolicyContainer::default(), None, vec![]),
1219            Some(metadata) => (
1220                Self::create_policy_container_from_fetch_response(metadata),
1221                ReportingEndpoint::parse_reporting_endpoints_header(
1222                    &self.url.clone(),
1223                    &metadata.headers,
1224                ),
1225                extract_links_from_headers(&metadata.headers),
1226            ),
1227        };
1228
1229        let parser = match ScriptThread::page_headers_available(
1230            self.webview_id,
1231            self.pipeline_id,
1232            metadata,
1233            CanGc::note(),
1234        ) {
1235            Some(parser) => parser,
1236            None => return,
1237        };
1238        if parser.aborted.get() {
1239            return;
1240        }
1241
1242        let _realm = enter_realm(&*parser.document);
1243        let window = parser.document.window();
1244
1245        // From Step 23.8.3 of https://html.spec.whatwg.org/multipage/#navigate
1246        // Let finalSandboxFlags be the union of targetSnapshotParams's sandboxing flags and
1247        // policyContainer's CSP list's CSP-derived sandboxing flags.
1248        //
1249        // TODO: This deviates a bit from the specification, because there isn't a `targetSnapshotParam`
1250        // concept yet.
1251        let final_sandboxing_flag_set = policy_container
1252            .csp_list
1253            .as_ref()
1254            .and_then(|csp| csp.get_sandboxing_flag_set_for_document())
1255            .unwrap_or(SandboxingFlagSet::empty())
1256            .union(parser.document.creation_sandboxing_flag_set());
1257
1258        if let Some(endpoints) = endpoints_list {
1259            window.set_endpoints_list(endpoints);
1260        }
1261        self.parser = Some(Trusted::new(&*parser));
1262        self.navigation_params = NavigationParams {
1263            policy_container,
1264            content_type,
1265            final_sandboxing_flag_set,
1266            link_headers,
1267            resource_header: vec![],
1268        };
1269        self.submit_resource_timing();
1270
1271        // Part of https://html.spec.whatwg.org/multipage/#loading-a-document
1272        //
1273        // Step 3. If, given type, the new resource is to be handled by displaying some sort of inline content,
1274        // e.g., a native rendering of the content or an error message because the specified type is not supported,
1275        // then return the result of creating a document for inline content that doesn't have a DOM given
1276        // navigationParams's navigable, navigationParams's id, navigationParams's navigation timing type,
1277        // and navigationParams's user involvement.
1278        if let Some(error) = error {
1279            let page = match error {
1280                NetworkError::SslValidation(reason, bytes) => {
1281                    let page = resources::read_string(Resource::BadCertHTML);
1282                    let page = page.replace("${reason}", &reason);
1283                    let encoded_bytes = general_purpose::STANDARD_NO_PAD.encode(bytes);
1284                    let page = page.replace("${bytes}", encoded_bytes.as_str());
1285                    page.replace("${secret}", &net_traits::PRIVILEGED_SECRET.to_string())
1286                },
1287                NetworkError::Internal(reason) => {
1288                    let page = resources::read_string(Resource::NetErrorHTML);
1289                    page.replace("${reason}", &reason)
1290                },
1291                NetworkError::Crash(details) => {
1292                    let page = resources::read_string(Resource::CrashHTML);
1293                    page.replace("${details}", &details)
1294                },
1295                NetworkError::LoadCancelled => {
1296                    // The next load will show a page
1297                    return;
1298                },
1299            };
1300            self.load_inline_unknown_content(&parser, page);
1301        }
1302    }
1303
1304    fn process_response_chunk(&mut self, _: RequestId, payload: Vec<u8>) {
1305        if self.is_synthesized_document {
1306            return;
1307        }
1308        let Some(parser) = self.parser.as_ref().map(|p| p.root()) else {
1309            return;
1310        };
1311        if parser.aborted.get() {
1312            return;
1313        }
1314        if !self.has_loaded_document {
1315            // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1316            self.navigation_params
1317                .resource_header
1318                .extend_from_slice(&payload);
1319            // the number of bytes in buffer is greater than or equal to 1445.
1320            if self.navigation_params.resource_header.len() >= 1445 {
1321                self.load_document(CanGc::note());
1322            }
1323        } else {
1324            parser.parse_bytes_chunk(payload, CanGc::note());
1325        }
1326    }
1327
1328    // This method is called via script_thread::handle_fetch_eof, so we must call
1329    // submit_resource_timing in this function
1330    // Resource listeners are called via net_traits::Action::process, which handles submission for them
1331    fn process_response_eof(
1332        mut self,
1333        _: RequestId,
1334        status: Result<ResourceFetchTiming, NetworkError>,
1335    ) {
1336        let parser = match self.parser.as_ref() {
1337            Some(parser) => parser.root(),
1338            None => return,
1339        };
1340        if parser.aborted.get() {
1341            return;
1342        }
1343
1344        if let Err(error) = &status {
1345            // TODO(Savago): we should send a notification to callers #5463.
1346            debug!("Failed to load page URL {}, error: {error:?}", self.url);
1347        }
1348
1349        // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1350        //
1351        // the end of the resource is reached.
1352        if !self.has_loaded_document {
1353            self.load_document(CanGc::note());
1354        }
1355
1356        let _realm = enter_realm(&*parser);
1357
1358        if let Ok(resource_timing) = &status {
1359            parser
1360                .document
1361                .set_redirect_count(resource_timing.redirect_count);
1362        }
1363
1364        parser.last_chunk_received.set(true);
1365        if !parser.suspended.get() {
1366            parser.parse_sync(CanGc::note());
1367        }
1368
1369        // TODO: Only update if this is the current document resource.
1370        // TODO(mrobinson): Pass a proper fetch_start parameter here instead of `CrossProcessInstant::now()`.
1371        if let Some(pushed_index) = self.pushed_entry_index {
1372            let document = &parser.document;
1373            let performance_entry = PerformanceNavigationTiming::new(
1374                &document.global(),
1375                CrossProcessInstant::now(),
1376                document,
1377                CanGc::note(),
1378            );
1379            document
1380                .global()
1381                .performance()
1382                .update_entry(pushed_index, performance_entry.upcast::<PerformanceEntry>());
1383        }
1384    }
1385
1386    fn process_csp_violations(&mut self, _request_id: RequestId, violations: Vec<Violation>) {
1387        let parser = match self.parser.as_ref() {
1388            Some(parser) => parser.root(),
1389            None => return,
1390        };
1391        let document = &parser.document;
1392        let global = &document.global();
1393        // TODO(https://github.com/w3c/webappsec-csp/issues/687): Update after spec is resolved
1394        global.report_csp_violations(violations, None, None);
1395    }
1396}
1397
1398pub(crate) struct FragmentContext<'a> {
1399    pub(crate) context_elem: &'a Node,
1400    pub(crate) form_elem: Option<&'a Node>,
1401    pub(crate) context_element_allows_scripting: bool,
1402}
1403
1404#[cfg_attr(crown, allow(crown::unrooted_must_root))]
1405fn insert(
1406    parent: &Node,
1407    reference_child: Option<&Node>,
1408    child: NodeOrText<Dom<Node>>,
1409    parsing_algorithm: ParsingAlgorithm,
1410    custom_element_reaction_stack: &CustomElementReactionStack,
1411    can_gc: CanGc,
1412) {
1413    match child {
1414        NodeOrText::AppendNode(n) => {
1415            // https://html.spec.whatwg.org/multipage/#insert-a-foreign-element
1416            // applies if this is an element; if not, it may be
1417            // https://html.spec.whatwg.org/multipage/#insert-a-comment
1418            let element_in_non_fragment =
1419                parsing_algorithm != ParsingAlgorithm::Fragment && n.is::<Element>();
1420            if element_in_non_fragment {
1421                custom_element_reaction_stack.push_new_element_queue();
1422            }
1423            parent.InsertBefore(&n, reference_child, can_gc).unwrap();
1424            if element_in_non_fragment {
1425                custom_element_reaction_stack.pop_current_element_queue(can_gc);
1426            }
1427        },
1428        NodeOrText::AppendText(t) => {
1429            // https://html.spec.whatwg.org/multipage/#insert-a-character
1430            let text = reference_child
1431                .and_then(Node::GetPreviousSibling)
1432                .or_else(|| parent.GetLastChild())
1433                .and_then(DomRoot::downcast::<Text>);
1434
1435            if let Some(text) = text {
1436                text.upcast::<CharacterData>().append_data(&t);
1437            } else {
1438                let text = Text::new(String::from(t).into(), &parent.owner_doc(), can_gc);
1439                parent
1440                    .InsertBefore(text.upcast(), reference_child, can_gc)
1441                    .unwrap();
1442            }
1443        },
1444    }
1445}
1446
1447#[derive(JSTraceable, MallocSizeOf)]
1448#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
1449pub(crate) struct Sink {
1450    #[no_trace]
1451    base_url: ServoUrl,
1452    document: Dom<Document>,
1453    current_line: Cell<u64>,
1454    script: MutNullableDom<HTMLScriptElement>,
1455    parsing_algorithm: ParsingAlgorithm,
1456    #[conditional_malloc_size_of]
1457    custom_element_reaction_stack: Rc<CustomElementReactionStack>,
1458}
1459
1460impl Sink {
1461    fn same_tree(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1462        let x = x.downcast::<Element>().expect("Element node expected");
1463        let y = y.downcast::<Element>().expect("Element node expected");
1464
1465        x.is_in_same_home_subtree(y)
1466    }
1467
1468    fn has_parent_node(&self, node: &Dom<Node>) -> bool {
1469        node.GetParentNode().is_some()
1470    }
1471}
1472
1473impl TreeSink for Sink {
1474    type Output = Self;
1475    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1476    fn finish(self) -> Self {
1477        self
1478    }
1479
1480    type Handle = Dom<Node>;
1481    type ElemName<'a>
1482        = ExpandedName<'a>
1483    where
1484        Self: 'a;
1485
1486    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1487    fn get_document(&self) -> Dom<Node> {
1488        Dom::from_ref(self.document.upcast())
1489    }
1490
1491    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1492    fn get_template_contents(&self, target: &Dom<Node>) -> Dom<Node> {
1493        let template = target
1494            .downcast::<HTMLTemplateElement>()
1495            .expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing");
1496        Dom::from_ref(template.Content(CanGc::note()).upcast())
1497    }
1498
1499    fn same_node(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1500        x == y
1501    }
1502
1503    fn elem_name<'a>(&self, target: &'a Dom<Node>) -> ExpandedName<'a> {
1504        let elem = target
1505            .downcast::<Element>()
1506            .expect("tried to get name of non-Element in HTML parsing");
1507        ExpandedName {
1508            ns: elem.namespace(),
1509            local: elem.local_name(),
1510        }
1511    }
1512
1513    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1514    fn create_element(
1515        &self,
1516        name: QualName,
1517        attrs: Vec<Attribute>,
1518        flags: ElementFlags,
1519    ) -> Dom<Node> {
1520        let attrs = attrs
1521            .into_iter()
1522            .map(|attr| ElementAttribute::new(attr.name, DOMString::from(String::from(attr.value))))
1523            .collect();
1524        let parsing_algorithm = if flags.template {
1525            ParsingAlgorithm::Fragment
1526        } else {
1527            self.parsing_algorithm
1528        };
1529        let element = create_element_for_token(
1530            name,
1531            attrs,
1532            &self.document,
1533            ElementCreator::ParserCreated(self.current_line.get()),
1534            parsing_algorithm,
1535            &self.custom_element_reaction_stack,
1536            CanGc::note(),
1537        );
1538        Dom::from_ref(element.upcast())
1539    }
1540
1541    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1542    fn create_comment(&self, text: StrTendril) -> Dom<Node> {
1543        let comment = Comment::new(
1544            DOMString::from(String::from(text)),
1545            &self.document,
1546            None,
1547            CanGc::note(),
1548        );
1549        Dom::from_ref(comment.upcast())
1550    }
1551
1552    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1553    fn create_pi(&self, target: StrTendril, data: StrTendril) -> Dom<Node> {
1554        let doc = &*self.document;
1555        let pi = ProcessingInstruction::new(
1556            DOMString::from(String::from(target)),
1557            DOMString::from(String::from(data)),
1558            doc,
1559            CanGc::note(),
1560        );
1561        Dom::from_ref(pi.upcast())
1562    }
1563
1564    fn associate_with_form(
1565        &self,
1566        target: &Dom<Node>,
1567        form: &Dom<Node>,
1568        nodes: (&Dom<Node>, Option<&Dom<Node>>),
1569    ) {
1570        let (element, prev_element) = nodes;
1571        let tree_node = prev_element.map_or(element, |prev| {
1572            if self.has_parent_node(element) {
1573                element
1574            } else {
1575                prev
1576            }
1577        });
1578        if !self.same_tree(tree_node, form) {
1579            return;
1580        }
1581
1582        let node = target;
1583        let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
1584            .expect("Owner must be a form element");
1585
1586        let elem = node.downcast::<Element>();
1587        let control = elem.and_then(|e| e.as_maybe_form_control());
1588
1589        if let Some(control) = control {
1590            control.set_form_owner_from_parser(&form, CanGc::note());
1591        }
1592    }
1593
1594    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1595    fn append_before_sibling(&self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) {
1596        let parent = sibling
1597            .GetParentNode()
1598            .expect("append_before_sibling called on node without parent");
1599
1600        insert(
1601            &parent,
1602            Some(sibling),
1603            new_node,
1604            self.parsing_algorithm,
1605            &self.custom_element_reaction_stack,
1606            CanGc::note(),
1607        );
1608    }
1609
1610    fn parse_error(&self, msg: Cow<'static, str>) {
1611        debug!("Parse error: {}", msg);
1612    }
1613
1614    fn set_quirks_mode(&self, mode: QuirksMode) {
1615        let mode = match mode {
1616            QuirksMode::Quirks => ServoQuirksMode::Quirks,
1617            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
1618            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
1619        };
1620        self.document.set_quirks_mode(mode);
1621    }
1622
1623    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1624    fn append(&self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) {
1625        insert(
1626            parent,
1627            None,
1628            child,
1629            self.parsing_algorithm,
1630            &self.custom_element_reaction_stack,
1631            CanGc::note(),
1632        );
1633    }
1634
1635    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1636    fn append_based_on_parent_node(
1637        &self,
1638        elem: &Dom<Node>,
1639        prev_elem: &Dom<Node>,
1640        child: NodeOrText<Dom<Node>>,
1641    ) {
1642        if self.has_parent_node(elem) {
1643            self.append_before_sibling(elem, child);
1644        } else {
1645            self.append(prev_elem, child);
1646        }
1647    }
1648
1649    fn append_doctype_to_document(
1650        &self,
1651        name: StrTendril,
1652        public_id: StrTendril,
1653        system_id: StrTendril,
1654    ) {
1655        let doc = &*self.document;
1656        let doctype = DocumentType::new(
1657            DOMString::from(String::from(name)),
1658            Some(DOMString::from(String::from(public_id))),
1659            Some(DOMString::from(String::from(system_id))),
1660            doc,
1661            CanGc::note(),
1662        );
1663        doc.upcast::<Node>()
1664            .AppendChild(doctype.upcast(), CanGc::note())
1665            .expect("Appending failed");
1666    }
1667
1668    fn add_attrs_if_missing(&self, target: &Dom<Node>, attrs: Vec<Attribute>) {
1669        let elem = target
1670            .downcast::<Element>()
1671            .expect("tried to set attrs on non-Element in HTML parsing");
1672        for attr in attrs {
1673            elem.set_attribute_from_parser(
1674                attr.name,
1675                DOMString::from(String::from(attr.value)),
1676                None,
1677                CanGc::note(),
1678            );
1679        }
1680    }
1681
1682    fn remove_from_parent(&self, target: &Dom<Node>) {
1683        if let Some(ref parent) = target.GetParentNode() {
1684            parent.RemoveChild(target, CanGc::note()).unwrap();
1685        }
1686    }
1687
1688    fn mark_script_already_started(&self, node: &Dom<Node>) {
1689        let script = node.downcast::<HTMLScriptElement>();
1690        if let Some(script) = script {
1691            script.set_already_started(true)
1692        }
1693    }
1694
1695    fn reparent_children(&self, node: &Dom<Node>, new_parent: &Dom<Node>) {
1696        while let Some(ref child) = node.GetFirstChild() {
1697            new_parent.AppendChild(child, CanGc::note()).unwrap();
1698        }
1699    }
1700
1701    /// <https://html.spec.whatwg.org/multipage/#html-integration-point>
1702    /// Specifically, the `<annotation-xml>` cases.
1703    fn is_mathml_annotation_xml_integration_point(&self, handle: &Dom<Node>) -> bool {
1704        let elem = handle.downcast::<Element>().unwrap();
1705        elem.get_attribute(&ns!(), &local_name!("encoding"))
1706            .is_some_and(|attr| {
1707                attr.value().eq_ignore_ascii_case("text/html") ||
1708                    attr.value().eq_ignore_ascii_case("application/xhtml+xml")
1709            })
1710    }
1711
1712    fn set_current_line(&self, line_number: u64) {
1713        self.current_line.set(line_number);
1714    }
1715
1716    fn pop(&self, node: &Dom<Node>) {
1717        let node = DomRoot::from_ref(&**node);
1718        vtable_for(&node).pop();
1719    }
1720
1721    fn allow_declarative_shadow_roots(&self, intended_parent: &Dom<Node>) -> bool {
1722        intended_parent.owner_doc().allow_declarative_shadow_roots()
1723    }
1724
1725    /// <https://html.spec.whatwg.org/multipage/#parsing-main-inhead>
1726    /// A start tag whose tag name is "template"
1727    /// Attach shadow path
1728    fn attach_declarative_shadow(
1729        &self,
1730        host: &Dom<Node>,
1731        template: &Dom<Node>,
1732        attributes: &[Attribute],
1733    ) -> bool {
1734        attach_declarative_shadow_inner(host, template, attributes)
1735    }
1736}
1737
1738/// <https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token>
1739fn create_element_for_token(
1740    name: QualName,
1741    attrs: Vec<ElementAttribute>,
1742    document: &Document,
1743    creator: ElementCreator,
1744    parsing_algorithm: ParsingAlgorithm,
1745    custom_element_reaction_stack: &CustomElementReactionStack,
1746    can_gc: CanGc,
1747) -> DomRoot<Element> {
1748    // Step 3.
1749    let is = attrs
1750        .iter()
1751        .find(|attr| attr.name.local.eq_str_ignore_ascii_case("is"))
1752        .map(|attr| LocalName::from(&attr.value));
1753
1754    // Step 4.
1755    let definition = document.lookup_custom_element_definition(&name.ns, &name.local, is.as_ref());
1756
1757    // Step 5.
1758    let will_execute_script =
1759        definition.is_some() && parsing_algorithm != ParsingAlgorithm::Fragment;
1760
1761    // Step 6.
1762    if will_execute_script {
1763        // Step 6.1.
1764        document.increment_throw_on_dynamic_markup_insertion_counter();
1765        // Step 6.2
1766        if is_execution_stack_empty() {
1767            document.window().perform_a_microtask_checkpoint(can_gc);
1768        }
1769        // Step 6.3
1770        custom_element_reaction_stack.push_new_element_queue()
1771    }
1772
1773    // Step 7.
1774    let creation_mode = if will_execute_script {
1775        CustomElementCreationMode::Synchronous
1776    } else {
1777        CustomElementCreationMode::Asynchronous
1778    };
1779
1780    let element = Element::create(name, is, document, creator, creation_mode, None, can_gc);
1781
1782    // https://html.spec.whatwg.org/multipage#the-input-element:value-sanitization-algorithm-3
1783    // says to invoke sanitization "when an input element is first created";
1784    // however, since sanitization requires content attributes to function,
1785    // it can't mean that literally.
1786    // Indeed, to make sanitization work correctly, we need to _not_ sanitize
1787    // until after all content attributes have been added
1788
1789    let maybe_input = element.downcast::<HTMLInputElement>();
1790    if let Some(input) = maybe_input {
1791        input.disable_sanitization();
1792    }
1793
1794    // Step 8
1795    for attr in attrs {
1796        element.set_attribute_from_parser(attr.name, attr.value, None, can_gc);
1797    }
1798
1799    // _now_ we can sanitize (and we sanitize now even if the "value"
1800    // attribute isn't present!)
1801    if let Some(input) = maybe_input {
1802        input.enable_sanitization();
1803    }
1804
1805    // Step 9.
1806    if will_execute_script {
1807        // Steps 9.1 - 9.2.
1808        custom_element_reaction_stack.pop_current_element_queue(can_gc);
1809        // Step 9.3.
1810        document.decrement_throw_on_dynamic_markup_insertion_counter();
1811    }
1812
1813    // TODO: Step 10.
1814    // TODO: Step 11.
1815
1816    // Step 12 is handled in `associate_with_form`.
1817
1818    // Step 13.
1819    element
1820}
1821
1822#[derive(JSTraceable, MallocSizeOf)]
1823struct NetworkDecoder {
1824    #[ignore_malloc_size_of = "Defined in tendril"]
1825    #[custom_trace]
1826    decoder: LossyDecoder<NetworkSink>,
1827}
1828
1829impl NetworkDecoder {
1830    fn new(encoding: &'static Encoding) -> Self {
1831        Self {
1832            decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()),
1833        }
1834    }
1835
1836    fn decode(&mut self, chunk: Vec<u8>) -> StrTendril {
1837        self.decoder.process(ByteTendril::from(&*chunk));
1838        std::mem::take(&mut self.decoder.inner_sink_mut().output)
1839    }
1840
1841    fn finish(self) -> StrTendril {
1842        self.decoder.finish()
1843    }
1844}
1845
1846#[derive(Default, JSTraceable)]
1847struct NetworkSink {
1848    #[no_trace]
1849    output: StrTendril,
1850}
1851
1852impl TendrilSink<UTF8> for NetworkSink {
1853    type Output = StrTendril;
1854
1855    fn process(&mut self, t: StrTendril) {
1856        if self.output.is_empty() {
1857            self.output = t;
1858        } else {
1859            self.output.push_tendril(&t);
1860        }
1861    }
1862
1863    fn error(&mut self, _desc: Cow<'static, str>) {}
1864
1865    fn finish(self) -> Self::Output {
1866        self.output
1867    }
1868}
1869
1870fn attach_declarative_shadow_inner(host: &Node, template: &Node, attributes: &[Attribute]) -> bool {
1871    let host_element = host.downcast::<Element>().unwrap();
1872
1873    if host_element.shadow_root().is_some() {
1874        return false;
1875    }
1876
1877    let template_element = template.downcast::<HTMLTemplateElement>().unwrap();
1878
1879    // Step 3. Let mode be template start tag's shadowrootmode attribute's value.
1880    // Step 4. Let clonable be true if template start tag has a shadowrootclonable attribute; otherwise false.
1881    // Step 5. Let delegatesfocus be true if template start tag
1882    // has a shadowrootdelegatesfocus attribute; otherwise false.
1883    // Step 6. Let serializable be true if template start tag
1884    // has a shadowrootserializable attribute; otherwise false.
1885    let mut shadow_root_mode = ShadowRootMode::Open;
1886    let mut clonable = false;
1887    let mut delegatesfocus = false;
1888    let mut serializable = false;
1889
1890    let attributes: Vec<ElementAttribute> = attributes
1891        .iter()
1892        .map(|attr| {
1893            ElementAttribute::new(
1894                attr.name.clone(),
1895                DOMString::from(String::from(attr.value.clone())),
1896            )
1897        })
1898        .collect();
1899
1900    attributes
1901        .iter()
1902        .for_each(|attr: &ElementAttribute| match attr.name.local {
1903            local_name!("shadowrootmode") => {
1904                if attr.value.str().eq_ignore_ascii_case("open") {
1905                    shadow_root_mode = ShadowRootMode::Open;
1906                } else if attr.value.str().eq_ignore_ascii_case("closed") {
1907                    shadow_root_mode = ShadowRootMode::Closed;
1908                } else {
1909                    unreachable!("shadowrootmode value is not open nor closed");
1910                }
1911            },
1912            local_name!("shadowrootclonable") => {
1913                clonable = true;
1914            },
1915            local_name!("shadowrootdelegatesfocus") => {
1916                delegatesfocus = true;
1917            },
1918            local_name!("shadowrootserializable") => {
1919                serializable = true;
1920            },
1921            _ => {},
1922        });
1923
1924    // Step 8.1. Attach a shadow root with declarative shadow host element,
1925    // mode, clonable, serializable, delegatesFocus, and "named".
1926    match host_element.attach_shadow(
1927        IsUserAgentWidget::No,
1928        shadow_root_mode,
1929        clonable,
1930        serializable,
1931        delegatesfocus,
1932        SlotAssignmentMode::Named,
1933        CanGc::note(),
1934    ) {
1935        Ok(shadow_root) => {
1936            // Step 8.3. Set shadow's declarative to true.
1937            shadow_root.set_declarative(true);
1938
1939            // Set 8.4. Set template's template contents property to shadow.
1940            let shadow = shadow_root.upcast::<DocumentFragment>();
1941            template_element.set_contents(Some(shadow));
1942
1943            // Step 8.5. Set shadow’s available to element internals to true.
1944            shadow_root.set_available_to_element_internals(true);
1945
1946            true
1947        },
1948        Err(_) => false,
1949    }
1950}