script/dom/servoparser/
mod.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::cell::Cell;
7use std::rc::Rc;
8
9use base::cross_process_instant::CrossProcessInstant;
10use base::id::{PipelineId, WebViewId};
11use base64::Engine as _;
12use base64::engine::general_purpose;
13use content_security_policy::sandboxing_directive::SandboxingFlagSet;
14use devtools_traits::ScriptToDevtoolsControlMsg;
15use dom_struct::dom_struct;
16use embedder_traits::resources::{self, Resource};
17use encoding_rs::Encoding;
18use html5ever::buffer_queue::BufferQueue;
19use html5ever::tendril::fmt::UTF8;
20use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink};
21use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
22use html5ever::{Attribute, ExpandedName, LocalName, QualName, local_name, ns};
23use hyper_serde::Serde;
24use markup5ever::TokenizerResult;
25use mime::{self, Mime};
26use net_traits::mime_classifier::{ApacheBugFlag, MediaType, MimeClassifier, NoSniffFlag};
27use net_traits::policy_container::PolicyContainer;
28use net_traits::request::RequestId;
29use net_traits::{
30    FetchMetadata, FetchResponseListener, LoadContext, Metadata, NetworkError, ReferrerPolicy,
31    ResourceFetchTiming, ResourceTimingType,
32};
33use profile_traits::time::{
34    ProfilerCategory, ProfilerChan, TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType,
35};
36use profile_traits::time_profile;
37use script_traits::DocumentActivity;
38use servo_config::pref;
39use servo_url::ServoUrl;
40use style::context::QuirksMode as ServoQuirksMode;
41use tendril::stream::LossyDecoder;
42
43use crate::document_loader::{DocumentLoader, LoadType};
44use crate::dom::bindings::cell::DomRefCell;
45use crate::dom::bindings::codegen::Bindings::DocumentBinding::{
46    DocumentMethods, DocumentReadyState,
47};
48use crate::dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods;
49use crate::dom::bindings::codegen::Bindings::HTMLMediaElementBinding::HTMLMediaElementMethods;
50use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
51use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
52use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::{
53    ShadowRootMode, SlotAssignmentMode,
54};
55use crate::dom::bindings::inheritance::Castable;
56use crate::dom::bindings::refcounted::Trusted;
57use crate::dom::bindings::reflector::{DomGlobal, Reflector, reflect_dom_object};
58use crate::dom::bindings::root::{Dom, DomRoot, MutNullableDom};
59use crate::dom::bindings::settings_stack::is_execution_stack_empty;
60use crate::dom::bindings::str::{DOMString, USVString};
61use crate::dom::characterdata::CharacterData;
62use crate::dom::comment::Comment;
63use crate::dom::csp::{GlobalCspReporting, Violation, parse_csp_list_from_metadata};
64use crate::dom::customelementregistry::CustomElementReactionStack;
65use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument};
66use crate::dom::documentfragment::DocumentFragment;
67use crate::dom::documenttype::DocumentType;
68use crate::dom::element::{CustomElementCreationMode, Element, ElementCreator};
69use crate::dom::globalscope::GlobalScope;
70use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
71use crate::dom::html::htmlimageelement::HTMLImageElement;
72use crate::dom::html::htmlinputelement::HTMLInputElement;
73use crate::dom::html::htmlscriptelement::{HTMLScriptElement, ScriptResult};
74use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
75use crate::dom::node::{Node, ShadowIncluding};
76use crate::dom::performance::performanceentry::PerformanceEntry;
77use crate::dom::performance::performancenavigationtiming::PerformanceNavigationTiming;
78use crate::dom::processinginstruction::ProcessingInstruction;
79use crate::dom::processingoptions::{
80    LinkHeader, LinkProcessingPhase, extract_links_from_headers, process_link_headers,
81};
82use crate::dom::reportingendpoint::ReportingEndpoint;
83use crate::dom::shadowroot::IsUserAgentWidget;
84use crate::dom::text::Text;
85use crate::dom::types::HTMLMediaElement;
86use crate::dom::virtualmethods::vtable_for;
87use crate::network_listener::PreInvoke;
88use crate::realms::enter_realm;
89use crate::script_runtime::{CanGc, IntroductionType};
90use crate::script_thread::ScriptThread;
91
92mod async_html;
93mod html;
94mod prefetch;
95mod xml;
96
97pub(crate) use html::serialize_html_fragment;
98
99#[dom_struct]
100/// The parser maintains two input streams: one for input from script through
101/// document.write(), and one for input from network.
102///
103/// There is no concrete representation of the insertion point, instead it
104/// always points to just before the next character from the network input,
105/// with all of the script input before itself.
106///
107/// ```text
108///     ... script input ... | ... network input ...
109///                          ^
110///                 insertion point
111/// ```
112pub(crate) struct ServoParser {
113    reflector: Reflector,
114    /// The document associated with this parser.
115    document: Dom<Document>,
116    /// The BOM sniffing state.
117    ///
118    /// `None` means we've found the BOM, we've found there isn't one, or
119    /// we're not parsing from a byte stream. `Some` contains the BOM bytes
120    /// found so far.
121    bom_sniff: DomRefCell<Option<Vec<u8>>>,
122    /// The decoder used for the network input.
123    network_decoder: DomRefCell<Option<NetworkDecoder>>,
124    /// Input received from network.
125    #[ignore_malloc_size_of = "Defined in html5ever"]
126    #[no_trace]
127    network_input: BufferQueue,
128    /// Input received from script. Used only to support document.write().
129    #[ignore_malloc_size_of = "Defined in html5ever"]
130    #[no_trace]
131    script_input: BufferQueue,
132    /// The tokenizer of this parser.
133    tokenizer: Tokenizer,
134    /// Whether to expect any further input from the associated network request.
135    last_chunk_received: Cell<bool>,
136    /// Whether this parser should avoid passing any further data to the tokenizer.
137    suspended: Cell<bool>,
138    /// <https://html.spec.whatwg.org/multipage/#script-nesting-level>
139    script_nesting_level: Cell<usize>,
140    /// <https://html.spec.whatwg.org/multipage/#abort-a-parser>
141    aborted: Cell<bool>,
142    /// <https://html.spec.whatwg.org/multipage/#script-created-parser>
143    script_created_parser: bool,
144    /// We do a quick-and-dirty parse of the input looking for resources to prefetch.
145    // TODO: if we had speculative parsing, we could do this when speculatively
146    // building the DOM. https://github.com/servo/servo/pull/19203
147    prefetch_tokenizer: prefetch::Tokenizer,
148    #[ignore_malloc_size_of = "Defined in html5ever"]
149    #[no_trace]
150    prefetch_input: BufferQueue,
151    // The whole input as a string, if needed for the devtools Sources panel.
152    // TODO: use a faster type for concatenating strings?
153    content_for_devtools: Option<DomRefCell<String>>,
154}
155
156pub(crate) struct ElementAttribute {
157    name: QualName,
158    value: DOMString,
159}
160
161#[derive(Clone, Copy, JSTraceable, MallocSizeOf, PartialEq)]
162pub(crate) enum ParsingAlgorithm {
163    Normal,
164    Fragment,
165}
166
167impl ElementAttribute {
168    pub(crate) fn new(name: QualName, value: DOMString) -> ElementAttribute {
169        ElementAttribute { name, value }
170    }
171}
172
173impl ServoParser {
174    pub(crate) fn parser_is_not_active(&self) -> bool {
175        self.can_write()
176    }
177
178    /// <https://html.spec.whatwg.org/multipage/#parse-html-from-a-string>
179    pub(crate) fn parse_html_document(
180        document: &Document,
181        input: Option<DOMString>,
182        url: ServoUrl,
183        can_gc: CanGc,
184    ) {
185        // Step 1. Set document's type to "html".
186        //
187        // Set by callers of this function and asserted here
188        assert!(document.is_html_document());
189        // Step 2. Create an HTML parser parser, associated with document.
190        let parser = if pref!(dom_servoparser_async_html_tokenizer_enabled) {
191            ServoParser::new(
192                document,
193                Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None)),
194                ParserKind::Normal,
195                can_gc,
196            )
197        } else {
198            ServoParser::new(
199                document,
200                Tokenizer::Html(self::html::Tokenizer::new(
201                    document,
202                    url,
203                    None,
204                    ParsingAlgorithm::Normal,
205                )),
206                ParserKind::Normal,
207                can_gc,
208            )
209        };
210        // Step 3. Place html into the input stream for parser. The encoding confidence is irrelevant.
211        // Step 4. Start parser and let it run until it has consumed all the
212        // characters just inserted into the input stream.
213        //
214        // Set as the document's current parser and initialize with `input`, if given.
215        if let Some(input) = input {
216            parser.parse_complete_string_chunk(String::from(input), can_gc);
217        } else {
218            parser.document.set_current_parser(Some(&parser));
219        }
220    }
221
222    /// <https://html.spec.whatwg.org/multipage/#parsing-html-fragments>
223    pub(crate) fn parse_html_fragment(
224        context: &Element,
225        input: DOMString,
226        allow_declarative_shadow_roots: bool,
227        can_gc: CanGc,
228    ) -> impl Iterator<Item = DomRoot<Node>> + use<'_> {
229        let context_node = context.upcast::<Node>();
230        let context_document = context_node.owner_doc();
231        let window = context_document.window();
232        let url = context_document.url();
233
234        // Step 1. Let document be a Document node whose type is "html".
235        let loader = DocumentLoader::new_with_threads(
236            context_document.loader().resource_threads().clone(),
237            Some(url.clone()),
238        );
239        let document = Document::new(
240            window,
241            HasBrowsingContext::No,
242            Some(url.clone()),
243            context_document.origin().clone(),
244            IsHTMLDocument::HTMLDocument,
245            None,
246            None,
247            DocumentActivity::Inactive,
248            DocumentSource::FromParser,
249            loader,
250            None,
251            None,
252            Default::default(),
253            false,
254            allow_declarative_shadow_roots,
255            Some(context_document.insecure_requests_policy()),
256            context_document.has_trustworthy_ancestor_or_current_origin(),
257            context_document.custom_element_reaction_stack(),
258            context_document.creation_sandboxing_flag_set(),
259            can_gc,
260        );
261
262        // Step 2. If context's node document is in quirks mode, then set document's mode to "quirks".
263        // Step 3. Otherwise, if context's node document is in limited-quirks mode, then set document's
264        // mode to "limited-quirks".
265        document.set_quirks_mode(context_document.quirks_mode());
266
267        // NOTE: The following steps happened as part of Step 1.
268        // Step 4. If allowDeclarativeShadowRoots is true, then set document's
269        // allow declarative shadow roots to true.
270        // Step 5. Create a new HTML parser, and associate it with document.
271
272        // Step 11.
273        let form = context_node
274            .inclusive_ancestors(ShadowIncluding::No)
275            .find(|element| element.is::<HTMLFormElement>());
276
277        let fragment_context = FragmentContext {
278            context_elem: context_node,
279            form_elem: form.as_deref(),
280            context_element_allows_scripting: context_document.scripting_enabled(),
281        };
282
283        let parser = ServoParser::new(
284            &document,
285            Tokenizer::Html(self::html::Tokenizer::new(
286                &document,
287                url,
288                Some(fragment_context),
289                ParsingAlgorithm::Fragment,
290            )),
291            ParserKind::Normal,
292            can_gc,
293        );
294        parser.parse_complete_string_chunk(String::from(input), can_gc);
295
296        // Step 14.
297        let root_element = document.GetDocumentElement().expect("no document element");
298        FragmentParsingResult {
299            inner: root_element.upcast::<Node>().children(),
300        }
301    }
302
303    pub(crate) fn parse_html_script_input(document: &Document, url: ServoUrl) {
304        let parser = ServoParser::new(
305            document,
306            Tokenizer::Html(self::html::Tokenizer::new(
307                document,
308                url,
309                None,
310                ParsingAlgorithm::Normal,
311            )),
312            ParserKind::ScriptCreated,
313            CanGc::note(),
314        );
315        *parser.bom_sniff.borrow_mut() = None;
316        document.set_current_parser(Some(&parser));
317    }
318
319    pub(crate) fn parse_xml_document(
320        document: &Document,
321        input: Option<DOMString>,
322        url: ServoUrl,
323        can_gc: CanGc,
324    ) {
325        let parser = ServoParser::new(
326            document,
327            Tokenizer::Xml(self::xml::Tokenizer::new(document, url)),
328            ParserKind::Normal,
329            can_gc,
330        );
331
332        // Set as the document's current parser and initialize with `input`, if given.
333        if let Some(input) = input {
334            parser.parse_complete_string_chunk(String::from(input), can_gc);
335        } else {
336            parser.document.set_current_parser(Some(&parser));
337        }
338    }
339
340    pub(crate) fn script_nesting_level(&self) -> usize {
341        self.script_nesting_level.get()
342    }
343
344    pub(crate) fn is_script_created(&self) -> bool {
345        self.script_created_parser
346    }
347
348    /// Corresponds to the latter part of the "Otherwise" branch of the 'An end
349    /// tag whose tag name is "script"' of
350    /// <https://html.spec.whatwg.org/multipage/#parsing-main-incdata>
351    ///
352    /// This first moves everything from the script input to the beginning of
353    /// the network input, effectively resetting the insertion point to just
354    /// before the next character to be consumed.
355    ///
356    ///
357    /// ```text
358    ///     | ... script input ... network input ...
359    ///     ^
360    ///     insertion point
361    /// ```
362    pub(crate) fn resume_with_pending_parsing_blocking_script(
363        &self,
364        script: &HTMLScriptElement,
365        result: ScriptResult,
366        can_gc: CanGc,
367    ) {
368        assert!(self.suspended.get());
369        self.suspended.set(false);
370
371        self.script_input.swap_with(&self.network_input);
372        while let Some(chunk) = self.script_input.pop_front() {
373            self.network_input.push_back(chunk);
374        }
375
376        let script_nesting_level = self.script_nesting_level.get();
377        assert_eq!(script_nesting_level, 0);
378
379        self.script_nesting_level.set(script_nesting_level + 1);
380        script.execute(result, can_gc);
381        self.script_nesting_level.set(script_nesting_level);
382
383        if !self.suspended.get() && !self.aborted.get() {
384            self.parse_sync(can_gc);
385        }
386    }
387
388    pub(crate) fn can_write(&self) -> bool {
389        self.script_created_parser || self.script_nesting_level.get() > 0
390    }
391
392    /// Steps 6-8 of <https://html.spec.whatwg.org/multipage/#document.write()>
393    pub(crate) fn write(&self, text: DOMString, can_gc: CanGc) {
394        assert!(self.can_write());
395
396        if self.document.has_pending_parsing_blocking_script() {
397            // There is already a pending parsing blocking script so the
398            // parser is suspended, we just append everything to the
399            // script input and abort these steps.
400            self.script_input.push_back(String::from(text).into());
401            return;
402        }
403
404        // There is no pending parsing blocking script, so all previous calls
405        // to document.write() should have seen their entire input tokenized
406        // and process, with nothing pushed to the parser script input.
407        assert!(self.script_input.is_empty());
408
409        let input = BufferQueue::default();
410        input.push_back(String::from(text).into());
411
412        let profiler_chan = self
413            .document
414            .window()
415            .as_global_scope()
416            .time_profiler_chan()
417            .clone();
418        let profiler_metadata = TimerMetadata {
419            url: self.document.url().as_str().into(),
420            iframe: TimerMetadataFrameType::RootWindow,
421            incremental: TimerMetadataReflowType::FirstReflow,
422        };
423        self.tokenize(
424            |tokenizer| {
425                tokenizer.feed(
426                    &input,
427                    can_gc,
428                    profiler_chan.clone(),
429                    profiler_metadata.clone(),
430                )
431            },
432            can_gc,
433        );
434
435        if self.suspended.get() {
436            // Parser got suspended, insert remaining input at end of
437            // script input, following anything written by scripts executed
438            // reentrantly during this call.
439            while let Some(chunk) = input.pop_front() {
440                self.script_input.push_back(chunk);
441            }
442            return;
443        }
444
445        assert!(input.is_empty());
446    }
447
448    // Steps 4-6 of https://html.spec.whatwg.org/multipage/#dom-document-close
449    pub(crate) fn close(&self, can_gc: CanGc) {
450        assert!(self.script_created_parser);
451
452        // Step 4.
453        self.last_chunk_received.set(true);
454
455        if self.suspended.get() {
456            // Step 5.
457            return;
458        }
459
460        // Step 6.
461        self.parse_sync(can_gc);
462    }
463
464    // https://html.spec.whatwg.org/multipage/#abort-a-parser
465    pub(crate) fn abort(&self, can_gc: CanGc) {
466        assert!(!self.aborted.get());
467        self.aborted.set(true);
468
469        // Step 1.
470        self.script_input.replace_with(BufferQueue::default());
471        self.network_input.replace_with(BufferQueue::default());
472
473        // Step 2.
474        self.document
475            .set_ready_state(DocumentReadyState::Interactive, can_gc);
476
477        // Step 3.
478        self.tokenizer.end(can_gc);
479        self.document.set_current_parser(None);
480
481        // Step 4.
482        self.document
483            .set_ready_state(DocumentReadyState::Complete, can_gc);
484    }
485
486    // https://html.spec.whatwg.org/multipage/#active-parser
487    pub(crate) fn is_active(&self) -> bool {
488        self.script_nesting_level() > 0 && !self.aborted.get()
489    }
490
491    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
492    fn new_inherited(document: &Document, tokenizer: Tokenizer, kind: ParserKind) -> Self {
493        // Store the whole input for the devtools Sources panel, if the devtools server is running
494        // and we are parsing for a document load (not just things like innerHTML).
495        // TODO: check if a devtools client is actually connected and/or wants the sources?
496        let content_for_devtools = (document.global().devtools_chan().is_some() &&
497            document.has_browsing_context())
498        .then_some(DomRefCell::new(String::new()));
499
500        ServoParser {
501            reflector: Reflector::new(),
502            document: Dom::from_ref(document),
503            bom_sniff: DomRefCell::new(Some(Vec::with_capacity(3))),
504            network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))),
505            network_input: BufferQueue::default(),
506            script_input: BufferQueue::default(),
507            tokenizer,
508            last_chunk_received: Cell::new(false),
509            suspended: Default::default(),
510            script_nesting_level: Default::default(),
511            aborted: Default::default(),
512            script_created_parser: kind == ParserKind::ScriptCreated,
513            prefetch_tokenizer: prefetch::Tokenizer::new(document),
514            prefetch_input: BufferQueue::default(),
515            content_for_devtools,
516        }
517    }
518
519    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
520    fn new(
521        document: &Document,
522        tokenizer: Tokenizer,
523        kind: ParserKind,
524        can_gc: CanGc,
525    ) -> DomRoot<Self> {
526        reflect_dom_object(
527            Box::new(ServoParser::new_inherited(document, tokenizer, kind)),
528            document.window(),
529            can_gc,
530        )
531    }
532
533    fn push_tendril_input_chunk(&self, chunk: StrTendril) {
534        if let Some(mut content_for_devtools) = self
535            .content_for_devtools
536            .as_ref()
537            .map(|content| content.borrow_mut())
538        {
539            // TODO: append these chunks more efficiently
540            content_for_devtools.push_str(chunk.as_ref());
541        }
542
543        if chunk.is_empty() {
544            return;
545        }
546        // Per https://github.com/whatwg/html/issues/1495
547        // stylesheets should not be loaded for documents
548        // without browsing contexts.
549        // https://github.com/whatwg/html/issues/1495#issuecomment-230334047
550        // suggests that no content should be preloaded in such a case.
551        // We're conservative, and only prefetch for documents
552        // with browsing contexts.
553        if self.document.browsing_context().is_some() {
554            // Push the chunk into the prefetch input stream,
555            // which is tokenized eagerly, to scan for resources
556            // to prefetch. If the user script uses `document.write()`
557            // to overwrite the network input, this prefetching may
558            // have been wasted, but in most cases it won't.
559            self.prefetch_input.push_back(chunk.clone());
560            self.prefetch_tokenizer.feed(&self.prefetch_input);
561        }
562        // Push the chunk into the network input stream,
563        // which is tokenized lazily.
564        self.network_input.push_back(chunk);
565    }
566
567    fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
568        // BOM sniff. This is needed because NetworkDecoder will switch the
569        // encoding based on the BOM, but it won't change
570        // `self.document.encoding` in the process.
571        {
572            let mut bom_sniff = self.bom_sniff.borrow_mut();
573            if let Some(partial_bom) = bom_sniff.as_mut() {
574                if partial_bom.len() + chunk.len() >= 3 {
575                    partial_bom.extend(chunk.iter().take(3 - partial_bom.len()).copied());
576                    if let Some((encoding, _)) = Encoding::for_bom(partial_bom) {
577                        self.document.set_encoding(encoding);
578                    }
579                    drop(bom_sniff);
580                    *self.bom_sniff.borrow_mut() = None;
581                } else {
582                    partial_bom.extend(chunk.iter().copied());
583                }
584            }
585        }
586
587        // For byte input, we convert it to text using the network decoder.
588        let chunk = self
589            .network_decoder
590            .borrow_mut()
591            .as_mut()
592            .unwrap()
593            .decode(chunk);
594        self.push_tendril_input_chunk(chunk);
595    }
596
597    fn push_string_input_chunk(&self, chunk: String) {
598        // If the input is a string, we don't have a BOM.
599        if self.bom_sniff.borrow().is_some() {
600            *self.bom_sniff.borrow_mut() = None;
601        }
602
603        // The input has already been decoded as a string, so doesn't need
604        // to be decoded by the network decoder again.
605        let chunk = StrTendril::from(chunk);
606        self.push_tendril_input_chunk(chunk);
607    }
608
609    fn parse_sync(&self, can_gc: CanGc) {
610        assert!(self.script_input.is_empty());
611
612        // This parser will continue to parse while there is either pending input or
613        // the parser remains unsuspended.
614
615        if self.last_chunk_received.get() {
616            if let Some(decoder) = self.network_decoder.borrow_mut().take() {
617                let chunk = decoder.finish();
618                if !chunk.is_empty() {
619                    self.network_input.push_back(chunk);
620                }
621            }
622        }
623
624        if self.aborted.get() {
625            return;
626        }
627
628        let profiler_chan = self
629            .document
630            .window()
631            .as_global_scope()
632            .time_profiler_chan()
633            .clone();
634        let profiler_metadata = TimerMetadata {
635            url: self.document.url().as_str().into(),
636            iframe: TimerMetadataFrameType::RootWindow,
637            incremental: TimerMetadataReflowType::FirstReflow,
638        };
639        self.tokenize(
640            |tokenizer| {
641                tokenizer.feed(
642                    &self.network_input,
643                    can_gc,
644                    profiler_chan.clone(),
645                    profiler_metadata.clone(),
646                )
647            },
648            can_gc,
649        );
650
651        if self.suspended.get() {
652            return;
653        }
654
655        assert!(self.network_input.is_empty());
656
657        if self.last_chunk_received.get() {
658            self.finish(can_gc);
659        }
660    }
661
662    fn parse_complete_string_chunk(&self, input: String, can_gc: CanGc) {
663        self.document.set_current_parser(Some(self));
664        self.push_string_input_chunk(input);
665        self.last_chunk_received.set(true);
666        if !self.suspended.get() {
667            self.parse_sync(can_gc);
668        }
669    }
670
671    fn parse_bytes_chunk(&self, input: Vec<u8>, can_gc: CanGc) {
672        let _realm = enter_realm(&*self.document);
673        self.document.set_current_parser(Some(self));
674        self.push_bytes_input_chunk(input);
675        if !self.suspended.get() {
676            self.parse_sync(can_gc);
677        }
678    }
679
680    fn tokenize<F>(&self, feed: F, can_gc: CanGc)
681    where
682        F: Fn(&Tokenizer) -> TokenizerResult<DomRoot<HTMLScriptElement>>,
683    {
684        loop {
685            assert!(!self.suspended.get());
686            assert!(!self.aborted.get());
687
688            self.document.window().reflow_if_reflow_timer_expired();
689            let script = match feed(&self.tokenizer) {
690                TokenizerResult::Done => return,
691                TokenizerResult::Script(script) => script,
692            };
693
694            // https://html.spec.whatwg.org/multipage/#parsing-main-incdata
695            // branch "An end tag whose tag name is "script"
696            // The spec says to perform the microtask checkpoint before
697            // setting the insertion mode back from Text, but this is not
698            // possible with the way servo and html5ever currently
699            // relate to each other, and hopefully it is not observable.
700            if is_execution_stack_empty() {
701                self.document
702                    .window()
703                    .as_global_scope()
704                    .perform_a_microtask_checkpoint(can_gc);
705            }
706
707            let script_nesting_level = self.script_nesting_level.get();
708
709            self.script_nesting_level.set(script_nesting_level + 1);
710            script.set_initial_script_text();
711            let introduction_type_override =
712                (script_nesting_level > 0).then_some(IntroductionType::INJECTED_SCRIPT);
713            script.prepare(introduction_type_override, can_gc);
714            self.script_nesting_level.set(script_nesting_level);
715
716            if self.document.has_pending_parsing_blocking_script() {
717                self.suspended.set(true);
718                return;
719            }
720            if self.aborted.get() {
721                return;
722            }
723        }
724    }
725
726    /// <https://html.spec.whatwg.org/multipage/#the-end>
727    fn finish(&self, can_gc: CanGc) {
728        assert!(!self.suspended.get());
729        assert!(self.last_chunk_received.get());
730        assert!(self.script_input.is_empty());
731        assert!(self.network_input.is_empty());
732        assert!(self.network_decoder.borrow().is_none());
733
734        // Step 1.
735        self.document
736            .set_ready_state(DocumentReadyState::Interactive, can_gc);
737
738        // Step 2.
739        self.tokenizer.end(can_gc);
740        self.document.set_current_parser(None);
741
742        // Steps 3-12 are in another castle, namely finish_load.
743        let url = self.tokenizer.url().clone();
744        self.document.finish_load(LoadType::PageSource(url), can_gc);
745
746        // Send the source contents to devtools, if needed.
747        if let Some(content_for_devtools) = self
748            .content_for_devtools
749            .as_ref()
750            .map(|content| content.take())
751        {
752            let global = self.document.global();
753            let chan = global.devtools_chan().expect("Guaranteed by new");
754            let pipeline_id = self.document.global().pipeline_id();
755            let _ = chan.send(ScriptToDevtoolsControlMsg::UpdateSourceContent(
756                pipeline_id,
757                content_for_devtools,
758            ));
759        }
760    }
761}
762
763struct FragmentParsingResult<I>
764where
765    I: Iterator<Item = DomRoot<Node>>,
766{
767    inner: I,
768}
769
770impl<I> Iterator for FragmentParsingResult<I>
771where
772    I: Iterator<Item = DomRoot<Node>>,
773{
774    type Item = DomRoot<Node>;
775
776    fn next(&mut self) -> Option<DomRoot<Node>> {
777        let next = self.inner.next()?;
778        next.remove_self(CanGc::note());
779        Some(next)
780    }
781
782    fn size_hint(&self) -> (usize, Option<usize>) {
783        self.inner.size_hint()
784    }
785}
786
787#[derive(JSTraceable, MallocSizeOf, PartialEq)]
788enum ParserKind {
789    Normal,
790    ScriptCreated,
791}
792
793#[derive(JSTraceable, MallocSizeOf)]
794#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
795enum Tokenizer {
796    Html(self::html::Tokenizer),
797    AsyncHtml(self::async_html::Tokenizer),
798    Xml(self::xml::Tokenizer),
799}
800
801impl Tokenizer {
802    fn feed(
803        &self,
804        input: &BufferQueue,
805        can_gc: CanGc,
806        profiler_chan: ProfilerChan,
807        profiler_metadata: TimerMetadata,
808    ) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
809        match *self {
810            Tokenizer::Html(ref tokenizer) => time_profile!(
811                ProfilerCategory::ScriptParseHTML,
812                Some(profiler_metadata),
813                profiler_chan,
814                || tokenizer.feed(input),
815            ),
816            Tokenizer::AsyncHtml(ref tokenizer) => time_profile!(
817                ProfilerCategory::ScriptParseHTML,
818                Some(profiler_metadata),
819                profiler_chan,
820                || tokenizer.feed(input, can_gc),
821            ),
822            Tokenizer::Xml(ref tokenizer) => time_profile!(
823                ProfilerCategory::ScriptParseXML,
824                Some(profiler_metadata),
825                profiler_chan,
826                || tokenizer.feed(input),
827            ),
828        }
829    }
830
831    fn end(&self, can_gc: CanGc) {
832        match *self {
833            Tokenizer::Html(ref tokenizer) => tokenizer.end(),
834            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.end(can_gc),
835            Tokenizer::Xml(ref tokenizer) => tokenizer.end(),
836        }
837    }
838
839    fn url(&self) -> &ServoUrl {
840        match *self {
841            Tokenizer::Html(ref tokenizer) => tokenizer.url(),
842            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(),
843            Tokenizer::Xml(ref tokenizer) => tokenizer.url(),
844        }
845    }
846
847    fn set_plaintext_state(&self) {
848        match *self {
849            Tokenizer::Html(ref tokenizer) => tokenizer.set_plaintext_state(),
850            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.set_plaintext_state(),
851            Tokenizer::Xml(_) => unimplemented!(),
852        }
853    }
854}
855
856/// <https://html.spec.whatwg.org/multipage/#navigation-params>
857/// This does not have the relevant fields, but mimics the intent
858/// of the struct when used in loading document spec algorithms.
859struct NavigationParams {
860    /// <https://html.spec.whatwg.org/multipage/#navigation-params-policy-container>
861    policy_container: PolicyContainer,
862    /// content-type of this document, if known. Otherwise need to sniff it
863    content_type: Option<Mime>,
864    /// link headers from the response
865    link_headers: Vec<LinkHeader>,
866    /// <https://html.spec.whatwg.org/multipage/#navigation-params-sandboxing>
867    final_sandboxing_flag_set: SandboxingFlagSet,
868    /// <https://mimesniff.spec.whatwg.org/#resource-header>
869    resource_header: Vec<u8>,
870}
871
872/// The context required for asynchronously fetching a document
873/// and parsing it progressively.
874pub(crate) struct ParserContext {
875    /// The parser that initiated the request.
876    parser: Option<Trusted<ServoParser>>,
877    /// Is this a synthesized document
878    is_synthesized_document: bool,
879    /// Has a document already been loaded (relevant for checking the resource header)
880    has_loaded_document: bool,
881    /// The [`WebViewId`] of the `WebView` associated with this document.
882    webview_id: WebViewId,
883    /// The [`PipelineId`] of the `Pipeline` associated with this document.
884    pipeline_id: PipelineId,
885    /// The URL for this document.
886    url: ServoUrl,
887    /// timing data for this resource
888    resource_timing: ResourceFetchTiming,
889    /// pushed entry index
890    pushed_entry_index: Option<usize>,
891    /// params required in document load algorithms
892    navigation_params: NavigationParams,
893}
894
895impl ParserContext {
896    pub(crate) fn new(
897        webview_id: WebViewId,
898        pipeline_id: PipelineId,
899        url: ServoUrl,
900        creation_sandboxing_flag_set: SandboxingFlagSet,
901    ) -> ParserContext {
902        ParserContext {
903            parser: None,
904            is_synthesized_document: false,
905            has_loaded_document: false,
906            webview_id,
907            pipeline_id,
908            url,
909            resource_timing: ResourceFetchTiming::new(ResourceTimingType::Navigation),
910            pushed_entry_index: None,
911            navigation_params: NavigationParams {
912                policy_container: Default::default(),
913                content_type: None,
914                link_headers: vec![],
915                final_sandboxing_flag_set: creation_sandboxing_flag_set,
916                resource_header: vec![],
917            },
918        }
919    }
920
921    pub(crate) fn set_policy_container(&mut self, policy_container: Option<&PolicyContainer>) {
922        let Some(policy_container) = policy_container else {
923            return;
924        };
925        self.navigation_params.policy_container = policy_container.clone();
926    }
927
928    /// <https://html.spec.whatwg.org/multipage/#creating-a-policy-container-from-a-fetch-response>
929    fn create_policy_container_from_fetch_response(metadata: &Metadata) -> PolicyContainer {
930        // Step 1. If response's URL's scheme is "blob", then return a clone of response's URL's blob URL entry's environment's policy container.
931        // TODO
932        // Step 2. Let result be a new policy container.
933        // Step 7. Return result.
934        PolicyContainer {
935            // Step 3. Set result's CSP list to the result of parsing a response's Content Security Policies given response.
936            csp_list: parse_csp_list_from_metadata(&metadata.headers),
937            // Step 5. Set result's referrer policy to the result of parsing the `Referrer-Policy` header given response. [REFERRERPOLICY]
938            referrer_policy: ReferrerPolicy::parse_header_for_response(&metadata.headers),
939        }
940    }
941
942    /// <https://html.spec.whatwg.org/multipage/#initialise-the-document-object>
943    fn initialize_document_object(&self, document: &Document) {
944        // Step 9. Let document be a new Document, with
945        document.set_policy_container(self.navigation_params.policy_container.clone());
946        document.set_active_sandboxing_flag_set(self.navigation_params.final_sandboxing_flag_set);
947        // Step 17. Process link headers given document, navigationParams's response, and "pre-media".
948        process_link_headers(
949            &self.navigation_params.link_headers,
950            document,
951            LinkProcessingPhase::PreMedia,
952        );
953    }
954
955    /// Part of various load document methods
956    fn process_link_headers_in_media_phase_with_task(&mut self, document: &Document) {
957        // The first task that the networking task source places on the task queue
958        // while fetching runs must process link headers given document,
959        // navigationParams's response, and "media", after the task has been processed by the HTML parser.
960        let link_headers = std::mem::take(&mut self.navigation_params.link_headers);
961        if !link_headers.is_empty() {
962            let window = document.window();
963            let document = Trusted::new(document);
964            window
965                .upcast::<GlobalScope>()
966                .task_manager()
967                .networking_task_source()
968                .queue(task!(process_link_headers_task: move || {
969                    process_link_headers(&link_headers, &document.root(), LinkProcessingPhase::Media);
970                }));
971        }
972    }
973
974    /// <https://html.spec.whatwg.org/multipage/#loading-a-document>
975    fn load_document(&mut self, can_gc: CanGc) {
976        assert!(!self.has_loaded_document);
977        self.has_loaded_document = true;
978        let Some(ref parser) = self.parser.as_ref().map(|p| p.root()) else {
979            return;
980        };
981        // Step 1. Let type be the computed type of navigationParams's response.
982        let content_type = &self.navigation_params.content_type;
983        let mime_type = MimeClassifier::default().classify(
984            LoadContext::Browsing,
985            NoSniffFlag::Off,
986            ApacheBugFlag::from_content_type(content_type.as_ref()),
987            content_type,
988            &self.navigation_params.resource_header,
989        );
990        // Step 2. If the user agent has been configured to process resources of the given type using
991        // some mechanism other than rendering the content in a navigable, then skip this step.
992        // Otherwise, if the type is one of the following types:
993        let Some(media_type) = MimeClassifier::get_media_type(&mime_type) else {
994            let page = format!(
995                "<html><body><p>Unknown content type ({}).</p></body></html>",
996                &mime_type,
997            );
998            self.load_inline_unknown_content(parser, page);
999            return;
1000        };
1001        match media_type {
1002            // Return the result of loading an HTML document, given navigationParams.
1003            MediaType::Html => self.load_html_document(parser),
1004            // Return the result of loading an XML document given navigationParams and type.
1005            MediaType::Xml => self.load_xml_document(parser),
1006            // Return the result of loading a text document given navigationParams and type.
1007            MediaType::JavaScript | MediaType::Json | MediaType::Text | MediaType::Css => {
1008                self.load_text_document(parser)
1009            },
1010            // Return the result of loading a media document given navigationParams and type.
1011            MediaType::Image | MediaType::AudioVideo => {
1012                self.load_media_document(parser, media_type, &mime_type);
1013                return;
1014            },
1015            MediaType::Font => {
1016                let page = format!(
1017                    "<html><body><p>Unable to load font with content type ({}).</p></body></html>",
1018                    &mime_type,
1019                );
1020                self.load_inline_unknown_content(parser, page);
1021                return;
1022            },
1023        };
1024
1025        parser.parse_bytes_chunk(
1026            std::mem::take(&mut self.navigation_params.resource_header),
1027            can_gc,
1028        );
1029    }
1030
1031    /// <https://html.spec.whatwg.org/multipage/#navigate-html>
1032    fn load_html_document(&mut self, parser: &ServoParser) {
1033        // Step 1. Let document be the result of creating and initializing a
1034        // Document object given "html", "text/html", and navigationParams.
1035        self.initialize_document_object(&parser.document);
1036        // The first task that the networking task source places on the task queue while fetching
1037        // runs must process link headers given document, navigationParams's response, and "media",
1038        // after the task has been processed by the HTML parser.
1039        self.process_link_headers_in_media_phase_with_task(&parser.document);
1040    }
1041
1042    /// <https://html.spec.whatwg.org/multipage/#read-xml>
1043    fn load_xml_document(&mut self, parser: &ServoParser) {
1044        // When faced with displaying an XML file inline, provided navigation params navigationParams
1045        // and a string type, user agents must follow the requirements defined in XML and Namespaces in XML,
1046        // XML Media Types, DOM, and other relevant specifications to create and initialize a
1047        // Document object document, given "xml", type, and navigationParams, and return that Document.
1048        // They must also create a corresponding XML parser. [XML] [XMLNS] [RFC7303] [DOM]
1049        self.initialize_document_object(&parser.document);
1050        // The first task that the networking task source places on the task queue while fetching
1051        // runs must process link headers given document, navigationParams's response, and "media",
1052        // after the task has been processed by the XML parser.
1053        self.process_link_headers_in_media_phase_with_task(&parser.document);
1054    }
1055
1056    /// <https://html.spec.whatwg.org/multipage/#navigate-text>
1057    fn load_text_document(&mut self, parser: &ServoParser) {
1058        // Step 4. Create an HTML parser and associate it with the document.
1059        // Act as if the tokenizer had emitted a start tag token with the tag name "pre" followed by
1060        // a single U+000A LINE FEED (LF) character, and switch the HTML parser's tokenizer to the PLAINTEXT state.
1061        // Each task that the networking task source places on the task queue while fetching runs must then
1062        // fill the parser's input byte stream with the fetched bytes and cause the HTML parser to perform
1063        // the appropriate processing of the input stream.
1064        let page = "<pre>\n".into();
1065        parser.push_string_input_chunk(page);
1066        parser.parse_sync(CanGc::note());
1067        parser.tokenizer.set_plaintext_state();
1068        // The first task that the networking task source places on the task queue while fetching
1069        // runs must process link headers given document, navigationParams's response, and "media",
1070        // after the task has been processed by the HTML parser.
1071        self.process_link_headers_in_media_phase_with_task(&parser.document);
1072    }
1073
1074    /// <https://html.spec.whatwg.org/multipage/#navigate-media>
1075    fn load_media_document(
1076        &mut self,
1077        parser: &ServoParser,
1078        media_type: MediaType,
1079        mime_type: &Mime,
1080    ) {
1081        // Step 8. Act as if the user agent had stopped parsing document.
1082        self.is_synthesized_document = true;
1083        // Step 3. Populate with html/head/body given document.
1084        let page = "<html><body></body></html>".into();
1085        parser.push_string_input_chunk(page);
1086        parser.parse_sync(CanGc::note());
1087
1088        let doc = &parser.document;
1089        // Step 5. Set the appropriate attribute of the element host element, as described below,
1090        // to the address of the image, video, or audio resource.
1091        let node = if media_type == MediaType::Image {
1092            let img = Element::create(
1093                QualName::new(None, ns!(html), local_name!("img")),
1094                None,
1095                doc,
1096                ElementCreator::ParserCreated(1),
1097                CustomElementCreationMode::Asynchronous,
1098                None,
1099                CanGc::note(),
1100            );
1101            let img = DomRoot::downcast::<HTMLImageElement>(img).unwrap();
1102            img.SetSrc(USVString(self.url.to_string()));
1103            DomRoot::upcast::<Node>(img)
1104        } else if mime_type.type_() == mime::AUDIO {
1105            let audio = Element::create(
1106                QualName::new(None, ns!(html), local_name!("audio")),
1107                None,
1108                doc,
1109                ElementCreator::ParserCreated(1),
1110                CustomElementCreationMode::Asynchronous,
1111                None,
1112                CanGc::note(),
1113            );
1114            let audio = DomRoot::downcast::<HTMLMediaElement>(audio).unwrap();
1115            audio.SetSrc(USVString(self.url.to_string()));
1116            DomRoot::upcast::<Node>(audio)
1117        } else {
1118            let video = Element::create(
1119                QualName::new(None, ns!(html), local_name!("video")),
1120                None,
1121                doc,
1122                ElementCreator::ParserCreated(1),
1123                CustomElementCreationMode::Asynchronous,
1124                None,
1125                CanGc::note(),
1126            );
1127            let video = DomRoot::downcast::<HTMLMediaElement>(video).unwrap();
1128            video.SetSrc(USVString(self.url.to_string()));
1129            DomRoot::upcast::<Node>(video)
1130        };
1131        // Step 4. Append an element host element for the media, as described below, to the body element.
1132        let doc_body = DomRoot::upcast::<Node>(doc.GetBody().unwrap());
1133        doc_body
1134            .AppendChild(&node, CanGc::note())
1135            .expect("Appending failed");
1136        // Step 7. Process link headers given document, navigationParams's response, and "media".
1137        let link_headers = std::mem::take(&mut self.navigation_params.link_headers);
1138        process_link_headers(&link_headers, doc, LinkProcessingPhase::Media);
1139    }
1140
1141    /// <https://html.spec.whatwg.org/multipage/#read-ua-inline>
1142    fn load_inline_unknown_content(&mut self, parser: &ServoParser, page: String) {
1143        self.is_synthesized_document = true;
1144        parser.push_string_input_chunk(page);
1145        parser.parse_sync(CanGc::note());
1146    }
1147}
1148
1149impl FetchResponseListener for ParserContext {
1150    fn process_request_body(&mut self, _: RequestId) {}
1151
1152    fn process_request_eof(&mut self, _: RequestId) {}
1153
1154    fn process_response(&mut self, _: RequestId, meta_result: Result<FetchMetadata, NetworkError>) {
1155        let (metadata, error) = match meta_result {
1156            Ok(meta) => (
1157                Some(match meta {
1158                    FetchMetadata::Unfiltered(m) => m,
1159                    FetchMetadata::Filtered { unsafe_, .. } => unsafe_,
1160                }),
1161                None,
1162            ),
1163            Err(error) => (
1164                // Check variant without moving
1165                match &error {
1166                    NetworkError::SslValidation(..) |
1167                    NetworkError::Internal(..) |
1168                    NetworkError::Crash(..) => {
1169                        let mut meta = Metadata::default(self.url.clone());
1170                        let mime: Option<Mime> = "text/html".parse().ok();
1171                        meta.set_content_type(mime.as_ref());
1172                        Some(meta)
1173                    },
1174                    _ => None,
1175                },
1176                Some(error),
1177            ),
1178        };
1179        let content_type: Option<Mime> = metadata
1180            .clone()
1181            .and_then(|meta| meta.content_type)
1182            .map(Serde::into_inner)
1183            .map(Into::into);
1184
1185        let (policy_container, endpoints_list, link_headers) = match metadata.as_ref() {
1186            None => (PolicyContainer::default(), None, vec![]),
1187            Some(metadata) => (
1188                Self::create_policy_container_from_fetch_response(metadata),
1189                ReportingEndpoint::parse_reporting_endpoints_header(
1190                    &self.url.clone(),
1191                    &metadata.headers,
1192                ),
1193                extract_links_from_headers(&metadata.headers),
1194            ),
1195        };
1196
1197        let parser = match ScriptThread::page_headers_available(
1198            self.webview_id,
1199            self.pipeline_id,
1200            metadata,
1201            CanGc::note(),
1202        ) {
1203            Some(parser) => parser,
1204            None => return,
1205        };
1206        if parser.aborted.get() {
1207            return;
1208        }
1209
1210        let _realm = enter_realm(&*parser.document);
1211        let window = parser.document.window();
1212
1213        // From Step 23.8.3 of https://html.spec.whatwg.org/multipage/#navigate
1214        // Let finalSandboxFlags be the union of targetSnapshotParams's sandboxing flags and
1215        // policyContainer's CSP list's CSP-derived sandboxing flags.
1216        //
1217        // TODO: This deviates a bit from the specification, because there isn't a `targetSnapshotParam`
1218        // concept yet.
1219        let final_sandboxing_flag_set = policy_container
1220            .csp_list
1221            .as_ref()
1222            .and_then(|csp| csp.get_sandboxing_flag_set_for_document())
1223            .unwrap_or(SandboxingFlagSet::empty())
1224            .union(parser.document.creation_sandboxing_flag_set());
1225
1226        if let Some(endpoints) = endpoints_list {
1227            window.set_endpoints_list(endpoints);
1228        }
1229        self.parser = Some(Trusted::new(&*parser));
1230        self.navigation_params = NavigationParams {
1231            policy_container,
1232            content_type,
1233            final_sandboxing_flag_set,
1234            link_headers,
1235            resource_header: vec![],
1236        };
1237        self.submit_resource_timing();
1238
1239        // Part of https://html.spec.whatwg.org/multipage/#loading-a-document
1240        //
1241        // Step 3. If, given type, the new resource is to be handled by displaying some sort of inline content,
1242        // e.g., a native rendering of the content or an error message because the specified type is not supported,
1243        // then return the result of creating a document for inline content that doesn't have a DOM given
1244        // navigationParams's navigable, navigationParams's id, navigationParams's navigation timing type,
1245        // and navigationParams's user involvement.
1246        if let Some(error) = error {
1247            let page = match error {
1248                NetworkError::SslValidation(reason, bytes) => {
1249                    let page = resources::read_string(Resource::BadCertHTML);
1250                    let page = page.replace("${reason}", &reason);
1251                    let encoded_bytes = general_purpose::STANDARD_NO_PAD.encode(bytes);
1252                    let page = page.replace("${bytes}", encoded_bytes.as_str());
1253                    page.replace("${secret}", &net_traits::PRIVILEGED_SECRET.to_string())
1254                },
1255                NetworkError::Internal(reason) => {
1256                    let page = resources::read_string(Resource::NetErrorHTML);
1257                    page.replace("${reason}", &reason)
1258                },
1259                NetworkError::Crash(details) => {
1260                    let page = resources::read_string(Resource::CrashHTML);
1261                    page.replace("${details}", &details)
1262                },
1263                NetworkError::LoadCancelled => {
1264                    // The next load will show a page
1265                    return;
1266                },
1267            };
1268            self.load_inline_unknown_content(&parser, page);
1269        }
1270    }
1271
1272    fn process_response_chunk(&mut self, _: RequestId, payload: Vec<u8>) {
1273        if self.is_synthesized_document {
1274            return;
1275        }
1276        let Some(parser) = self.parser.as_ref().map(|p| p.root()) else {
1277            return;
1278        };
1279        if parser.aborted.get() {
1280            return;
1281        }
1282        if !self.has_loaded_document {
1283            // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1284            self.navigation_params
1285                .resource_header
1286                .extend_from_slice(&payload);
1287            // the number of bytes in buffer is greater than or equal to 1445.
1288            if self.navigation_params.resource_header.len() >= 1445 {
1289                self.load_document(CanGc::note());
1290            }
1291        } else {
1292            parser.parse_bytes_chunk(payload, CanGc::note());
1293        }
1294    }
1295
1296    // This method is called via script_thread::handle_fetch_eof, so we must call
1297    // submit_resource_timing in this function
1298    // Resource listeners are called via net_traits::Action::process, which handles submission for them
1299    fn process_response_eof(
1300        &mut self,
1301        _: RequestId,
1302        status: Result<ResourceFetchTiming, NetworkError>,
1303    ) {
1304        let parser = match self.parser.as_ref() {
1305            Some(parser) => parser.root(),
1306            None => return,
1307        };
1308        if parser.aborted.get() {
1309            return;
1310        }
1311
1312        match status {
1313            // are we throwing this away or can we use it?
1314            Ok(_) => (),
1315            // TODO(Savago): we should send a notification to callers #5463.
1316            Err(err) => debug!("Failed to load page URL {}, error: {:?}", self.url, err),
1317        }
1318
1319        // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1320        //
1321        // the end of the resource is reached.
1322        if !self.has_loaded_document {
1323            self.load_document(CanGc::note());
1324        }
1325
1326        let _realm = enter_realm(&*parser);
1327
1328        parser
1329            .document
1330            .set_redirect_count(self.resource_timing.redirect_count);
1331
1332        parser.last_chunk_received.set(true);
1333        if !parser.suspended.get() {
1334            parser.parse_sync(CanGc::note());
1335        }
1336
1337        // TODO: Only update if this is the current document resource.
1338        // TODO(mrobinson): Pass a proper fetch_start parameter here instead of `CrossProcessInstant::now()`.
1339        if let Some(pushed_index) = self.pushed_entry_index {
1340            let document = &parser.document;
1341            let performance_entry = PerformanceNavigationTiming::new(
1342                &document.global(),
1343                CrossProcessInstant::now(),
1344                document,
1345                CanGc::note(),
1346            );
1347            document
1348                .global()
1349                .performance()
1350                .update_entry(pushed_index, performance_entry.upcast::<PerformanceEntry>());
1351        }
1352    }
1353
1354    fn resource_timing_mut(&mut self) -> &mut ResourceFetchTiming {
1355        &mut self.resource_timing
1356    }
1357
1358    fn resource_timing(&self) -> &ResourceFetchTiming {
1359        &self.resource_timing
1360    }
1361
1362    // store a PerformanceNavigationTiming entry in the globalscope's Performance buffer
1363    fn submit_resource_timing(&mut self) {
1364        let parser = match self.parser.as_ref() {
1365            Some(parser) => parser.root(),
1366            None => return,
1367        };
1368        if parser.aborted.get() {
1369            return;
1370        }
1371
1372        let document = &parser.document;
1373
1374        // TODO: Pass a proper fetch start time here.
1375        let performance_entry = PerformanceNavigationTiming::new(
1376            &document.global(),
1377            CrossProcessInstant::now(),
1378            document,
1379            CanGc::note(),
1380        );
1381        self.pushed_entry_index = document.global().performance().queue_entry(
1382            performance_entry.upcast::<PerformanceEntry>(),
1383            CanGc::note(),
1384        );
1385    }
1386
1387    fn process_csp_violations(&mut self, _request_id: RequestId, violations: Vec<Violation>) {
1388        let parser = match self.parser.as_ref() {
1389            Some(parser) => parser.root(),
1390            None => return,
1391        };
1392        let document = &parser.document;
1393        let global = &document.global();
1394        // TODO(https://github.com/w3c/webappsec-csp/issues/687): Update after spec is resolved
1395        global.report_csp_violations(violations, None, None);
1396    }
1397}
1398
1399impl PreInvoke for ParserContext {}
1400
1401pub(crate) struct FragmentContext<'a> {
1402    pub(crate) context_elem: &'a Node,
1403    pub(crate) form_elem: Option<&'a Node>,
1404    pub(crate) context_element_allows_scripting: bool,
1405}
1406
1407#[cfg_attr(crown, allow(crown::unrooted_must_root))]
1408fn insert(
1409    parent: &Node,
1410    reference_child: Option<&Node>,
1411    child: NodeOrText<Dom<Node>>,
1412    parsing_algorithm: ParsingAlgorithm,
1413    custom_element_reaction_stack: &CustomElementReactionStack,
1414    can_gc: CanGc,
1415) {
1416    match child {
1417        NodeOrText::AppendNode(n) => {
1418            // https://html.spec.whatwg.org/multipage/#insert-a-foreign-element
1419            // applies if this is an element; if not, it may be
1420            // https://html.spec.whatwg.org/multipage/#insert-a-comment
1421            let element_in_non_fragment =
1422                parsing_algorithm != ParsingAlgorithm::Fragment && n.is::<Element>();
1423            if element_in_non_fragment {
1424                custom_element_reaction_stack.push_new_element_queue();
1425            }
1426            parent.InsertBefore(&n, reference_child, can_gc).unwrap();
1427            if element_in_non_fragment {
1428                custom_element_reaction_stack.pop_current_element_queue(can_gc);
1429            }
1430        },
1431        NodeOrText::AppendText(t) => {
1432            // https://html.spec.whatwg.org/multipage/#insert-a-character
1433            let text = reference_child
1434                .and_then(Node::GetPreviousSibling)
1435                .or_else(|| parent.GetLastChild())
1436                .and_then(DomRoot::downcast::<Text>);
1437
1438            if let Some(text) = text {
1439                text.upcast::<CharacterData>().append_data(&t);
1440            } else {
1441                let text = Text::new(String::from(t).into(), &parent.owner_doc(), can_gc);
1442                parent
1443                    .InsertBefore(text.upcast(), reference_child, can_gc)
1444                    .unwrap();
1445            }
1446        },
1447    }
1448}
1449
1450#[derive(JSTraceable, MallocSizeOf)]
1451#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
1452pub(crate) struct Sink {
1453    #[no_trace]
1454    base_url: ServoUrl,
1455    document: Dom<Document>,
1456    current_line: Cell<u64>,
1457    script: MutNullableDom<HTMLScriptElement>,
1458    parsing_algorithm: ParsingAlgorithm,
1459    #[conditional_malloc_size_of]
1460    custom_element_reaction_stack: Rc<CustomElementReactionStack>,
1461}
1462
1463impl Sink {
1464    fn same_tree(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1465        let x = x.downcast::<Element>().expect("Element node expected");
1466        let y = y.downcast::<Element>().expect("Element node expected");
1467
1468        x.is_in_same_home_subtree(y)
1469    }
1470
1471    fn has_parent_node(&self, node: &Dom<Node>) -> bool {
1472        node.GetParentNode().is_some()
1473    }
1474}
1475
1476impl TreeSink for Sink {
1477    type Output = Self;
1478    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1479    fn finish(self) -> Self {
1480        self
1481    }
1482
1483    type Handle = Dom<Node>;
1484    type ElemName<'a>
1485        = ExpandedName<'a>
1486    where
1487        Self: 'a;
1488
1489    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1490    fn get_document(&self) -> Dom<Node> {
1491        Dom::from_ref(self.document.upcast())
1492    }
1493
1494    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1495    fn get_template_contents(&self, target: &Dom<Node>) -> Dom<Node> {
1496        let template = target
1497            .downcast::<HTMLTemplateElement>()
1498            .expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing");
1499        Dom::from_ref(template.Content(CanGc::note()).upcast())
1500    }
1501
1502    fn same_node(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1503        x == y
1504    }
1505
1506    fn elem_name<'a>(&self, target: &'a Dom<Node>) -> ExpandedName<'a> {
1507        let elem = target
1508            .downcast::<Element>()
1509            .expect("tried to get name of non-Element in HTML parsing");
1510        ExpandedName {
1511            ns: elem.namespace(),
1512            local: elem.local_name(),
1513        }
1514    }
1515
1516    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1517    fn create_element(
1518        &self,
1519        name: QualName,
1520        attrs: Vec<Attribute>,
1521        flags: ElementFlags,
1522    ) -> Dom<Node> {
1523        let attrs = attrs
1524            .into_iter()
1525            .map(|attr| ElementAttribute::new(attr.name, DOMString::from(String::from(attr.value))))
1526            .collect();
1527        let parsing_algorithm = if flags.template {
1528            ParsingAlgorithm::Fragment
1529        } else {
1530            self.parsing_algorithm
1531        };
1532        let element = create_element_for_token(
1533            name,
1534            attrs,
1535            &self.document,
1536            ElementCreator::ParserCreated(self.current_line.get()),
1537            parsing_algorithm,
1538            &self.custom_element_reaction_stack,
1539            CanGc::note(),
1540        );
1541        Dom::from_ref(element.upcast())
1542    }
1543
1544    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1545    fn create_comment(&self, text: StrTendril) -> Dom<Node> {
1546        let comment = Comment::new(
1547            DOMString::from(String::from(text)),
1548            &self.document,
1549            None,
1550            CanGc::note(),
1551        );
1552        Dom::from_ref(comment.upcast())
1553    }
1554
1555    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1556    fn create_pi(&self, target: StrTendril, data: StrTendril) -> Dom<Node> {
1557        let doc = &*self.document;
1558        let pi = ProcessingInstruction::new(
1559            DOMString::from(String::from(target)),
1560            DOMString::from(String::from(data)),
1561            doc,
1562            CanGc::note(),
1563        );
1564        Dom::from_ref(pi.upcast())
1565    }
1566
1567    fn associate_with_form(
1568        &self,
1569        target: &Dom<Node>,
1570        form: &Dom<Node>,
1571        nodes: (&Dom<Node>, Option<&Dom<Node>>),
1572    ) {
1573        let (element, prev_element) = nodes;
1574        let tree_node = prev_element.map_or(element, |prev| {
1575            if self.has_parent_node(element) {
1576                element
1577            } else {
1578                prev
1579            }
1580        });
1581        if !self.same_tree(tree_node, form) {
1582            return;
1583        }
1584
1585        let node = target;
1586        let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
1587            .expect("Owner must be a form element");
1588
1589        let elem = node.downcast::<Element>();
1590        let control = elem.and_then(|e| e.as_maybe_form_control());
1591
1592        if let Some(control) = control {
1593            control.set_form_owner_from_parser(&form, CanGc::note());
1594        }
1595    }
1596
1597    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1598    fn append_before_sibling(&self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) {
1599        let parent = sibling
1600            .GetParentNode()
1601            .expect("append_before_sibling called on node without parent");
1602
1603        insert(
1604            &parent,
1605            Some(sibling),
1606            new_node,
1607            self.parsing_algorithm,
1608            &self.custom_element_reaction_stack,
1609            CanGc::note(),
1610        );
1611    }
1612
1613    fn parse_error(&self, msg: Cow<'static, str>) {
1614        debug!("Parse error: {}", msg);
1615    }
1616
1617    fn set_quirks_mode(&self, mode: QuirksMode) {
1618        let mode = match mode {
1619            QuirksMode::Quirks => ServoQuirksMode::Quirks,
1620            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
1621            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
1622        };
1623        self.document.set_quirks_mode(mode);
1624    }
1625
1626    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1627    fn append(&self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) {
1628        insert(
1629            parent,
1630            None,
1631            child,
1632            self.parsing_algorithm,
1633            &self.custom_element_reaction_stack,
1634            CanGc::note(),
1635        );
1636    }
1637
1638    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1639    fn append_based_on_parent_node(
1640        &self,
1641        elem: &Dom<Node>,
1642        prev_elem: &Dom<Node>,
1643        child: NodeOrText<Dom<Node>>,
1644    ) {
1645        if self.has_parent_node(elem) {
1646            self.append_before_sibling(elem, child);
1647        } else {
1648            self.append(prev_elem, child);
1649        }
1650    }
1651
1652    fn append_doctype_to_document(
1653        &self,
1654        name: StrTendril,
1655        public_id: StrTendril,
1656        system_id: StrTendril,
1657    ) {
1658        let doc = &*self.document;
1659        let doctype = DocumentType::new(
1660            DOMString::from(String::from(name)),
1661            Some(DOMString::from(String::from(public_id))),
1662            Some(DOMString::from(String::from(system_id))),
1663            doc,
1664            CanGc::note(),
1665        );
1666        doc.upcast::<Node>()
1667            .AppendChild(doctype.upcast(), CanGc::note())
1668            .expect("Appending failed");
1669    }
1670
1671    fn add_attrs_if_missing(&self, target: &Dom<Node>, attrs: Vec<Attribute>) {
1672        let elem = target
1673            .downcast::<Element>()
1674            .expect("tried to set attrs on non-Element in HTML parsing");
1675        for attr in attrs {
1676            elem.set_attribute_from_parser(
1677                attr.name,
1678                DOMString::from(String::from(attr.value)),
1679                None,
1680                CanGc::note(),
1681            );
1682        }
1683    }
1684
1685    fn remove_from_parent(&self, target: &Dom<Node>) {
1686        if let Some(ref parent) = target.GetParentNode() {
1687            parent.RemoveChild(target, CanGc::note()).unwrap();
1688        }
1689    }
1690
1691    fn mark_script_already_started(&self, node: &Dom<Node>) {
1692        let script = node.downcast::<HTMLScriptElement>();
1693        if let Some(script) = script {
1694            script.set_already_started(true)
1695        }
1696    }
1697
1698    fn reparent_children(&self, node: &Dom<Node>, new_parent: &Dom<Node>) {
1699        while let Some(ref child) = node.GetFirstChild() {
1700            new_parent.AppendChild(child, CanGc::note()).unwrap();
1701        }
1702    }
1703
1704    /// <https://html.spec.whatwg.org/multipage/#html-integration-point>
1705    /// Specifically, the `<annotation-xml>` cases.
1706    fn is_mathml_annotation_xml_integration_point(&self, handle: &Dom<Node>) -> bool {
1707        let elem = handle.downcast::<Element>().unwrap();
1708        elem.get_attribute(&ns!(), &local_name!("encoding"))
1709            .is_some_and(|attr| {
1710                attr.value().eq_ignore_ascii_case("text/html") ||
1711                    attr.value().eq_ignore_ascii_case("application/xhtml+xml")
1712            })
1713    }
1714
1715    fn set_current_line(&self, line_number: u64) {
1716        self.current_line.set(line_number);
1717    }
1718
1719    fn pop(&self, node: &Dom<Node>) {
1720        let node = DomRoot::from_ref(&**node);
1721        vtable_for(&node).pop();
1722    }
1723
1724    fn allow_declarative_shadow_roots(&self, intended_parent: &Dom<Node>) -> bool {
1725        intended_parent.owner_doc().allow_declarative_shadow_roots()
1726    }
1727
1728    /// <https://html.spec.whatwg.org/multipage/#parsing-main-inhead>
1729    /// A start tag whose tag name is "template"
1730    /// Attach shadow path
1731    fn attach_declarative_shadow(
1732        &self,
1733        host: &Dom<Node>,
1734        template: &Dom<Node>,
1735        attributes: &[Attribute],
1736    ) -> bool {
1737        attach_declarative_shadow_inner(host, template, attributes)
1738    }
1739}
1740
1741/// <https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token>
1742fn create_element_for_token(
1743    name: QualName,
1744    attrs: Vec<ElementAttribute>,
1745    document: &Document,
1746    creator: ElementCreator,
1747    parsing_algorithm: ParsingAlgorithm,
1748    custom_element_reaction_stack: &CustomElementReactionStack,
1749    can_gc: CanGc,
1750) -> DomRoot<Element> {
1751    // Step 3.
1752    let is = attrs
1753        .iter()
1754        .find(|attr| attr.name.local.eq_str_ignore_ascii_case("is"))
1755        .map(|attr| LocalName::from(&attr.value));
1756
1757    // Step 4.
1758    let definition = document.lookup_custom_element_definition(&name.ns, &name.local, is.as_ref());
1759
1760    // Step 5.
1761    let will_execute_script =
1762        definition.is_some() && parsing_algorithm != ParsingAlgorithm::Fragment;
1763
1764    // Step 6.
1765    if will_execute_script {
1766        // Step 6.1.
1767        document.increment_throw_on_dynamic_markup_insertion_counter();
1768        // Step 6.2
1769        if is_execution_stack_empty() {
1770            document
1771                .window()
1772                .as_global_scope()
1773                .perform_a_microtask_checkpoint(can_gc);
1774        }
1775        // Step 6.3
1776        custom_element_reaction_stack.push_new_element_queue()
1777    }
1778
1779    // Step 7.
1780    let creation_mode = if will_execute_script {
1781        CustomElementCreationMode::Synchronous
1782    } else {
1783        CustomElementCreationMode::Asynchronous
1784    };
1785
1786    let element = Element::create(name, is, document, creator, creation_mode, None, can_gc);
1787
1788    // https://html.spec.whatwg.org/multipage#the-input-element:value-sanitization-algorithm-3
1789    // says to invoke sanitization "when an input element is first created";
1790    // however, since sanitization requires content attributes to function,
1791    // it can't mean that literally.
1792    // Indeed, to make sanitization work correctly, we need to _not_ sanitize
1793    // until after all content attributes have been added
1794
1795    let maybe_input = element.downcast::<HTMLInputElement>();
1796    if let Some(input) = maybe_input {
1797        input.disable_sanitization();
1798    }
1799
1800    // Step 8
1801    for attr in attrs {
1802        element.set_attribute_from_parser(attr.name, attr.value, None, can_gc);
1803    }
1804
1805    // _now_ we can sanitize (and we sanitize now even if the "value"
1806    // attribute isn't present!)
1807    if let Some(input) = maybe_input {
1808        input.enable_sanitization();
1809    }
1810
1811    // Step 9.
1812    if will_execute_script {
1813        // Steps 9.1 - 9.2.
1814        custom_element_reaction_stack.pop_current_element_queue(can_gc);
1815        // Step 9.3.
1816        document.decrement_throw_on_dynamic_markup_insertion_counter();
1817    }
1818
1819    // TODO: Step 10.
1820    // TODO: Step 11.
1821
1822    // Step 12 is handled in `associate_with_form`.
1823
1824    // Step 13.
1825    element
1826}
1827
1828#[derive(JSTraceable, MallocSizeOf)]
1829struct NetworkDecoder {
1830    #[ignore_malloc_size_of = "Defined in tendril"]
1831    #[custom_trace]
1832    decoder: LossyDecoder<NetworkSink>,
1833}
1834
1835impl NetworkDecoder {
1836    fn new(encoding: &'static Encoding) -> Self {
1837        Self {
1838            decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()),
1839        }
1840    }
1841
1842    fn decode(&mut self, chunk: Vec<u8>) -> StrTendril {
1843        self.decoder.process(ByteTendril::from(&*chunk));
1844        std::mem::take(&mut self.decoder.inner_sink_mut().output)
1845    }
1846
1847    fn finish(self) -> StrTendril {
1848        self.decoder.finish()
1849    }
1850}
1851
1852#[derive(Default, JSTraceable)]
1853struct NetworkSink {
1854    #[no_trace]
1855    output: StrTendril,
1856}
1857
1858impl TendrilSink<UTF8> for NetworkSink {
1859    type Output = StrTendril;
1860
1861    fn process(&mut self, t: StrTendril) {
1862        if self.output.is_empty() {
1863            self.output = t;
1864        } else {
1865            self.output.push_tendril(&t);
1866        }
1867    }
1868
1869    fn error(&mut self, _desc: Cow<'static, str>) {}
1870
1871    fn finish(self) -> Self::Output {
1872        self.output
1873    }
1874}
1875
1876fn attach_declarative_shadow_inner(host: &Node, template: &Node, attributes: &[Attribute]) -> bool {
1877    let host_element = host.downcast::<Element>().unwrap();
1878
1879    if host_element.shadow_root().is_some() {
1880        return false;
1881    }
1882
1883    let template_element = template.downcast::<HTMLTemplateElement>().unwrap();
1884
1885    // Step 3. Let mode be template start tag's shadowrootmode attribute's value.
1886    // Step 4. Let clonable be true if template start tag has a shadowrootclonable attribute; otherwise false.
1887    // Step 5. Let delegatesfocus be true if template start tag
1888    // has a shadowrootdelegatesfocus attribute; otherwise false.
1889    // Step 6. Let serializable be true if template start tag
1890    // has a shadowrootserializable attribute; otherwise false.
1891    let mut shadow_root_mode = ShadowRootMode::Open;
1892    let mut clonable = false;
1893    let mut delegatesfocus = false;
1894    let mut serializable = false;
1895
1896    let attributes: Vec<ElementAttribute> = attributes
1897        .iter()
1898        .map(|attr| {
1899            ElementAttribute::new(
1900                attr.name.clone(),
1901                DOMString::from(String::from(attr.value.clone())),
1902            )
1903        })
1904        .collect();
1905
1906    attributes
1907        .iter()
1908        .for_each(|attr: &ElementAttribute| match attr.name.local {
1909            local_name!("shadowrootmode") => {
1910                if attr.value.str().eq_ignore_ascii_case("open") {
1911                    shadow_root_mode = ShadowRootMode::Open;
1912                } else if attr.value.str().eq_ignore_ascii_case("closed") {
1913                    shadow_root_mode = ShadowRootMode::Closed;
1914                } else {
1915                    unreachable!("shadowrootmode value is not open nor closed");
1916                }
1917            },
1918            local_name!("shadowrootclonable") => {
1919                clonable = true;
1920            },
1921            local_name!("shadowrootdelegatesfocus") => {
1922                delegatesfocus = true;
1923            },
1924            local_name!("shadowrootserializable") => {
1925                serializable = true;
1926            },
1927            _ => {},
1928        });
1929
1930    // Step 8.1. Attach a shadow root with declarative shadow host element,
1931    // mode, clonable, serializable, delegatesFocus, and "named".
1932    match host_element.attach_shadow(
1933        IsUserAgentWidget::No,
1934        shadow_root_mode,
1935        clonable,
1936        serializable,
1937        delegatesfocus,
1938        SlotAssignmentMode::Named,
1939        CanGc::note(),
1940    ) {
1941        Ok(shadow_root) => {
1942            // Step 8.3. Set shadow's declarative to true.
1943            shadow_root.set_declarative(true);
1944
1945            // Set 8.4. Set template's template contents property to shadow.
1946            let shadow = shadow_root.upcast::<DocumentFragment>();
1947            template_element.set_contents(Some(shadow));
1948
1949            // Step 8.5. Set shadow’s available to element internals to true.
1950            shadow_root.set_available_to_element_internals(true);
1951
1952            true
1953        },
1954        Err(_) => false,
1955    }
1956}