script/dom/servoparser/
mod.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::cell::Cell;
7use std::rc::Rc;
8
9use base::cross_process_instant::CrossProcessInstant;
10use base::id::PipelineId;
11use base64::Engine as _;
12use base64::engine::general_purpose;
13use content_security_policy::sandboxing_directive::SandboxingFlagSet;
14use devtools_traits::ScriptToDevtoolsControlMsg;
15use dom_struct::dom_struct;
16use embedder_traits::resources::{self, Resource};
17use encoding_rs::Encoding;
18use html5ever::buffer_queue::BufferQueue;
19use html5ever::tendril::fmt::UTF8;
20use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink};
21use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
22use html5ever::{Attribute, ExpandedName, LocalName, QualName, local_name, ns};
23use hyper_serde::Serde;
24use markup5ever::TokenizerResult;
25use mime::{self, Mime};
26use net_traits::mime_classifier::{ApacheBugFlag, MediaType, MimeClassifier, NoSniffFlag};
27use net_traits::policy_container::PolicyContainer;
28use net_traits::request::RequestId;
29use net_traits::{
30    FetchMetadata, FetchResponseListener, LoadContext, Metadata, NetworkError, ReferrerPolicy,
31    ResourceFetchTiming, ResourceTimingType,
32};
33use profile_traits::time::{
34    ProfilerCategory, ProfilerChan, TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType,
35};
36use profile_traits::time_profile;
37use script_traits::DocumentActivity;
38use servo_config::pref;
39use servo_url::ServoUrl;
40use style::context::QuirksMode as ServoQuirksMode;
41use tendril::stream::LossyDecoder;
42
43use crate::document_loader::{DocumentLoader, LoadType};
44use crate::dom::bindings::cell::DomRefCell;
45use crate::dom::bindings::codegen::Bindings::DocumentBinding::{
46    DocumentMethods, DocumentReadyState,
47};
48use crate::dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods;
49use crate::dom::bindings::codegen::Bindings::HTMLMediaElementBinding::HTMLMediaElementMethods;
50use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
51use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
52use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::{
53    ShadowRootMode, SlotAssignmentMode,
54};
55use crate::dom::bindings::inheritance::Castable;
56use crate::dom::bindings::refcounted::Trusted;
57use crate::dom::bindings::reflector::{DomGlobal, Reflector, reflect_dom_object};
58use crate::dom::bindings::root::{Dom, DomRoot, MutNullableDom};
59use crate::dom::bindings::settings_stack::is_execution_stack_empty;
60use crate::dom::bindings::str::{DOMString, USVString};
61use crate::dom::characterdata::CharacterData;
62use crate::dom::comment::Comment;
63use crate::dom::csp::{GlobalCspReporting, Violation, parse_csp_list_from_metadata};
64use crate::dom::customelementregistry::CustomElementReactionStack;
65use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument};
66use crate::dom::documentfragment::DocumentFragment;
67use crate::dom::documenttype::DocumentType;
68use crate::dom::element::{CustomElementCreationMode, Element, ElementCreator};
69use crate::dom::globalscope::GlobalScope;
70use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
71use crate::dom::html::htmlimageelement::HTMLImageElement;
72use crate::dom::html::htmlinputelement::HTMLInputElement;
73use crate::dom::html::htmlscriptelement::{HTMLScriptElement, ScriptResult};
74use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
75use crate::dom::node::{Node, ShadowIncluding};
76use crate::dom::performanceentry::PerformanceEntry;
77use crate::dom::performancenavigationtiming::PerformanceNavigationTiming;
78use crate::dom::processinginstruction::ProcessingInstruction;
79use crate::dom::processingoptions::{
80    LinkHeader, LinkProcessingPhase, extract_links_from_headers, process_link_headers,
81};
82use crate::dom::reportingendpoint::ReportingEndpoint;
83use crate::dom::shadowroot::IsUserAgentWidget;
84use crate::dom::text::Text;
85use crate::dom::types::HTMLMediaElement;
86use crate::dom::virtualmethods::vtable_for;
87use crate::network_listener::PreInvoke;
88use crate::realms::enter_realm;
89use crate::script_runtime::{CanGc, IntroductionType};
90use crate::script_thread::ScriptThread;
91
92mod async_html;
93mod html;
94mod prefetch;
95mod xml;
96
97pub(crate) use html::serialize_html_fragment;
98
99#[dom_struct]
100/// The parser maintains two input streams: one for input from script through
101/// document.write(), and one for input from network.
102///
103/// There is no concrete representation of the insertion point, instead it
104/// always points to just before the next character from the network input,
105/// with all of the script input before itself.
106///
107/// ```text
108///     ... script input ... | ... network input ...
109///                          ^
110///                 insertion point
111/// ```
112pub(crate) struct ServoParser {
113    reflector: Reflector,
114    /// The document associated with this parser.
115    document: Dom<Document>,
116    /// The BOM sniffing state.
117    ///
118    /// `None` means we've found the BOM, we've found there isn't one, or
119    /// we're not parsing from a byte stream. `Some` contains the BOM bytes
120    /// found so far.
121    bom_sniff: DomRefCell<Option<Vec<u8>>>,
122    /// The decoder used for the network input.
123    network_decoder: DomRefCell<Option<NetworkDecoder>>,
124    /// Input received from network.
125    #[ignore_malloc_size_of = "Defined in html5ever"]
126    #[no_trace]
127    network_input: BufferQueue,
128    /// Input received from script. Used only to support document.write().
129    #[ignore_malloc_size_of = "Defined in html5ever"]
130    #[no_trace]
131    script_input: BufferQueue,
132    /// The tokenizer of this parser.
133    tokenizer: Tokenizer,
134    /// Whether to expect any further input from the associated network request.
135    last_chunk_received: Cell<bool>,
136    /// Whether this parser should avoid passing any further data to the tokenizer.
137    suspended: Cell<bool>,
138    /// <https://html.spec.whatwg.org/multipage/#script-nesting-level>
139    script_nesting_level: Cell<usize>,
140    /// <https://html.spec.whatwg.org/multipage/#abort-a-parser>
141    aborted: Cell<bool>,
142    /// <https://html.spec.whatwg.org/multipage/#script-created-parser>
143    script_created_parser: bool,
144    /// We do a quick-and-dirty parse of the input looking for resources to prefetch.
145    // TODO: if we had speculative parsing, we could do this when speculatively
146    // building the DOM. https://github.com/servo/servo/pull/19203
147    prefetch_tokenizer: prefetch::Tokenizer,
148    #[ignore_malloc_size_of = "Defined in html5ever"]
149    #[no_trace]
150    prefetch_input: BufferQueue,
151    // The whole input as a string, if needed for the devtools Sources panel.
152    // TODO: use a faster type for concatenating strings?
153    content_for_devtools: Option<DomRefCell<String>>,
154}
155
156pub(crate) struct ElementAttribute {
157    name: QualName,
158    value: DOMString,
159}
160
161#[derive(Clone, Copy, JSTraceable, MallocSizeOf, PartialEq)]
162pub(crate) enum ParsingAlgorithm {
163    Normal,
164    Fragment,
165}
166
167impl ElementAttribute {
168    pub(crate) fn new(name: QualName, value: DOMString) -> ElementAttribute {
169        ElementAttribute { name, value }
170    }
171}
172
173impl ServoParser {
174    pub(crate) fn parser_is_not_active(&self) -> bool {
175        self.can_write()
176    }
177
178    /// <https://html.spec.whatwg.org/multipage/#parse-html-from-a-string>
179    pub(crate) fn parse_html_document(
180        document: &Document,
181        input: Option<DOMString>,
182        url: ServoUrl,
183        can_gc: CanGc,
184    ) {
185        // Step 1. Set document's type to "html".
186        //
187        // Set by callers of this function and asserted here
188        assert!(document.is_html_document());
189        // Step 2. Create an HTML parser parser, associated with document.
190        let parser = if pref!(dom_servoparser_async_html_tokenizer_enabled) {
191            ServoParser::new(
192                document,
193                Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None)),
194                ParserKind::Normal,
195                can_gc,
196            )
197        } else {
198            ServoParser::new(
199                document,
200                Tokenizer::Html(self::html::Tokenizer::new(
201                    document,
202                    url,
203                    None,
204                    ParsingAlgorithm::Normal,
205                )),
206                ParserKind::Normal,
207                can_gc,
208            )
209        };
210        // Step 3. Place html into the input stream for parser. The encoding confidence is irrelevant.
211        // Step 4. Start parser and let it run until it has consumed all the
212        // characters just inserted into the input stream.
213        //
214        // Set as the document's current parser and initialize with `input`, if given.
215        if let Some(input) = input {
216            parser.parse_complete_string_chunk(String::from(input), can_gc);
217        } else {
218            parser.document.set_current_parser(Some(&parser));
219        }
220    }
221
222    /// <https://html.spec.whatwg.org/multipage/#parsing-html-fragments>
223    pub(crate) fn parse_html_fragment(
224        context: &Element,
225        input: DOMString,
226        allow_declarative_shadow_roots: bool,
227        can_gc: CanGc,
228    ) -> impl Iterator<Item = DomRoot<Node>> + use<'_> {
229        let context_node = context.upcast::<Node>();
230        let context_document = context_node.owner_doc();
231        let window = context_document.window();
232        let url = context_document.url();
233
234        // Step 1. Let document be a Document node whose type is "html".
235        let loader = DocumentLoader::new_with_threads(
236            context_document.loader().resource_threads().clone(),
237            Some(url.clone()),
238        );
239        let document = Document::new(
240            window,
241            HasBrowsingContext::No,
242            Some(url.clone()),
243            context_document.origin().clone(),
244            IsHTMLDocument::HTMLDocument,
245            None,
246            None,
247            DocumentActivity::Inactive,
248            DocumentSource::FromParser,
249            loader,
250            None,
251            None,
252            Default::default(),
253            false,
254            allow_declarative_shadow_roots,
255            Some(context_document.insecure_requests_policy()),
256            context_document.has_trustworthy_ancestor_or_current_origin(),
257            context_document.custom_element_reaction_stack(),
258            context_document.creation_sandboxing_flag_set(),
259            can_gc,
260        );
261
262        // Step 2. If context's node document is in quirks mode, then set document's mode to "quirks".
263        // Step 3. Otherwise, if context's node document is in limited-quirks mode, then set document's
264        // mode to "limited-quirks".
265        document.set_quirks_mode(context_document.quirks_mode());
266
267        // NOTE: The following steps happened as part of Step 1.
268        // Step 4. If allowDeclarativeShadowRoots is true, then set document's
269        // allow declarative shadow roots to true.
270        // Step 5. Create a new HTML parser, and associate it with document.
271
272        // Step 11.
273        let form = context_node
274            .inclusive_ancestors(ShadowIncluding::No)
275            .find(|element| element.is::<HTMLFormElement>());
276
277        let fragment_context = FragmentContext {
278            context_elem: context_node,
279            form_elem: form.as_deref(),
280            context_element_allows_scripting: context_document.scripting_enabled(),
281        };
282
283        let parser = ServoParser::new(
284            &document,
285            Tokenizer::Html(self::html::Tokenizer::new(
286                &document,
287                url,
288                Some(fragment_context),
289                ParsingAlgorithm::Fragment,
290            )),
291            ParserKind::Normal,
292            can_gc,
293        );
294        parser.parse_complete_string_chunk(String::from(input), can_gc);
295
296        // Step 14.
297        let root_element = document.GetDocumentElement().expect("no document element");
298        FragmentParsingResult {
299            inner: root_element.upcast::<Node>().children(),
300        }
301    }
302
303    pub(crate) fn parse_html_script_input(document: &Document, url: ServoUrl) {
304        let parser = ServoParser::new(
305            document,
306            Tokenizer::Html(self::html::Tokenizer::new(
307                document,
308                url,
309                None,
310                ParsingAlgorithm::Normal,
311            )),
312            ParserKind::ScriptCreated,
313            CanGc::note(),
314        );
315        *parser.bom_sniff.borrow_mut() = None;
316        document.set_current_parser(Some(&parser));
317    }
318
319    pub(crate) fn parse_xml_document(
320        document: &Document,
321        input: Option<DOMString>,
322        url: ServoUrl,
323        can_gc: CanGc,
324    ) {
325        let parser = ServoParser::new(
326            document,
327            Tokenizer::Xml(self::xml::Tokenizer::new(document, url)),
328            ParserKind::Normal,
329            can_gc,
330        );
331
332        // Set as the document's current parser and initialize with `input`, if given.
333        if let Some(input) = input {
334            parser.parse_complete_string_chunk(String::from(input), can_gc);
335        } else {
336            parser.document.set_current_parser(Some(&parser));
337        }
338    }
339
340    pub(crate) fn script_nesting_level(&self) -> usize {
341        self.script_nesting_level.get()
342    }
343
344    pub(crate) fn is_script_created(&self) -> bool {
345        self.script_created_parser
346    }
347
348    /// Corresponds to the latter part of the "Otherwise" branch of the 'An end
349    /// tag whose tag name is "script"' of
350    /// <https://html.spec.whatwg.org/multipage/#parsing-main-incdata>
351    ///
352    /// This first moves everything from the script input to the beginning of
353    /// the network input, effectively resetting the insertion point to just
354    /// before the next character to be consumed.
355    ///
356    ///
357    /// ```text
358    ///     | ... script input ... network input ...
359    ///     ^
360    ///     insertion point
361    /// ```
362    pub(crate) fn resume_with_pending_parsing_blocking_script(
363        &self,
364        script: &HTMLScriptElement,
365        result: ScriptResult,
366        can_gc: CanGc,
367    ) {
368        assert!(self.suspended.get());
369        self.suspended.set(false);
370
371        self.script_input.swap_with(&self.network_input);
372        while let Some(chunk) = self.script_input.pop_front() {
373            self.network_input.push_back(chunk);
374        }
375
376        let script_nesting_level = self.script_nesting_level.get();
377        assert_eq!(script_nesting_level, 0);
378
379        self.script_nesting_level.set(script_nesting_level + 1);
380        script.execute(result, can_gc);
381        self.script_nesting_level.set(script_nesting_level);
382
383        if !self.suspended.get() && !self.aborted.get() {
384            self.parse_sync(can_gc);
385        }
386    }
387
388    pub(crate) fn can_write(&self) -> bool {
389        self.script_created_parser || self.script_nesting_level.get() > 0
390    }
391
392    /// Steps 6-8 of <https://html.spec.whatwg.org/multipage/#document.write()>
393    pub(crate) fn write(&self, text: DOMString, can_gc: CanGc) {
394        assert!(self.can_write());
395
396        if self.document.has_pending_parsing_blocking_script() {
397            // There is already a pending parsing blocking script so the
398            // parser is suspended, we just append everything to the
399            // script input and abort these steps.
400            self.script_input.push_back(String::from(text).into());
401            return;
402        }
403
404        // There is no pending parsing blocking script, so all previous calls
405        // to document.write() should have seen their entire input tokenized
406        // and process, with nothing pushed to the parser script input.
407        assert!(self.script_input.is_empty());
408
409        let input = BufferQueue::default();
410        input.push_back(String::from(text).into());
411
412        let profiler_chan = self
413            .document
414            .window()
415            .as_global_scope()
416            .time_profiler_chan()
417            .clone();
418        let profiler_metadata = TimerMetadata {
419            url: self.document.url().as_str().into(),
420            iframe: TimerMetadataFrameType::RootWindow,
421            incremental: TimerMetadataReflowType::FirstReflow,
422        };
423        self.tokenize(
424            |tokenizer| {
425                tokenizer.feed(
426                    &input,
427                    can_gc,
428                    profiler_chan.clone(),
429                    profiler_metadata.clone(),
430                )
431            },
432            can_gc,
433        );
434
435        if self.suspended.get() {
436            // Parser got suspended, insert remaining input at end of
437            // script input, following anything written by scripts executed
438            // reentrantly during this call.
439            while let Some(chunk) = input.pop_front() {
440                self.script_input.push_back(chunk);
441            }
442            return;
443        }
444
445        assert!(input.is_empty());
446    }
447
448    // Steps 4-6 of https://html.spec.whatwg.org/multipage/#dom-document-close
449    pub(crate) fn close(&self, can_gc: CanGc) {
450        assert!(self.script_created_parser);
451
452        // Step 4.
453        self.last_chunk_received.set(true);
454
455        if self.suspended.get() {
456            // Step 5.
457            return;
458        }
459
460        // Step 6.
461        self.parse_sync(can_gc);
462    }
463
464    // https://html.spec.whatwg.org/multipage/#abort-a-parser
465    pub(crate) fn abort(&self, can_gc: CanGc) {
466        assert!(!self.aborted.get());
467        self.aborted.set(true);
468
469        // Step 1.
470        self.script_input.replace_with(BufferQueue::default());
471        self.network_input.replace_with(BufferQueue::default());
472
473        // Step 2.
474        self.document
475            .set_ready_state(DocumentReadyState::Interactive, can_gc);
476
477        // Step 3.
478        self.tokenizer.end(can_gc);
479        self.document.set_current_parser(None);
480
481        // Step 4.
482        self.document
483            .set_ready_state(DocumentReadyState::Complete, can_gc);
484    }
485
486    // https://html.spec.whatwg.org/multipage/#active-parser
487    pub(crate) fn is_active(&self) -> bool {
488        self.script_nesting_level() > 0 && !self.aborted.get()
489    }
490
491    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
492    fn new_inherited(document: &Document, tokenizer: Tokenizer, kind: ParserKind) -> Self {
493        // Store the whole input for the devtools Sources panel, if the devtools server is running
494        // and we are parsing for a document load (not just things like innerHTML).
495        // TODO: check if a devtools client is actually connected and/or wants the sources?
496        let content_for_devtools = (document.global().devtools_chan().is_some() &&
497            document.has_browsing_context())
498        .then_some(DomRefCell::new(String::new()));
499
500        ServoParser {
501            reflector: Reflector::new(),
502            document: Dom::from_ref(document),
503            bom_sniff: DomRefCell::new(Some(Vec::with_capacity(3))),
504            network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))),
505            network_input: BufferQueue::default(),
506            script_input: BufferQueue::default(),
507            tokenizer,
508            last_chunk_received: Cell::new(false),
509            suspended: Default::default(),
510            script_nesting_level: Default::default(),
511            aborted: Default::default(),
512            script_created_parser: kind == ParserKind::ScriptCreated,
513            prefetch_tokenizer: prefetch::Tokenizer::new(document),
514            prefetch_input: BufferQueue::default(),
515            content_for_devtools,
516        }
517    }
518
519    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
520    fn new(
521        document: &Document,
522        tokenizer: Tokenizer,
523        kind: ParserKind,
524        can_gc: CanGc,
525    ) -> DomRoot<Self> {
526        reflect_dom_object(
527            Box::new(ServoParser::new_inherited(document, tokenizer, kind)),
528            document.window(),
529            can_gc,
530        )
531    }
532
533    fn push_tendril_input_chunk(&self, chunk: StrTendril) {
534        if let Some(mut content_for_devtools) = self
535            .content_for_devtools
536            .as_ref()
537            .map(|content| content.borrow_mut())
538        {
539            // TODO: append these chunks more efficiently
540            content_for_devtools.push_str(chunk.as_ref());
541        }
542
543        if chunk.is_empty() {
544            return;
545        }
546        // Per https://github.com/whatwg/html/issues/1495
547        // stylesheets should not be loaded for documents
548        // without browsing contexts.
549        // https://github.com/whatwg/html/issues/1495#issuecomment-230334047
550        // suggests that no content should be preloaded in such a case.
551        // We're conservative, and only prefetch for documents
552        // with browsing contexts.
553        if self.document.browsing_context().is_some() {
554            // Push the chunk into the prefetch input stream,
555            // which is tokenized eagerly, to scan for resources
556            // to prefetch. If the user script uses `document.write()`
557            // to overwrite the network input, this prefetching may
558            // have been wasted, but in most cases it won't.
559            self.prefetch_input.push_back(chunk.clone());
560            self.prefetch_tokenizer.feed(&self.prefetch_input);
561        }
562        // Push the chunk into the network input stream,
563        // which is tokenized lazily.
564        self.network_input.push_back(chunk);
565    }
566
567    fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
568        // BOM sniff. This is needed because NetworkDecoder will switch the
569        // encoding based on the BOM, but it won't change
570        // `self.document.encoding` in the process.
571        {
572            let mut bom_sniff = self.bom_sniff.borrow_mut();
573            if let Some(partial_bom) = bom_sniff.as_mut() {
574                if partial_bom.len() + chunk.len() >= 3 {
575                    partial_bom.extend(chunk.iter().take(3 - partial_bom.len()).copied());
576                    if let Some((encoding, _)) = Encoding::for_bom(partial_bom) {
577                        self.document.set_encoding(encoding);
578                    }
579                    drop(bom_sniff);
580                    *self.bom_sniff.borrow_mut() = None;
581                } else {
582                    partial_bom.extend(chunk.iter().copied());
583                }
584            }
585        }
586
587        // For byte input, we convert it to text using the network decoder.
588        let chunk = self
589            .network_decoder
590            .borrow_mut()
591            .as_mut()
592            .unwrap()
593            .decode(chunk);
594        self.push_tendril_input_chunk(chunk);
595    }
596
597    fn push_string_input_chunk(&self, chunk: String) {
598        // If the input is a string, we don't have a BOM.
599        if self.bom_sniff.borrow().is_some() {
600            *self.bom_sniff.borrow_mut() = None;
601        }
602
603        // The input has already been decoded as a string, so doesn't need
604        // to be decoded by the network decoder again.
605        let chunk = StrTendril::from(chunk);
606        self.push_tendril_input_chunk(chunk);
607    }
608
609    fn parse_sync(&self, can_gc: CanGc) {
610        assert!(self.script_input.is_empty());
611
612        // This parser will continue to parse while there is either pending input or
613        // the parser remains unsuspended.
614
615        if self.last_chunk_received.get() {
616            if let Some(decoder) = self.network_decoder.borrow_mut().take() {
617                let chunk = decoder.finish();
618                if !chunk.is_empty() {
619                    self.network_input.push_back(chunk);
620                }
621            }
622        }
623
624        if self.aborted.get() {
625            return;
626        }
627
628        let profiler_chan = self
629            .document
630            .window()
631            .as_global_scope()
632            .time_profiler_chan()
633            .clone();
634        let profiler_metadata = TimerMetadata {
635            url: self.document.url().as_str().into(),
636            iframe: TimerMetadataFrameType::RootWindow,
637            incremental: TimerMetadataReflowType::FirstReflow,
638        };
639        self.tokenize(
640            |tokenizer| {
641                tokenizer.feed(
642                    &self.network_input,
643                    can_gc,
644                    profiler_chan.clone(),
645                    profiler_metadata.clone(),
646                )
647            },
648            can_gc,
649        );
650
651        if self.suspended.get() {
652            return;
653        }
654
655        assert!(self.network_input.is_empty());
656
657        if self.last_chunk_received.get() {
658            self.finish(can_gc);
659        }
660    }
661
662    fn parse_complete_string_chunk(&self, input: String, can_gc: CanGc) {
663        self.document.set_current_parser(Some(self));
664        self.push_string_input_chunk(input);
665        self.last_chunk_received.set(true);
666        if !self.suspended.get() {
667            self.parse_sync(can_gc);
668        }
669    }
670
671    fn parse_bytes_chunk(&self, input: Vec<u8>, can_gc: CanGc) {
672        let _realm = enter_realm(&*self.document);
673        self.document.set_current_parser(Some(self));
674        self.push_bytes_input_chunk(input);
675        if !self.suspended.get() {
676            self.parse_sync(can_gc);
677        }
678    }
679
680    fn tokenize<F>(&self, feed: F, can_gc: CanGc)
681    where
682        F: Fn(&Tokenizer) -> TokenizerResult<DomRoot<HTMLScriptElement>>,
683    {
684        loop {
685            assert!(!self.suspended.get());
686            assert!(!self.aborted.get());
687
688            self.document.window().reflow_if_reflow_timer_expired();
689            let script = match feed(&self.tokenizer) {
690                TokenizerResult::Done => return,
691                TokenizerResult::Script(script) => script,
692            };
693
694            // https://html.spec.whatwg.org/multipage/#parsing-main-incdata
695            // branch "An end tag whose tag name is "script"
696            // The spec says to perform the microtask checkpoint before
697            // setting the insertion mode back from Text, but this is not
698            // possible with the way servo and html5ever currently
699            // relate to each other, and hopefully it is not observable.
700            if is_execution_stack_empty() {
701                self.document
702                    .window()
703                    .as_global_scope()
704                    .perform_a_microtask_checkpoint(can_gc);
705            }
706
707            let script_nesting_level = self.script_nesting_level.get();
708
709            self.script_nesting_level.set(script_nesting_level + 1);
710            script.set_initial_script_text();
711            let introduction_type_override =
712                (script_nesting_level > 0).then_some(IntroductionType::INJECTED_SCRIPT);
713            script.prepare(introduction_type_override, can_gc);
714            self.script_nesting_level.set(script_nesting_level);
715
716            if self.document.has_pending_parsing_blocking_script() {
717                self.suspended.set(true);
718                return;
719            }
720            if self.aborted.get() {
721                return;
722            }
723        }
724    }
725
726    // https://html.spec.whatwg.org/multipage/#the-end
727    fn finish(&self, can_gc: CanGc) {
728        assert!(!self.suspended.get());
729        assert!(self.last_chunk_received.get());
730        assert!(self.script_input.is_empty());
731        assert!(self.network_input.is_empty());
732        assert!(self.network_decoder.borrow().is_none());
733
734        // Step 1.
735        self.document
736            .set_ready_state(DocumentReadyState::Interactive, can_gc);
737
738        // Step 2.
739        self.tokenizer.end(can_gc);
740        self.document.set_current_parser(None);
741
742        // Steps 3-12 are in another castle, namely finish_load.
743        let url = self.tokenizer.url().clone();
744        self.document.finish_load(LoadType::PageSource(url), can_gc);
745
746        // Send the source contents to devtools, if needed.
747        if let Some(content_for_devtools) = self
748            .content_for_devtools
749            .as_ref()
750            .map(|content| content.take())
751        {
752            let global = self.document.global();
753            let chan = global.devtools_chan().expect("Guaranteed by new");
754            let pipeline_id = self.document.global().pipeline_id();
755            let _ = chan.send(ScriptToDevtoolsControlMsg::UpdateSourceContent(
756                pipeline_id,
757                content_for_devtools,
758            ));
759        }
760    }
761}
762
763struct FragmentParsingResult<I>
764where
765    I: Iterator<Item = DomRoot<Node>>,
766{
767    inner: I,
768}
769
770impl<I> Iterator for FragmentParsingResult<I>
771where
772    I: Iterator<Item = DomRoot<Node>>,
773{
774    type Item = DomRoot<Node>;
775
776    fn next(&mut self) -> Option<DomRoot<Node>> {
777        let next = self.inner.next()?;
778        next.remove_self(CanGc::note());
779        Some(next)
780    }
781
782    fn size_hint(&self) -> (usize, Option<usize>) {
783        self.inner.size_hint()
784    }
785}
786
787#[derive(JSTraceable, MallocSizeOf, PartialEq)]
788enum ParserKind {
789    Normal,
790    ScriptCreated,
791}
792
793#[derive(JSTraceable, MallocSizeOf)]
794#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
795enum Tokenizer {
796    Html(self::html::Tokenizer),
797    AsyncHtml(self::async_html::Tokenizer),
798    Xml(self::xml::Tokenizer),
799}
800
801impl Tokenizer {
802    fn feed(
803        &self,
804        input: &BufferQueue,
805        can_gc: CanGc,
806        profiler_chan: ProfilerChan,
807        profiler_metadata: TimerMetadata,
808    ) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
809        match *self {
810            Tokenizer::Html(ref tokenizer) => time_profile!(
811                ProfilerCategory::ScriptParseHTML,
812                Some(profiler_metadata),
813                profiler_chan,
814                || tokenizer.feed(input),
815            ),
816            Tokenizer::AsyncHtml(ref tokenizer) => time_profile!(
817                ProfilerCategory::ScriptParseHTML,
818                Some(profiler_metadata),
819                profiler_chan,
820                || tokenizer.feed(input, can_gc),
821            ),
822            Tokenizer::Xml(ref tokenizer) => time_profile!(
823                ProfilerCategory::ScriptParseXML,
824                Some(profiler_metadata),
825                profiler_chan,
826                || tokenizer.feed(input),
827            ),
828        }
829    }
830
831    fn end(&self, can_gc: CanGc) {
832        match *self {
833            Tokenizer::Html(ref tokenizer) => tokenizer.end(),
834            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.end(can_gc),
835            Tokenizer::Xml(ref tokenizer) => tokenizer.end(),
836        }
837    }
838
839    fn url(&self) -> &ServoUrl {
840        match *self {
841            Tokenizer::Html(ref tokenizer) => tokenizer.url(),
842            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(),
843            Tokenizer::Xml(ref tokenizer) => tokenizer.url(),
844        }
845    }
846
847    fn set_plaintext_state(&self) {
848        match *self {
849            Tokenizer::Html(ref tokenizer) => tokenizer.set_plaintext_state(),
850            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.set_plaintext_state(),
851            Tokenizer::Xml(_) => unimplemented!(),
852        }
853    }
854}
855
856/// <https://html.spec.whatwg.org/multipage/#navigation-params>
857/// This does not have the relevant fields, but mimics the intent
858/// of the struct when used in loading document spec algorithms.
859struct NavigationParams {
860    /// <https://html.spec.whatwg.org/multipage/#navigation-params-policy-container>
861    policy_container: PolicyContainer,
862    /// content-type of this document, if known. Otherwise need to sniff it
863    content_type: Option<Mime>,
864    /// link headers from the response
865    link_headers: Vec<LinkHeader>,
866    /// <https://html.spec.whatwg.org/multipage/#navigation-params-sandboxing>
867    final_sandboxing_flag_set: SandboxingFlagSet,
868    /// <https://mimesniff.spec.whatwg.org/#resource-header>
869    resource_header: Vec<u8>,
870}
871
872/// The context required for asynchronously fetching a document
873/// and parsing it progressively.
874pub(crate) struct ParserContext {
875    /// The parser that initiated the request.
876    parser: Option<Trusted<ServoParser>>,
877    /// Is this a synthesized document
878    is_synthesized_document: bool,
879    /// Has a document already been loaded (relevant for checking the resource header)
880    has_loaded_document: bool,
881    /// The pipeline associated with this document.
882    id: PipelineId,
883    /// The URL for this document.
884    url: ServoUrl,
885    /// timing data for this resource
886    resource_timing: ResourceFetchTiming,
887    /// pushed entry index
888    pushed_entry_index: Option<usize>,
889    /// params required in document load algorithms
890    navigation_params: NavigationParams,
891}
892
893impl ParserContext {
894    pub(crate) fn new(
895        id: PipelineId,
896        url: ServoUrl,
897        creation_sandboxing_flag_set: SandboxingFlagSet,
898    ) -> ParserContext {
899        ParserContext {
900            parser: None,
901            is_synthesized_document: false,
902            has_loaded_document: false,
903            id,
904            url,
905            resource_timing: ResourceFetchTiming::new(ResourceTimingType::Navigation),
906            pushed_entry_index: None,
907            navigation_params: NavigationParams {
908                policy_container: Default::default(),
909                content_type: None,
910                link_headers: vec![],
911                final_sandboxing_flag_set: creation_sandboxing_flag_set,
912                resource_header: vec![],
913            },
914        }
915    }
916
917    pub(crate) fn set_policy_container(&mut self, policy_container: Option<&PolicyContainer>) {
918        let Some(policy_container) = policy_container else {
919            return;
920        };
921        self.navigation_params.policy_container = policy_container.clone();
922    }
923
924    /// <https://html.spec.whatwg.org/multipage/#creating-a-policy-container-from-a-fetch-response>
925    fn create_policy_container_from_fetch_response(metadata: &Metadata) -> PolicyContainer {
926        // Step 1. If response's URL's scheme is "blob", then return a clone of response's URL's blob URL entry's environment's policy container.
927        // TODO
928        // Step 2. Let result be a new policy container.
929        // Step 7. Return result.
930        PolicyContainer {
931            // Step 3. Set result's CSP list to the result of parsing a response's Content Security Policies given response.
932            csp_list: parse_csp_list_from_metadata(&metadata.headers),
933            // Step 5. Set result's referrer policy to the result of parsing the `Referrer-Policy` header given response. [REFERRERPOLICY]
934            referrer_policy: ReferrerPolicy::parse_header_for_response(&metadata.headers),
935        }
936    }
937
938    /// <https://html.spec.whatwg.org/multipage/#initialise-the-document-object>
939    fn initialize_document_object(&self, document: &Document) {
940        // Step 9. Let document be a new Document, with
941        document.set_policy_container(self.navigation_params.policy_container.clone());
942        document.set_active_sandboxing_flag_set(self.navigation_params.final_sandboxing_flag_set);
943        // Step 17. Process link headers given document, navigationParams's response, and "pre-media".
944        process_link_headers(
945            &self.navigation_params.link_headers,
946            document,
947            LinkProcessingPhase::PreMedia,
948        );
949    }
950
951    /// Part of various load document methods
952    fn process_link_headers_in_media_phase_with_task(&mut self, document: &Document) {
953        // The first task that the networking task source places on the task queue
954        // while fetching runs must process link headers given document,
955        // navigationParams's response, and "media", after the task has been processed by the HTML parser.
956        let link_headers = std::mem::take(&mut self.navigation_params.link_headers);
957        if !link_headers.is_empty() {
958            let window = document.window();
959            let document = Trusted::new(document);
960            window
961                .upcast::<GlobalScope>()
962                .task_manager()
963                .networking_task_source()
964                .queue(task!(process_link_headers_task: move || {
965                    process_link_headers(&link_headers, &document.root(), LinkProcessingPhase::Media);
966                }));
967        }
968    }
969
970    /// <https://html.spec.whatwg.org/multipage/#loading-a-document>
971    fn load_document(&mut self, can_gc: CanGc) {
972        assert!(!self.has_loaded_document);
973        self.has_loaded_document = true;
974        let Some(ref parser) = self.parser.as_ref().map(|p| p.root()) else {
975            return;
976        };
977        // Step 1. Let type be the computed type of navigationParams's response.
978        let content_type = &self.navigation_params.content_type;
979        let mime_type = MimeClassifier::default().classify(
980            LoadContext::Browsing,
981            NoSniffFlag::Off,
982            ApacheBugFlag::from_content_type(content_type.as_ref()),
983            content_type,
984            &self.navigation_params.resource_header,
985        );
986        // Step 2. If the user agent has been configured to process resources of the given type using
987        // some mechanism other than rendering the content in a navigable, then skip this step.
988        // Otherwise, if the type is one of the following types:
989        let Some(media_type) = MimeClassifier::get_media_type(&mime_type) else {
990            let page = format!(
991                "<html><body><p>Unknown content type ({}).</p></body></html>",
992                &mime_type,
993            );
994            self.load_inline_unknown_content(parser, page);
995            return;
996        };
997        match media_type {
998            // Return the result of loading an HTML document, given navigationParams.
999            MediaType::Html => self.load_html_document(parser),
1000            // Return the result of loading an XML document given navigationParams and type.
1001            MediaType::Xml => self.load_xml_document(parser),
1002            // Return the result of loading a text document given navigationParams and type.
1003            MediaType::JavaScript | MediaType::Json | MediaType::Text | MediaType::Css => {
1004                self.load_text_document(parser)
1005            },
1006            // Return the result of loading a media document given navigationParams and type.
1007            MediaType::Image | MediaType::AudioVideo => {
1008                self.load_media_document(parser, media_type, &mime_type);
1009                return;
1010            },
1011            MediaType::Font => {
1012                let page = format!(
1013                    "<html><body><p>Unable to load font with content type ({}).</p></body></html>",
1014                    &mime_type,
1015                );
1016                self.load_inline_unknown_content(parser, page);
1017                return;
1018            },
1019        };
1020
1021        parser.parse_bytes_chunk(
1022            std::mem::take(&mut self.navigation_params.resource_header),
1023            can_gc,
1024        );
1025    }
1026
1027    /// <https://html.spec.whatwg.org/multipage/#navigate-html>
1028    fn load_html_document(&mut self, parser: &ServoParser) {
1029        // Step 1. Let document be the result of creating and initializing a
1030        // Document object given "html", "text/html", and navigationParams.
1031        self.initialize_document_object(&parser.document);
1032        // The first task that the networking task source places on the task queue while fetching
1033        // runs must process link headers given document, navigationParams's response, and "media",
1034        // after the task has been processed by the HTML parser.
1035        self.process_link_headers_in_media_phase_with_task(&parser.document);
1036    }
1037
1038    /// <https://html.spec.whatwg.org/multipage/#read-xml>
1039    fn load_xml_document(&mut self, parser: &ServoParser) {
1040        // When faced with displaying an XML file inline, provided navigation params navigationParams
1041        // and a string type, user agents must follow the requirements defined in XML and Namespaces in XML,
1042        // XML Media Types, DOM, and other relevant specifications to create and initialize a
1043        // Document object document, given "xml", type, and navigationParams, and return that Document.
1044        // They must also create a corresponding XML parser. [XML] [XMLNS] [RFC7303] [DOM]
1045        self.initialize_document_object(&parser.document);
1046        // The first task that the networking task source places on the task queue while fetching
1047        // runs must process link headers given document, navigationParams's response, and "media",
1048        // after the task has been processed by the XML parser.
1049        self.process_link_headers_in_media_phase_with_task(&parser.document);
1050    }
1051
1052    /// <https://html.spec.whatwg.org/multipage/#navigate-text>
1053    fn load_text_document(&mut self, parser: &ServoParser) {
1054        // Step 4. Create an HTML parser and associate it with the document.
1055        // Act as if the tokenizer had emitted a start tag token with the tag name "pre" followed by
1056        // a single U+000A LINE FEED (LF) character, and switch the HTML parser's tokenizer to the PLAINTEXT state.
1057        // Each task that the networking task source places on the task queue while fetching runs must then
1058        // fill the parser's input byte stream with the fetched bytes and cause the HTML parser to perform
1059        // the appropriate processing of the input stream.
1060        let page = "<pre>\n".into();
1061        parser.push_string_input_chunk(page);
1062        parser.parse_sync(CanGc::note());
1063        parser.tokenizer.set_plaintext_state();
1064        // The first task that the networking task source places on the task queue while fetching
1065        // runs must process link headers given document, navigationParams's response, and "media",
1066        // after the task has been processed by the HTML parser.
1067        self.process_link_headers_in_media_phase_with_task(&parser.document);
1068    }
1069
1070    /// <https://html.spec.whatwg.org/multipage/#navigate-media>
1071    fn load_media_document(
1072        &mut self,
1073        parser: &ServoParser,
1074        media_type: MediaType,
1075        mime_type: &Mime,
1076    ) {
1077        // Step 8. Act as if the user agent had stopped parsing document.
1078        self.is_synthesized_document = true;
1079        // Step 3. Populate with html/head/body given document.
1080        let page = "<html><body></body></html>".into();
1081        parser.push_string_input_chunk(page);
1082        parser.parse_sync(CanGc::note());
1083
1084        let doc = &parser.document;
1085        // Step 5. Set the appropriate attribute of the element host element, as described below,
1086        // to the address of the image, video, or audio resource.
1087        let node = if media_type == MediaType::Image {
1088            let img = Element::create(
1089                QualName::new(None, ns!(html), local_name!("img")),
1090                None,
1091                doc,
1092                ElementCreator::ParserCreated(1),
1093                CustomElementCreationMode::Asynchronous,
1094                None,
1095                CanGc::note(),
1096            );
1097            let img = DomRoot::downcast::<HTMLImageElement>(img).unwrap();
1098            img.SetSrc(USVString(self.url.to_string()));
1099            DomRoot::upcast::<Node>(img)
1100        } else if mime_type.type_() == mime::AUDIO {
1101            let audio = Element::create(
1102                QualName::new(None, ns!(html), local_name!("audio")),
1103                None,
1104                doc,
1105                ElementCreator::ParserCreated(1),
1106                CustomElementCreationMode::Asynchronous,
1107                None,
1108                CanGc::note(),
1109            );
1110            let audio = DomRoot::downcast::<HTMLMediaElement>(audio).unwrap();
1111            audio.SetSrc(USVString(self.url.to_string()));
1112            DomRoot::upcast::<Node>(audio)
1113        } else {
1114            let video = Element::create(
1115                QualName::new(None, ns!(html), local_name!("video")),
1116                None,
1117                doc,
1118                ElementCreator::ParserCreated(1),
1119                CustomElementCreationMode::Asynchronous,
1120                None,
1121                CanGc::note(),
1122            );
1123            let video = DomRoot::downcast::<HTMLMediaElement>(video).unwrap();
1124            video.SetSrc(USVString(self.url.to_string()));
1125            DomRoot::upcast::<Node>(video)
1126        };
1127        // Step 4. Append an element host element for the media, as described below, to the body element.
1128        let doc_body = DomRoot::upcast::<Node>(doc.GetBody().unwrap());
1129        doc_body
1130            .AppendChild(&node, CanGc::note())
1131            .expect("Appending failed");
1132        // Step 7. Process link headers given document, navigationParams's response, and "media".
1133        let link_headers = std::mem::take(&mut self.navigation_params.link_headers);
1134        process_link_headers(&link_headers, doc, LinkProcessingPhase::Media);
1135    }
1136
1137    /// <https://html.spec.whatwg.org/multipage/#read-ua-inline>
1138    fn load_inline_unknown_content(&mut self, parser: &ServoParser, page: String) {
1139        self.is_synthesized_document = true;
1140        parser.push_string_input_chunk(page);
1141        parser.parse_sync(CanGc::note());
1142    }
1143}
1144
1145impl FetchResponseListener for ParserContext {
1146    fn process_request_body(&mut self, _: RequestId) {}
1147
1148    fn process_request_eof(&mut self, _: RequestId) {}
1149
1150    fn process_response(&mut self, _: RequestId, meta_result: Result<FetchMetadata, NetworkError>) {
1151        let (metadata, error) = match meta_result {
1152            Ok(meta) => (
1153                Some(match meta {
1154                    FetchMetadata::Unfiltered(m) => m,
1155                    FetchMetadata::Filtered { unsafe_, .. } => unsafe_,
1156                }),
1157                None,
1158            ),
1159            Err(error) => (
1160                // Check variant without moving
1161                match &error {
1162                    NetworkError::SslValidation(..) |
1163                    NetworkError::Internal(..) |
1164                    NetworkError::Crash(..) => {
1165                        let mut meta = Metadata::default(self.url.clone());
1166                        let mime: Option<Mime> = "text/html".parse().ok();
1167                        meta.set_content_type(mime.as_ref());
1168                        Some(meta)
1169                    },
1170                    _ => None,
1171                },
1172                Some(error),
1173            ),
1174        };
1175        let content_type: Option<Mime> = metadata
1176            .clone()
1177            .and_then(|meta| meta.content_type)
1178            .map(Serde::into_inner)
1179            .map(Into::into);
1180
1181        let (policy_container, endpoints_list, link_headers) = match metadata.as_ref() {
1182            None => (PolicyContainer::default(), None, vec![]),
1183            Some(metadata) => (
1184                Self::create_policy_container_from_fetch_response(metadata),
1185                ReportingEndpoint::parse_reporting_endpoints_header(
1186                    &self.url.clone(),
1187                    &metadata.headers,
1188                ),
1189                extract_links_from_headers(&metadata.headers),
1190            ),
1191        };
1192
1193        let parser = match ScriptThread::page_headers_available(&self.id, metadata, CanGc::note()) {
1194            Some(parser) => parser,
1195            None => return,
1196        };
1197        if parser.aborted.get() {
1198            return;
1199        }
1200
1201        let _realm = enter_realm(&*parser.document);
1202        let window = parser.document.window();
1203
1204        // From Step 23.8.3 of https://html.spec.whatwg.org/multipage/#navigate
1205        // Let finalSandboxFlags be the union of targetSnapshotParams's sandboxing flags and
1206        // policyContainer's CSP list's CSP-derived sandboxing flags.
1207        //
1208        // TODO: This deviates a bit from the specification, because there isn't a `targetSnapshotParam`
1209        // concept yet.
1210        let final_sandboxing_flag_set = policy_container
1211            .csp_list
1212            .as_ref()
1213            .and_then(|csp| csp.get_sandboxing_flag_set_for_document())
1214            .unwrap_or(SandboxingFlagSet::empty())
1215            .union(parser.document.creation_sandboxing_flag_set());
1216
1217        if let Some(endpoints) = endpoints_list {
1218            window.set_endpoints_list(endpoints);
1219        }
1220        self.parser = Some(Trusted::new(&*parser));
1221        self.navigation_params = NavigationParams {
1222            policy_container,
1223            content_type,
1224            final_sandboxing_flag_set,
1225            link_headers,
1226            resource_header: vec![],
1227        };
1228        self.submit_resource_timing();
1229
1230        // Part of https://html.spec.whatwg.org/multipage/#loading-a-document
1231        //
1232        // Step 3. If, given type, the new resource is to be handled by displaying some sort of inline content,
1233        // e.g., a native rendering of the content or an error message because the specified type is not supported,
1234        // then return the result of creating a document for inline content that doesn't have a DOM given
1235        // navigationParams's navigable, navigationParams's id, navigationParams's navigation timing type,
1236        // and navigationParams's user involvement.
1237        if let Some(error) = error {
1238            let page = match error {
1239                NetworkError::SslValidation(reason, bytes) => {
1240                    let page = resources::read_string(Resource::BadCertHTML);
1241                    let page = page.replace("${reason}", &reason);
1242                    let encoded_bytes = general_purpose::STANDARD_NO_PAD.encode(bytes);
1243                    let page = page.replace("${bytes}", encoded_bytes.as_str());
1244                    page.replace("${secret}", &net_traits::PRIVILEGED_SECRET.to_string())
1245                },
1246                NetworkError::Internal(reason) => {
1247                    let page = resources::read_string(Resource::NetErrorHTML);
1248                    page.replace("${reason}", &reason)
1249                },
1250                NetworkError::Crash(details) => {
1251                    let page = resources::read_string(Resource::CrashHTML);
1252                    page.replace("${details}", &details)
1253                },
1254                NetworkError::LoadCancelled => {
1255                    // The next load will show a page
1256                    return;
1257                },
1258            };
1259            self.load_inline_unknown_content(&parser, page);
1260        }
1261    }
1262
1263    fn process_response_chunk(&mut self, _: RequestId, payload: Vec<u8>) {
1264        if self.is_synthesized_document {
1265            return;
1266        }
1267        let Some(parser) = self.parser.as_ref().map(|p| p.root()) else {
1268            return;
1269        };
1270        if parser.aborted.get() {
1271            return;
1272        }
1273        if !self.has_loaded_document {
1274            // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1275            self.navigation_params
1276                .resource_header
1277                .extend_from_slice(&payload);
1278            // the number of bytes in buffer is greater than or equal to 1445.
1279            if self.navigation_params.resource_header.len() >= 1445 {
1280                self.load_document(CanGc::note());
1281            }
1282        } else {
1283            parser.parse_bytes_chunk(payload, CanGc::note());
1284        }
1285    }
1286
1287    // This method is called via script_thread::handle_fetch_eof, so we must call
1288    // submit_resource_timing in this function
1289    // Resource listeners are called via net_traits::Action::process, which handles submission for them
1290    fn process_response_eof(
1291        &mut self,
1292        _: RequestId,
1293        status: Result<ResourceFetchTiming, NetworkError>,
1294    ) {
1295        let parser = match self.parser.as_ref() {
1296            Some(parser) => parser.root(),
1297            None => return,
1298        };
1299        if parser.aborted.get() {
1300            return;
1301        }
1302
1303        match status {
1304            // are we throwing this away or can we use it?
1305            Ok(_) => (),
1306            // TODO(Savago): we should send a notification to callers #5463.
1307            Err(err) => debug!("Failed to load page URL {}, error: {:?}", self.url, err),
1308        }
1309
1310        // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1311        //
1312        // the end of the resource is reached.
1313        if !self.has_loaded_document {
1314            self.load_document(CanGc::note());
1315        }
1316
1317        let _realm = enter_realm(&*parser);
1318
1319        parser
1320            .document
1321            .set_redirect_count(self.resource_timing.redirect_count);
1322
1323        parser.last_chunk_received.set(true);
1324        if !parser.suspended.get() {
1325            parser.parse_sync(CanGc::note());
1326        }
1327
1328        // TODO: Only update if this is the current document resource.
1329        // TODO(mrobinson): Pass a proper fetch_start parameter here instead of `CrossProcessInstant::now()`.
1330        if let Some(pushed_index) = self.pushed_entry_index {
1331            let document = &parser.document;
1332            let performance_entry = PerformanceNavigationTiming::new(
1333                &document.global(),
1334                CrossProcessInstant::now(),
1335                document,
1336                CanGc::note(),
1337            );
1338            document
1339                .global()
1340                .performance()
1341                .update_entry(pushed_index, performance_entry.upcast::<PerformanceEntry>());
1342        }
1343    }
1344
1345    fn resource_timing_mut(&mut self) -> &mut ResourceFetchTiming {
1346        &mut self.resource_timing
1347    }
1348
1349    fn resource_timing(&self) -> &ResourceFetchTiming {
1350        &self.resource_timing
1351    }
1352
1353    // store a PerformanceNavigationTiming entry in the globalscope's Performance buffer
1354    fn submit_resource_timing(&mut self) {
1355        let parser = match self.parser.as_ref() {
1356            Some(parser) => parser.root(),
1357            None => return,
1358        };
1359        if parser.aborted.get() {
1360            return;
1361        }
1362
1363        let document = &parser.document;
1364
1365        // TODO: Pass a proper fetch start time here.
1366        let performance_entry = PerformanceNavigationTiming::new(
1367            &document.global(),
1368            CrossProcessInstant::now(),
1369            document,
1370            CanGc::note(),
1371        );
1372        self.pushed_entry_index = document.global().performance().queue_entry(
1373            performance_entry.upcast::<PerformanceEntry>(),
1374            CanGc::note(),
1375        );
1376    }
1377
1378    fn process_csp_violations(&mut self, _request_id: RequestId, violations: Vec<Violation>) {
1379        let parser = match self.parser.as_ref() {
1380            Some(parser) => parser.root(),
1381            None => return,
1382        };
1383        let document = &parser.document;
1384        let global = &document.global();
1385        // TODO(https://github.com/w3c/webappsec-csp/issues/687): Update after spec is resolved
1386        global.report_csp_violations(violations, None, None);
1387    }
1388}
1389
1390impl PreInvoke for ParserContext {}
1391
1392pub(crate) struct FragmentContext<'a> {
1393    pub(crate) context_elem: &'a Node,
1394    pub(crate) form_elem: Option<&'a Node>,
1395    pub(crate) context_element_allows_scripting: bool,
1396}
1397
1398#[cfg_attr(crown, allow(crown::unrooted_must_root))]
1399fn insert(
1400    parent: &Node,
1401    reference_child: Option<&Node>,
1402    child: NodeOrText<Dom<Node>>,
1403    parsing_algorithm: ParsingAlgorithm,
1404    custom_element_reaction_stack: &CustomElementReactionStack,
1405    can_gc: CanGc,
1406) {
1407    match child {
1408        NodeOrText::AppendNode(n) => {
1409            // https://html.spec.whatwg.org/multipage/#insert-a-foreign-element
1410            // applies if this is an element; if not, it may be
1411            // https://html.spec.whatwg.org/multipage/#insert-a-comment
1412            let element_in_non_fragment =
1413                parsing_algorithm != ParsingAlgorithm::Fragment && n.is::<Element>();
1414            if element_in_non_fragment {
1415                custom_element_reaction_stack.push_new_element_queue();
1416            }
1417            parent.InsertBefore(&n, reference_child, can_gc).unwrap();
1418            if element_in_non_fragment {
1419                custom_element_reaction_stack.pop_current_element_queue(can_gc);
1420            }
1421        },
1422        NodeOrText::AppendText(t) => {
1423            // https://html.spec.whatwg.org/multipage/#insert-a-character
1424            let text = reference_child
1425                .and_then(Node::GetPreviousSibling)
1426                .or_else(|| parent.GetLastChild())
1427                .and_then(DomRoot::downcast::<Text>);
1428
1429            if let Some(text) = text {
1430                text.upcast::<CharacterData>().append_data(&t);
1431            } else {
1432                let text = Text::new(String::from(t).into(), &parent.owner_doc(), can_gc);
1433                parent
1434                    .InsertBefore(text.upcast(), reference_child, can_gc)
1435                    .unwrap();
1436            }
1437        },
1438    }
1439}
1440
1441#[derive(JSTraceable, MallocSizeOf)]
1442#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
1443pub(crate) struct Sink {
1444    #[no_trace]
1445    base_url: ServoUrl,
1446    document: Dom<Document>,
1447    current_line: Cell<u64>,
1448    script: MutNullableDom<HTMLScriptElement>,
1449    parsing_algorithm: ParsingAlgorithm,
1450    #[conditional_malloc_size_of]
1451    custom_element_reaction_stack: Rc<CustomElementReactionStack>,
1452}
1453
1454impl Sink {
1455    fn same_tree(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1456        let x = x.downcast::<Element>().expect("Element node expected");
1457        let y = y.downcast::<Element>().expect("Element node expected");
1458
1459        x.is_in_same_home_subtree(y)
1460    }
1461
1462    fn has_parent_node(&self, node: &Dom<Node>) -> bool {
1463        node.GetParentNode().is_some()
1464    }
1465}
1466
1467impl TreeSink for Sink {
1468    type Output = Self;
1469    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1470    fn finish(self) -> Self {
1471        self
1472    }
1473
1474    type Handle = Dom<Node>;
1475    type ElemName<'a>
1476        = ExpandedName<'a>
1477    where
1478        Self: 'a;
1479
1480    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1481    fn get_document(&self) -> Dom<Node> {
1482        Dom::from_ref(self.document.upcast())
1483    }
1484
1485    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1486    fn get_template_contents(&self, target: &Dom<Node>) -> Dom<Node> {
1487        let template = target
1488            .downcast::<HTMLTemplateElement>()
1489            .expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing");
1490        Dom::from_ref(template.Content(CanGc::note()).upcast())
1491    }
1492
1493    fn same_node(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1494        x == y
1495    }
1496
1497    fn elem_name<'a>(&self, target: &'a Dom<Node>) -> ExpandedName<'a> {
1498        let elem = target
1499            .downcast::<Element>()
1500            .expect("tried to get name of non-Element in HTML parsing");
1501        ExpandedName {
1502            ns: elem.namespace(),
1503            local: elem.local_name(),
1504        }
1505    }
1506
1507    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1508    fn create_element(
1509        &self,
1510        name: QualName,
1511        attrs: Vec<Attribute>,
1512        flags: ElementFlags,
1513    ) -> Dom<Node> {
1514        let attrs = attrs
1515            .into_iter()
1516            .map(|attr| ElementAttribute::new(attr.name, DOMString::from(String::from(attr.value))))
1517            .collect();
1518        let parsing_algorithm = if flags.template {
1519            ParsingAlgorithm::Fragment
1520        } else {
1521            self.parsing_algorithm
1522        };
1523        let element = create_element_for_token(
1524            name,
1525            attrs,
1526            &self.document,
1527            ElementCreator::ParserCreated(self.current_line.get()),
1528            parsing_algorithm,
1529            &self.custom_element_reaction_stack,
1530            CanGc::note(),
1531        );
1532        Dom::from_ref(element.upcast())
1533    }
1534
1535    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1536    fn create_comment(&self, text: StrTendril) -> Dom<Node> {
1537        let comment = Comment::new(
1538            DOMString::from(String::from(text)),
1539            &self.document,
1540            None,
1541            CanGc::note(),
1542        );
1543        Dom::from_ref(comment.upcast())
1544    }
1545
1546    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1547    fn create_pi(&self, target: StrTendril, data: StrTendril) -> Dom<Node> {
1548        let doc = &*self.document;
1549        let pi = ProcessingInstruction::new(
1550            DOMString::from(String::from(target)),
1551            DOMString::from(String::from(data)),
1552            doc,
1553            CanGc::note(),
1554        );
1555        Dom::from_ref(pi.upcast())
1556    }
1557
1558    fn associate_with_form(
1559        &self,
1560        target: &Dom<Node>,
1561        form: &Dom<Node>,
1562        nodes: (&Dom<Node>, Option<&Dom<Node>>),
1563    ) {
1564        let (element, prev_element) = nodes;
1565        let tree_node = prev_element.map_or(element, |prev| {
1566            if self.has_parent_node(element) {
1567                element
1568            } else {
1569                prev
1570            }
1571        });
1572        if !self.same_tree(tree_node, form) {
1573            return;
1574        }
1575
1576        let node = target;
1577        let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
1578            .expect("Owner must be a form element");
1579
1580        let elem = node.downcast::<Element>();
1581        let control = elem.and_then(|e| e.as_maybe_form_control());
1582
1583        if let Some(control) = control {
1584            control.set_form_owner_from_parser(&form, CanGc::note());
1585        }
1586    }
1587
1588    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1589    fn append_before_sibling(&self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) {
1590        let parent = sibling
1591            .GetParentNode()
1592            .expect("append_before_sibling called on node without parent");
1593
1594        insert(
1595            &parent,
1596            Some(sibling),
1597            new_node,
1598            self.parsing_algorithm,
1599            &self.custom_element_reaction_stack,
1600            CanGc::note(),
1601        );
1602    }
1603
1604    fn parse_error(&self, msg: Cow<'static, str>) {
1605        debug!("Parse error: {}", msg);
1606    }
1607
1608    fn set_quirks_mode(&self, mode: QuirksMode) {
1609        let mode = match mode {
1610            QuirksMode::Quirks => ServoQuirksMode::Quirks,
1611            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
1612            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
1613        };
1614        self.document.set_quirks_mode(mode);
1615    }
1616
1617    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1618    fn append(&self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) {
1619        insert(
1620            parent,
1621            None,
1622            child,
1623            self.parsing_algorithm,
1624            &self.custom_element_reaction_stack,
1625            CanGc::note(),
1626        );
1627    }
1628
1629    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
1630    fn append_based_on_parent_node(
1631        &self,
1632        elem: &Dom<Node>,
1633        prev_elem: &Dom<Node>,
1634        child: NodeOrText<Dom<Node>>,
1635    ) {
1636        if self.has_parent_node(elem) {
1637            self.append_before_sibling(elem, child);
1638        } else {
1639            self.append(prev_elem, child);
1640        }
1641    }
1642
1643    fn append_doctype_to_document(
1644        &self,
1645        name: StrTendril,
1646        public_id: StrTendril,
1647        system_id: StrTendril,
1648    ) {
1649        let doc = &*self.document;
1650        let doctype = DocumentType::new(
1651            DOMString::from(String::from(name)),
1652            Some(DOMString::from(String::from(public_id))),
1653            Some(DOMString::from(String::from(system_id))),
1654            doc,
1655            CanGc::note(),
1656        );
1657        doc.upcast::<Node>()
1658            .AppendChild(doctype.upcast(), CanGc::note())
1659            .expect("Appending failed");
1660    }
1661
1662    fn add_attrs_if_missing(&self, target: &Dom<Node>, attrs: Vec<Attribute>) {
1663        let elem = target
1664            .downcast::<Element>()
1665            .expect("tried to set attrs on non-Element in HTML parsing");
1666        for attr in attrs {
1667            elem.set_attribute_from_parser(
1668                attr.name,
1669                DOMString::from(String::from(attr.value)),
1670                None,
1671                CanGc::note(),
1672            );
1673        }
1674    }
1675
1676    fn remove_from_parent(&self, target: &Dom<Node>) {
1677        if let Some(ref parent) = target.GetParentNode() {
1678            parent.RemoveChild(target, CanGc::note()).unwrap();
1679        }
1680    }
1681
1682    fn mark_script_already_started(&self, node: &Dom<Node>) {
1683        let script = node.downcast::<HTMLScriptElement>();
1684        if let Some(script) = script {
1685            script.set_already_started(true)
1686        }
1687    }
1688
1689    fn reparent_children(&self, node: &Dom<Node>, new_parent: &Dom<Node>) {
1690        while let Some(ref child) = node.GetFirstChild() {
1691            new_parent.AppendChild(child, CanGc::note()).unwrap();
1692        }
1693    }
1694
1695    /// <https://html.spec.whatwg.org/multipage/#html-integration-point>
1696    /// Specifically, the `<annotation-xml>` cases.
1697    fn is_mathml_annotation_xml_integration_point(&self, handle: &Dom<Node>) -> bool {
1698        let elem = handle.downcast::<Element>().unwrap();
1699        elem.get_attribute(&ns!(), &local_name!("encoding"))
1700            .is_some_and(|attr| {
1701                attr.value().eq_ignore_ascii_case("text/html") ||
1702                    attr.value().eq_ignore_ascii_case("application/xhtml+xml")
1703            })
1704    }
1705
1706    fn set_current_line(&self, line_number: u64) {
1707        self.current_line.set(line_number);
1708    }
1709
1710    fn pop(&self, node: &Dom<Node>) {
1711        let node = DomRoot::from_ref(&**node);
1712        vtable_for(&node).pop();
1713    }
1714
1715    fn allow_declarative_shadow_roots(&self, intended_parent: &Dom<Node>) -> bool {
1716        intended_parent.owner_doc().allow_declarative_shadow_roots()
1717    }
1718
1719    /// <https://html.spec.whatwg.org/multipage/#parsing-main-inhead>
1720    /// A start tag whose tag name is "template"
1721    /// Attach shadow path
1722    fn attach_declarative_shadow(
1723        &self,
1724        host: &Dom<Node>,
1725        template: &Dom<Node>,
1726        attributes: &[Attribute],
1727    ) -> bool {
1728        attach_declarative_shadow_inner(host, template, attributes)
1729    }
1730}
1731
1732/// <https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token>
1733fn create_element_for_token(
1734    name: QualName,
1735    attrs: Vec<ElementAttribute>,
1736    document: &Document,
1737    creator: ElementCreator,
1738    parsing_algorithm: ParsingAlgorithm,
1739    custom_element_reaction_stack: &CustomElementReactionStack,
1740    can_gc: CanGc,
1741) -> DomRoot<Element> {
1742    // Step 3.
1743    let is = attrs
1744        .iter()
1745        .find(|attr| attr.name.local.eq_str_ignore_ascii_case("is"))
1746        .map(|attr| LocalName::from(&attr.value));
1747
1748    // Step 4.
1749    let definition = document.lookup_custom_element_definition(&name.ns, &name.local, is.as_ref());
1750
1751    // Step 5.
1752    let will_execute_script =
1753        definition.is_some() && parsing_algorithm != ParsingAlgorithm::Fragment;
1754
1755    // Step 6.
1756    if will_execute_script {
1757        // Step 6.1.
1758        document.increment_throw_on_dynamic_markup_insertion_counter();
1759        // Step 6.2
1760        if is_execution_stack_empty() {
1761            document
1762                .window()
1763                .as_global_scope()
1764                .perform_a_microtask_checkpoint(can_gc);
1765        }
1766        // Step 6.3
1767        custom_element_reaction_stack.push_new_element_queue()
1768    }
1769
1770    // Step 7.
1771    let creation_mode = if will_execute_script {
1772        CustomElementCreationMode::Synchronous
1773    } else {
1774        CustomElementCreationMode::Asynchronous
1775    };
1776
1777    let element = Element::create(name, is, document, creator, creation_mode, None, can_gc);
1778
1779    // https://html.spec.whatwg.org/multipage#the-input-element:value-sanitization-algorithm-3
1780    // says to invoke sanitization "when an input element is first created";
1781    // however, since sanitization requires content attributes to function,
1782    // it can't mean that literally.
1783    // Indeed, to make sanitization work correctly, we need to _not_ sanitize
1784    // until after all content attributes have been added
1785
1786    let maybe_input = element.downcast::<HTMLInputElement>();
1787    if let Some(input) = maybe_input {
1788        input.disable_sanitization();
1789    }
1790
1791    // Step 8
1792    for attr in attrs {
1793        element.set_attribute_from_parser(attr.name, attr.value, None, can_gc);
1794    }
1795
1796    // _now_ we can sanitize (and we sanitize now even if the "value"
1797    // attribute isn't present!)
1798    if let Some(input) = maybe_input {
1799        input.enable_sanitization();
1800    }
1801
1802    // Step 9.
1803    if will_execute_script {
1804        // Steps 9.1 - 9.2.
1805        custom_element_reaction_stack.pop_current_element_queue(can_gc);
1806        // Step 9.3.
1807        document.decrement_throw_on_dynamic_markup_insertion_counter();
1808    }
1809
1810    // TODO: Step 10.
1811    // TODO: Step 11.
1812
1813    // Step 12 is handled in `associate_with_form`.
1814
1815    // Step 13.
1816    element
1817}
1818
1819#[derive(JSTraceable, MallocSizeOf)]
1820struct NetworkDecoder {
1821    #[ignore_malloc_size_of = "Defined in tendril"]
1822    #[custom_trace]
1823    decoder: LossyDecoder<NetworkSink>,
1824}
1825
1826impl NetworkDecoder {
1827    fn new(encoding: &'static Encoding) -> Self {
1828        Self {
1829            decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()),
1830        }
1831    }
1832
1833    fn decode(&mut self, chunk: Vec<u8>) -> StrTendril {
1834        self.decoder.process(ByteTendril::from(&*chunk));
1835        std::mem::take(&mut self.decoder.inner_sink_mut().output)
1836    }
1837
1838    fn finish(self) -> StrTendril {
1839        self.decoder.finish()
1840    }
1841}
1842
1843#[derive(Default, JSTraceable)]
1844struct NetworkSink {
1845    #[no_trace]
1846    output: StrTendril,
1847}
1848
1849impl TendrilSink<UTF8> for NetworkSink {
1850    type Output = StrTendril;
1851
1852    fn process(&mut self, t: StrTendril) {
1853        if self.output.is_empty() {
1854            self.output = t;
1855        } else {
1856            self.output.push_tendril(&t);
1857        }
1858    }
1859
1860    fn error(&mut self, _desc: Cow<'static, str>) {}
1861
1862    fn finish(self) -> Self::Output {
1863        self.output
1864    }
1865}
1866
1867fn attach_declarative_shadow_inner(host: &Node, template: &Node, attributes: &[Attribute]) -> bool {
1868    let host_element = host.downcast::<Element>().unwrap();
1869
1870    if host_element.shadow_root().is_some() {
1871        return false;
1872    }
1873
1874    let template_element = template.downcast::<HTMLTemplateElement>().unwrap();
1875
1876    // Step 3. Let mode be template start tag's shadowrootmode attribute's value.
1877    // Step 4. Let clonable be true if template start tag has a shadowrootclonable attribute; otherwise false.
1878    // Step 5. Let delegatesfocus be true if template start tag
1879    // has a shadowrootdelegatesfocus attribute; otherwise false.
1880    // Step 6. Let serializable be true if template start tag
1881    // has a shadowrootserializable attribute; otherwise false.
1882    let mut shadow_root_mode = ShadowRootMode::Open;
1883    let mut clonable = false;
1884    let mut delegatesfocus = false;
1885    let mut serializable = false;
1886
1887    let attributes: Vec<ElementAttribute> = attributes
1888        .iter()
1889        .map(|attr| {
1890            ElementAttribute::new(
1891                attr.name.clone(),
1892                DOMString::from(String::from(attr.value.clone())),
1893            )
1894        })
1895        .collect();
1896
1897    attributes
1898        .iter()
1899        .for_each(|attr: &ElementAttribute| match attr.name.local {
1900            local_name!("shadowrootmode") => {
1901                if attr.value.str().eq_ignore_ascii_case("open") {
1902                    shadow_root_mode = ShadowRootMode::Open;
1903                } else if attr.value.str().eq_ignore_ascii_case("closed") {
1904                    shadow_root_mode = ShadowRootMode::Closed;
1905                } else {
1906                    unreachable!("shadowrootmode value is not open nor closed");
1907                }
1908            },
1909            local_name!("shadowrootclonable") => {
1910                clonable = true;
1911            },
1912            local_name!("shadowrootdelegatesfocus") => {
1913                delegatesfocus = true;
1914            },
1915            local_name!("shadowrootserializable") => {
1916                serializable = true;
1917            },
1918            _ => {},
1919        });
1920
1921    // Step 8.1. Attach a shadow root with declarative shadow host element,
1922    // mode, clonable, serializable, delegatesFocus, and "named".
1923    match host_element.attach_shadow(
1924        IsUserAgentWidget::No,
1925        shadow_root_mode,
1926        clonable,
1927        serializable,
1928        delegatesfocus,
1929        SlotAssignmentMode::Named,
1930        CanGc::note(),
1931    ) {
1932        Ok(shadow_root) => {
1933            // Step 8.3. Set shadow's declarative to true.
1934            shadow_root.set_declarative(true);
1935
1936            // Set 8.4. Set template's template contents property to shadow.
1937            let shadow = shadow_root.upcast::<DocumentFragment>();
1938            template_element.set_contents(Some(shadow));
1939
1940            // Step 8.5. Set shadow’s available to element internals to true.
1941            shadow_root.set_available_to_element_internals(true);
1942
1943            true
1944        },
1945        Err(_) => false,
1946    }
1947}