script/dom/servoparser/
mod.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::cell::{Cell, RefCell};
7use std::mem;
8use std::rc::Rc;
9
10use base64::Engine as _;
11use base64::engine::general_purpose;
12use content_security_policy::sandboxing_directive::SandboxingFlagSet;
13use devtools_traits::ScriptToDevtoolsControlMsg;
14use dom_struct::dom_struct;
15use embedder_traits::resources::{self, Resource};
16use encoding_rs::{Encoding, UTF_8};
17use html5ever::buffer_queue::BufferQueue;
18use html5ever::tendril::StrTendril;
19use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
20use html5ever::{Attribute, ExpandedName, LocalName, QualName, local_name, ns};
21use hyper_serde::Serde;
22use markup5ever::TokenizerResult;
23use mime::{self, Mime};
24use net_traits::mime_classifier::{ApacheBugFlag, MediaType, MimeClassifier, NoSniffFlag};
25use net_traits::policy_container::PolicyContainer;
26use net_traits::request::RequestId;
27use net_traits::{
28    FetchMetadata, LoadContext, Metadata, NetworkError, ReferrerPolicy, ResourceFetchTiming,
29};
30use profile_traits::time::{
31    ProfilerCategory, ProfilerChan, TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType,
32};
33use profile_traits::time_profile;
34use script_bindings::script_runtime::temp_cx;
35use script_traits::DocumentActivity;
36use servo_base::cross_process_instant::CrossProcessInstant;
37use servo_base::id::{PipelineId, WebViewId};
38use servo_config::pref;
39use servo_constellation_traits::{LoadOrigin, TargetSnapshotParams};
40use servo_url::{MutableOrigin, ServoUrl};
41use style::context::QuirksMode as ServoQuirksMode;
42use tendril::stream::LossyDecoder;
43use tendril::{ByteTendril, TendrilSink};
44
45use crate::document_loader::{DocumentLoader, LoadType};
46use crate::dom::bindings::cell::DomRefCell;
47use crate::dom::bindings::codegen::Bindings::DocumentBinding::{
48    DocumentMethods, DocumentReadyState,
49};
50use crate::dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods;
51use crate::dom::bindings::codegen::Bindings::HTMLMediaElementBinding::HTMLMediaElementMethods;
52use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
53use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
54use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::{
55    ShadowRootMode, SlotAssignmentMode,
56};
57use crate::dom::bindings::inheritance::Castable;
58use crate::dom::bindings::refcounted::Trusted;
59use crate::dom::bindings::reflector::{DomGlobal, Reflector, reflect_dom_object};
60use crate::dom::bindings::root::{Dom, DomRoot, MutNullableDom};
61use crate::dom::bindings::settings_stack::is_execution_stack_empty;
62use crate::dom::bindings::str::{DOMString, USVString};
63use crate::dom::characterdata::CharacterData;
64use crate::dom::comment::Comment;
65use crate::dom::csp::{Violation, parse_csp_list_from_metadata};
66use crate::dom::customelementregistry::CustomElementReactionStack;
67use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument};
68use crate::dom::documentfragment::DocumentFragment;
69use crate::dom::documenttype::DocumentType;
70use crate::dom::element::{CustomElementCreationMode, Element, ElementCreator};
71use crate::dom::globalscope::GlobalScope;
72use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
73use crate::dom::html::htmlimageelement::HTMLImageElement;
74use crate::dom::html::htmlscriptelement::{HTMLScriptElement, ScriptResult};
75use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
76use crate::dom::node::{Node, ShadowIncluding};
77use crate::dom::performance::performanceentry::PerformanceEntry;
78use crate::dom::performance::performancenavigationtiming::PerformanceNavigationTiming;
79use crate::dom::processinginstruction::ProcessingInstruction;
80use crate::dom::processingoptions::{
81    LinkHeader, LinkProcessingPhase, extract_links_from_headers, process_link_headers,
82};
83use crate::dom::reporting::reportingendpoint::ReportingEndpoint;
84use crate::dom::security::csp::CspReporting;
85use crate::dom::security::xframeoptions::check_a_navigation_response_adherence_to_x_frame_options;
86use crate::dom::shadowroot::IsUserAgentWidget;
87use crate::dom::text::Text;
88use crate::dom::types::{HTMLElement, HTMLMediaElement, HTMLOptionElement};
89use crate::dom::virtualmethods::vtable_for;
90use crate::navigation::determine_the_origin;
91use crate::network_listener::FetchResponseListener;
92use crate::realms::{enter_auto_realm, enter_realm};
93use crate::script_runtime::{CanGc, IntroductionType};
94use crate::script_thread::ScriptThread;
95
96mod async_html;
97pub(crate) mod encoding;
98pub(crate) mod html;
99mod prefetch;
100mod xml;
101
102use encoding::{NetworkDecoderState, NetworkSink};
103pub(crate) use html::serialize_html_fragment;
104
105#[dom_struct]
106/// The parser maintains two input streams: one for input from script through
107/// document.write(), and one for input from network.
108///
109/// There is no concrete representation of the insertion point, instead it
110/// always points to just before the next character from the network input,
111/// with all of the script input before itself.
112///
113/// ```text
114///     ... script input ... | ... network input ...
115///                          ^
116///                 insertion point
117/// ```
118pub(crate) struct ServoParser {
119    reflector: Reflector,
120    /// The document associated with this parser.
121    document: Dom<Document>,
122    /// The decoder used for the network input.
123    network_decoder: DomRefCell<NetworkDecoderState>,
124    /// Input received from network.
125    #[ignore_malloc_size_of = "Defined in html5ever"]
126    #[no_trace]
127    network_input: BufferQueue,
128    /// Input received from script. Used only to support document.write().
129    #[ignore_malloc_size_of = "Defined in html5ever"]
130    #[no_trace]
131    script_input: BufferQueue,
132    /// The tokenizer of this parser.
133    tokenizer: Tokenizer,
134    /// Whether to expect any further input from the associated network request.
135    last_chunk_received: Cell<bool>,
136    /// Whether this parser should avoid passing any further data to the tokenizer.
137    suspended: Cell<bool>,
138    /// <https://html.spec.whatwg.org/multipage/#script-nesting-level>
139    script_nesting_level: Cell<usize>,
140    /// <https://html.spec.whatwg.org/multipage/#abort-a-parser>
141    aborted: Cell<bool>,
142    /// <https://html.spec.whatwg.org/multipage/#script-created-parser>
143    script_created_parser: bool,
144    /// A decoder exclusively for input to the prefetch tokenizer.
145    ///
146    /// Unlike the actual decoder, this one takes a best guess at the encoding and starts
147    /// decoding immediately.
148    #[no_trace]
149    prefetch_decoder: RefCell<LossyDecoder<NetworkSink>>,
150    /// We do a quick-and-dirty parse of the input looking for resources to prefetch.
151    // TODO: if we had speculative parsing, we could do this when speculatively
152    // building the DOM. https://github.com/servo/servo/pull/19203
153    prefetch_tokenizer: prefetch::Tokenizer,
154    #[ignore_malloc_size_of = "Defined in html5ever"]
155    #[no_trace]
156    prefetch_input: BufferQueue,
157    // The whole input as a string, if needed for the devtools Sources panel.
158    // TODO: use a faster type for concatenating strings?
159    content_for_devtools: Option<DomRefCell<String>>,
160}
161
162pub(crate) struct ElementAttribute {
163    name: QualName,
164    value: DOMString,
165}
166
167#[derive(Clone, Copy, JSTraceable, MallocSizeOf, PartialEq)]
168pub(crate) enum ParsingAlgorithm {
169    Normal,
170    Fragment,
171}
172
173impl ElementAttribute {
174    pub(crate) fn new(name: QualName, value: DOMString) -> ElementAttribute {
175        ElementAttribute { name, value }
176    }
177}
178
179impl ServoParser {
180    pub(crate) fn parser_is_not_active(&self) -> bool {
181        self.can_write()
182    }
183
184    /// <https://html.spec.whatwg.org/multipage/#parse-html-from-a-string>
185    pub(crate) fn parse_html_document(
186        document: &Document,
187        input: Option<DOMString>,
188        url: ServoUrl,
189        encoding_hint_from_content_type: Option<&'static Encoding>,
190        encoding_of_container_document: Option<&'static Encoding>,
191        cx: &mut js::context::JSContext,
192    ) {
193        // Step 1. Set document's type to "html".
194        //
195        // Set by callers of this function and asserted here
196        assert!(document.is_html_document());
197
198        // Step 2. Create an HTML parser parser, associated with document.
199        let parser = ServoParser::new(
200            document,
201            if pref!(dom_servoparser_async_html_tokenizer_enabled) {
202                Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None))
203            } else {
204                Tokenizer::Html(self::html::Tokenizer::new(
205                    document,
206                    url,
207                    None,
208                    ParsingAlgorithm::Normal,
209                ))
210            },
211            ParserKind::Normal,
212            encoding_hint_from_content_type,
213            encoding_of_container_document,
214            CanGc::from_cx(cx),
215        );
216
217        // Step 3. Place html into the input stream for parser. The encoding confidence is irrelevant.
218        // Step 4. Start parser and let it run until it has consumed all the
219        // characters just inserted into the input stream.
220        //
221        // Set as the document's current parser and initialize with `input`, if given.
222        if let Some(input) = input {
223            parser.parse_complete_string_chunk(String::from(input), cx);
224        } else {
225            parser.document.set_current_parser(Some(&parser));
226        }
227    }
228
229    /// <https://html.spec.whatwg.org/multipage/#parsing-html-fragments>
230    pub(crate) fn parse_html_fragment<'el>(
231        context: &'el Element,
232        input: DOMString,
233        allow_declarative_shadow_roots: bool,
234        cx: &mut js::context::JSContext,
235    ) -> impl Iterator<Item = DomRoot<Node>> + use<'el> {
236        let context_node = context.upcast::<Node>();
237        let context_document = context_node.owner_doc();
238        let window = context_document.window();
239        let url = context_document.url();
240
241        // Step 1. Let document be a Document node whose type is "html".
242        let loader = DocumentLoader::new_with_threads(
243            context_document.loader().resource_threads().clone(),
244            Some(url.clone()),
245        );
246        let document = Document::new(
247            window,
248            HasBrowsingContext::No,
249            Some(url.clone()),
250            context_document.about_base_url(),
251            context_document.origin().clone(),
252            IsHTMLDocument::HTMLDocument,
253            None,
254            None,
255            DocumentActivity::Inactive,
256            DocumentSource::FromParser,
257            loader,
258            None,
259            None,
260            Default::default(),
261            false,
262            allow_declarative_shadow_roots,
263            Some(context_document.insecure_requests_policy()),
264            context_document.has_trustworthy_ancestor_or_current_origin(),
265            context_document.custom_element_reaction_stack(),
266            context_document.creation_sandboxing_flag_set(),
267            CanGc::from_cx(cx),
268        );
269
270        // Step 2. If context's node document is in quirks mode, then set document's mode to "quirks".
271        // Step 3. Otherwise, if context's node document is in limited-quirks mode, then set document's
272        // mode to "limited-quirks".
273        document.set_quirks_mode(context_document.quirks_mode());
274
275        // NOTE: The following steps happened as part of Step 1.
276        // Step 4. If allowDeclarativeShadowRoots is true, then set document's
277        // allow declarative shadow roots to true.
278        // Step 5. Create a new HTML parser, and associate it with document.
279
280        // Step 11.
281        let form = context_node
282            .inclusive_ancestors(ShadowIncluding::No)
283            .find(|element| element.is::<HTMLFormElement>());
284
285        let fragment_context = FragmentContext {
286            context_elem: context_node,
287            form_elem: form.as_deref(),
288            context_element_allows_scripting: context_document.scripting_enabled(),
289        };
290
291        let parser = ServoParser::new(
292            &document,
293            Tokenizer::Html(self::html::Tokenizer::new(
294                &document,
295                url,
296                Some(fragment_context),
297                ParsingAlgorithm::Fragment,
298            )),
299            ParserKind::Normal,
300            None,
301            None,
302            CanGc::from_cx(cx),
303        );
304        parser.parse_complete_string_chunk(String::from(input), cx);
305
306        // Step 14.
307        let root_element = document.GetDocumentElement().expect("no document element");
308        FragmentParsingResult {
309            inner: root_element.upcast::<Node>().children(),
310        }
311    }
312
313    pub(crate) fn parse_html_script_input(document: &Document, url: ServoUrl) {
314        let parser = ServoParser::new(
315            document,
316            if pref!(dom_servoparser_async_html_tokenizer_enabled) {
317                Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None))
318            } else {
319                Tokenizer::Html(self::html::Tokenizer::new(
320                    document,
321                    url,
322                    None,
323                    ParsingAlgorithm::Normal,
324                ))
325            },
326            ParserKind::ScriptCreated,
327            None,
328            None,
329            CanGc::note(),
330        );
331        document.set_current_parser(Some(&parser));
332    }
333
334    pub(crate) fn parse_xml_document(
335        document: &Document,
336        input: Option<DOMString>,
337        url: ServoUrl,
338        encoding_hint_from_content_type: Option<&'static Encoding>,
339        cx: &mut js::context::JSContext,
340    ) {
341        let parser = ServoParser::new(
342            document,
343            Tokenizer::Xml(self::xml::Tokenizer::new(document, url)),
344            ParserKind::Normal,
345            encoding_hint_from_content_type,
346            None,
347            CanGc::from_cx(cx),
348        );
349
350        // Set as the document's current parser and initialize with `input`, if given.
351        if let Some(input) = input {
352            parser.parse_complete_string_chunk(String::from(input), cx);
353        } else {
354            parser.document.set_current_parser(Some(&parser));
355        }
356    }
357
358    pub(crate) fn script_nesting_level(&self) -> usize {
359        self.script_nesting_level.get()
360    }
361
362    pub(crate) fn is_script_created(&self) -> bool {
363        self.script_created_parser
364    }
365
366    /// Corresponds to the latter part of the "Otherwise" branch of the 'An end
367    /// tag whose tag name is "script"' of
368    /// <https://html.spec.whatwg.org/multipage/#parsing-main-incdata>
369    ///
370    /// This first moves everything from the script input to the beginning of
371    /// the network input, effectively resetting the insertion point to just
372    /// before the next character to be consumed.
373    ///
374    ///
375    /// ```text
376    ///     | ... script input ... network input ...
377    ///     ^
378    ///     insertion point
379    /// ```
380    pub(crate) fn resume_with_pending_parsing_blocking_script(
381        &self,
382        script: &HTMLScriptElement,
383        result: ScriptResult,
384        cx: &mut js::context::JSContext,
385    ) {
386        assert!(self.suspended.get());
387        self.suspended.set(false);
388
389        self.script_input.swap_with(&self.network_input);
390        while let Some(chunk) = self.script_input.pop_front() {
391            self.network_input.push_back(chunk);
392        }
393
394        let script_nesting_level = self.script_nesting_level.get();
395        assert_eq!(script_nesting_level, 0);
396
397        self.script_nesting_level.set(script_nesting_level + 1);
398        script.execute(cx, result);
399        self.script_nesting_level.set(script_nesting_level);
400
401        if !self.suspended.get() && !self.aborted.get() {
402            self.parse_sync(cx);
403        }
404    }
405
406    pub(crate) fn can_write(&self) -> bool {
407        self.script_created_parser || self.script_nesting_level.get() > 0
408    }
409
410    /// Steps 6-8 of <https://html.spec.whatwg.org/multipage/#document.write()>
411    pub(crate) fn write(&self, text: DOMString, cx: &mut js::context::JSContext) {
412        assert!(self.can_write());
413
414        if self.document.has_pending_parsing_blocking_script() {
415            // There is already a pending parsing blocking script so the
416            // parser is suspended, we just append everything to the
417            // script input and abort these steps.
418            self.script_input.push_back(String::from(text).into());
419            return;
420        }
421
422        // There is no pending parsing blocking script, so all previous calls
423        // to document.write() should have seen their entire input tokenized
424        // and process, with nothing pushed to the parser script input.
425        assert!(self.script_input.is_empty());
426
427        let input = BufferQueue::default();
428        input.push_back(String::from(text).into());
429
430        let profiler_chan = self
431            .document
432            .window()
433            .as_global_scope()
434            .time_profiler_chan()
435            .clone();
436        let profiler_metadata = TimerMetadata {
437            url: self.document.url().as_str().into(),
438            iframe: TimerMetadataFrameType::RootWindow,
439            incremental: TimerMetadataReflowType::FirstReflow,
440        };
441        self.tokenize(
442            |cx, tokenizer| {
443                tokenizer.feed(&input, cx, profiler_chan.clone(), profiler_metadata.clone())
444            },
445            cx,
446        );
447
448        if self.suspended.get() {
449            // Parser got suspended, insert remaining input at end of
450            // script input, following anything written by scripts executed
451            // reentrantly during this call.
452            while let Some(chunk) = input.pop_front() {
453                self.script_input.push_back(chunk);
454            }
455            return;
456        }
457
458        assert!(input.is_empty());
459    }
460
461    /// Steps 4-6 of <https://html.spec.whatwg.org/multipage/#dom-document-close>
462    pub(crate) fn close(&self, cx: &mut js::context::JSContext) {
463        assert!(self.script_created_parser);
464
465        // Step 4. Insert an explicit "EOF" character at the end of the parser's input stream.
466        self.last_chunk_received.set(true);
467
468        // Step 5. If this's pending parsing-blocking script is not null, then return.
469        if self.suspended.get() {
470            return;
471        }
472
473        // Step 6. Run the tokenizer, processing resulting tokens as they are emitted,
474        // and stopping when the tokenizer reaches the explicit "EOF" character or spins the event loop.
475        self.parse_sync(cx);
476    }
477
478    // https://html.spec.whatwg.org/multipage/#abort-a-parser
479    pub(crate) fn abort(&self, cx: &mut js::context::JSContext) {
480        assert!(!self.aborted.get());
481        self.aborted.set(true);
482
483        // Step 1.
484        self.script_input.replace_with(BufferQueue::default());
485        self.network_input.replace_with(BufferQueue::default());
486
487        // Step 2.
488        self.document
489            .set_ready_state(DocumentReadyState::Interactive, CanGc::from_cx(cx));
490
491        // Step 3.
492        self.tokenizer.end(cx);
493        self.document.set_current_parser(None);
494
495        // Step 4.
496        self.document
497            .set_ready_state(DocumentReadyState::Complete, CanGc::from_cx(cx));
498    }
499
500    // https://html.spec.whatwg.org/multipage/#active-parser
501    pub(crate) fn is_active(&self) -> bool {
502        self.script_nesting_level() > 0 && !self.aborted.get()
503    }
504
505    pub(crate) fn get_current_line(&self) -> u32 {
506        self.tokenizer.get_current_line()
507    }
508
509    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
510    fn new_inherited(
511        document: &Document,
512        tokenizer: Tokenizer,
513        kind: ParserKind,
514        encoding_hint_from_content_type: Option<&'static Encoding>,
515        encoding_of_container_document: Option<&'static Encoding>,
516    ) -> Self {
517        // Store the whole input for the devtools Sources panel, if the devtools server is running
518        // and we are parsing for a document load (not just things like innerHTML).
519        // TODO: check if a devtools client is actually connected and/or wants the sources?
520        let content_for_devtools = (document.global().devtools_chan().is_some() &&
521            document.has_browsing_context())
522        .then_some(DomRefCell::new(String::new()));
523
524        ServoParser {
525            reflector: Reflector::new(),
526            document: Dom::from_ref(document),
527            network_decoder: DomRefCell::new(NetworkDecoderState::new(
528                encoding_hint_from_content_type,
529                encoding_of_container_document,
530            )),
531            network_input: BufferQueue::default(),
532            script_input: BufferQueue::default(),
533            tokenizer,
534            last_chunk_received: Cell::new(false),
535            suspended: Default::default(),
536            script_nesting_level: Default::default(),
537            aborted: Default::default(),
538            script_created_parser: kind == ParserKind::ScriptCreated,
539            prefetch_decoder: RefCell::new(LossyDecoder::new_encoding_rs(
540                encoding_hint_from_content_type.unwrap_or(UTF_8),
541                Default::default(),
542            )),
543            prefetch_tokenizer: prefetch::Tokenizer::new(document),
544            prefetch_input: BufferQueue::default(),
545            content_for_devtools,
546        }
547    }
548
549    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
550    fn new(
551        document: &Document,
552        tokenizer: Tokenizer,
553        kind: ParserKind,
554        encoding_hint_from_content_type: Option<&'static Encoding>,
555        encoding_of_container_document: Option<&'static Encoding>,
556        can_gc: CanGc,
557    ) -> DomRoot<Self> {
558        reflect_dom_object(
559            Box::new(ServoParser::new_inherited(
560                document,
561                tokenizer,
562                kind,
563                encoding_hint_from_content_type,
564                encoding_of_container_document,
565            )),
566            document.window(),
567            can_gc,
568        )
569    }
570
571    fn push_tendril_input_chunk(&self, chunk: StrTendril) {
572        if let Some(mut content_for_devtools) = self
573            .content_for_devtools
574            .as_ref()
575            .map(|content| content.borrow_mut())
576        {
577            // TODO: append these chunks more efficiently
578            content_for_devtools.push_str(chunk.as_ref());
579        }
580
581        if chunk.is_empty() {
582            return;
583        }
584
585        // Push the chunk into the network input stream,
586        // which is tokenized lazily.
587        self.network_input.push_back(chunk);
588    }
589
590    fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
591        // For byte input, we convert it to text using the network decoder.
592        if let Some(decoded_chunk) = self
593            .network_decoder
594            .borrow_mut()
595            .push(&chunk, &self.document)
596        {
597            self.push_tendril_input_chunk(decoded_chunk);
598        }
599
600        if self.should_prefetch() {
601            // Push the chunk into the prefetch input stream,
602            // which is tokenized eagerly, to scan for resources
603            // to prefetch. If the user script uses `document.write()`
604            // to overwrite the network input, this prefetching may
605            // have been wasted, but in most cases it won't.
606            let mut prefetch_decoder = self.prefetch_decoder.borrow_mut();
607            prefetch_decoder.process(ByteTendril::from(&*chunk));
608
609            self.prefetch_input
610                .push_back(mem::take(&mut prefetch_decoder.inner_sink_mut().output));
611            self.prefetch_tokenizer.feed(&self.prefetch_input);
612        }
613    }
614
615    fn should_prefetch(&self) -> bool {
616        // Per https://github.com/whatwg/html/issues/1495
617        // stylesheets should not be loaded for documents
618        // without browsing contexts.
619        // https://github.com/whatwg/html/issues/1495#issuecomment-230334047
620        // suggests that no content should be preloaded in such a case.
621        // We're conservative, and only prefetch for documents
622        // with browsing contexts.
623        self.document.browsing_context().is_some()
624    }
625
626    fn push_string_input_chunk(&self, chunk: String) {
627        // The input has already been decoded as a string, so doesn't need
628        // to be decoded by the network decoder again.
629        let chunk = StrTendril::from(chunk);
630        self.push_tendril_input_chunk(chunk);
631    }
632
633    fn parse_sync(&self, cx: &mut js::context::JSContext) {
634        assert!(self.script_input.is_empty());
635
636        // This parser will continue to parse while there is either pending input or
637        // the parser remains unsuspended.
638
639        if self.last_chunk_received.get() {
640            let chunk = self.network_decoder.borrow_mut().finish(&self.document);
641            if !chunk.is_empty() {
642                self.push_tendril_input_chunk(chunk);
643            }
644        }
645
646        if self.aborted.get() {
647            return;
648        }
649
650        let profiler_chan = self
651            .document
652            .window()
653            .as_global_scope()
654            .time_profiler_chan()
655            .clone();
656        let profiler_metadata = TimerMetadata {
657            url: self.document.url().as_str().into(),
658            iframe: TimerMetadataFrameType::RootWindow,
659            incremental: TimerMetadataReflowType::FirstReflow,
660        };
661        self.tokenize(
662            |cx, tokenizer| {
663                tokenizer.feed(
664                    &self.network_input,
665                    cx,
666                    profiler_chan.clone(),
667                    profiler_metadata.clone(),
668                )
669            },
670            cx,
671        );
672
673        if self.suspended.get() {
674            return;
675        }
676
677        assert!(self.network_input.is_empty());
678
679        if self.last_chunk_received.get() {
680            self.finish(cx);
681        }
682    }
683
684    fn parse_complete_string_chunk(&self, input: String, cx: &mut js::context::JSContext) {
685        self.document.set_current_parser(Some(self));
686        self.push_string_input_chunk(input);
687        self.last_chunk_received.set(true);
688        if !self.suspended.get() {
689            self.parse_sync(cx);
690        }
691    }
692
693    fn parse_bytes_chunk(&self, input: Vec<u8>, cx: &mut js::context::JSContext) {
694        let _realm = enter_realm(&*self.document);
695        self.document.set_current_parser(Some(self));
696        self.push_bytes_input_chunk(input);
697        if !self.suspended.get() {
698            self.parse_sync(cx);
699        }
700    }
701
702    fn tokenize<F>(&self, feed: F, cx: &mut js::context::JSContext)
703    where
704        F: Fn(
705            &mut js::context::JSContext,
706            &Tokenizer,
707        ) -> TokenizerResult<DomRoot<HTMLScriptElement>>,
708    {
709        loop {
710            assert!(!self.suspended.get());
711            assert!(!self.aborted.get());
712
713            self.document.window().reflow_if_reflow_timer_expired();
714            let script = match feed(cx, &self.tokenizer) {
715                TokenizerResult::Done => return,
716                TokenizerResult::EncodingIndicator(_) => continue,
717                TokenizerResult::Script(script) => script,
718            };
719
720            // https://html.spec.whatwg.org/multipage/#parsing-main-incdata
721            // branch "An end tag whose tag name is "script"
722            // The spec says to perform the microtask checkpoint before
723            // setting the insertion mode back from Text, but this is not
724            // possible with the way servo and html5ever currently
725            // relate to each other, and hopefully it is not observable.
726            if is_execution_stack_empty() {
727                self.document.window().perform_a_microtask_checkpoint(cx);
728            }
729
730            let script_nesting_level = self.script_nesting_level.get();
731
732            self.script_nesting_level.set(script_nesting_level + 1);
733            script.set_initial_script_text();
734            let introduction_type_override =
735                (script_nesting_level > 0).then_some(IntroductionType::INJECTED_SCRIPT);
736            script.prepare(cx, introduction_type_override);
737            self.script_nesting_level.set(script_nesting_level);
738
739            if self.document.has_pending_parsing_blocking_script() {
740                self.suspended.set(true);
741                return;
742            }
743            if self.aborted.get() {
744                return;
745            }
746        }
747    }
748
749    /// <https://html.spec.whatwg.org/multipage/#the-end>
750    fn finish(&self, cx: &mut js::context::JSContext) {
751        assert!(!self.suspended.get());
752        assert!(self.last_chunk_received.get());
753        assert!(self.script_input.is_empty());
754        assert!(self.network_input.is_empty());
755        assert!(self.network_decoder.borrow().is_finished());
756
757        // Step 1.
758        self.document
759            .set_ready_state(DocumentReadyState::Interactive, CanGc::from_cx(cx));
760
761        // Step 2.
762        self.tokenizer.end(cx);
763        self.document.set_current_parser(None);
764
765        // Steps 3-12 are in another castle, namely finish_load.
766        let url = self.tokenizer.url().clone();
767        self.document.finish_load(LoadType::PageSource(url), cx);
768
769        // Send the source contents to devtools, if needed.
770        if let Some(content_for_devtools) = self
771            .content_for_devtools
772            .as_ref()
773            .map(|content| content.take())
774        {
775            let global = self.document.global();
776            let chan = global.devtools_chan().expect("Guaranteed by new");
777            let pipeline_id = self.document.global().pipeline_id();
778            let _ = chan.send(ScriptToDevtoolsControlMsg::UpdateSourceContent(
779                pipeline_id,
780                content_for_devtools,
781            ));
782        }
783    }
784}
785
786struct FragmentParsingResult<I>
787where
788    I: Iterator<Item = DomRoot<Node>>,
789{
790    inner: I,
791}
792
793impl<I> Iterator for FragmentParsingResult<I>
794where
795    I: Iterator<Item = DomRoot<Node>>,
796{
797    type Item = DomRoot<Node>;
798
799    #[expect(unsafe_code)]
800    fn next(&mut self) -> Option<DomRoot<Node>> {
801        let mut cx = unsafe { script_bindings::script_runtime::temp_cx() };
802        let cx = &mut cx;
803
804        let next = self.inner.next()?;
805        next.remove_self(cx);
806        Some(next)
807    }
808
809    fn size_hint(&self) -> (usize, Option<usize>) {
810        self.inner.size_hint()
811    }
812}
813
814#[derive(JSTraceable, MallocSizeOf, PartialEq)]
815enum ParserKind {
816    Normal,
817    ScriptCreated,
818}
819
820#[derive(JSTraceable, MallocSizeOf)]
821#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
822enum Tokenizer {
823    Html(self::html::Tokenizer),
824    AsyncHtml(self::async_html::Tokenizer),
825    Xml(self::xml::Tokenizer),
826}
827
828impl Tokenizer {
829    fn feed(
830        &self,
831        input: &BufferQueue,
832        cx: &mut js::context::JSContext,
833        profiler_chan: ProfilerChan,
834        profiler_metadata: TimerMetadata,
835    ) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
836        match *self {
837            Tokenizer::Html(ref tokenizer) => time_profile!(
838                ProfilerCategory::ScriptParseHTML,
839                Some(profiler_metadata),
840                profiler_chan,
841                || tokenizer.feed(input),
842            ),
843            Tokenizer::AsyncHtml(ref tokenizer) => time_profile!(
844                ProfilerCategory::ScriptParseHTML,
845                Some(profiler_metadata),
846                profiler_chan,
847                || tokenizer.feed(input, cx),
848            ),
849            Tokenizer::Xml(ref tokenizer) => time_profile!(
850                ProfilerCategory::ScriptParseXML,
851                Some(profiler_metadata),
852                profiler_chan,
853                || tokenizer.feed(input),
854            ),
855        }
856    }
857
858    fn end(&self, cx: &mut js::context::JSContext) {
859        match *self {
860            Tokenizer::Html(ref tokenizer) => tokenizer.end(),
861            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.end(cx),
862            Tokenizer::Xml(ref tokenizer) => tokenizer.end(),
863        }
864    }
865
866    fn url(&self) -> &ServoUrl {
867        match *self {
868            Tokenizer::Html(ref tokenizer) => tokenizer.url(),
869            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(),
870            Tokenizer::Xml(ref tokenizer) => tokenizer.url(),
871        }
872    }
873
874    fn set_plaintext_state(&self) {
875        match *self {
876            Tokenizer::Html(ref tokenizer) => tokenizer.set_plaintext_state(),
877            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.set_plaintext_state(),
878            Tokenizer::Xml(_) => unimplemented!(),
879        }
880    }
881
882    fn get_current_line(&self) -> u32 {
883        match *self {
884            Tokenizer::Html(ref tokenizer) => tokenizer.get_current_line(),
885            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.get_current_line(),
886            Tokenizer::Xml(ref tokenizer) => tokenizer.get_current_line(),
887        }
888    }
889}
890
891/// <https://html.spec.whatwg.org/multipage/#navigation-params>
892/// This does not have the relevant fields, but mimics the intent
893/// of the struct when used in loading document spec algorithms.
894struct NavigationParams {
895    /// <https://html.spec.whatwg.org/multipage/#navigation-params-policy-container>
896    policy_container: PolicyContainer,
897    /// content-type of this document, if known. Otherwise need to sniff it
898    content_type: Option<Mime>,
899    /// link headers from the response
900    link_headers: Vec<LinkHeader>,
901    /// <https://html.spec.whatwg.org/multipage/#navigation-params-sandboxing>
902    final_sandboxing_flag_set: SandboxingFlagSet,
903    /// <https://mimesniff.spec.whatwg.org/#resource-header>
904    resource_header: Vec<u8>,
905    /// <https://html.spec.whatwg.org/multipage/#navigation-params-about-base-url>
906    about_base_url: Option<ServoUrl>,
907}
908
909/// The context required for asynchronously fetching a document
910/// and parsing it progressively.
911pub(crate) struct ParserContext {
912    /// The parser that initiated the request.
913    parser: Option<Trusted<ServoParser>>,
914    /// Is this a synthesized document
915    is_synthesized_document: bool,
916    /// Has a document already been loaded (relevant for checking the resource header)
917    has_loaded_document: bool,
918    /// The [`WebViewId`] of the `WebView` associated with this document.
919    webview_id: WebViewId,
920    /// The [`PipelineId`] of the `Pipeline` associated with this document.
921    pipeline_id: PipelineId,
922    /// The URL for this document.
923    url: ServoUrl,
924    /// pushed entry index
925    pushed_entry_index: Option<usize>,
926    /// params required in document load algorithms
927    navigation_params: NavigationParams,
928    /// To report CSP violations to the global that initiated the navigation
929    parent_info: Option<PipelineId>,
930    target_snapshot_params: TargetSnapshotParams,
931    load_origin: LoadOrigin,
932}
933
934impl ParserContext {
935    pub(crate) fn new(
936        webview_id: WebViewId,
937        pipeline_id: PipelineId,
938        url: ServoUrl,
939        creation_sandboxing_flag_set: SandboxingFlagSet,
940        parent_info: Option<PipelineId>,
941        target_snapshot_params: TargetSnapshotParams,
942        load_origin: LoadOrigin,
943    ) -> ParserContext {
944        ParserContext {
945            parser: None,
946            is_synthesized_document: false,
947            has_loaded_document: false,
948            webview_id,
949            pipeline_id,
950            url,
951            parent_info,
952            pushed_entry_index: None,
953            navigation_params: NavigationParams {
954                policy_container: Default::default(),
955                content_type: None,
956                link_headers: vec![],
957                final_sandboxing_flag_set: creation_sandboxing_flag_set,
958                resource_header: vec![],
959                about_base_url: Default::default(),
960            },
961            target_snapshot_params,
962            load_origin,
963        }
964    }
965
966    pub(crate) fn set_policy_container(&mut self, policy_container: Option<&PolicyContainer>) {
967        let Some(policy_container) = policy_container else {
968            return;
969        };
970        self.navigation_params.policy_container = policy_container.clone();
971    }
972
973    pub(crate) fn set_about_base_url(&mut self, about_base_url: Option<ServoUrl>) {
974        self.navigation_params.about_base_url = about_base_url;
975    }
976
977    pub(crate) fn get_document(&self) -> Option<DomRoot<Document>> {
978        self.parser
979            .as_ref()
980            .map(|parser| parser.root().document.as_rooted())
981    }
982
983    pub(crate) fn parent_info(&self) -> Option<PipelineId> {
984        self.parent_info
985    }
986
987    /// <https://html.spec.whatwg.org/multipage/#creating-a-policy-container-from-a-fetch-response>
988    fn create_policy_container_from_fetch_response(metadata: &Metadata) -> PolicyContainer {
989        // Step 1. If response's URL's scheme is "blob", then return a clone of response's URL's blob URL entry's environment's policy container.
990        // TODO
991        // Step 2. Let result be a new policy container.
992        // Step 7. Return result.
993        PolicyContainer {
994            // Step 3. Set result's CSP list to the result of parsing a response's Content Security Policies given response.
995            csp_list: parse_csp_list_from_metadata(&metadata.headers),
996            // Step 5. Set result's referrer policy to the result of parsing the `Referrer-Policy` header given response. [REFERRERPOLICY]
997            referrer_policy: ReferrerPolicy::parse_header_for_response(&metadata.headers),
998        }
999    }
1000
1001    /// <https://html.spec.whatwg.org/multipage/#initialise-the-document-object>
1002    fn initialize_document_object(&self, document: &Document) {
1003        // Step 9. Let document be a new Document, with
1004        document.set_policy_container(self.navigation_params.policy_container.clone());
1005        document.set_active_sandboxing_flag_set(self.navigation_params.final_sandboxing_flag_set);
1006        document.set_about_base_url(self.navigation_params.about_base_url.clone());
1007        // Step 17. Process link headers given document, navigationParams's response, and "pre-media".
1008        process_link_headers(
1009            &self.navigation_params.link_headers,
1010            document,
1011            LinkProcessingPhase::PreMedia,
1012        );
1013    }
1014
1015    /// Part of various load document methods
1016    fn process_link_headers_in_media_phase_with_task(&mut self, document: &Document) {
1017        // The first task that the networking task source places on the task queue
1018        // while fetching runs must process link headers given document,
1019        // navigationParams's response, and "media", after the task has been processed by the HTML parser.
1020        let link_headers = std::mem::take(&mut self.navigation_params.link_headers);
1021        if !link_headers.is_empty() {
1022            let window = document.window();
1023            let document = Trusted::new(document);
1024            window
1025                .upcast::<GlobalScope>()
1026                .task_manager()
1027                .networking_task_source()
1028                .queue(task!(process_link_headers_task: move || {
1029                    process_link_headers(&link_headers, &document.root(), LinkProcessingPhase::Media);
1030                }));
1031        }
1032    }
1033
1034    /// <https://html.spec.whatwg.org/multipage/#loading-a-document>
1035    fn load_document(&mut self, cx: &mut js::context::JSContext) {
1036        assert!(!self.has_loaded_document);
1037        self.has_loaded_document = true;
1038        let Some(ref parser) = self.parser.as_ref().map(|p| p.root()) else {
1039            return;
1040        };
1041        // Step 1. Let type be the computed type of navigationParams's response.
1042        let content_type = &self.navigation_params.content_type;
1043        let mime_type = MimeClassifier::default().classify(
1044            LoadContext::Browsing,
1045            NoSniffFlag::Off,
1046            ApacheBugFlag::from_content_type(content_type.as_ref()),
1047            content_type,
1048            &self.navigation_params.resource_header,
1049        );
1050        // Step 2. If the user agent has been configured to process resources of the given type using
1051        // some mechanism other than rendering the content in a navigable, then skip this step.
1052        // Otherwise, if the type is one of the following types:
1053        let Some(media_type) = MimeClassifier::get_media_type(&mime_type) else {
1054            let page = format!(
1055                "<html><body><p>Unknown content type ({}).</p></body></html>",
1056                &mime_type,
1057            );
1058            self.load_inline_unknown_content(parser, page, cx);
1059            return;
1060        };
1061        match media_type {
1062            // Return the result of loading an HTML document, given navigationParams.
1063            MediaType::Html => self.load_html_document(parser),
1064            // Return the result of loading an XML document given navigationParams and type.
1065            MediaType::Xml => self.load_xml_document(parser),
1066            // Return the result of loading a text document given navigationParams and type.
1067            MediaType::JavaScript | MediaType::Text | MediaType::Css => {
1068                self.load_text_document(parser, cx)
1069            },
1070            // Return the result of loading a json document given navigationParams and type.
1071            MediaType::Json => self.load_json_document(parser, cx),
1072            // Return the result of loading a media document given navigationParams and type.
1073            MediaType::Image | MediaType::AudioVideo => {
1074                self.load_media_document(parser, media_type, &mime_type, cx);
1075                return;
1076            },
1077            MediaType::Font => {
1078                let page = format!(
1079                    "<html><body><p>Unable to load font with content type ({}).</p></body></html>",
1080                    &mime_type,
1081                );
1082                self.load_inline_unknown_content(parser, page, cx);
1083                return;
1084            },
1085        };
1086
1087        parser.parse_bytes_chunk(
1088            std::mem::take(&mut self.navigation_params.resource_header),
1089            cx,
1090        );
1091    }
1092
1093    /// <https://html.spec.whatwg.org/multipage/#navigate-html>
1094    fn load_html_document(&mut self, parser: &ServoParser) {
1095        // Step 1. Let document be the result of creating and initializing a
1096        // Document object given "html", "text/html", and navigationParams.
1097        self.initialize_document_object(&parser.document);
1098        // The first task that the networking task source places on the task queue while fetching
1099        // runs must process link headers given document, navigationParams's response, and "media",
1100        // after the task has been processed by the HTML parser.
1101        self.process_link_headers_in_media_phase_with_task(&parser.document);
1102    }
1103
1104    /// <https://html.spec.whatwg.org/multipage/#read-xml>
1105    fn load_xml_document(&mut self, parser: &ServoParser) {
1106        // When faced with displaying an XML file inline, provided navigation params navigationParams
1107        // and a string type, user agents must follow the requirements defined in XML and Namespaces in XML,
1108        // XML Media Types, DOM, and other relevant specifications to create and initialize a
1109        // Document object document, given "xml", type, and navigationParams, and return that Document.
1110        // They must also create a corresponding XML parser. [XML] [XMLNS] [RFC7303] [DOM]
1111        self.initialize_document_object(&parser.document);
1112        // The first task that the networking task source places on the task queue while fetching
1113        // runs must process link headers given document, navigationParams's response, and "media",
1114        // after the task has been processed by the XML parser.
1115        self.process_link_headers_in_media_phase_with_task(&parser.document);
1116    }
1117
1118    /// <https://html.spec.whatwg.org/multipage/#navigate-text>
1119    fn load_text_document(&mut self, parser: &ServoParser, cx: &mut js::context::JSContext) {
1120        // Step 1. Let document be the result of creating and initializing a Document
1121        // object given "html", type, and navigationParams.
1122        self.initialize_document_object(&parser.document);
1123        // Step 4. Create an HTML parser and associate it with the document.
1124        // Act as if the tokenizer had emitted a start tag token with the tag name "pre" followed by
1125        // a single U+000A LINE FEED (LF) character, and switch the HTML parser's tokenizer to the PLAINTEXT state.
1126        // Each task that the networking task source places on the task queue while fetching runs must then
1127        // fill the parser's input byte stream with the fetched bytes and cause the HTML parser to perform
1128        // the appropriate processing of the input stream.
1129        let page = "<pre>\n".into();
1130        parser.push_string_input_chunk(page);
1131        parser.parse_sync(cx);
1132        parser.tokenizer.set_plaintext_state();
1133        // The first task that the networking task source places on the task queue while fetching
1134        // runs must process link headers given document, navigationParams's response, and "media",
1135        // after the task has been processed by the HTML parser.
1136        self.process_link_headers_in_media_phase_with_task(&parser.document);
1137    }
1138
1139    /// <https://html.spec.whatwg.org/multipage/#navigate-media>
1140    fn load_media_document(
1141        &mut self,
1142        parser: &ServoParser,
1143        media_type: MediaType,
1144        mime_type: &Mime,
1145        cx: &mut js::context::JSContext,
1146    ) {
1147        // Step 1. Let document be the result of creating and initializing a Document
1148        // object given "html", type, and navigationParams.
1149        self.initialize_document_object(&parser.document);
1150        // Step 8. Act as if the user agent had stopped parsing document.
1151        self.is_synthesized_document = true;
1152        parser.last_chunk_received.set(true);
1153        // Step 3. Populate with html/head/body given document.
1154        let page = "<html><body></body></html>".into();
1155        parser.push_string_input_chunk(page);
1156        parser.parse_sync(cx);
1157
1158        let doc = &parser.document;
1159        // Step 5. Set the appropriate attribute of the element host element, as described below,
1160        // to the address of the image, video, or audio resource.
1161        let node = if media_type == MediaType::Image {
1162            let img = Element::create(
1163                cx,
1164                QualName::new(None, ns!(html), local_name!("img")),
1165                None,
1166                doc,
1167                ElementCreator::ParserCreated(1),
1168                CustomElementCreationMode::Asynchronous,
1169                None,
1170            );
1171            let img = DomRoot::downcast::<HTMLImageElement>(img).unwrap();
1172            img.SetSrc(USVString(self.url.to_string()));
1173            DomRoot::upcast::<Node>(img)
1174        } else if mime_type.type_() == mime::AUDIO {
1175            let audio = Element::create(
1176                cx,
1177                QualName::new(None, ns!(html), local_name!("audio")),
1178                None,
1179                doc,
1180                ElementCreator::ParserCreated(1),
1181                CustomElementCreationMode::Asynchronous,
1182                None,
1183            );
1184            let audio = DomRoot::downcast::<HTMLMediaElement>(audio).unwrap();
1185            audio.SetControls(true);
1186            audio.SetSrc(USVString(self.url.to_string()));
1187            DomRoot::upcast::<Node>(audio)
1188        } else {
1189            let video = Element::create(
1190                cx,
1191                QualName::new(None, ns!(html), local_name!("video")),
1192                None,
1193                doc,
1194                ElementCreator::ParserCreated(1),
1195                CustomElementCreationMode::Asynchronous,
1196                None,
1197            );
1198            let video = DomRoot::downcast::<HTMLMediaElement>(video).unwrap();
1199            video.SetControls(true);
1200            video.SetSrc(USVString(self.url.to_string()));
1201            DomRoot::upcast::<Node>(video)
1202        };
1203        // Step 4. Append an element host element for the media, as described below, to the body element.
1204        let doc_body = DomRoot::upcast::<Node>(doc.GetBody().unwrap());
1205        doc_body.AppendChild(cx, &node).expect("Appending failed");
1206        // Step 7. Process link headers given document, navigationParams's response, and "media".
1207        let link_headers = std::mem::take(&mut self.navigation_params.link_headers);
1208        process_link_headers(&link_headers, doc, LinkProcessingPhase::Media);
1209    }
1210
1211    /// Load a JSON document with a pretty-printing, interactive viewer.
1212    fn load_json_document(&mut self, parser: &ServoParser, cx: &mut js::context::JSContext) {
1213        self.initialize_document_object(&parser.document);
1214        parser.push_string_input_chunk(resources::read_string(Resource::JsonViewerHTML));
1215        parser.parse_sync(cx);
1216        parser.tokenizer.set_plaintext_state();
1217        self.process_link_headers_in_media_phase_with_task(&parser.document);
1218    }
1219
1220    /// <https://html.spec.whatwg.org/multipage/#navigate-ua-inline>
1221    fn load_inline_unknown_content(
1222        &mut self,
1223        parser: &ServoParser,
1224        page: String,
1225        cx: &mut js::context::JSContext,
1226    ) {
1227        self.is_synthesized_document = true;
1228        parser.document.mark_as_internal();
1229        parser.push_string_input_chunk(page);
1230        // Step 7. Act as if the user agent had stopped parsing document.
1231        parser.last_chunk_received.set(true);
1232        parser.parse_sync(cx);
1233    }
1234
1235    /// Store a PerformanceNavigationTiming entry in the globalscope's Performance buffer
1236    fn submit_resource_timing(&mut self) {
1237        let Some(parser) = self.parser.as_ref() else {
1238            return;
1239        };
1240        let parser = parser.root();
1241        if parser.aborted.get() {
1242            return;
1243        }
1244
1245        let document = &parser.document;
1246
1247        // TODO: Pass a proper fetch start time here.
1248        let performance_entry = PerformanceNavigationTiming::new(
1249            &document.global(),
1250            CrossProcessInstant::now(),
1251            document,
1252            CanGc::note(),
1253        );
1254        self.pushed_entry_index = document
1255            .global()
1256            .performance()
1257            .queue_entry(performance_entry.upcast::<PerformanceEntry>());
1258    }
1259}
1260
1261impl FetchResponseListener for ParserContext {
1262    fn process_request_body(&mut self, _: RequestId) {}
1263
1264    /// Implements parts of
1265    /// <https://html.spec.whatwg.org/multipage/#attempt-to-populate-the-history-entry's-document>
1266    fn process_response(
1267        &mut self,
1268        cx: &mut js::context::JSContext,
1269        _: RequestId,
1270        meta_result: Result<FetchMetadata, NetworkError>,
1271    ) {
1272        let (metadata, mut error) = match meta_result {
1273            Ok(meta) => (
1274                Some(match meta {
1275                    FetchMetadata::Unfiltered(m) => m,
1276                    FetchMetadata::Filtered { unsafe_, .. } => unsafe_,
1277                }),
1278                None,
1279            ),
1280            Err(error) => (
1281                // Check variant without moving
1282                match &error {
1283                    NetworkError::LoadCancelled => {
1284                        return;
1285                    },
1286                    _ => {
1287                        let mut meta = Metadata::default(self.url.clone());
1288                        let mime: Option<Mime> = "text/html".parse().ok();
1289                        meta.set_content_type(mime.as_ref());
1290                        Some(meta)
1291                    },
1292                },
1293                Some(error),
1294            ),
1295        };
1296        let content_type: Option<Mime> = metadata
1297            .clone()
1298            .and_then(|meta| meta.content_type)
1299            .map(Serde::into_inner)
1300            .map(Into::into);
1301
1302        // <https://html.spec.whatwg.org/multipage/#create-navigation-params-by-fetching>
1303        // Step 21.9. Set responsePolicyContainer to the result of creating a
1304        // policy container from a fetch response given response and request's
1305        // reserved client.
1306        let (policy_container, endpoints_list, link_headers) = match metadata.as_ref() {
1307            None => (PolicyContainer::default(), None, vec![]),
1308            Some(metadata) => (
1309                Self::create_policy_container_from_fetch_response(metadata),
1310                ReportingEndpoint::parse_reporting_endpoints_header(
1311                    &self.url.clone(),
1312                    &metadata.headers,
1313                ),
1314                extract_links_from_headers(&metadata.headers),
1315            ),
1316        };
1317
1318        // Step 21.10. Set finalSandboxFlags to the union of targetSnapshotParams's
1319        // sandboxing flags and responsePolicyContainer's CSP list's CSP-derived
1320        // sandboxing flags.
1321        let final_sandboxing_flag_set = policy_container
1322            .csp_list
1323            .as_ref()
1324            .and_then(|csp| csp.get_sandboxing_flag_set_for_document())
1325            .unwrap_or(SandboxingFlagSet::empty())
1326            .union(self.target_snapshot_params.sandboxing_flags);
1327
1328        // Step 21.11. Set responseOrigin to the result of determining the origin
1329        // given response's URL, finalSandboxFlags, and entry's document state's
1330        // initiator origin.
1331        let source_origin = match self.load_origin {
1332            LoadOrigin::Script(ref snapshot) => {
1333                Some(MutableOrigin::from_snapshot(snapshot.clone()))
1334            },
1335            _ => None,
1336        };
1337        let origin = determine_the_origin(
1338            metadata.as_ref().map(|metadata| &metadata.final_url),
1339            final_sandboxing_flag_set,
1340            source_origin,
1341        );
1342
1343        let parser = match ScriptThread::page_headers_available(
1344            self.webview_id,
1345            self.pipeline_id,
1346            metadata.as_ref(),
1347            origin.clone(),
1348            cx,
1349        ) {
1350            Some(parser) => parser,
1351            None => return,
1352        };
1353        if parser.aborted.get() {
1354            return;
1355        }
1356
1357        let mut realm = enter_auto_realm(cx, &*parser.document);
1358        let cx = &mut realm;
1359        let document = &parser.document;
1360        let window = document.window();
1361
1362        // https://html.spec.whatwg.org/multipage/#attempt-to-populate-the-history-entry%27s-document
1363        // Step 4. Otherwise, if any of the following are true:
1364        if
1365        // navigationParams is null;
1366        // TODO
1367        // the result of should navigation response to navigation request of
1368        // type in target be blocked by Content Security Policy? given
1369        // navigationParams's request, navigationParams's response, navigationParams's policy container's CSP list,
1370        // cspNavigationType, and navigable is "Blocked";
1371        policy_container.csp_list.should_navigation_response_to_navigation_request_be_blocked(
1372            window,
1373            self.url.clone().into_url(),
1374            &origin.immutable().clone().into_url_origin(),
1375        )
1376        // navigationParams's reserved environment is non-null and the result of
1377        // checking a navigation response's adherence to its embedder policy given navigationParams's response,
1378        // navigable, and navigationParams's policy container's embedder policy is false; or
1379        // TODO
1380        // the result of checking a navigation response's adherence to `X-Frame-Options`
1381        // given navigationParams's response, navigable, navigationParams's policy container's CSP list,
1382        // and navigationParams's origin is false,
1383        || !check_a_navigation_response_adherence_to_x_frame_options(
1384            window,
1385            policy_container.csp_list.as_ref(),
1386            &origin,
1387            metadata
1388                .as_ref()
1389                .and_then(|metadata| metadata.headers.as_ref()),
1390        ) {
1391            // Step 4.1. Set entry's document state's document to the result of creating a document for inline content
1392            // that doesn't have a DOM, given navigable, null, navTimingType, and userInvolvement.
1393            // The inline content should indicate to the user the sort of error that occurred.
1394            error = Some(NetworkError::ContentSecurityPolicy);
1395            // Step 4.2. Make document unsalvageable given entry's document state's document and "navigation-failure".
1396            document.make_document_unsalvageable();
1397            // Step 4.3. Set saveExtraDocumentState to false.
1398            // TODO
1399            // Step 4.4. If navigationParams is not null, then:
1400            // TODO
1401        }
1402
1403        if let Some(endpoints) = endpoints_list {
1404            window.set_endpoints_list(endpoints);
1405        }
1406        self.parser = Some(Trusted::new(&*parser));
1407        self.navigation_params = NavigationParams {
1408            policy_container,
1409            content_type,
1410            final_sandboxing_flag_set,
1411            link_headers,
1412            about_base_url: document.about_base_url(),
1413            resource_header: vec![],
1414        };
1415        self.submit_resource_timing();
1416
1417        // Part of https://html.spec.whatwg.org/multipage/#loading-a-document
1418        //
1419        // Step 3. If, given type, the new resource is to be handled by displaying some sort of inline content,
1420        // e.g., a native rendering of the content or an error message because the specified type is not supported,
1421        // then return the result of creating a document for inline content that doesn't have a DOM given
1422        // navigationParams's navigable, navigationParams's id, navigationParams's navigation timing type,
1423        // and navigationParams's user involvement.
1424        if let Some(error) = error {
1425            let page = match error {
1426                NetworkError::SslValidation(reason, bytes) => {
1427                    let page = resources::read_string(Resource::BadCertHTML);
1428                    let page = page.replace("${reason}", &reason);
1429                    let encoded_bytes = general_purpose::STANDARD_NO_PAD.encode(bytes);
1430                    let page = page.replace("${bytes}", encoded_bytes.as_str());
1431                    page.replace("${secret}", &net_traits::PRIVILEGED_SECRET.to_string())
1432                },
1433                NetworkError::BlobURLStoreError(reason) |
1434                NetworkError::WebsocketConnectionFailure(reason) |
1435                NetworkError::HttpError(reason) |
1436                NetworkError::ResourceLoadError(reason) |
1437                NetworkError::MimeType(reason) => {
1438                    let page = resources::read_string(Resource::NetErrorHTML);
1439                    page.replace("${reason}", &reason)
1440                },
1441                NetworkError::Crash(details) => {
1442                    let page = resources::read_string(Resource::CrashHTML);
1443                    page.replace("${details}", &details)
1444                },
1445                NetworkError::UnsupportedScheme |
1446                NetworkError::CorsGeneral |
1447                NetworkError::CrossOriginResponse |
1448                NetworkError::CorsCredentials |
1449                NetworkError::CorsAllowMethods |
1450                NetworkError::CorsAllowHeaders |
1451                NetworkError::CorsMethod |
1452                NetworkError::CorsAuthorization |
1453                NetworkError::CorsHeaders |
1454                NetworkError::ConnectionFailure |
1455                NetworkError::RedirectError |
1456                NetworkError::TooManyRedirects |
1457                NetworkError::TooManyInFlightKeepAliveRequests |
1458                NetworkError::InvalidMethod |
1459                NetworkError::ContentSecurityPolicy |
1460                NetworkError::Nosniff |
1461                NetworkError::SubresourceIntegrity |
1462                NetworkError::MixedContent |
1463                NetworkError::CacheError |
1464                NetworkError::InvalidPort |
1465                NetworkError::LocalDirectoryError |
1466                NetworkError::PartialResponseToNonRangeRequestError |
1467                NetworkError::ProtocolHandlerSubstitutionError |
1468                NetworkError::DecompressionError => {
1469                    let page = resources::read_string(Resource::NetErrorHTML);
1470                    page.replace("${reason}", &format!("{:?}", error))
1471                },
1472                NetworkError::LoadCancelled => {
1473                    // The next load will show a page
1474                    return;
1475                },
1476            };
1477            self.load_inline_unknown_content(&parser, page, cx);
1478        }
1479    }
1480
1481    fn process_response_chunk(
1482        &mut self,
1483        cx: &mut js::context::JSContext,
1484        _: RequestId,
1485        payload: Vec<u8>,
1486    ) {
1487        if self.is_synthesized_document {
1488            return;
1489        }
1490        let Some(parser) = self.parser.as_ref().map(|p| p.root()) else {
1491            return;
1492        };
1493        if parser.aborted.get() {
1494            return;
1495        }
1496        if !self.has_loaded_document {
1497            // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1498            self.navigation_params
1499                .resource_header
1500                .extend_from_slice(&payload);
1501            // the number of bytes in buffer is greater than or equal to 1445.
1502            if self.navigation_params.resource_header.len() >= 1445 {
1503                self.load_document(cx);
1504            }
1505        } else {
1506            parser.parse_bytes_chunk(payload, cx);
1507        }
1508    }
1509
1510    // This method is called via script_thread::handle_fetch_eof, so we must call
1511    // submit_resource_timing in this function
1512    // Resource listeners are called via net_traits::Action::process, which handles submission for them
1513    fn process_response_eof(
1514        mut self,
1515        cx: &mut js::context::JSContext,
1516        _: RequestId,
1517        status: Result<(), NetworkError>,
1518        timing: ResourceFetchTiming,
1519    ) {
1520        let parser = match self.parser.as_ref() {
1521            Some(parser) => parser.root(),
1522            None => return,
1523        };
1524        if parser.aborted.get() || self.is_synthesized_document {
1525            return;
1526        }
1527
1528        if let Err(error) = &status {
1529            // TODO(Savago): we should send a notification to callers #5463.
1530            debug!("Failed to load page URL {}, error: {error:?}", self.url);
1531        }
1532
1533        // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1534        //
1535        // the end of the resource is reached.
1536        if !self.has_loaded_document {
1537            self.load_document(cx);
1538        }
1539
1540        let mut realm = enter_auto_realm(cx, &*parser);
1541        let cx = &mut realm;
1542
1543        if status.is_ok() {
1544            parser.document.set_redirect_count(timing.redirect_count);
1545        }
1546
1547        parser.last_chunk_received.set(true);
1548        if !parser.suspended.get() {
1549            parser.parse_sync(cx);
1550        }
1551
1552        // TODO: Only update if this is the current document resource.
1553        // TODO(mrobinson): Pass a proper fetch_start parameter here instead of `CrossProcessInstant::now()`.
1554        if let Some(pushed_index) = self.pushed_entry_index {
1555            let document = &parser.document;
1556            let performance_entry = PerformanceNavigationTiming::new(
1557                &document.global(),
1558                CrossProcessInstant::now(),
1559                document,
1560                CanGc::from_cx(cx),
1561            );
1562            document
1563                .global()
1564                .performance()
1565                .update_entry(pushed_index, performance_entry.upcast::<PerformanceEntry>());
1566        }
1567    }
1568
1569    fn process_csp_violations(&mut self, _: RequestId, _: Vec<Violation>) {
1570        unreachable!("Script_thread should handle reporting violations for parser contexts");
1571    }
1572}
1573
1574pub(crate) struct FragmentContext<'a> {
1575    pub(crate) context_elem: &'a Node,
1576    pub(crate) form_elem: Option<&'a Node>,
1577    pub(crate) context_element_allows_scripting: bool,
1578}
1579
1580#[cfg_attr(crown, expect(crown::unrooted_must_root))]
1581fn insert(
1582    cx: &mut js::context::JSContext,
1583    parent: &Node,
1584    reference_child: Option<&Node>,
1585    child: NodeOrText<Dom<Node>>,
1586    parsing_algorithm: ParsingAlgorithm,
1587    custom_element_reaction_stack: &CustomElementReactionStack,
1588) {
1589    match child {
1590        NodeOrText::AppendNode(n) => {
1591            // https://html.spec.whatwg.org/multipage/#insert-a-foreign-element
1592            // applies if this is an element; if not, it may be
1593            // https://html.spec.whatwg.org/multipage/#insert-a-comment
1594            let element_in_non_fragment =
1595                parsing_algorithm != ParsingAlgorithm::Fragment && n.is::<Element>();
1596            if element_in_non_fragment {
1597                custom_element_reaction_stack.push_new_element_queue();
1598            }
1599            parent.InsertBefore(cx, &n, reference_child).unwrap();
1600            if element_in_non_fragment {
1601                custom_element_reaction_stack.pop_current_element_queue(cx);
1602            }
1603        },
1604        NodeOrText::AppendText(t) => {
1605            // https://html.spec.whatwg.org/multipage/#insert-a-character
1606            let text = reference_child
1607                .and_then(Node::GetPreviousSibling)
1608                .or_else(|| parent.GetLastChild())
1609                .and_then(DomRoot::downcast::<Text>);
1610
1611            if let Some(text) = text {
1612                text.upcast::<CharacterData>().append_data(&t);
1613            } else {
1614                let text = Text::new(cx, String::from(t).into(), &parent.owner_doc());
1615                parent
1616                    .InsertBefore(cx, text.upcast(), reference_child)
1617                    .unwrap();
1618            }
1619        },
1620    }
1621}
1622
1623#[derive(JSTraceable, MallocSizeOf)]
1624#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
1625pub(crate) struct Sink {
1626    #[no_trace]
1627    base_url: ServoUrl,
1628    document: Dom<Document>,
1629    current_line: Cell<u64>,
1630    script: MutNullableDom<HTMLScriptElement>,
1631    parsing_algorithm: ParsingAlgorithm,
1632    #[conditional_malloc_size_of]
1633    custom_element_reaction_stack: Rc<CustomElementReactionStack>,
1634}
1635
1636impl Sink {
1637    fn same_tree(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1638        let x = x.downcast::<Element>().expect("Element node expected");
1639        let y = y.downcast::<Element>().expect("Element node expected");
1640
1641        x.is_in_same_home_subtree(y)
1642    }
1643
1644    fn has_parent_node(&self, node: &Dom<Node>) -> bool {
1645        node.GetParentNode().is_some()
1646    }
1647}
1648
1649impl TreeSink for Sink {
1650    type Output = Self;
1651    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1652    fn finish(self) -> Self {
1653        self
1654    }
1655
1656    type Handle = Dom<Node>;
1657    type ElemName<'a>
1658        = ExpandedName<'a>
1659    where
1660        Self: 'a;
1661
1662    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1663    fn get_document(&self) -> Dom<Node> {
1664        Dom::from_ref(self.document.upcast())
1665    }
1666
1667    #[expect(unsafe_code)]
1668    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1669    fn get_template_contents(&self, target: &Dom<Node>) -> Dom<Node> {
1670        // TODO: https://github.com/servo/servo/issues/42839
1671        let mut cx = unsafe { temp_cx() };
1672        let cx = &mut cx;
1673        let template = target
1674            .downcast::<HTMLTemplateElement>()
1675            .expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing");
1676        Dom::from_ref(template.Content(cx).upcast())
1677    }
1678
1679    fn same_node(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1680        x == y
1681    }
1682
1683    fn elem_name<'a>(&self, target: &'a Dom<Node>) -> ExpandedName<'a> {
1684        let elem = target
1685            .downcast::<Element>()
1686            .expect("tried to get name of non-Element in HTML parsing");
1687        ExpandedName {
1688            ns: elem.namespace(),
1689            local: elem.local_name(),
1690        }
1691    }
1692
1693    #[expect(unsafe_code)]
1694    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1695    fn create_element(
1696        &self,
1697        name: QualName,
1698        attrs: Vec<Attribute>,
1699        flags: ElementFlags,
1700    ) -> Dom<Node> {
1701        // TODO: https://github.com/servo/servo/issues/42839
1702        let mut cx = unsafe { temp_cx() };
1703        let cx = &mut cx;
1704        let attrs = attrs
1705            .into_iter()
1706            .map(|attr| ElementAttribute::new(attr.name, DOMString::from(String::from(attr.value))))
1707            .collect();
1708        let parsing_algorithm = if flags.template {
1709            ParsingAlgorithm::Fragment
1710        } else {
1711            self.parsing_algorithm
1712        };
1713        let element = create_element_for_token(
1714            name,
1715            attrs,
1716            &self.document,
1717            ElementCreator::ParserCreated(self.current_line.get()),
1718            parsing_algorithm,
1719            &self.custom_element_reaction_stack,
1720            flags.had_duplicate_attributes,
1721            cx,
1722        );
1723        Dom::from_ref(element.upcast())
1724    }
1725
1726    #[expect(unsafe_code)]
1727    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1728    fn create_comment(&self, text: StrTendril) -> Dom<Node> {
1729        // TODO: https://github.com/servo/servo/issues/42839
1730        let mut cx = unsafe { temp_cx() };
1731        let cx = &mut cx;
1732        let comment = Comment::new(
1733            cx,
1734            DOMString::from(String::from(text)),
1735            &self.document,
1736            None,
1737        );
1738        Dom::from_ref(comment.upcast())
1739    }
1740
1741    #[expect(unsafe_code)]
1742    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1743    fn create_pi(&self, target: StrTendril, data: StrTendril) -> Dom<Node> {
1744        // TODO: https://github.com/servo/servo/issues/42839
1745        let mut cx = unsafe { temp_cx() };
1746        let cx = &mut cx;
1747        let doc = &*self.document;
1748        let pi = ProcessingInstruction::new(
1749            cx,
1750            DOMString::from(String::from(target)),
1751            DOMString::from(String::from(data)),
1752            doc,
1753        );
1754        Dom::from_ref(pi.upcast())
1755    }
1756
1757    fn associate_with_form(
1758        &self,
1759        target: &Dom<Node>,
1760        form: &Dom<Node>,
1761        nodes: (&Dom<Node>, Option<&Dom<Node>>),
1762    ) {
1763        let (element, prev_element) = nodes;
1764        let tree_node = prev_element.map_or(element, |prev| {
1765            if self.has_parent_node(element) {
1766                element
1767            } else {
1768                prev
1769            }
1770        });
1771        if !self.same_tree(tree_node, form) {
1772            return;
1773        }
1774
1775        let node = target;
1776        let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
1777            .expect("Owner must be a form element");
1778
1779        let elem = node.downcast::<Element>();
1780        let control = elem.and_then(|e| e.as_maybe_form_control());
1781
1782        if let Some(control) = control {
1783            control.set_form_owner_from_parser(&form, CanGc::note());
1784        }
1785    }
1786
1787    #[expect(unsafe_code)]
1788    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1789    fn append_before_sibling(&self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) {
1790        // TODO: https://github.com/servo/servo/issues/42839
1791        let mut cx = unsafe { temp_cx() };
1792        let cx = &mut cx;
1793
1794        let parent = sibling
1795            .GetParentNode()
1796            .expect("append_before_sibling called on node without parent");
1797
1798        insert(
1799            cx,
1800            &parent,
1801            Some(sibling),
1802            new_node,
1803            self.parsing_algorithm,
1804            &self.custom_element_reaction_stack,
1805        );
1806    }
1807
1808    fn parse_error(&self, msg: Cow<'static, str>) {
1809        debug!("Parse error: {}", msg);
1810    }
1811
1812    fn set_quirks_mode(&self, mode: QuirksMode) {
1813        let mode = match mode {
1814            QuirksMode::Quirks => ServoQuirksMode::Quirks,
1815            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
1816            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
1817        };
1818        self.document.set_quirks_mode(mode);
1819    }
1820
1821    #[expect(unsafe_code)]
1822    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1823    fn append(&self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) {
1824        // TODO: https://github.com/servo/servo/issues/42839
1825        let mut cx = unsafe { temp_cx() };
1826        let cx = &mut cx;
1827
1828        insert(
1829            cx,
1830            parent,
1831            None,
1832            child,
1833            self.parsing_algorithm,
1834            &self.custom_element_reaction_stack,
1835        );
1836    }
1837
1838    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1839    fn append_based_on_parent_node(
1840        &self,
1841        elem: &Dom<Node>,
1842        prev_elem: &Dom<Node>,
1843        child: NodeOrText<Dom<Node>>,
1844    ) {
1845        if self.has_parent_node(elem) {
1846            self.append_before_sibling(elem, child);
1847        } else {
1848            self.append(prev_elem, child);
1849        }
1850    }
1851
1852    #[expect(unsafe_code)]
1853    fn append_doctype_to_document(
1854        &self,
1855        name: StrTendril,
1856        public_id: StrTendril,
1857        system_id: StrTendril,
1858    ) {
1859        // TODO: https://github.com/servo/servo/issues/42839
1860        let mut cx = unsafe { temp_cx() };
1861        let cx = &mut cx;
1862
1863        let doc = &*self.document;
1864        let doctype = DocumentType::new(
1865            cx,
1866            DOMString::from(String::from(name)),
1867            Some(DOMString::from(String::from(public_id))),
1868            Some(DOMString::from(String::from(system_id))),
1869            doc,
1870        );
1871        doc.upcast::<Node>()
1872            .AppendChild(cx, doctype.upcast())
1873            .expect("Appending failed");
1874    }
1875
1876    fn add_attrs_if_missing(&self, target: &Dom<Node>, attrs: Vec<Attribute>) {
1877        let elem = target
1878            .downcast::<Element>()
1879            .expect("tried to set attrs on non-Element in HTML parsing");
1880        for attr in attrs {
1881            elem.set_attribute_from_parser(
1882                attr.name,
1883                DOMString::from(String::from(attr.value)),
1884                None,
1885                CanGc::note(),
1886            );
1887        }
1888    }
1889
1890    #[expect(unsafe_code)]
1891    fn remove_from_parent(&self, target: &Dom<Node>) {
1892        // TODO: https://github.com/servo/servo/issues/42839
1893        let mut cx = unsafe { temp_cx() };
1894        let cx = &mut cx;
1895
1896        if let Some(ref parent) = target.GetParentNode() {
1897            parent.RemoveChild(cx, target).unwrap();
1898        }
1899    }
1900
1901    fn mark_script_already_started(&self, node: &Dom<Node>) {
1902        let script = node.downcast::<HTMLScriptElement>();
1903        if let Some(script) = script {
1904            script.set_already_started(true)
1905        }
1906    }
1907
1908    #[expect(unsafe_code)]
1909    fn reparent_children(&self, node: &Dom<Node>, new_parent: &Dom<Node>) {
1910        // TODO: https://github.com/servo/servo/issues/42839
1911        let mut cx = unsafe { temp_cx() };
1912        let cx = &mut cx;
1913
1914        while let Some(ref child) = node.GetFirstChild() {
1915            new_parent.AppendChild(cx, child).unwrap();
1916        }
1917    }
1918
1919    /// <https://html.spec.whatwg.org/multipage/#html-integration-point>
1920    /// Specifically, the `<annotation-xml>` cases.
1921    fn is_mathml_annotation_xml_integration_point(&self, handle: &Dom<Node>) -> bool {
1922        let elem = handle.downcast::<Element>().unwrap();
1923        elem.get_attribute(&local_name!("encoding"))
1924            .is_some_and(|attr| {
1925                attr.value().eq_ignore_ascii_case("text/html") ||
1926                    attr.value().eq_ignore_ascii_case("application/xhtml+xml")
1927            })
1928    }
1929
1930    fn set_current_line(&self, line_number: u64) {
1931        self.current_line.set(line_number);
1932    }
1933
1934    fn pop(&self, node: &Dom<Node>) {
1935        let node = DomRoot::from_ref(&**node);
1936        vtable_for(&node).pop();
1937    }
1938
1939    fn allow_declarative_shadow_roots(&self, intended_parent: &Dom<Node>) -> bool {
1940        intended_parent.owner_doc().allow_declarative_shadow_roots()
1941    }
1942
1943    /// <https://html.spec.whatwg.org/multipage/#parsing-main-inhead>
1944    /// A start tag whose tag name is "template"
1945    /// Attach shadow path
1946    #[expect(unsafe_code)]
1947    fn attach_declarative_shadow(
1948        &self,
1949        host: &Dom<Node>,
1950        template: &Dom<Node>,
1951        attributes: &[Attribute],
1952    ) -> bool {
1953        // TODO: https://github.com/servo/servo/issues/42839
1954        let mut cx = unsafe { temp_cx() };
1955        let cx = &mut cx;
1956
1957        attach_declarative_shadow_inner(cx, host, template, attributes)
1958    }
1959
1960    #[expect(unsafe_code)]
1961    fn maybe_clone_an_option_into_selectedcontent(&self, option: &Self::Handle) {
1962        // TODO: https://github.com/servo/servo/issues/42839
1963        let mut cx = unsafe { temp_cx() };
1964        let cx = &mut cx;
1965
1966        let Some(option) = option.downcast::<HTMLOptionElement>() else {
1967            if cfg!(debug_assertions) {
1968                unreachable!();
1969            }
1970            log::error!(
1971                "Received non-option element in maybe_clone_an_option_into_selectedcontent"
1972            );
1973            return;
1974        };
1975
1976        option.maybe_clone_an_option_into_selectedcontent(cx)
1977    }
1978}
1979
1980/// <https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token>
1981#[expect(clippy::too_many_arguments)]
1982fn create_element_for_token(
1983    name: QualName,
1984    attrs: Vec<ElementAttribute>,
1985    document: &Document,
1986    creator: ElementCreator,
1987    parsing_algorithm: ParsingAlgorithm,
1988    custom_element_reaction_stack: &CustomElementReactionStack,
1989    had_duplicate_attributes: bool,
1990    cx: &mut js::context::JSContext,
1991) -> DomRoot<Element> {
1992    // Step 1. If the active speculative HTML parser is not null, then return the result
1993    // of creating a speculative mock element given namespace, token's tag name, and
1994    // token's attributes.
1995    // TODO: Implement
1996
1997    // Step 2: Otherwise, optionally create a speculative mock element given namespace,
1998    // token's tag name, and token's attributes
1999    // TODO: Implement.
2000
2001    // Step 3. Let document be intendedParent's node document.
2002    // Passed as argument.
2003
2004    // Step 4. Let localName be token's tag name.
2005    // Passed as argument
2006
2007    // Step 5. Let is be the value of the "is" attribute in token, if such an attribute
2008    // exists; otherwise null.
2009    let is = attrs
2010        .iter()
2011        .find(|attr| attr.name.local.eq_str_ignore_ascii_case("is"))
2012        .map(|attr| LocalName::from(&attr.value));
2013
2014    // Step 6. Let registry be the result of looking up a custom element registry given intendedParent.
2015    // TODO: Implement registries other than `Document`.
2016
2017    // Step 7. Let definition be the result of looking up a custom element definition
2018    // given registry, namespace, localName, and is.
2019    let definition = document.lookup_custom_element_definition(&name.ns, &name.local, is.as_ref());
2020
2021    // Step 8. Let willExecuteScript be true if definition is non-null and the parser was
2022    // not created as part of the HTML fragment parsing algorithm; otherwise false.
2023    let will_execute_script =
2024        definition.is_some() && parsing_algorithm != ParsingAlgorithm::Fragment;
2025
2026    // Step 9. If willExecuteScript is true:
2027    if will_execute_script {
2028        // Step 9.1. Increment document's throw-on-dynamic-markup-insertion counter.
2029        document.increment_throw_on_dynamic_markup_insertion_counter();
2030        // Step 6.2. If the JavaScript execution context stack is empty, then perform a
2031        // microtask checkpoint.
2032        if is_execution_stack_empty() {
2033            document.window().perform_a_microtask_checkpoint(cx);
2034        }
2035        // Step 9.3. Push a new element queue onto document's relevant agent's custom
2036        // element reactions stack.
2037        custom_element_reaction_stack.push_new_element_queue()
2038    }
2039
2040    // Step 10. Let element be the result of creating an element given document,
2041    // localName, namespace, null, is, willExecuteScript, and registry.
2042    let creation_mode = if will_execute_script {
2043        CustomElementCreationMode::Synchronous
2044    } else {
2045        CustomElementCreationMode::Asynchronous
2046    };
2047    let element = Element::create(cx, name, is, document, creator, creation_mode, None);
2048
2049    // Step 11. Append each attribute in the given token to element.
2050    for attr in attrs {
2051        element.set_attribute_from_parser(attr.name, attr.value, None, CanGc::from_cx(cx));
2052    }
2053
2054    // Record if the tokenizer saw duplicate attributes on this element,
2055    // used for CSP nonce validation (step 3 of "is element nonceable").
2056    if had_duplicate_attributes {
2057        element.set_had_duplicate_attributes();
2058    }
2059
2060    // Step 12. If willExecuteScript is true:
2061    if will_execute_script {
2062        // Step 12.1. Let queue be the result of popping from document's relevant agent's
2063        // custom element reactions stack. (This will be the same element queue as was
2064        // pushed above.)
2065        // Step 12.2 Invoke custom element reactions in queue.
2066        custom_element_reaction_stack.pop_current_element_queue(cx);
2067        // Step 12.3. Decrement document's throw-on-dynamic-markup-insertion counter.
2068        document.decrement_throw_on_dynamic_markup_insertion_counter();
2069    }
2070
2071    // Step 13. If element has an xmlns attribute in the XMLNS namespace whose value is
2072    // not exactly the same as the element's namespace, that is a parse error. Similarly,
2073    // if element has an xmlns:xlink attribute in the XMLNS namespace whose value is not
2074    // the XLink Namespace, that is a parse error.
2075    // TODO: Implement.
2076
2077    // Step 14. If element is a resettable element and not a form-associated custom
2078    // element, then invoke its reset algorithm. (This initializes the element's value and
2079    // checkedness based on the element's attributes.)
2080    if let Some(html_element) = element.downcast::<HTMLElement>() {
2081        if element.is_resettable() && !html_element.is_form_associated_custom_element() {
2082            element.reset(CanGc::from_cx(cx));
2083        }
2084    }
2085
2086    // Step 15. If element is a form-associated element and not a form-associated custom
2087    // element, the form element pointer is not null, there is no template element on the
2088    // stack of open elements, element is either not listed or doesn't have a form attribute,
2089    // and the intendedParent is in the same tree as the element pointed to by the form
2090    // element pointer, then associate element with the form element pointed to by the form
2091    // element pointer and set element's parser inserted flag.
2092    // TODO: Implement
2093
2094    // Step 16. Return element.
2095    element
2096}
2097
2098fn attach_declarative_shadow_inner(
2099    cx: &mut js::context::JSContext,
2100    host: &Node,
2101    template: &Node,
2102    attributes: &[Attribute],
2103) -> bool {
2104    let host_element = host.downcast::<Element>().unwrap();
2105
2106    if host_element.shadow_root().is_some() {
2107        return false;
2108    }
2109
2110    let template_element = template.downcast::<HTMLTemplateElement>().unwrap();
2111
2112    // Step 3. Let mode be template start tag's shadowrootmode attribute's value.
2113    // Step 4. Let clonable be true if template start tag has a shadowrootclonable attribute; otherwise false.
2114    // Step 5. Let delegatesfocus be true if template start tag
2115    // has a shadowrootdelegatesfocus attribute; otherwise false.
2116    // Step 6. Let serializable be true if template start tag
2117    // has a shadowrootserializable attribute; otherwise false.
2118    let mut shadow_root_mode = ShadowRootMode::Open;
2119    let mut clonable = false;
2120    let mut delegatesfocus = false;
2121    let mut serializable = false;
2122
2123    let attributes: Vec<ElementAttribute> = attributes
2124        .iter()
2125        .map(|attr| {
2126            ElementAttribute::new(
2127                attr.name.clone(),
2128                DOMString::from(String::from(attr.value.clone())),
2129            )
2130        })
2131        .collect();
2132
2133    attributes
2134        .iter()
2135        .for_each(|attr: &ElementAttribute| match attr.name.local {
2136            local_name!("shadowrootmode") => {
2137                if attr.value.str().eq_ignore_ascii_case("open") {
2138                    shadow_root_mode = ShadowRootMode::Open;
2139                } else if attr.value.str().eq_ignore_ascii_case("closed") {
2140                    shadow_root_mode = ShadowRootMode::Closed;
2141                } else {
2142                    unreachable!("shadowrootmode value is not open nor closed");
2143                }
2144            },
2145            local_name!("shadowrootclonable") => {
2146                clonable = true;
2147            },
2148            local_name!("shadowrootdelegatesfocus") => {
2149                delegatesfocus = true;
2150            },
2151            local_name!("shadowrootserializable") => {
2152                serializable = true;
2153            },
2154            _ => {},
2155        });
2156
2157    // Step 8.1. Attach a shadow root with declarative shadow host element,
2158    // mode, clonable, serializable, delegatesFocus, and "named".
2159    match host_element.attach_shadow(
2160        cx,
2161        IsUserAgentWidget::No,
2162        shadow_root_mode,
2163        clonable,
2164        serializable,
2165        delegatesfocus,
2166        SlotAssignmentMode::Named,
2167    ) {
2168        Ok(shadow_root) => {
2169            // Step 8.3. Set shadow's declarative to true.
2170            shadow_root.set_declarative(true);
2171
2172            // Set 8.4. Set template's template contents property to shadow.
2173            let shadow = shadow_root.upcast::<DocumentFragment>();
2174            template_element.set_contents(Some(shadow));
2175
2176            // Step 8.5. Set shadow’s available to element internals to true.
2177            shadow_root.set_available_to_element_internals(true);
2178
2179            true
2180        },
2181        Err(_) => false,
2182    }
2183}