Skip to main content

script/dom/servoparser/
mod.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::cell::{Cell, RefCell};
7use std::mem;
8use std::rc::Rc;
9
10use base64::Engine as _;
11use base64::engine::general_purpose;
12use content_security_policy::sandboxing_directive::SandboxingFlagSet;
13use devtools_traits::ScriptToDevtoolsControlMsg;
14use dom_struct::dom_struct;
15use embedder_traits::resources::{self, Resource};
16use encoding_rs::{Encoding, UTF_8};
17use html5ever::buffer_queue::BufferQueue;
18use html5ever::tendril::StrTendril;
19use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
20use html5ever::{Attribute, ExpandedName, LocalName, QualName, local_name, ns};
21use hyper_serde::Serde;
22use js::context::JSContext;
23use markup5ever::TokenizerResult;
24use mime::{self, Mime};
25use net_traits::mime_classifier::{ApacheBugFlag, MediaType, MimeClassifier, NoSniffFlag};
26use net_traits::policy_container::PolicyContainer;
27use net_traits::request::RequestId;
28use net_traits::{
29    FetchMetadata, LoadContext, Metadata, NetworkError, ReferrerPolicy, ResourceFetchTiming,
30};
31use profile_traits::time::{
32    ProfilerCategory, ProfilerChan, TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType,
33};
34use profile_traits::time_profile;
35use script_bindings::cell::DomRefCell;
36use script_bindings::reflector::{Reflector, reflect_dom_object};
37use script_bindings::script_runtime::temp_cx;
38use script_traits::DocumentActivity;
39use servo_base::cross_process_instant::CrossProcessInstant;
40use servo_base::id::{PipelineId, WebViewId};
41use servo_config::pref;
42use servo_constellation_traits::{LoadOrigin, TargetSnapshotParams};
43use servo_url::{MutableOrigin, ServoUrl};
44use style::context::QuirksMode as ServoQuirksMode;
45use tendril::stream::LossyDecoder;
46use tendril::{ByteTendril, TendrilSink};
47
48use crate::document_loader::{DocumentLoader, LoadType};
49use crate::dom::bindings::codegen::Bindings::DocumentBinding::{
50    DocumentMethods, DocumentReadyState,
51};
52use crate::dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods;
53use crate::dom::bindings::codegen::Bindings::HTMLMediaElementBinding::HTMLMediaElementMethods;
54use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
55use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
56use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::{
57    ShadowRootMode, SlotAssignmentMode,
58};
59use crate::dom::bindings::inheritance::Castable;
60use crate::dom::bindings::refcounted::Trusted;
61use crate::dom::bindings::reflector::DomGlobal;
62use crate::dom::bindings::root::{Dom, DomRoot, MutNullableDom};
63use crate::dom::bindings::settings_stack::is_execution_stack_empty;
64use crate::dom::bindings::str::{DOMString, USVString};
65use crate::dom::characterdata::CharacterData;
66use crate::dom::comment::Comment;
67use crate::dom::csp::{Violation, parse_csp_list_from_metadata};
68use crate::dom::customelementregistry::CustomElementReactionStack;
69use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument};
70use crate::dom::documentfragment::DocumentFragment;
71use crate::dom::documenttype::DocumentType;
72use crate::dom::element::{CustomElementCreationMode, Element, ElementCreator};
73use crate::dom::globalscope::GlobalScope;
74use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
75use crate::dom::html::htmlimageelement::HTMLImageElement;
76use crate::dom::html::htmlscriptelement::{HTMLScriptElement, ScriptResult};
77use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
78use crate::dom::iterators::ShadowIncluding;
79use crate::dom::node::Node;
80use crate::dom::performance::performanceentry::PerformanceEntry;
81use crate::dom::performance::performancenavigationtiming::PerformanceNavigationTiming;
82use crate::dom::processinginstruction::ProcessingInstruction;
83use crate::dom::processingoptions::{
84    LinkHeader, LinkProcessingPhase, extract_links_from_headers, process_link_headers,
85};
86use crate::dom::reporting::reportingendpoint::ReportingEndpoint;
87use crate::dom::security::csp::CspReporting;
88use crate::dom::security::xframeoptions::check_a_navigation_response_adherence_to_x_frame_options;
89use crate::dom::shadowroot::IsUserAgentWidget;
90use crate::dom::text::Text;
91use crate::dom::types::{HTMLElement, HTMLMediaElement, HTMLOptionElement};
92use crate::dom::virtualmethods::vtable_for;
93use crate::navigation::determine_the_origin;
94use crate::network_listener::FetchResponseListener;
95use crate::realms::{enter_auto_realm, enter_realm};
96use crate::script_runtime::{CanGc, IntroductionType};
97use crate::script_thread::ScriptThread;
98
99mod async_html;
100pub(crate) mod encoding;
101pub(crate) mod html;
102mod prefetch;
103mod xml;
104
105use encoding::{NetworkDecoderState, NetworkSink};
106pub(crate) use html::serialize_html_fragment;
107
108#[dom_struct]
109/// The parser maintains two input streams: one for input from script through
110/// document.write(), and one for input from network.
111///
112/// There is no concrete representation of the insertion point, instead it
113/// always points to just before the next character from the network input,
114/// with all of the script input before itself.
115///
116/// ```text
117///     ... script input ... | ... network input ...
118///                          ^
119///                 insertion point
120/// ```
121pub(crate) struct ServoParser {
122    reflector: Reflector,
123    /// The document associated with this parser.
124    document: Dom<Document>,
125    /// The decoder used for the network input.
126    network_decoder: DomRefCell<NetworkDecoderState>,
127    /// Input received from network.
128    #[ignore_malloc_size_of = "Defined in html5ever"]
129    #[no_trace]
130    network_input: BufferQueue,
131    /// Input received from script. Used only to support document.write().
132    #[ignore_malloc_size_of = "Defined in html5ever"]
133    #[no_trace]
134    script_input: BufferQueue,
135    /// The tokenizer of this parser.
136    tokenizer: Tokenizer,
137    /// Whether to expect any further input from the associated network request.
138    last_chunk_received: Cell<bool>,
139    /// Whether this parser should avoid passing any further data to the tokenizer.
140    suspended: Cell<bool>,
141    /// <https://html.spec.whatwg.org/multipage/#script-nesting-level>
142    script_nesting_level: Cell<usize>,
143    /// <https://html.spec.whatwg.org/multipage/#abort-a-parser>
144    aborted: Cell<bool>,
145    /// <https://html.spec.whatwg.org/multipage/#stop-parsing>
146    stopped: Cell<bool>,
147    /// <https://html.spec.whatwg.org/multipage/#script-created-parser>
148    script_created_parser: bool,
149    /// A decoder exclusively for input to the prefetch tokenizer.
150    ///
151    /// Unlike the actual decoder, this one takes a best guess at the encoding and starts
152    /// decoding immediately.
153    #[no_trace]
154    prefetch_decoder: RefCell<LossyDecoder<NetworkSink>>,
155    /// We do a quick-and-dirty parse of the input looking for resources to prefetch.
156    // TODO: if we had speculative parsing, we could do this when speculatively
157    // building the DOM. https://github.com/servo/servo/pull/19203
158    prefetch_tokenizer: prefetch::Tokenizer,
159    #[ignore_malloc_size_of = "Defined in html5ever"]
160    #[no_trace]
161    prefetch_input: BufferQueue,
162    // The whole input as a string, if needed for the devtools Sources panel.
163    // TODO: use a faster type for concatenating strings?
164    content_for_devtools: Option<DomRefCell<String>>,
165}
166
167pub(crate) struct ElementAttribute {
168    name: QualName,
169    value: DOMString,
170}
171
172#[derive(Clone, Copy, JSTraceable, MallocSizeOf, PartialEq)]
173pub(crate) enum ParsingAlgorithm {
174    Normal,
175    Fragment,
176}
177
178impl ElementAttribute {
179    pub(crate) fn new(name: QualName, value: DOMString) -> ElementAttribute {
180        ElementAttribute { name, value }
181    }
182}
183
184impl ServoParser {
185    pub(crate) fn parser_is_not_active(&self) -> bool {
186        self.can_write()
187    }
188
189    /// <https://html.spec.whatwg.org/multipage/#parse-html-from-a-string>
190    pub(crate) fn parse_html_document(
191        cx: &mut JSContext,
192        document: &Document,
193        input: Option<DOMString>,
194        url: ServoUrl,
195        encoding_hint_from_content_type: Option<&'static Encoding>,
196        encoding_of_container_document: Option<&'static Encoding>,
197    ) {
198        // Step 1. Set document's type to "html".
199        //
200        // Set by callers of this function and asserted here
201        assert!(document.is_html_document());
202
203        // Step 2. Create an HTML parser parser, associated with document.
204        let parser = ServoParser::new(
205            document,
206            if pref!(dom_servoparser_async_html_tokenizer_enabled) {
207                Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None))
208            } else {
209                Tokenizer::Html(self::html::Tokenizer::new(
210                    document,
211                    url,
212                    None,
213                    ParsingAlgorithm::Normal,
214                ))
215            },
216            ParserKind::Normal,
217            encoding_hint_from_content_type,
218            encoding_of_container_document,
219            CanGc::from_cx(cx),
220        );
221
222        // Step 3. Place html into the input stream for parser. The encoding confidence is irrelevant.
223        // Step 4. Start parser and let it run until it has consumed all the
224        // characters just inserted into the input stream.
225        //
226        // Set as the document's current parser and initialize with `input`, if given.
227        if let Some(input) = input {
228            parser.parse_complete_string_chunk(cx, String::from(input));
229        } else {
230            parser.document.set_current_parser(Some(&parser));
231        }
232    }
233
234    /// <https://html.spec.whatwg.org/multipage/#parsing-html-fragments>
235    pub(crate) fn parse_html_fragment<'el>(
236        cx: &mut JSContext,
237        context: &'el Element,
238        input: DOMString,
239        allow_declarative_shadow_roots: bool,
240    ) -> impl Iterator<Item = DomRoot<Node>> + use<'el> {
241        let context_node = context.upcast::<Node>();
242        let context_document = context_node.owner_doc();
243        let window = context_document.window();
244        let url = context_document.url();
245
246        // Step 1. Let document be a Document node whose type is "html".
247        let loader = DocumentLoader::new_with_threads(
248            context_document.loader().resource_threads().clone(),
249            Some(url.clone()),
250        );
251        let document = Document::new(
252            window,
253            HasBrowsingContext::No,
254            Some(url.clone()),
255            context_document.about_base_url(),
256            context_document.origin().clone(),
257            IsHTMLDocument::HTMLDocument,
258            None,
259            None,
260            DocumentActivity::Inactive,
261            DocumentSource::FromParser,
262            loader,
263            None,
264            None,
265            Default::default(),
266            false,
267            allow_declarative_shadow_roots,
268            Some(context_document.insecure_requests_policy()),
269            context_document.has_trustworthy_ancestor_or_current_origin(),
270            context_document.custom_element_reaction_stack(),
271            context_document.creation_sandboxing_flag_set(),
272            CanGc::from_cx(cx),
273        );
274
275        // Step 2. If context's node document is in quirks mode, then set document's mode to "quirks".
276        // Step 3. Otherwise, if context's node document is in limited-quirks mode, then set document's
277        // mode to "limited-quirks".
278        document.set_quirks_mode(context_document.quirks_mode());
279
280        // NOTE: The following steps happened as part of Step 1.
281        // Step 4. If allowDeclarativeShadowRoots is true, then set document's
282        // allow declarative shadow roots to true.
283        // Step 5. Create a new HTML parser, and associate it with document.
284
285        // Step 11.
286        let form = context_node
287            .inclusive_ancestors(ShadowIncluding::No)
288            .find(|element| element.is::<HTMLFormElement>());
289
290        let fragment_context = FragmentContext {
291            context_elem: context_node,
292            form_elem: form.as_deref(),
293            context_element_allows_scripting: context_document.scripting_enabled(),
294        };
295
296        let parser = ServoParser::new(
297            &document,
298            Tokenizer::Html(self::html::Tokenizer::new(
299                &document,
300                url,
301                Some(fragment_context),
302                ParsingAlgorithm::Fragment,
303            )),
304            ParserKind::Normal,
305            None,
306            None,
307            CanGc::from_cx(cx),
308        );
309        parser.parse_complete_string_chunk(cx, String::from(input));
310
311        // Step 14.
312        let root_element = document.GetDocumentElement().expect("no document element");
313        FragmentParsingResult {
314            inner: root_element.upcast::<Node>().children(),
315        }
316    }
317
318    pub(crate) fn parse_html_script_input(document: &Document, url: ServoUrl) {
319        let parser = ServoParser::new(
320            document,
321            if pref!(dom_servoparser_async_html_tokenizer_enabled) {
322                Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None))
323            } else {
324                Tokenizer::Html(self::html::Tokenizer::new(
325                    document,
326                    url,
327                    None,
328                    ParsingAlgorithm::Normal,
329                ))
330            },
331            ParserKind::ScriptCreated,
332            None,
333            None,
334            CanGc::deprecated_note(),
335        );
336        document.set_current_parser(Some(&parser));
337    }
338
339    pub(crate) fn parse_xml_document(
340        cx: &mut JSContext,
341        document: &Document,
342        input: Option<DOMString>,
343        url: ServoUrl,
344        encoding_hint_from_content_type: Option<&'static Encoding>,
345    ) {
346        let parser = ServoParser::new(
347            document,
348            Tokenizer::Xml(self::xml::Tokenizer::new(document, url)),
349            ParserKind::Normal,
350            encoding_hint_from_content_type,
351            None,
352            CanGc::from_cx(cx),
353        );
354
355        // Set as the document's current parser and initialize with `input`, if given.
356        if let Some(input) = input {
357            parser.parse_complete_string_chunk(cx, String::from(input));
358        } else {
359            parser.document.set_current_parser(Some(&parser));
360        }
361    }
362
363    pub(crate) fn script_nesting_level(&self) -> usize {
364        self.script_nesting_level.get()
365    }
366
367    pub(crate) fn is_script_created(&self) -> bool {
368        self.script_created_parser
369    }
370
371    /// Corresponds to the latter part of the "Otherwise" branch of the 'An end
372    /// tag whose tag name is "script"' of
373    /// <https://html.spec.whatwg.org/multipage/#parsing-main-incdata>
374    ///
375    /// This first moves everything from the script input to the beginning of
376    /// the network input, effectively resetting the insertion point to just
377    /// before the next character to be consumed.
378    ///
379    ///
380    /// ```text
381    ///     | ... script input ... network input ...
382    ///     ^
383    ///     insertion point
384    /// ```
385    pub(crate) fn resume_with_pending_parsing_blocking_script(
386        &self,
387        cx: &mut JSContext,
388        script: &HTMLScriptElement,
389        result: ScriptResult,
390    ) {
391        assert!(self.suspended.get());
392        self.suspended.set(false);
393
394        self.script_input.swap_with(&self.network_input);
395        while let Some(chunk) = self.script_input.pop_front() {
396            self.network_input.push_back(chunk);
397        }
398
399        let script_nesting_level = self.script_nesting_level.get();
400        assert_eq!(script_nesting_level, 0);
401
402        self.script_nesting_level.set(script_nesting_level + 1);
403        script.execute(cx, result);
404        self.script_nesting_level.set(script_nesting_level);
405
406        if !self.suspended.get() && !self.aborted.get() {
407            self.parse_sync(cx);
408        }
409    }
410
411    pub(crate) fn can_write(&self) -> bool {
412        self.script_created_parser || self.script_nesting_level.get() > 0
413    }
414
415    /// Steps 6-8 of <https://html.spec.whatwg.org/multipage/#document.write()>
416    pub(crate) fn write(&self, cx: &mut JSContext, text: DOMString) {
417        assert!(self.can_write());
418
419        if self.document.has_pending_parsing_blocking_script() {
420            // There is already a pending parsing blocking script so the
421            // parser is suspended, we just append everything to the
422            // script input and abort these steps.
423            self.script_input.push_back(String::from(text).into());
424            return;
425        }
426
427        // There is no pending parsing blocking script, so all previous calls
428        // to document.write() should have seen their entire input tokenized
429        // and process, with nothing pushed to the parser script input.
430        assert!(self.script_input.is_empty());
431
432        let input = BufferQueue::default();
433        input.push_back(String::from(text).into());
434
435        let profiler_chan = self
436            .document
437            .window()
438            .as_global_scope()
439            .time_profiler_chan()
440            .clone();
441        let profiler_metadata = TimerMetadata {
442            url: self.document.url().as_str().into(),
443            iframe: TimerMetadataFrameType::RootWindow,
444            incremental: TimerMetadataReflowType::FirstReflow,
445        };
446        self.tokenize(cx, |cx, tokenizer| {
447            tokenizer.feed(cx, &input, profiler_chan.clone(), profiler_metadata.clone())
448        });
449
450        if self.suspended.get() {
451            // Parser got suspended, insert remaining input at end of
452            // script input, following anything written by scripts executed
453            // reentrantly during this call.
454            while let Some(chunk) = input.pop_front() {
455                self.script_input.push_back(chunk);
456            }
457            return;
458        }
459
460        assert!(input.is_empty());
461    }
462
463    /// Steps 4-6 of <https://html.spec.whatwg.org/multipage/#dom-document-close>
464    pub(crate) fn close(&self, cx: &mut JSContext) {
465        assert!(self.script_created_parser);
466
467        // Step 4. Insert an explicit "EOF" character at the end of the parser's input stream.
468        self.last_chunk_received.set(true);
469
470        // Step 5. If this's pending parsing-blocking script is not null, then return.
471        if self.suspended.get() {
472            return;
473        }
474
475        // Step 6. Run the tokenizer, processing resulting tokens as they are emitted,
476        // and stopping when the tokenizer reaches the explicit "EOF" character or spins the event loop.
477        self.parse_sync(cx);
478    }
479
480    // https://html.spec.whatwg.org/multipage/#abort-a-parser
481    pub(crate) fn abort(&self, cx: &mut JSContext) {
482        assert!(!self.aborted.get());
483        self.aborted.set(true);
484
485        // Step 1.
486        self.script_input.replace_with(BufferQueue::default());
487        self.network_input.replace_with(BufferQueue::default());
488
489        // Step 2.
490        self.document
491            .set_ready_state(cx, DocumentReadyState::Interactive);
492
493        // Step 3.
494        self.tokenizer.end(cx);
495        self.document.set_current_parser(None);
496
497        // Step 4.
498        self.document
499            .set_ready_state(cx, DocumentReadyState::Complete);
500    }
501
502    pub(crate) fn get_current_line(&self) -> u32 {
503        self.tokenizer.get_current_line()
504    }
505
506    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
507    fn new_inherited(
508        document: &Document,
509        tokenizer: Tokenizer,
510        kind: ParserKind,
511        encoding_hint_from_content_type: Option<&'static Encoding>,
512        encoding_of_container_document: Option<&'static Encoding>,
513    ) -> Self {
514        // Store the whole input for the devtools Sources panel, if the devtools server is running
515        // and we are parsing for a document load (not just things like innerHTML).
516        // TODO: check if a devtools client is actually connected and/or wants the sources?
517        let content_for_devtools = (document.global().devtools_chan().is_some() &&
518            document.has_browsing_context())
519        .then_some(DomRefCell::new(String::new()));
520
521        ServoParser {
522            reflector: Reflector::new(),
523            document: Dom::from_ref(document),
524            network_decoder: DomRefCell::new(NetworkDecoderState::new(
525                encoding_hint_from_content_type,
526                encoding_of_container_document,
527            )),
528            network_input: BufferQueue::default(),
529            script_input: BufferQueue::default(),
530            tokenizer,
531            last_chunk_received: Cell::new(false),
532            suspended: Default::default(),
533            script_nesting_level: Default::default(),
534            aborted: Default::default(),
535            stopped: Default::default(),
536            script_created_parser: kind == ParserKind::ScriptCreated,
537            prefetch_decoder: RefCell::new(LossyDecoder::new_encoding_rs(
538                encoding_hint_from_content_type.unwrap_or(UTF_8),
539                Default::default(),
540            )),
541            prefetch_tokenizer: prefetch::Tokenizer::new(document),
542            prefetch_input: BufferQueue::default(),
543            content_for_devtools,
544        }
545    }
546
547    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
548    fn new(
549        document: &Document,
550        tokenizer: Tokenizer,
551        kind: ParserKind,
552        encoding_hint_from_content_type: Option<&'static Encoding>,
553        encoding_of_container_document: Option<&'static Encoding>,
554        can_gc: CanGc,
555    ) -> DomRoot<Self> {
556        reflect_dom_object(
557            Box::new(ServoParser::new_inherited(
558                document,
559                tokenizer,
560                kind,
561                encoding_hint_from_content_type,
562                encoding_of_container_document,
563            )),
564            document.window(),
565            can_gc,
566        )
567    }
568
569    fn push_tendril_input_chunk(&self, chunk: StrTendril) {
570        if let Some(mut content_for_devtools) = self
571            .content_for_devtools
572            .as_ref()
573            .map(|content| content.borrow_mut())
574        {
575            // TODO: append these chunks more efficiently
576            content_for_devtools.push_str(chunk.as_ref());
577        }
578
579        if chunk.is_empty() {
580            return;
581        }
582
583        // Push the chunk into the network input stream,
584        // which is tokenized lazily.
585        self.network_input.push_back(chunk);
586    }
587
588    fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
589        // For byte input, we convert it to text using the network decoder.
590        if let Some(decoded_chunk) = self
591            .network_decoder
592            .borrow_mut()
593            .push(&chunk, &self.document)
594        {
595            self.push_tendril_input_chunk(decoded_chunk);
596        }
597
598        if self.should_prefetch() {
599            // Push the chunk into the prefetch input stream,
600            // which is tokenized eagerly, to scan for resources
601            // to prefetch. If the user script uses `document.write()`
602            // to overwrite the network input, this prefetching may
603            // have been wasted, but in most cases it won't.
604            let mut prefetch_decoder = self.prefetch_decoder.borrow_mut();
605            prefetch_decoder.process(ByteTendril::from(&*chunk));
606
607            self.prefetch_input
608                .push_back(mem::take(&mut prefetch_decoder.inner_sink_mut().output));
609            self.prefetch_tokenizer.feed(&self.prefetch_input);
610        }
611    }
612
613    fn should_prefetch(&self) -> bool {
614        // Per https://github.com/whatwg/html/issues/1495
615        // stylesheets should not be loaded for documents
616        // without browsing contexts.
617        // https://github.com/whatwg/html/issues/1495#issuecomment-230334047
618        // suggests that no content should be preloaded in such a case.
619        // We're conservative, and only prefetch for documents
620        // with browsing contexts.
621        self.document.browsing_context().is_some()
622    }
623
624    fn push_string_input_chunk(&self, chunk: String) {
625        // The input has already been decoded as a string, so doesn't need
626        // to be decoded by the network decoder again.
627        let chunk = StrTendril::from(chunk);
628        self.push_tendril_input_chunk(chunk);
629    }
630
631    fn parse_sync(&self, cx: &mut JSContext) {
632        assert!(self.script_input.is_empty());
633
634        // This parser will continue to parse while there is either pending input or
635        // the parser remains unsuspended.
636
637        if self.last_chunk_received.get() {
638            let chunk = self.network_decoder.borrow_mut().finish(&self.document);
639            if !chunk.is_empty() {
640                self.push_tendril_input_chunk(chunk);
641            }
642        }
643
644        if self.aborted.get() {
645            return;
646        }
647
648        let profiler_chan = self
649            .document
650            .window()
651            .as_global_scope()
652            .time_profiler_chan()
653            .clone();
654        let profiler_metadata = TimerMetadata {
655            url: self.document.url().as_str().into(),
656            iframe: TimerMetadataFrameType::RootWindow,
657            incremental: TimerMetadataReflowType::FirstReflow,
658        };
659        self.tokenize(cx, |cx, tokenizer| {
660            tokenizer.feed(
661                cx,
662                &self.network_input,
663                profiler_chan.clone(),
664                profiler_metadata.clone(),
665            )
666        });
667
668        if self.suspended.get() {
669            return;
670        }
671
672        assert!(self.network_input.is_empty());
673
674        if self.last_chunk_received.get() {
675            self.finish(cx);
676        }
677    }
678
679    fn parse_complete_string_chunk(&self, cx: &mut JSContext, input: String) {
680        self.document.set_current_parser(Some(self));
681        self.push_string_input_chunk(input);
682        self.last_chunk_received.set(true);
683        if !self.suspended.get() {
684            self.parse_sync(cx);
685        }
686    }
687
688    fn parse_bytes_chunk(&self, cx: &mut JSContext, input: Vec<u8>) {
689        let _realm = enter_realm(&*self.document);
690        self.document.set_current_parser(Some(self));
691        self.push_bytes_input_chunk(input);
692        if !self.suspended.get() {
693            self.parse_sync(cx);
694        }
695    }
696
697    fn tokenize<F>(&self, cx: &mut JSContext, feed: F)
698    where
699        F: Fn(&mut JSContext, &Tokenizer) -> TokenizerResult<DomRoot<HTMLScriptElement>>,
700    {
701        loop {
702            assert!(!self.suspended.get());
703            assert!(!self.aborted.get());
704
705            self.document.window().reflow_if_reflow_timer_expired(cx);
706            let script = match feed(cx, &self.tokenizer) {
707                TokenizerResult::Done => return,
708                TokenizerResult::EncodingIndicator(_) => continue,
709                TokenizerResult::Script(script) => script,
710            };
711
712            // https://html.spec.whatwg.org/multipage/#parsing-main-incdata
713            // branch "An end tag whose tag name is "script"
714            // The spec says to perform the microtask checkpoint before
715            // setting the insertion mode back from Text, but this is not
716            // possible with the way servo and html5ever currently
717            // relate to each other, and hopefully it is not observable.
718            if is_execution_stack_empty() {
719                self.document.window().perform_a_microtask_checkpoint(cx);
720            }
721
722            let script_nesting_level = self.script_nesting_level.get();
723
724            self.script_nesting_level.set(script_nesting_level + 1);
725            script.set_initial_script_text();
726            let introduction_type_override =
727                (script_nesting_level > 0).then_some(IntroductionType::INJECTED_SCRIPT);
728            script.prepare(cx, introduction_type_override);
729            self.script_nesting_level.set(script_nesting_level);
730
731            if self.document.has_pending_parsing_blocking_script() {
732                self.suspended.set(true);
733                return;
734            }
735            if self.aborted.get() {
736                return;
737            }
738        }
739    }
740
741    /// <https://html.spec.whatwg.org/multipage/#abort-a-parser>
742    pub(crate) fn has_aborted(&self) -> bool {
743        self.aborted.get()
744    }
745
746    /// <https://html.spec.whatwg.org/multipage/#stop-parsing>
747    pub(crate) fn has_stopped(&self) -> bool {
748        self.stopped.get()
749    }
750
751    /// <https://html.spec.whatwg.org/multipage/#the-end>
752    fn finish(&self, cx: &mut JSContext) {
753        assert!(!self.suspended.get());
754        assert!(self.last_chunk_received.get());
755        assert!(self.script_input.is_empty());
756        assert!(self.network_input.is_empty());
757        assert!(self.network_decoder.borrow().is_finished());
758
759        self.stopped.set(true);
760
761        // Step 1. If the active speculative HTML parser is not null,
762        // then stop the speculative HTML parser and return.
763        // TODO
764
765        // Step 2. Set the insertion point to undefined.
766        self.document.set_current_parser(None);
767
768        // Step 3. Update the current document readiness to "interactive".
769        self.document
770            .set_ready_state(cx, DocumentReadyState::Interactive);
771
772        // Step 4. Pop all the nodes off the stack of open elements.
773        self.tokenizer.end(cx);
774
775        // Steps 5-11 are in another castle, namely finish_load.
776        let url = self.tokenizer.url().clone();
777        self.document.finish_load(LoadType::PageSource(url), cx);
778
779        // Send the source contents to devtools, if needed.
780        if let Some(content_for_devtools) = self
781            .content_for_devtools
782            .as_ref()
783            .map(|content| content.take())
784        {
785            let global = self.document.global();
786            let chan = global.devtools_chan().expect("Guaranteed by new");
787            let pipeline_id = self.document.global().pipeline_id();
788            let _ = chan.send(ScriptToDevtoolsControlMsg::UpdateSourceContent(
789                pipeline_id,
790                content_for_devtools,
791            ));
792        }
793    }
794}
795
796struct FragmentParsingResult<I>
797where
798    I: Iterator<Item = DomRoot<Node>>,
799{
800    inner: I,
801}
802
803impl<I> Iterator for FragmentParsingResult<I>
804where
805    I: Iterator<Item = DomRoot<Node>>,
806{
807    type Item = DomRoot<Node>;
808
809    #[expect(unsafe_code)]
810    fn next(&mut self) -> Option<DomRoot<Node>> {
811        let mut cx = unsafe { script_bindings::script_runtime::temp_cx() };
812        let cx = &mut cx;
813
814        let next = self.inner.next()?;
815        next.remove_self(cx);
816        Some(next)
817    }
818
819    fn size_hint(&self) -> (usize, Option<usize>) {
820        self.inner.size_hint()
821    }
822}
823
824#[derive(JSTraceable, MallocSizeOf, PartialEq)]
825enum ParserKind {
826    Normal,
827    ScriptCreated,
828}
829
830#[derive(JSTraceable, MallocSizeOf)]
831#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
832enum Tokenizer {
833    Html(self::html::Tokenizer),
834    AsyncHtml(self::async_html::Tokenizer),
835    Xml(self::xml::Tokenizer),
836}
837
838impl Tokenizer {
839    fn feed(
840        &self,
841        cx: &mut JSContext,
842        input: &BufferQueue,
843        profiler_chan: ProfilerChan,
844        profiler_metadata: TimerMetadata,
845    ) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
846        match *self {
847            Tokenizer::Html(ref tokenizer) => time_profile!(
848                ProfilerCategory::ScriptParseHTML,
849                Some(profiler_metadata),
850                profiler_chan,
851                || tokenizer.feed(input),
852            ),
853            Tokenizer::AsyncHtml(ref tokenizer) => time_profile!(
854                ProfilerCategory::ScriptParseHTML,
855                Some(profiler_metadata),
856                profiler_chan,
857                || tokenizer.feed(input, cx),
858            ),
859            Tokenizer::Xml(ref tokenizer) => time_profile!(
860                ProfilerCategory::ScriptParseXML,
861                Some(profiler_metadata),
862                profiler_chan,
863                || tokenizer.feed(input),
864            ),
865        }
866    }
867
868    fn end(&self, cx: &mut JSContext) {
869        match *self {
870            Tokenizer::Html(ref tokenizer) => tokenizer.end(),
871            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.end(cx),
872            Tokenizer::Xml(ref tokenizer) => tokenizer.end(),
873        }
874    }
875
876    fn url(&self) -> &ServoUrl {
877        match *self {
878            Tokenizer::Html(ref tokenizer) => tokenizer.url(),
879            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(),
880            Tokenizer::Xml(ref tokenizer) => tokenizer.url(),
881        }
882    }
883
884    fn set_plaintext_state(&self) {
885        match *self {
886            Tokenizer::Html(ref tokenizer) => tokenizer.set_plaintext_state(),
887            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.set_plaintext_state(),
888            Tokenizer::Xml(_) => unimplemented!(),
889        }
890    }
891
892    fn get_current_line(&self) -> u32 {
893        match *self {
894            Tokenizer::Html(ref tokenizer) => tokenizer.get_current_line(),
895            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.get_current_line(),
896            Tokenizer::Xml(ref tokenizer) => tokenizer.get_current_line(),
897        }
898    }
899}
900
901/// <https://html.spec.whatwg.org/multipage/#navigation-params>
902/// This does not have the relevant fields, but mimics the intent
903/// of the struct when used in loading document spec algorithms.
904struct NavigationParams {
905    /// <https://html.spec.whatwg.org/multipage/#navigation-params-policy-container>
906    policy_container: PolicyContainer,
907    /// content-type of this document, if known. Otherwise need to sniff it
908    content_type: Option<Mime>,
909    /// link headers from the response
910    link_headers: Vec<LinkHeader>,
911    /// <https://html.spec.whatwg.org/multipage/#navigation-params-sandboxing>
912    final_sandboxing_flag_set: SandboxingFlagSet,
913    /// <https://mimesniff.spec.whatwg.org/#resource-header>
914    resource_header: Vec<u8>,
915    /// <https://html.spec.whatwg.org/multipage/#navigation-params-about-base-url>
916    about_base_url: Option<ServoUrl>,
917}
918
919/// The context required for asynchronously fetching a document
920/// and parsing it progressively.
921pub(crate) struct ParserContext {
922    /// The parser that initiated the request.
923    parser: Option<Trusted<ServoParser>>,
924    /// Is this a synthesized document
925    is_synthesized_document: bool,
926    /// Has a document already been loaded (relevant for checking the resource header)
927    has_loaded_document: bool,
928    /// The [`WebViewId`] of the `WebView` associated with this document.
929    webview_id: WebViewId,
930    /// The [`PipelineId`] of the `Pipeline` associated with this document.
931    pipeline_id: PipelineId,
932    /// The URL for this document.
933    url: ServoUrl,
934    /// pushed entry index
935    pushed_entry_index: Option<usize>,
936    /// params required in document load algorithms
937    navigation_params: NavigationParams,
938    /// To report CSP violations to the global that initiated the navigation
939    parent_info: Option<PipelineId>,
940    target_snapshot_params: TargetSnapshotParams,
941    load_origin: LoadOrigin,
942}
943
944impl ParserContext {
945    pub(crate) fn new(
946        webview_id: WebViewId,
947        pipeline_id: PipelineId,
948        url: ServoUrl,
949        creation_sandboxing_flag_set: SandboxingFlagSet,
950        parent_info: Option<PipelineId>,
951        target_snapshot_params: TargetSnapshotParams,
952        load_origin: LoadOrigin,
953    ) -> ParserContext {
954        ParserContext {
955            parser: None,
956            is_synthesized_document: false,
957            has_loaded_document: false,
958            webview_id,
959            pipeline_id,
960            url,
961            parent_info,
962            pushed_entry_index: None,
963            navigation_params: NavigationParams {
964                policy_container: Default::default(),
965                content_type: None,
966                link_headers: vec![],
967                final_sandboxing_flag_set: creation_sandboxing_flag_set,
968                resource_header: vec![],
969                about_base_url: Default::default(),
970            },
971            target_snapshot_params,
972            load_origin,
973        }
974    }
975
976    pub(crate) fn set_policy_container(&mut self, policy_container: Option<&PolicyContainer>) {
977        let Some(policy_container) = policy_container else {
978            return;
979        };
980        self.navigation_params.policy_container = policy_container.clone();
981    }
982
983    pub(crate) fn set_about_base_url(&mut self, about_base_url: Option<ServoUrl>) {
984        self.navigation_params.about_base_url = about_base_url;
985    }
986
987    pub(crate) fn get_document(&self) -> Option<DomRoot<Document>> {
988        self.parser
989            .as_ref()
990            .map(|parser| parser.root().document.as_rooted())
991    }
992
993    pub(crate) fn parent_info(&self) -> Option<PipelineId> {
994        self.parent_info
995    }
996
997    /// <https://html.spec.whatwg.org/multipage/#creating-a-policy-container-from-a-fetch-response>
998    fn create_policy_container_from_fetch_response(metadata: &Metadata) -> PolicyContainer {
999        // TODO Step 1. If response's URL's scheme is "blob", then return a clone of response's
1000        // URL's blob URL entry's environment's policy container.
1001
1002        // Step 2. Let result be a new policy container.
1003        // TODO Step 6. Parse Integrity-Policy headers with response and result.
1004        // Step 7. Return result.
1005        PolicyContainer {
1006            // Step 3. Set result's CSP list to the result of parsing a response's Content Security Policies given response.
1007            csp_list: parse_csp_list_from_metadata(&metadata.headers),
1008            // TODO Step 4. If environment is non-null, then set result's embedder policy to the
1009            // result of obtaining an embedder policy given response and environment.
1010            // Otherwise, set it to "unsafe-none".
1011            embedder_policy: Default::default(),
1012            // Step 5. Set result's referrer policy to the result of parsing the `Referrer-Policy` header given response. [REFERRERPOLICY]
1013            referrer_policy: ReferrerPolicy::parse_header_for_response(&metadata.headers),
1014        }
1015    }
1016
1017    /// <https://html.spec.whatwg.org/multipage/#initialise-the-document-object>
1018    fn initialize_document_object(&self, document: &Document) {
1019        // Step 9. Let document be a new Document, with
1020        document.set_policy_container(self.navigation_params.policy_container.clone());
1021        document.set_active_sandboxing_flag_set(self.navigation_params.final_sandboxing_flag_set);
1022        document.set_about_base_url(self.navigation_params.about_base_url.clone());
1023        // Step 17. Process link headers given document, navigationParams's response, and "pre-media".
1024        process_link_headers(
1025            &self.navigation_params.link_headers,
1026            document,
1027            LinkProcessingPhase::PreMedia,
1028        );
1029    }
1030
1031    /// Part of various load document methods
1032    fn process_link_headers_in_media_phase_with_task(&mut self, document: &Document) {
1033        // The first task that the networking task source places on the task queue
1034        // while fetching runs must process link headers given document,
1035        // navigationParams's response, and "media", after the task has been processed by the HTML parser.
1036        let link_headers = std::mem::take(&mut self.navigation_params.link_headers);
1037        if !link_headers.is_empty() {
1038            let window = document.window();
1039            let document = Trusted::new(document);
1040            window
1041                .upcast::<GlobalScope>()
1042                .task_manager()
1043                .networking_task_source()
1044                .queue(task!(process_link_headers_task: move || {
1045                    process_link_headers(&link_headers, &document.root(), LinkProcessingPhase::Media);
1046                }));
1047        }
1048    }
1049
1050    /// <https://html.spec.whatwg.org/multipage/#loading-a-document>
1051    fn load_document(&mut self, cx: &mut JSContext) {
1052        assert!(!self.has_loaded_document);
1053        self.has_loaded_document = true;
1054        let Some(ref parser) = self.parser.as_ref().map(|p| p.root()) else {
1055            return;
1056        };
1057        // Step 1. Let type be the computed type of navigationParams's response.
1058        let content_type = &self.navigation_params.content_type;
1059        let mime_type = MimeClassifier::default().classify(
1060            LoadContext::Browsing,
1061            NoSniffFlag::Off,
1062            ApacheBugFlag::from_content_type(content_type.as_ref()),
1063            content_type,
1064            &self.navigation_params.resource_header,
1065        );
1066        // Step 2. If the user agent has been configured to process resources of the given type using
1067        // some mechanism other than rendering the content in a navigable, then skip this step.
1068        // Otherwise, if the type is one of the following types:
1069        let Some(media_type) = MimeClassifier::get_media_type(&mime_type) else {
1070            let page = format!(
1071                "<html><body><p>Unknown content type ({}).</p></body></html>",
1072                &mime_type,
1073            );
1074            self.load_inline_unknown_content(cx, parser, page);
1075            return;
1076        };
1077        match media_type {
1078            // Return the result of loading an HTML document, given navigationParams.
1079            MediaType::Html => self.load_html_document(parser),
1080            // Return the result of loading an XML document given navigationParams and type.
1081            MediaType::Xml => self.load_xml_document(parser),
1082            // Return the result of loading a text document given navigationParams and type.
1083            MediaType::JavaScript | MediaType::Text | MediaType::Css => {
1084                self.load_text_document(cx, parser)
1085            },
1086            // Return the result of loading a json document given navigationParams and type.
1087            MediaType::Json => self.load_json_document(cx, parser),
1088            // Return the result of loading a media document given navigationParams and type.
1089            MediaType::Image | MediaType::AudioVideo => {
1090                self.load_media_document(cx, parser, media_type, &mime_type);
1091                return;
1092            },
1093            MediaType::Font => {
1094                let page = format!(
1095                    "<html><body><p>Unable to load font with content type ({}).</p></body></html>",
1096                    &mime_type,
1097                );
1098                self.load_inline_unknown_content(cx, parser, page);
1099                return;
1100            },
1101        };
1102
1103        parser.parse_bytes_chunk(
1104            cx,
1105            std::mem::take(&mut self.navigation_params.resource_header),
1106        );
1107    }
1108
1109    /// <https://html.spec.whatwg.org/multipage/#navigate-html>
1110    fn load_html_document(&mut self, parser: &ServoParser) {
1111        // Step 1. Let document be the result of creating and initializing a
1112        // Document object given "html", "text/html", and navigationParams.
1113        self.initialize_document_object(&parser.document);
1114        // The first task that the networking task source places on the task queue while fetching
1115        // runs must process link headers given document, navigationParams's response, and "media",
1116        // after the task has been processed by the HTML parser.
1117        self.process_link_headers_in_media_phase_with_task(&parser.document);
1118    }
1119
1120    /// <https://html.spec.whatwg.org/multipage/#read-xml>
1121    fn load_xml_document(&mut self, parser: &ServoParser) {
1122        // When faced with displaying an XML file inline, provided navigation params navigationParams
1123        // and a string type, user agents must follow the requirements defined in XML and Namespaces in XML,
1124        // XML Media Types, DOM, and other relevant specifications to create and initialize a
1125        // Document object document, given "xml", type, and navigationParams, and return that Document.
1126        // They must also create a corresponding XML parser. [XML] [XMLNS] [RFC7303] [DOM]
1127        self.initialize_document_object(&parser.document);
1128        // The first task that the networking task source places on the task queue while fetching
1129        // runs must process link headers given document, navigationParams's response, and "media",
1130        // after the task has been processed by the XML parser.
1131        self.process_link_headers_in_media_phase_with_task(&parser.document);
1132    }
1133
1134    /// <https://html.spec.whatwg.org/multipage/#navigate-text>
1135    fn load_text_document(&mut self, cx: &mut JSContext, parser: &ServoParser) {
1136        // Step 1. Let document be the result of creating and initializing a Document
1137        // object given "html", type, and navigationParams.
1138        self.initialize_document_object(&parser.document);
1139        // Step 4. Create an HTML parser and associate it with the document.
1140        // Act as if the tokenizer had emitted a start tag token with the tag name "pre" followed by
1141        // a single U+000A LINE FEED (LF) character, and switch the HTML parser's tokenizer to the PLAINTEXT state.
1142        // Each task that the networking task source places on the task queue while fetching runs must then
1143        // fill the parser's input byte stream with the fetched bytes and cause the HTML parser to perform
1144        // the appropriate processing of the input stream.
1145        let page = "<pre>\n".into();
1146        parser.push_string_input_chunk(page);
1147        parser.parse_sync(cx);
1148        parser.tokenizer.set_plaintext_state();
1149        // The first task that the networking task source places on the task queue while fetching
1150        // runs must process link headers given document, navigationParams's response, and "media",
1151        // after the task has been processed by the HTML parser.
1152        self.process_link_headers_in_media_phase_with_task(&parser.document);
1153    }
1154
1155    /// <https://html.spec.whatwg.org/multipage/#navigate-media>
1156    fn load_media_document(
1157        &mut self,
1158        cx: &mut JSContext,
1159        parser: &ServoParser,
1160        media_type: MediaType,
1161        mime_type: &Mime,
1162    ) {
1163        // Step 1. Let document be the result of creating and initializing a Document
1164        // object given "html", type, and navigationParams.
1165        self.initialize_document_object(&parser.document);
1166        // Step 8. Act as if the user agent had stopped parsing document.
1167        self.is_synthesized_document = true;
1168        parser.last_chunk_received.set(true);
1169        // Step 3. Populate with html/head/body given document.
1170        let page = "<html><body></body></html>".into();
1171        parser.push_string_input_chunk(page);
1172        parser.parse_sync(cx);
1173
1174        let doc = &parser.document;
1175        // Step 5. Set the appropriate attribute of the element host element, as described below,
1176        // to the address of the image, video, or audio resource.
1177        let node = if media_type == MediaType::Image {
1178            let img = Element::create(
1179                cx,
1180                QualName::new(None, ns!(html), local_name!("img")),
1181                None,
1182                doc,
1183                ElementCreator::ParserCreated(1),
1184                CustomElementCreationMode::Asynchronous,
1185                None,
1186            );
1187            let img = DomRoot::downcast::<HTMLImageElement>(img).unwrap();
1188            img.SetSrc(cx, USVString(self.url.to_string()));
1189            DomRoot::upcast::<Node>(img)
1190        } else if mime_type.type_() == mime::AUDIO {
1191            let audio = Element::create(
1192                cx,
1193                QualName::new(None, ns!(html), local_name!("audio")),
1194                None,
1195                doc,
1196                ElementCreator::ParserCreated(1),
1197                CustomElementCreationMode::Asynchronous,
1198                None,
1199            );
1200            let audio = DomRoot::downcast::<HTMLMediaElement>(audio).unwrap();
1201            audio.SetControls(cx, true);
1202            audio.SetSrc(cx, USVString(self.url.to_string()));
1203            DomRoot::upcast::<Node>(audio)
1204        } else {
1205            let video = Element::create(
1206                cx,
1207                QualName::new(None, ns!(html), local_name!("video")),
1208                None,
1209                doc,
1210                ElementCreator::ParserCreated(1),
1211                CustomElementCreationMode::Asynchronous,
1212                None,
1213            );
1214            let video = DomRoot::downcast::<HTMLMediaElement>(video).unwrap();
1215            video.SetControls(cx, true);
1216            video.SetSrc(cx, USVString(self.url.to_string()));
1217            DomRoot::upcast::<Node>(video)
1218        };
1219        // Step 4. Append an element host element for the media, as described below, to the body element.
1220        let doc_body = DomRoot::upcast::<Node>(doc.GetBody().unwrap());
1221        doc_body.AppendChild(cx, &node).expect("Appending failed");
1222        // Step 7. Process link headers given document, navigationParams's response, and "media".
1223        let link_headers = std::mem::take(&mut self.navigation_params.link_headers);
1224        process_link_headers(&link_headers, doc, LinkProcessingPhase::Media);
1225    }
1226
1227    /// Load a JSON document with a pretty-printing, interactive viewer.
1228    fn load_json_document(&mut self, cx: &mut JSContext, parser: &ServoParser) {
1229        self.initialize_document_object(&parser.document);
1230        parser.push_string_input_chunk(resources::read_string(Resource::JsonViewerHTML));
1231        parser.parse_sync(cx);
1232        parser.tokenizer.set_plaintext_state();
1233        self.process_link_headers_in_media_phase_with_task(&parser.document);
1234    }
1235
1236    /// <https://html.spec.whatwg.org/multipage/#navigate-ua-inline>
1237    fn load_inline_unknown_content(
1238        &mut self,
1239        cx: &mut JSContext,
1240        parser: &ServoParser,
1241        page: String,
1242    ) {
1243        self.is_synthesized_document = true;
1244        parser.document.mark_as_internal();
1245        parser.push_string_input_chunk(page);
1246        // Step 7. Act as if the user agent had stopped parsing document.
1247        parser.last_chunk_received.set(true);
1248        parser.parse_sync(cx);
1249    }
1250
1251    /// Store a PerformanceNavigationTiming entry in the globalscope's Performance buffer
1252    fn submit_resource_timing(&mut self) {
1253        let Some(parser) = self.parser.as_ref() else {
1254            return;
1255        };
1256        let parser = parser.root();
1257        if parser.aborted.get() {
1258            return;
1259        }
1260
1261        let document = &parser.document;
1262
1263        // TODO: Pass a proper fetch start time here.
1264        let performance_entry = PerformanceNavigationTiming::new(
1265            &document.global(),
1266            CrossProcessInstant::now(),
1267            document,
1268            CanGc::deprecated_note(),
1269        );
1270        self.pushed_entry_index = document
1271            .global()
1272            .performance()
1273            .queue_entry(performance_entry.upcast::<PerformanceEntry>());
1274    }
1275}
1276
1277impl FetchResponseListener for ParserContext {
1278    fn process_request_body(&mut self, _: RequestId) {}
1279
1280    /// Implements parts of
1281    /// <https://html.spec.whatwg.org/multipage/#attempt-to-populate-the-history-entry's-document>
1282    fn process_response(
1283        &mut self,
1284        cx: &mut JSContext,
1285        _: RequestId,
1286        meta_result: Result<FetchMetadata, NetworkError>,
1287    ) {
1288        let (metadata, mut error) = match meta_result {
1289            Ok(meta) => (
1290                Some(match meta {
1291                    FetchMetadata::Unfiltered(m) => m,
1292                    FetchMetadata::Filtered { unsafe_, .. } => unsafe_,
1293                }),
1294                None,
1295            ),
1296            Err(error) => (
1297                // Check variant without moving
1298                match &error {
1299                    NetworkError::LoadCancelled => {
1300                        return;
1301                    },
1302                    _ => {
1303                        let mut meta = Metadata::default(self.url.clone());
1304                        let mime: Option<Mime> = "text/html".parse().ok();
1305                        meta.set_content_type(mime.as_ref());
1306                        Some(meta)
1307                    },
1308                },
1309                Some(error),
1310            ),
1311        };
1312        let content_type: Option<Mime> = metadata
1313            .clone()
1314            .and_then(|meta| meta.content_type)
1315            .map(Serde::into_inner)
1316            .map(Into::into);
1317
1318        // <https://html.spec.whatwg.org/multipage/#create-navigation-params-by-fetching>
1319        // Step 21.9. Set responsePolicyContainer to the result of creating a
1320        // policy container from a fetch response given response and request's
1321        // reserved client.
1322        let (policy_container, endpoints_list, link_headers) = match metadata.as_ref() {
1323            None => (PolicyContainer::default(), None, vec![]),
1324            Some(metadata) => (
1325                Self::create_policy_container_from_fetch_response(metadata),
1326                ReportingEndpoint::parse_reporting_endpoints_header(
1327                    &self.url.clone(),
1328                    &metadata.headers,
1329                ),
1330                extract_links_from_headers(&metadata.headers),
1331            ),
1332        };
1333
1334        // Step 21.10. Set finalSandboxFlags to the union of targetSnapshotParams's
1335        // sandboxing flags and responsePolicyContainer's CSP list's CSP-derived
1336        // sandboxing flags.
1337        let final_sandboxing_flag_set = policy_container
1338            .csp_list
1339            .as_ref()
1340            .and_then(|csp| csp.get_sandboxing_flag_set_for_document())
1341            .unwrap_or(SandboxingFlagSet::empty())
1342            .union(self.target_snapshot_params.sandboxing_flags);
1343
1344        // Step 21.11. Set responseOrigin to the result of determining the origin
1345        // given response's URL, finalSandboxFlags, and entry's document state's
1346        // initiator origin.
1347        let source_origin = match self.load_origin {
1348            LoadOrigin::Script(ref snapshot) => {
1349                Some(MutableOrigin::from_snapshot(snapshot.clone()))
1350            },
1351            _ => None,
1352        };
1353        let origin = determine_the_origin(
1354            metadata.as_ref().map(|metadata| &metadata.final_url),
1355            final_sandboxing_flag_set,
1356            source_origin,
1357        );
1358
1359        let parser = match ScriptThread::page_headers_available(
1360            self.webview_id,
1361            self.pipeline_id,
1362            metadata.as_ref(),
1363            origin.clone(),
1364            cx,
1365        ) {
1366            Some(parser) => parser,
1367            None => return,
1368        };
1369        if parser.aborted.get() {
1370            return;
1371        }
1372
1373        let mut realm = enter_auto_realm(cx, &*parser.document);
1374        let cx = &mut realm;
1375        let document = &parser.document;
1376        let window = document.window();
1377
1378        // https://html.spec.whatwg.org/multipage/#attempt-to-populate-the-history-entry%27s-document
1379        // Step 4. Otherwise, if any of the following are true:
1380        if
1381        // navigationParams is null;
1382        // TODO
1383        // the result of should navigation response to navigation request of
1384        // type in target be blocked by Content Security Policy? given
1385        // navigationParams's request, navigationParams's response, navigationParams's policy container's CSP list,
1386        // cspNavigationType, and navigable is "Blocked";
1387        policy_container.csp_list.should_navigation_response_to_navigation_request_be_blocked(
1388            window,
1389            self.url.clone().into_url(),
1390            &origin.immutable().clone().into_url_origin(),
1391        )
1392        // navigationParams's reserved environment is non-null and the result of
1393        // checking a navigation response's adherence to its embedder policy given navigationParams's response,
1394        // navigable, and navigationParams's policy container's embedder policy is false; or
1395        // TODO
1396        // the result of checking a navigation response's adherence to `X-Frame-Options`
1397        // given navigationParams's response, navigable, navigationParams's policy container's CSP list,
1398        // and navigationParams's origin is false,
1399        || !check_a_navigation_response_adherence_to_x_frame_options(
1400            window,
1401            policy_container.csp_list.as_ref(),
1402            &origin,
1403            metadata
1404                .as_ref()
1405                .and_then(|metadata| metadata.headers.as_ref()),
1406        ) {
1407            // Step 4.1. Set entry's document state's document to the result of creating a document for inline content
1408            // that doesn't have a DOM, given navigable, null, navTimingType, and userInvolvement.
1409            // The inline content should indicate to the user the sort of error that occurred.
1410            error = Some(NetworkError::ContentSecurityPolicy);
1411            // Step 4.2. Make document unsalvageable given entry's document state's document and "navigation-failure".
1412            document.make_document_unsalvageable();
1413            // Step 4.3. Set saveExtraDocumentState to false.
1414            // TODO
1415            // Step 4.4. If navigationParams is not null, then:
1416            // TODO
1417        }
1418
1419        if let Some(endpoints) = endpoints_list {
1420            window.set_endpoints_list(endpoints);
1421        }
1422        self.parser = Some(Trusted::new(&*parser));
1423        self.navigation_params = NavigationParams {
1424            policy_container,
1425            content_type,
1426            final_sandboxing_flag_set,
1427            link_headers,
1428            about_base_url: document.about_base_url(),
1429            resource_header: vec![],
1430        };
1431        self.submit_resource_timing();
1432
1433        // Part of https://html.spec.whatwg.org/multipage/#loading-a-document
1434        //
1435        // Step 3. If, given type, the new resource is to be handled by displaying some sort of inline content,
1436        // e.g., a native rendering of the content or an error message because the specified type is not supported,
1437        // then return the result of creating a document for inline content that doesn't have a DOM given
1438        // navigationParams's navigable, navigationParams's id, navigationParams's navigation timing type,
1439        // and navigationParams's user involvement.
1440        if let Some(error) = error {
1441            let page = match error {
1442                NetworkError::SslValidation(reason, bytes) => {
1443                    let page = resources::read_string(Resource::BadCertHTML);
1444                    let page = page.replace("${reason}", &reason);
1445                    let encoded_bytes = general_purpose::STANDARD_NO_PAD.encode(bytes);
1446                    let page = page.replace("${bytes}", encoded_bytes.as_str());
1447                    page.replace("${secret}", &net_traits::PRIVILEGED_SECRET.to_string())
1448                },
1449                NetworkError::BlobURLStoreError(reason) |
1450                NetworkError::WebsocketConnectionFailure(reason) |
1451                NetworkError::HttpError(reason) |
1452                NetworkError::ResourceLoadError(reason) |
1453                NetworkError::MimeType(reason) => {
1454                    let page = resources::read_string(Resource::NetErrorHTML);
1455                    page.replace("${reason}", &reason)
1456                },
1457                NetworkError::Crash(details) => {
1458                    let page = resources::read_string(Resource::CrashHTML);
1459                    page.replace("${details}", &details)
1460                },
1461                NetworkError::UnsupportedScheme |
1462                NetworkError::CorsGeneral |
1463                NetworkError::CrossOriginResponse |
1464                NetworkError::CorsCredentials |
1465                NetworkError::CorsAllowMethods |
1466                NetworkError::CorsAllowHeaders |
1467                NetworkError::CorsMethod |
1468                NetworkError::CorsAuthorization |
1469                NetworkError::CorsHeaders |
1470                NetworkError::ConnectionFailure |
1471                NetworkError::RedirectError |
1472                NetworkError::TooManyRedirects |
1473                NetworkError::TooManyInFlightKeepAliveRequests |
1474                NetworkError::InvalidMethod |
1475                NetworkError::ContentSecurityPolicy |
1476                NetworkError::Nosniff |
1477                NetworkError::SubresourceIntegrity |
1478                NetworkError::MixedContent |
1479                NetworkError::CacheError |
1480                NetworkError::InvalidPort |
1481                NetworkError::LocalDirectoryError |
1482                NetworkError::PartialResponseToNonRangeRequestError |
1483                NetworkError::ProtocolHandlerSubstitutionError |
1484                NetworkError::DecompressionError => {
1485                    let page = resources::read_string(Resource::NetErrorHTML);
1486                    page.replace("${reason}", &format!("{:?}", error))
1487                },
1488                NetworkError::LoadCancelled => {
1489                    // The next load will show a page
1490                    return;
1491                },
1492            };
1493            self.load_inline_unknown_content(cx, &parser, page);
1494        }
1495    }
1496
1497    fn process_response_chunk(&mut self, cx: &mut JSContext, _: RequestId, payload: Vec<u8>) {
1498        if self.is_synthesized_document {
1499            return;
1500        }
1501        let Some(parser) = self.parser.as_ref().map(|p| p.root()) else {
1502            return;
1503        };
1504        if parser.aborted.get() {
1505            return;
1506        }
1507        if !self.has_loaded_document {
1508            // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1509            self.navigation_params
1510                .resource_header
1511                .extend_from_slice(&payload);
1512            // the number of bytes in buffer is greater than or equal to 1445.
1513            if self.navigation_params.resource_header.len() >= 1445 {
1514                self.load_document(cx);
1515            }
1516        } else {
1517            parser.parse_bytes_chunk(cx, payload);
1518        }
1519    }
1520
1521    // This method is called via script_thread::handle_fetch_eof, so we must call
1522    // submit_resource_timing in this function
1523    // Resource listeners are called via net_traits::Action::process, which handles submission for them
1524    fn process_response_eof(
1525        mut self,
1526        cx: &mut JSContext,
1527        _: RequestId,
1528        status: Result<(), NetworkError>,
1529        timing: ResourceFetchTiming,
1530    ) {
1531        let parser = match self.parser.as_ref() {
1532            Some(parser) => parser.root(),
1533            None => return,
1534        };
1535        if parser.aborted.get() || self.is_synthesized_document {
1536            return;
1537        }
1538
1539        if let Err(error) = &status {
1540            // TODO(Savago): we should send a notification to callers #5463.
1541            debug!("Failed to load page URL {}, error: {error:?}", self.url);
1542        }
1543
1544        // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1545        //
1546        // the end of the resource is reached.
1547        if !self.has_loaded_document {
1548            self.load_document(cx);
1549        }
1550
1551        let mut realm = enter_auto_realm(cx, &*parser);
1552        let cx = &mut realm;
1553
1554        if status.is_ok() {
1555            parser.document.set_resource_fetch_timing(timing);
1556        }
1557
1558        parser.last_chunk_received.set(true);
1559        if !parser.suspended.get() {
1560            parser.parse_sync(cx);
1561        }
1562
1563        // TODO: Only update if this is the current document resource.
1564        // TODO(mrobinson): Pass a proper fetch_start parameter here instead of `CrossProcessInstant::now()`.
1565        if let Some(pushed_index) = self.pushed_entry_index {
1566            let document = &parser.document;
1567            let performance_entry = PerformanceNavigationTiming::new(
1568                &document.global(),
1569                CrossProcessInstant::now(),
1570                document,
1571                CanGc::from_cx(cx),
1572            );
1573            document
1574                .global()
1575                .performance()
1576                .update_entry(pushed_index, performance_entry.upcast::<PerformanceEntry>());
1577        }
1578    }
1579
1580    fn process_csp_violations(&mut self, _: RequestId, _: Vec<Violation>) {
1581        unreachable!("Script_thread should handle reporting violations for parser contexts");
1582    }
1583}
1584
1585pub(crate) struct FragmentContext<'a> {
1586    pub(crate) context_elem: &'a Node,
1587    pub(crate) form_elem: Option<&'a Node>,
1588    pub(crate) context_element_allows_scripting: bool,
1589}
1590
1591#[cfg_attr(crown, expect(crown::unrooted_must_root))]
1592fn insert(
1593    cx: &mut JSContext,
1594    parent: &Node,
1595    reference_child: Option<&Node>,
1596    child: NodeOrText<Dom<Node>>,
1597    parsing_algorithm: ParsingAlgorithm,
1598    custom_element_reaction_stack: &CustomElementReactionStack,
1599) {
1600    match child {
1601        NodeOrText::AppendNode(n) => {
1602            // https://html.spec.whatwg.org/multipage/#insert-a-foreign-element
1603            // applies if this is an element; if not, it may be
1604            // https://html.spec.whatwg.org/multipage/#insert-a-comment
1605            let element_in_non_fragment =
1606                parsing_algorithm != ParsingAlgorithm::Fragment && n.is::<Element>();
1607            if element_in_non_fragment {
1608                custom_element_reaction_stack.push_new_element_queue();
1609            }
1610            parent.InsertBefore(cx, &n, reference_child).unwrap();
1611            if element_in_non_fragment {
1612                custom_element_reaction_stack.pop_current_element_queue(cx);
1613            }
1614        },
1615        NodeOrText::AppendText(t) => {
1616            // https://html.spec.whatwg.org/multipage/#insert-a-character
1617            let text = reference_child
1618                .and_then(Node::GetPreviousSibling)
1619                .or_else(|| parent.GetLastChild())
1620                .and_then(DomRoot::downcast::<Text>);
1621
1622            if let Some(text) = text {
1623                text.upcast::<CharacterData>().append_data(cx, &t);
1624            } else {
1625                let text = Text::new(cx, String::from(t).into(), &parent.owner_doc());
1626                parent
1627                    .InsertBefore(cx, text.upcast(), reference_child)
1628                    .unwrap();
1629            }
1630        },
1631    }
1632}
1633
1634#[derive(JSTraceable, MallocSizeOf)]
1635#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
1636pub(crate) struct Sink {
1637    #[no_trace]
1638    base_url: ServoUrl,
1639    document: Dom<Document>,
1640    current_line: Cell<u64>,
1641    script: MutNullableDom<HTMLScriptElement>,
1642    parsing_algorithm: ParsingAlgorithm,
1643    #[conditional_malloc_size_of]
1644    custom_element_reaction_stack: Rc<CustomElementReactionStack>,
1645}
1646
1647impl Sink {
1648    fn same_tree(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1649        let x = x.downcast::<Element>().expect("Element node expected");
1650        let y = y.downcast::<Element>().expect("Element node expected");
1651
1652        x.is_in_same_home_subtree(y)
1653    }
1654
1655    fn has_parent_node(&self, node: &Dom<Node>) -> bool {
1656        node.GetParentNode().is_some()
1657    }
1658}
1659
1660impl TreeSink for Sink {
1661    type Output = Self;
1662
1663    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1664    fn finish(self) -> Self {
1665        self
1666    }
1667
1668    type Handle = Dom<Node>;
1669    type ElemName<'a>
1670        = ExpandedName<'a>
1671    where
1672        Self: 'a;
1673
1674    fn get_document(&self) -> Dom<Node> {
1675        Dom::from_ref(self.document.upcast())
1676    }
1677
1678    #[expect(unsafe_code)]
1679    fn get_template_contents(&self, target: &Dom<Node>) -> Dom<Node> {
1680        // TODO: https://github.com/servo/servo/issues/42839
1681        let mut cx = unsafe { temp_cx() };
1682        let cx = &mut cx;
1683        let template = target
1684            .downcast::<HTMLTemplateElement>()
1685            .expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing");
1686        Dom::from_ref(template.Content(cx).upcast())
1687    }
1688
1689    fn same_node(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1690        x == y
1691    }
1692
1693    fn elem_name<'a>(&self, target: &'a Dom<Node>) -> ExpandedName<'a> {
1694        let elem = target
1695            .downcast::<Element>()
1696            .expect("tried to get name of non-Element in HTML parsing");
1697        ExpandedName {
1698            ns: elem.namespace(),
1699            local: elem.local_name(),
1700        }
1701    }
1702
1703    #[expect(unsafe_code)]
1704    fn create_element(
1705        &self,
1706        name: QualName,
1707        attrs: Vec<Attribute>,
1708        flags: ElementFlags,
1709    ) -> Dom<Node> {
1710        // TODO: https://github.com/servo/servo/issues/42839
1711        let mut cx = unsafe { temp_cx() };
1712        let cx = &mut cx;
1713        let attrs = attrs
1714            .into_iter()
1715            .map(|attr| ElementAttribute::new(attr.name, DOMString::from(String::from(attr.value))))
1716            .collect();
1717        let parsing_algorithm = if flags.template {
1718            ParsingAlgorithm::Fragment
1719        } else {
1720            self.parsing_algorithm
1721        };
1722        let element = create_element_for_token(
1723            cx,
1724            name,
1725            attrs,
1726            &self.document,
1727            ElementCreator::ParserCreated(self.current_line.get()),
1728            parsing_algorithm,
1729            &self.custom_element_reaction_stack,
1730            flags.had_duplicate_attributes,
1731        );
1732        Dom::from_ref(element.upcast())
1733    }
1734
1735    #[expect(unsafe_code)]
1736    fn create_comment(&self, text: StrTendril) -> Dom<Node> {
1737        // TODO: https://github.com/servo/servo/issues/42839
1738        let mut cx = unsafe { temp_cx() };
1739        let cx = &mut cx;
1740        let comment = Comment::new(
1741            cx,
1742            DOMString::from(String::from(text)),
1743            &self.document,
1744            None,
1745        );
1746        Dom::from_ref(comment.upcast())
1747    }
1748
1749    #[expect(unsafe_code)]
1750    fn create_pi(&self, target: StrTendril, data: StrTendril) -> Dom<Node> {
1751        // TODO: https://github.com/servo/servo/issues/42839
1752        let mut cx = unsafe { temp_cx() };
1753        let cx = &mut cx;
1754        let doc = &*self.document;
1755        let pi = ProcessingInstruction::new(
1756            cx,
1757            DOMString::from(String::from(target)),
1758            DOMString::from(String::from(data)),
1759            doc,
1760        );
1761        Dom::from_ref(pi.upcast())
1762    }
1763
1764    #[expect(unsafe_code)]
1765    fn associate_with_form(
1766        &self,
1767        target: &Dom<Node>,
1768        form: &Dom<Node>,
1769        nodes: (&Dom<Node>, Option<&Dom<Node>>),
1770    ) {
1771        // TODO: https://github.com/servo/servo/issues/42839
1772        let mut cx = unsafe { temp_cx() };
1773        let cx = &mut cx;
1774        let (element, prev_element) = nodes;
1775        let tree_node = prev_element.map_or(element, |prev| {
1776            if self.has_parent_node(element) {
1777                element
1778            } else {
1779                prev
1780            }
1781        });
1782        if !self.same_tree(tree_node, form) {
1783            return;
1784        }
1785
1786        let node = target;
1787        let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
1788            .expect("Owner must be a form element");
1789
1790        let elem = node.downcast::<Element>();
1791        let control = elem.and_then(|e| e.as_maybe_form_control());
1792
1793        if let Some(control) = control {
1794            control.set_form_owner_from_parser(cx, &form);
1795        }
1796    }
1797
1798    #[expect(unsafe_code)]
1799    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1800    fn append_before_sibling(&self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) {
1801        // TODO: https://github.com/servo/servo/issues/42839
1802        let mut cx = unsafe { temp_cx() };
1803        let cx = &mut cx;
1804
1805        let parent = sibling
1806            .GetParentNode()
1807            .expect("append_before_sibling called on node without parent");
1808
1809        insert(
1810            cx,
1811            &parent,
1812            Some(sibling),
1813            new_node,
1814            self.parsing_algorithm,
1815            &self.custom_element_reaction_stack,
1816        );
1817    }
1818
1819    fn parse_error(&self, msg: Cow<'static, str>) {
1820        debug!("Parse error: {}", msg);
1821    }
1822
1823    fn set_quirks_mode(&self, mode: QuirksMode) {
1824        let mode = match mode {
1825            QuirksMode::Quirks => ServoQuirksMode::Quirks,
1826            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
1827            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
1828        };
1829        self.document.set_quirks_mode(mode);
1830    }
1831
1832    #[expect(unsafe_code)]
1833    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1834    fn append(&self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) {
1835        // TODO: https://github.com/servo/servo/issues/42839
1836        let mut cx = unsafe { temp_cx() };
1837        let cx = &mut cx;
1838
1839        insert(
1840            cx,
1841            parent,
1842            None,
1843            child,
1844            self.parsing_algorithm,
1845            &self.custom_element_reaction_stack,
1846        );
1847    }
1848
1849    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1850    fn append_based_on_parent_node(
1851        &self,
1852        elem: &Dom<Node>,
1853        prev_elem: &Dom<Node>,
1854        child: NodeOrText<Dom<Node>>,
1855    ) {
1856        if self.has_parent_node(elem) {
1857            self.append_before_sibling(elem, child);
1858        } else {
1859            self.append(prev_elem, child);
1860        }
1861    }
1862
1863    #[expect(unsafe_code)]
1864    fn append_doctype_to_document(
1865        &self,
1866        name: StrTendril,
1867        public_id: StrTendril,
1868        system_id: StrTendril,
1869    ) {
1870        // TODO: https://github.com/servo/servo/issues/42839
1871        let mut cx = unsafe { temp_cx() };
1872        let cx = &mut cx;
1873
1874        let doc = &*self.document;
1875        let doctype = DocumentType::new(
1876            cx,
1877            DOMString::from(String::from(name)),
1878            Some(DOMString::from(String::from(public_id))),
1879            Some(DOMString::from(String::from(system_id))),
1880            doc,
1881        );
1882        doc.upcast::<Node>()
1883            .AppendChild(cx, doctype.upcast())
1884            .expect("Appending failed");
1885    }
1886
1887    #[expect(unsafe_code)]
1888    fn add_attrs_if_missing(&self, target: &Dom<Node>, attrs: Vec<Attribute>) {
1889        // TODO: https://github.com/servo/servo/issues/42839
1890        let mut cx = unsafe { temp_cx() };
1891        let cx = &mut cx;
1892
1893        let elem = target
1894            .downcast::<Element>()
1895            .expect("tried to set attrs on non-Element in HTML parsing");
1896        for attr in attrs {
1897            elem.set_attribute_from_parser(
1898                cx,
1899                attr.name,
1900                DOMString::from(String::from(attr.value)),
1901                None,
1902            );
1903        }
1904    }
1905
1906    #[expect(unsafe_code)]
1907    fn remove_from_parent(&self, target: &Dom<Node>) {
1908        // TODO: https://github.com/servo/servo/issues/42839
1909        let mut cx = unsafe { temp_cx() };
1910        let cx = &mut cx;
1911
1912        if let Some(ref parent) = target.GetParentNode() {
1913            parent.RemoveChild(cx, target).unwrap();
1914        }
1915    }
1916
1917    fn mark_script_already_started(&self, node: &Dom<Node>) {
1918        let script = node.downcast::<HTMLScriptElement>();
1919        if let Some(script) = script {
1920            script.set_already_started(true)
1921        }
1922    }
1923
1924    #[expect(unsafe_code)]
1925    fn reparent_children(&self, node: &Dom<Node>, new_parent: &Dom<Node>) {
1926        // TODO: https://github.com/servo/servo/issues/42839
1927        let mut cx = unsafe { temp_cx() };
1928        let cx = &mut cx;
1929
1930        while let Some(ref child) = node.GetFirstChild() {
1931            new_parent.AppendChild(cx, child).unwrap();
1932        }
1933    }
1934
1935    /// <https://html.spec.whatwg.org/multipage/#html-integration-point>
1936    /// Specifically, the `<annotation-xml>` cases.
1937    fn is_mathml_annotation_xml_integration_point(&self, handle: &Dom<Node>) -> bool {
1938        let elem = handle.downcast::<Element>().unwrap();
1939        elem.get_attribute_string_value(&local_name!("encoding"))
1940            .is_some_and(|value| {
1941                value.eq_ignore_ascii_case("text/html") ||
1942                    value.eq_ignore_ascii_case("application/xhtml+xml")
1943            })
1944    }
1945
1946    fn set_current_line(&self, line_number: u64) {
1947        self.current_line.set(line_number);
1948    }
1949
1950    #[expect(unsafe_code)]
1951    fn pop(&self, node: &Dom<Node>) {
1952        // TODO: https://github.com/servo/servo/issues/42839
1953        let mut cx = unsafe { temp_cx() };
1954        let cx = &mut cx;
1955
1956        let node = DomRoot::from_ref(&**node);
1957        vtable_for(&node).pop(cx);
1958    }
1959
1960    fn allow_declarative_shadow_roots(&self, intended_parent: &Dom<Node>) -> bool {
1961        intended_parent.owner_doc().allow_declarative_shadow_roots()
1962    }
1963
1964    /// <https://html.spec.whatwg.org/multipage/#parsing-main-inhead>
1965    /// A start tag whose tag name is "template"
1966    /// Attach shadow path
1967    #[expect(unsafe_code)]
1968    fn attach_declarative_shadow(
1969        &self,
1970        host: &Dom<Node>,
1971        template: &Dom<Node>,
1972        attributes: &[Attribute],
1973    ) -> bool {
1974        // TODO: https://github.com/servo/servo/issues/42839
1975        let mut cx = unsafe { temp_cx() };
1976        let cx = &mut cx;
1977
1978        attach_declarative_shadow_inner(cx, host, template, attributes)
1979    }
1980
1981    #[expect(unsafe_code)]
1982    fn maybe_clone_an_option_into_selectedcontent(&self, option: &Self::Handle) {
1983        // TODO: https://github.com/servo/servo/issues/42839
1984        let mut cx = unsafe { temp_cx() };
1985        let cx = &mut cx;
1986
1987        let Some(option) = option.downcast::<HTMLOptionElement>() else {
1988            if cfg!(debug_assertions) {
1989                unreachable!();
1990            }
1991            log::error!(
1992                "Received non-option element in maybe_clone_an_option_into_selectedcontent"
1993            );
1994            return;
1995        };
1996
1997        option.maybe_clone_an_option_into_selectedcontent(cx)
1998    }
1999}
2000
2001/// <https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token>
2002#[expect(clippy::too_many_arguments)]
2003fn create_element_for_token(
2004    cx: &mut JSContext,
2005    name: QualName,
2006    attrs: Vec<ElementAttribute>,
2007    document: &Document,
2008    creator: ElementCreator,
2009    parsing_algorithm: ParsingAlgorithm,
2010    custom_element_reaction_stack: &CustomElementReactionStack,
2011    had_duplicate_attributes: bool,
2012) -> DomRoot<Element> {
2013    // Step 1. If the active speculative HTML parser is not null, then return the result
2014    // of creating a speculative mock element given namespace, token's tag name, and
2015    // token's attributes.
2016    // TODO: Implement
2017
2018    // Step 2: Otherwise, optionally create a speculative mock element given namespace,
2019    // token's tag name, and token's attributes
2020    // TODO: Implement.
2021
2022    // Step 3. Let document be intendedParent's node document.
2023    // Passed as argument.
2024
2025    // Step 4. Let localName be token's tag name.
2026    // Passed as argument
2027
2028    // Step 5. Let is be the value of the "is" attribute in token, if such an attribute
2029    // exists; otherwise null.
2030    let is = attrs
2031        .iter()
2032        .find(|attr| attr.name.local.eq_str_ignore_ascii_case("is"))
2033        .map(|attr| LocalName::from(&attr.value));
2034
2035    // Step 6. Let registry be the result of looking up a custom element registry given intendedParent.
2036    // TODO: Implement registries other than `Document`.
2037
2038    // Step 7. Let definition be the result of looking up a custom element definition
2039    // given registry, namespace, localName, and is.
2040    let definition = document.lookup_custom_element_definition(&name.ns, &name.local, is.as_ref());
2041
2042    // Step 8. Let willExecuteScript be true if definition is non-null and the parser was
2043    // not created as part of the HTML fragment parsing algorithm; otherwise false.
2044    let will_execute_script =
2045        definition.is_some() && parsing_algorithm != ParsingAlgorithm::Fragment;
2046
2047    // Step 9. If willExecuteScript is true:
2048    if will_execute_script {
2049        // Step 9.1. Increment document's throw-on-dynamic-markup-insertion counter.
2050        document.increment_throw_on_dynamic_markup_insertion_counter();
2051        // Step 6.2. If the JavaScript execution context stack is empty, then perform a
2052        // microtask checkpoint.
2053        if is_execution_stack_empty() {
2054            document.window().perform_a_microtask_checkpoint(cx);
2055        }
2056        // Step 9.3. Push a new element queue onto document's relevant agent's custom
2057        // element reactions stack.
2058        custom_element_reaction_stack.push_new_element_queue()
2059    }
2060
2061    // Step 10. Let element be the result of creating an element given document,
2062    // localName, namespace, null, is, willExecuteScript, and registry.
2063    let creation_mode = if will_execute_script {
2064        CustomElementCreationMode::Synchronous
2065    } else {
2066        CustomElementCreationMode::Asynchronous
2067    };
2068    let element = Element::create(cx, name, is, document, creator, creation_mode, None);
2069
2070    // Step 11. Append each attribute in the given token to element.
2071    for attr in attrs {
2072        element.set_attribute_from_parser(cx, attr.name, attr.value, None);
2073    }
2074
2075    // Record if the tokenizer saw duplicate attributes on this element,
2076    // used for CSP nonce validation (step 3 of "is element nonceable").
2077    if had_duplicate_attributes {
2078        element.set_had_duplicate_attributes();
2079    }
2080
2081    // Step 12. If willExecuteScript is true:
2082    if will_execute_script {
2083        // Step 12.1. Let queue be the result of popping from document's relevant agent's
2084        // custom element reactions stack. (This will be the same element queue as was
2085        // pushed above.)
2086        // Step 12.2 Invoke custom element reactions in queue.
2087        custom_element_reaction_stack.pop_current_element_queue(cx);
2088        // Step 12.3. Decrement document's throw-on-dynamic-markup-insertion counter.
2089        document.decrement_throw_on_dynamic_markup_insertion_counter();
2090    }
2091
2092    // Step 13. If element has an xmlns attribute in the XMLNS namespace whose value is
2093    // not exactly the same as the element's namespace, that is a parse error. Similarly,
2094    // if element has an xmlns:xlink attribute in the XMLNS namespace whose value is not
2095    // the XLink Namespace, that is a parse error.
2096    // TODO: Implement.
2097
2098    // Step 14. If element is a resettable element and not a form-associated custom
2099    // element, then invoke its reset algorithm. (This initializes the element's value and
2100    // checkedness based on the element's attributes.)
2101    if let Some(html_element) = element.downcast::<HTMLElement>() &&
2102        element.is_resettable() &&
2103        !html_element.is_form_associated_custom_element()
2104    {
2105        element.reset(cx);
2106    }
2107
2108    // Step 15. If element is a form-associated element and not a form-associated custom
2109    // element, the form element pointer is not null, there is no template element on the
2110    // stack of open elements, element is either not listed or doesn't have a form attribute,
2111    // and the intendedParent is in the same tree as the element pointed to by the form
2112    // element pointer, then associate element with the form element pointed to by the form
2113    // element pointer and set element's parser inserted flag.
2114    // TODO: Implement
2115
2116    // Step 16. Return element.
2117    element
2118}
2119
2120fn attach_declarative_shadow_inner(
2121    cx: &mut JSContext,
2122    host: &Node,
2123    template: &Node,
2124    attributes: &[Attribute],
2125) -> bool {
2126    let host_element = host.downcast::<Element>().unwrap();
2127
2128    if host_element.shadow_root().is_some() {
2129        return false;
2130    }
2131
2132    let template_element = template.downcast::<HTMLTemplateElement>().unwrap();
2133
2134    // Step 3. Let mode be templateStartTag's shadowrootmode attribute's value.
2135    // Step 4. Let slotAssignment be "named".
2136    // Step 5. If templateStartTag's shadowrootslotassignment attribute is in
2137    // the Manual state, then set slotAssignment to "manual".
2138    // Step 6. Let clonable be true if templateStartTag has a shadowrootclonable attribute; otherwise false.
2139    // Step 7. Let serializable be true if templateStartTag has a shadowrootserializable
2140    // attribute; otherwise false.
2141    // Step 8. Let delegatesFocus be true if templateStartTag has a shadowrootdelegatesfocus
2142    // attribute; otherwise false.
2143    let mut shadow_root_mode = ShadowRootMode::Open;
2144    let mut slot_assignment_mode = SlotAssignmentMode::Named;
2145    let mut clonable = false;
2146    let mut delegatesfocus = false;
2147    let mut serializable = false;
2148
2149    attributes
2150        .iter()
2151        .for_each(|attr: &Attribute| match attr.name.local {
2152            local_name!("shadowrootmode") => {
2153                if attr.value.eq_ignore_ascii_case("open") {
2154                    shadow_root_mode = ShadowRootMode::Open;
2155                } else if attr.value.eq_ignore_ascii_case("closed") {
2156                    shadow_root_mode = ShadowRootMode::Closed;
2157                } else {
2158                    unreachable!("shadowrootmode value is not open nor closed");
2159                }
2160            },
2161            local_name!("shadowrootclonable") => {
2162                clonable = true;
2163            },
2164            local_name!("shadowrootdelegatesfocus") => {
2165                delegatesfocus = true;
2166            },
2167            local_name!("shadowrootserializable") => {
2168                serializable = true;
2169            },
2170            local_name!("shadowrootslotassignment") => {
2171                if attr.value.eq_ignore_ascii_case("manual") {
2172                    slot_assignment_mode = SlotAssignmentMode::Manual;
2173                }
2174            },
2175            _ => {},
2176        });
2177
2178    // Step 8.1. Attach a shadow root with declarative shadow host element,
2179    // mode, clonable, serializable, delegatesFocus, and "named".
2180    match host_element.attach_shadow(
2181        cx,
2182        IsUserAgentWidget::No,
2183        shadow_root_mode,
2184        clonable,
2185        serializable,
2186        delegatesfocus,
2187        slot_assignment_mode,
2188    ) {
2189        Ok(shadow_root) => {
2190            // Step 8.3. Set shadow's declarative to true.
2191            shadow_root.set_declarative(true);
2192
2193            // Set 8.4. Set template's template contents property to shadow.
2194            let shadow = shadow_root.upcast::<DocumentFragment>();
2195            template_element.set_contents(Some(shadow));
2196
2197            // Step 8.5. Set shadow’s available to element internals to true.
2198            shadow_root.set_available_to_element_internals(true);
2199
2200            true
2201        },
2202        Err(_) => false,
2203    }
2204}