Skip to main content

script/dom/servoparser/
mod.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5use std::borrow::Cow;
6use std::cell::{Cell, RefCell};
7use std::mem;
8use std::rc::Rc;
9
10use base64::Engine as _;
11use base64::engine::general_purpose;
12use content_security_policy::sandboxing_directive::SandboxingFlagSet;
13use devtools_traits::ScriptToDevtoolsControlMsg;
14use dom_struct::dom_struct;
15use embedder_traits::resources::{self, Resource};
16use encoding_rs::{Encoding, UTF_8};
17use html5ever::buffer_queue::BufferQueue;
18use html5ever::tendril::StrTendril;
19use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
20use html5ever::{Attribute, ExpandedName, LocalName, QualName, local_name, ns};
21use hyper_serde::Serde;
22use js::context::JSContext;
23use markup5ever::TokenizerResult;
24use mime::{self, Mime};
25use net_traits::mime_classifier::{ApacheBugFlag, MediaType, MimeClassifier, NoSniffFlag};
26use net_traits::policy_container::PolicyContainer;
27use net_traits::request::RequestId;
28use net_traits::{
29    FetchMetadata, LoadContext, Metadata, NetworkError, ReferrerPolicy, ResourceFetchTiming,
30};
31use profile_traits::time::{
32    ProfilerCategory, ProfilerChan, TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType,
33};
34use profile_traits::time_profile;
35use script_bindings::cell::DomRefCell;
36use script_bindings::reflector::{Reflector, reflect_dom_object};
37use script_bindings::script_runtime::temp_cx;
38use script_traits::DocumentActivity;
39use servo_base::id::{PipelineId, WebViewId};
40use servo_config::pref;
41use servo_constellation_traits::{LoadOrigin, TargetSnapshotParams};
42use servo_url::{MutableOrigin, ServoUrl};
43use style::context::QuirksMode as ServoQuirksMode;
44use tendril::stream::LossyDecoder;
45use tendril::{ByteTendril, TendrilSink};
46
47use crate::document_loader::{DocumentLoader, LoadType};
48use crate::dom::SuppressObserver;
49use crate::dom::bindings::codegen::Bindings::DocumentBinding::{
50    DocumentMethods, DocumentReadyState,
51};
52use crate::dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods;
53use crate::dom::bindings::codegen::Bindings::HTMLMediaElementBinding::HTMLMediaElementMethods;
54use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
55use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
56use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::{
57    ShadowRootMode, SlotAssignmentMode,
58};
59use crate::dom::bindings::inheritance::Castable;
60use crate::dom::bindings::refcounted::Trusted;
61use crate::dom::bindings::reflector::DomGlobal;
62use crate::dom::bindings::root::{Dom, DomRoot, MutNullableDom};
63use crate::dom::bindings::settings_stack::is_execution_stack_empty;
64use crate::dom::bindings::str::{DOMString, USVString};
65use crate::dom::characterdata::CharacterData;
66use crate::dom::comment::Comment;
67use crate::dom::csp::{Violation, parse_csp_list_from_metadata};
68use crate::dom::customelementregistry::CustomElementReactionStack;
69use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument};
70use crate::dom::documentfragment::DocumentFragment;
71use crate::dom::documenttype::DocumentType;
72use crate::dom::element::{CustomElementCreationMode, Element, ElementCreator};
73use crate::dom::globalscope::GlobalScope;
74use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
75use crate::dom::html::htmlimageelement::HTMLImageElement;
76use crate::dom::html::htmlscriptelement::{HTMLScriptElement, ScriptResult};
77use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
78use crate::dom::iterators::ShadowIncluding;
79use crate::dom::node::Node;
80use crate::dom::node::virtualmethods::vtable_for;
81use crate::dom::performance::performanceentry::PerformanceEntry;
82use crate::dom::performance::performancenavigationtiming::PerformanceNavigationTiming;
83use crate::dom::processinginstruction::ProcessingInstruction;
84use crate::dom::processingoptions::{
85    LinkHeader, LinkProcessingPhase, extract_links_from_headers, process_link_headers,
86};
87use crate::dom::reporting::reportingendpoint::ReportingEndpoint;
88use crate::dom::security::csp::CspReporting;
89use crate::dom::security::xframeoptions::check_a_navigation_response_adherence_to_x_frame_options;
90use crate::dom::shadowroot::IsUserAgentWidget;
91use crate::dom::text::Text;
92use crate::dom::types::{HTMLElement, HTMLMediaElement, HTMLOptionElement};
93use crate::navigation::determine_the_origin;
94use crate::network_listener::FetchResponseListener;
95use crate::realms::enter_auto_realm;
96use crate::script_runtime::{CanGc, IntroductionType};
97use crate::script_thread::ScriptThread;
98
99mod async_html;
100pub(crate) mod encoding;
101pub(crate) mod html;
102mod prefetch;
103mod xml;
104
105use encoding::{NetworkDecoderState, NetworkSink};
106pub(crate) use html::serialize_html_fragment;
107
108#[dom_struct]
109/// The parser maintains two input streams: one for input from script through
110/// document.write(), and one for input from network.
111///
112/// There is no concrete representation of the insertion point, instead it
113/// always points to just before the next character from the network input,
114/// with all of the script input before itself.
115///
116/// ```text
117///     ... script input ... | ... network input ...
118///                          ^
119///                 insertion point
120/// ```
121pub(crate) struct ServoParser {
122    reflector: Reflector,
123    /// The document associated with this parser.
124    document: Dom<Document>,
125    /// The decoder used for the network input.
126    network_decoder: DomRefCell<NetworkDecoderState>,
127    /// Input received from network.
128    #[ignore_malloc_size_of = "Defined in html5ever"]
129    #[no_trace]
130    network_input: BufferQueue,
131    /// Input received from script. Used only to support document.write().
132    #[ignore_malloc_size_of = "Defined in html5ever"]
133    #[no_trace]
134    script_input: BufferQueue,
135    /// The tokenizer of this parser.
136    tokenizer: Tokenizer,
137    /// Whether to expect any further input from the associated network request.
138    last_chunk_received: Cell<bool>,
139    /// Whether this parser should avoid passing any further data to the tokenizer.
140    suspended: Cell<bool>,
141    /// <https://html.spec.whatwg.org/multipage/#script-nesting-level>
142    script_nesting_level: Cell<usize>,
143    /// <https://html.spec.whatwg.org/multipage/#abort-a-parser>
144    aborted: Cell<bool>,
145    /// <https://html.spec.whatwg.org/multipage/#stop-parsing>
146    stopped: Cell<bool>,
147    /// <https://html.spec.whatwg.org/multipage/#script-created-parser>
148    script_created_parser: bool,
149    /// A decoder exclusively for input to the prefetch tokenizer.
150    ///
151    /// Unlike the actual decoder, this one takes a best guess at the encoding and starts
152    /// decoding immediately.
153    #[no_trace]
154    prefetch_decoder: RefCell<LossyDecoder<NetworkSink>>,
155    /// We do a quick-and-dirty parse of the input looking for resources to prefetch.
156    // TODO: if we had speculative parsing, we could do this when speculatively
157    // building the DOM. https://github.com/servo/servo/pull/19203
158    prefetch_tokenizer: prefetch::Tokenizer,
159    #[ignore_malloc_size_of = "Defined in html5ever"]
160    #[no_trace]
161    prefetch_input: BufferQueue,
162    // The whole input as a string, if needed for the devtools Sources panel.
163    // TODO: use a faster type for concatenating strings?
164    content_for_devtools: Option<DomRefCell<String>>,
165}
166
167pub(crate) struct ElementAttribute {
168    name: QualName,
169    value: DOMString,
170}
171
172#[derive(Clone, Copy, JSTraceable, MallocSizeOf, PartialEq)]
173pub(crate) enum ParsingAlgorithm {
174    Normal,
175    Fragment,
176}
177
178impl ElementAttribute {
179    pub(crate) fn new(name: QualName, value: DOMString) -> ElementAttribute {
180        ElementAttribute { name, value }
181    }
182}
183
184impl ServoParser {
185    pub(crate) fn parser_is_not_active(&self) -> bool {
186        self.can_write()
187    }
188
189    /// <https://html.spec.whatwg.org/multipage/#parse-html-from-a-string>
190    pub(crate) fn parse_html_document(
191        cx: &mut JSContext,
192        document: &Document,
193        input: Option<DOMString>,
194        url: ServoUrl,
195        encoding_hint_from_content_type: Option<&'static Encoding>,
196        encoding_of_container_document: Option<&'static Encoding>,
197    ) {
198        // Step 1. Set document's type to "html".
199        //
200        // Set by callers of this function and asserted here
201        assert!(document.is_html_document());
202
203        // Step 2. Create an HTML parser parser, associated with document.
204        let parser = ServoParser::new(
205            document,
206            if pref!(dom_servoparser_async_html_tokenizer_enabled) {
207                Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None))
208            } else {
209                Tokenizer::Html(self::html::Tokenizer::new(
210                    document,
211                    url,
212                    None,
213                    ParsingAlgorithm::Normal,
214                ))
215            },
216            ParserKind::Normal,
217            encoding_hint_from_content_type,
218            encoding_of_container_document,
219            CanGc::from_cx(cx),
220        );
221
222        // Step 3. Place html into the input stream for parser. The encoding confidence is irrelevant.
223        // Step 4. Start parser and let it run until it has consumed all the
224        // characters just inserted into the input stream.
225        //
226        // Set as the document's current parser and initialize with `input`, if given.
227        if let Some(input) = input {
228            parser.parse_complete_string_chunk(cx, String::from(input));
229        } else {
230            parser.document.set_current_parser(Some(&parser));
231        }
232    }
233
234    /// <https://html.spec.whatwg.org/multipage/#parsing-html-fragments>
235    pub(crate) fn parse_html_fragment<'el>(
236        cx: &mut JSContext,
237        context: &'el Element,
238        input: DOMString,
239        allow_declarative_shadow_roots: bool,
240    ) -> impl Iterator<Item = DomRoot<Node>> + use<'el> {
241        let context_node = context.upcast::<Node>();
242        let context_document = context_node.owner_doc();
243        let window = context_document.window();
244        let url = context_document.url();
245
246        // Step 1. Let document be a Document node whose type is "html".
247        let loader = DocumentLoader::new_with_threads(
248            context_document.loader().resource_threads().clone(),
249            Some(url.clone()),
250        );
251        let document = Document::new(
252            cx,
253            window,
254            HasBrowsingContext::No,
255            Some(url.clone()),
256            context_document.about_base_url(),
257            context_document.origin().clone(),
258            IsHTMLDocument::HTMLDocument,
259            None,
260            None,
261            DocumentActivity::Inactive,
262            DocumentSource::FromParser,
263            loader,
264            None,
265            None,
266            Default::default(),
267            false,
268            allow_declarative_shadow_roots,
269            Some(context_document.insecure_requests_policy()),
270            context_document.has_trustworthy_ancestor_or_current_origin(),
271            context_document.custom_element_reaction_stack(),
272            context_document.creation_sandboxing_flag_set(),
273            context_document.pipeline_id(),
274            context_document.image_cache(),
275        );
276
277        // Step 2. If context's node document is in quirks mode, then set document's mode to "quirks".
278        // Step 3. Otherwise, if context's node document is in limited-quirks mode, then set document's
279        // mode to "limited-quirks".
280        document.set_quirks_mode(context_document.quirks_mode());
281
282        // NOTE: The following steps happened as part of Step 1.
283        // Step 4. If allowDeclarativeShadowRoots is true, then set document's
284        // allow declarative shadow roots to true.
285        // Step 5. Create a new HTML parser, and associate it with document.
286
287        // Step 11.
288        let form = context_node
289            .inclusive_ancestors(ShadowIncluding::No)
290            .find(|element| element.is::<HTMLFormElement>());
291
292        let fragment_context = FragmentContext {
293            context_elem: context_node,
294            form_elem: form.as_deref(),
295            context_element_allows_scripting: context_document.scripting_enabled(),
296        };
297
298        let parser = ServoParser::new(
299            &document,
300            Tokenizer::Html(self::html::Tokenizer::new(
301                &document,
302                url,
303                Some(fragment_context),
304                ParsingAlgorithm::Fragment,
305            )),
306            ParserKind::Normal,
307            None,
308            None,
309            CanGc::from_cx(cx),
310        );
311        parser.parse_complete_string_chunk(cx, String::from(input));
312
313        // Step 14.
314        let root_element = document.GetDocumentElement().expect("no document element");
315        FragmentParsingResult {
316            inner: root_element.upcast::<Node>().children(),
317        }
318    }
319
320    pub(crate) fn parse_html_script_input(document: &Document, url: ServoUrl) {
321        let parser = ServoParser::new(
322            document,
323            if pref!(dom_servoparser_async_html_tokenizer_enabled) {
324                Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None))
325            } else {
326                Tokenizer::Html(self::html::Tokenizer::new(
327                    document,
328                    url,
329                    None,
330                    ParsingAlgorithm::Normal,
331                ))
332            },
333            ParserKind::ScriptCreated,
334            None,
335            None,
336            CanGc::deprecated_note(),
337        );
338        document.set_current_parser(Some(&parser));
339    }
340
341    pub(crate) fn parse_xml_document(
342        cx: &mut JSContext,
343        document: &Document,
344        input: Option<DOMString>,
345        url: ServoUrl,
346        encoding_hint_from_content_type: Option<&'static Encoding>,
347    ) {
348        let parser = ServoParser::new(
349            document,
350            Tokenizer::Xml(self::xml::Tokenizer::new(document, url)),
351            ParserKind::Normal,
352            encoding_hint_from_content_type,
353            None,
354            CanGc::from_cx(cx),
355        );
356
357        // Set as the document's current parser and initialize with `input`, if given.
358        if let Some(input) = input {
359            parser.parse_complete_string_chunk(cx, String::from(input));
360        } else {
361            parser.document.set_current_parser(Some(&parser));
362        }
363    }
364
365    pub(crate) fn script_nesting_level(&self) -> usize {
366        self.script_nesting_level.get()
367    }
368
369    pub(crate) fn is_script_created(&self) -> bool {
370        self.script_created_parser
371    }
372
373    /// Corresponds to the latter part of the "Otherwise" branch of the 'An end
374    /// tag whose tag name is "script"' of
375    /// <https://html.spec.whatwg.org/multipage/#parsing-main-incdata>
376    ///
377    /// This first moves everything from the script input to the beginning of
378    /// the network input, effectively resetting the insertion point to just
379    /// before the next character to be consumed.
380    ///
381    ///
382    /// ```text
383    ///     | ... script input ... network input ...
384    ///     ^
385    ///     insertion point
386    /// ```
387    pub(crate) fn resume_with_pending_parsing_blocking_script(
388        &self,
389        cx: &mut JSContext,
390        script: &HTMLScriptElement,
391        result: ScriptResult,
392    ) {
393        assert!(self.suspended.get());
394        self.suspended.set(false);
395
396        self.script_input.swap_with(&self.network_input);
397        while let Some(chunk) = self.script_input.pop_front() {
398            self.network_input.push_back(chunk);
399        }
400
401        let script_nesting_level = self.script_nesting_level.get();
402        assert_eq!(script_nesting_level, 0);
403
404        self.script_nesting_level.set(script_nesting_level + 1);
405        script.execute(cx, result);
406        self.script_nesting_level.set(script_nesting_level);
407
408        if !self.suspended.get() && !self.aborted.get() {
409            self.parse_sync(cx);
410        }
411    }
412
413    pub(crate) fn can_write(&self) -> bool {
414        self.script_created_parser || self.script_nesting_level.get() > 0
415    }
416
417    /// Steps 6-8 of <https://html.spec.whatwg.org/multipage/#document.write()>
418    pub(crate) fn write(&self, cx: &mut JSContext, text: DOMString) {
419        assert!(self.can_write());
420
421        if self.document.has_pending_parsing_blocking_script() {
422            // There is already a pending parsing blocking script so the
423            // parser is suspended, we just append everything to the
424            // script input and abort these steps.
425            self.script_input.push_back(String::from(text).into());
426            return;
427        }
428
429        // There is no pending parsing blocking script, so all previous calls
430        // to document.write() should have seen their entire input tokenized
431        // and process, with nothing pushed to the parser script input.
432        assert!(self.script_input.is_empty());
433
434        let input = BufferQueue::default();
435        input.push_back(String::from(text).into());
436
437        let profiler_chan = self
438            .document
439            .window()
440            .as_global_scope()
441            .time_profiler_chan()
442            .clone();
443        let profiler_metadata = TimerMetadata {
444            url: self.document.url().as_str().into(),
445            iframe: TimerMetadataFrameType::RootWindow,
446            incremental: TimerMetadataReflowType::FirstReflow,
447        };
448        self.tokenize(cx, |cx, tokenizer| {
449            tokenizer.feed(cx, &input, profiler_chan.clone(), profiler_metadata.clone())
450        });
451
452        if self.suspended.get() {
453            // Parser got suspended, insert remaining input at end of
454            // script input, following anything written by scripts executed
455            // reentrantly during this call.
456            while let Some(chunk) = input.pop_front() {
457                self.script_input.push_back(chunk);
458            }
459            return;
460        }
461
462        assert!(input.is_empty());
463    }
464
465    /// Steps 4-6 of <https://html.spec.whatwg.org/multipage/#dom-document-close>
466    pub(crate) fn close(&self, cx: &mut JSContext) {
467        assert!(self.script_created_parser);
468
469        // Step 4. Insert an explicit "EOF" character at the end of the parser's input stream.
470        self.last_chunk_received.set(true);
471
472        // Step 5. If this's pending parsing-blocking script is not null, then return.
473        if self.suspended.get() {
474            return;
475        }
476
477        // Step 6. Run the tokenizer, processing resulting tokens as they are emitted,
478        // and stopping when the tokenizer reaches the explicit "EOF" character or spins the event loop.
479        self.parse_sync(cx);
480    }
481
482    // https://html.spec.whatwg.org/multipage/#abort-a-parser
483    pub(crate) fn abort(&self, cx: &mut JSContext) {
484        assert!(!self.aborted.get());
485        self.aborted.set(true);
486
487        // Step 1.
488        self.script_input.replace_with(BufferQueue::default());
489        self.network_input.replace_with(BufferQueue::default());
490
491        // Step 2.
492        self.document
493            .set_ready_state(cx, DocumentReadyState::Interactive);
494
495        // Step 3.
496        self.tokenizer.end(cx);
497        self.document.set_current_parser(None);
498
499        // Step 4.
500        self.document
501            .set_ready_state(cx, DocumentReadyState::Complete);
502    }
503
504    pub(crate) fn get_current_line(&self) -> u32 {
505        self.tokenizer.get_current_line()
506    }
507
508    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
509    fn new_inherited(
510        document: &Document,
511        tokenizer: Tokenizer,
512        kind: ParserKind,
513        encoding_hint_from_content_type: Option<&'static Encoding>,
514        encoding_of_container_document: Option<&'static Encoding>,
515    ) -> Self {
516        // Store the whole input for the devtools Sources panel, if the devtools server is running
517        // and we are parsing for a document load (not just things like innerHTML).
518        // TODO: check if a devtools client is actually connected and/or wants the sources?
519        let content_for_devtools = (document.global().devtools_chan().is_some() &&
520            document.has_browsing_context())
521        .then_some(DomRefCell::new(String::new()));
522
523        ServoParser {
524            reflector: Reflector::new(),
525            document: Dom::from_ref(document),
526            network_decoder: DomRefCell::new(NetworkDecoderState::new(
527                encoding_hint_from_content_type,
528                encoding_of_container_document,
529            )),
530            network_input: BufferQueue::default(),
531            script_input: BufferQueue::default(),
532            tokenizer,
533            last_chunk_received: Cell::new(false),
534            suspended: Default::default(),
535            script_nesting_level: Default::default(),
536            aborted: Default::default(),
537            stopped: Default::default(),
538            script_created_parser: kind == ParserKind::ScriptCreated,
539            prefetch_decoder: RefCell::new(LossyDecoder::new_encoding_rs(
540                encoding_hint_from_content_type.unwrap_or(UTF_8),
541                Default::default(),
542            )),
543            prefetch_tokenizer: prefetch::Tokenizer::new(document),
544            prefetch_input: BufferQueue::default(),
545            content_for_devtools,
546        }
547    }
548
549    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
550    fn new(
551        document: &Document,
552        tokenizer: Tokenizer,
553        kind: ParserKind,
554        encoding_hint_from_content_type: Option<&'static Encoding>,
555        encoding_of_container_document: Option<&'static Encoding>,
556        can_gc: CanGc,
557    ) -> DomRoot<Self> {
558        reflect_dom_object(
559            Box::new(ServoParser::new_inherited(
560                document,
561                tokenizer,
562                kind,
563                encoding_hint_from_content_type,
564                encoding_of_container_document,
565            )),
566            document.window(),
567            can_gc,
568        )
569    }
570
571    fn push_tendril_input_chunk(&self, chunk: StrTendril) {
572        if let Some(mut content_for_devtools) = self
573            .content_for_devtools
574            .as_ref()
575            .map(|content| content.borrow_mut())
576        {
577            // TODO: append these chunks more efficiently
578            content_for_devtools.push_str(chunk.as_ref());
579        }
580
581        if chunk.is_empty() {
582            return;
583        }
584
585        // Push the chunk into the network input stream,
586        // which is tokenized lazily.
587        self.network_input.push_back(chunk);
588    }
589
590    fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
591        // For byte input, we convert it to text using the network decoder.
592        if let Some(decoded_chunk) = self
593            .network_decoder
594            .borrow_mut()
595            .push(&chunk, &self.document)
596        {
597            self.push_tendril_input_chunk(decoded_chunk);
598        }
599
600        if self.should_prefetch() {
601            // Push the chunk into the prefetch input stream,
602            // which is tokenized eagerly, to scan for resources
603            // to prefetch. If the user script uses `document.write()`
604            // to overwrite the network input, this prefetching may
605            // have been wasted, but in most cases it won't.
606            let mut prefetch_decoder = self.prefetch_decoder.borrow_mut();
607            prefetch_decoder.process(ByteTendril::from(&*chunk));
608
609            self.prefetch_input
610                .push_back(mem::take(&mut prefetch_decoder.inner_sink_mut().output));
611            self.prefetch_tokenizer.feed(&self.prefetch_input);
612        }
613    }
614
615    fn should_prefetch(&self) -> bool {
616        // Per https://github.com/whatwg/html/issues/1495
617        // stylesheets should not be loaded for documents
618        // without browsing contexts.
619        // https://github.com/whatwg/html/issues/1495#issuecomment-230334047
620        // suggests that no content should be preloaded in such a case.
621        // We're conservative, and only prefetch for documents
622        // with browsing contexts.
623        self.document.browsing_context().is_some()
624    }
625
626    fn push_string_input_chunk(&self, chunk: String) {
627        // The input has already been decoded as a string, so doesn't need
628        // to be decoded by the network decoder again.
629        let chunk = StrTendril::from(chunk);
630        self.push_tendril_input_chunk(chunk);
631    }
632
633    fn parse_sync(&self, cx: &mut JSContext) {
634        assert!(self.script_input.is_empty());
635
636        // This parser will continue to parse while there is either pending input or
637        // the parser remains unsuspended.
638
639        if self.last_chunk_received.get() {
640            let chunk = self.network_decoder.borrow_mut().finish(&self.document);
641            if !chunk.is_empty() {
642                self.push_tendril_input_chunk(chunk);
643            }
644        }
645
646        if self.aborted.get() {
647            return;
648        }
649
650        let profiler_chan = self
651            .document
652            .window()
653            .as_global_scope()
654            .time_profiler_chan()
655            .clone();
656        let profiler_metadata = TimerMetadata {
657            url: self.document.url().as_str().into(),
658            iframe: TimerMetadataFrameType::RootWindow,
659            incremental: TimerMetadataReflowType::FirstReflow,
660        };
661        self.tokenize(cx, |cx, tokenizer| {
662            tokenizer.feed(
663                cx,
664                &self.network_input,
665                profiler_chan.clone(),
666                profiler_metadata.clone(),
667            )
668        });
669
670        if self.suspended.get() {
671            return;
672        }
673
674        assert!(self.network_input.is_empty());
675
676        if self.last_chunk_received.get() {
677            self.finish(cx);
678        }
679    }
680
681    fn parse_complete_string_chunk(&self, cx: &mut JSContext, input: String) {
682        self.document.set_current_parser(Some(self));
683        self.push_string_input_chunk(input);
684        self.last_chunk_received.set(true);
685        if !self.suspended.get() {
686            self.parse_sync(cx);
687        }
688    }
689
690    fn parse_bytes_chunk(&self, cx: &mut JSContext, input: Vec<u8>) {
691        let mut realm = enter_auto_realm(cx, &*self.document);
692        let cx = &mut realm.current_realm();
693        self.document.set_current_parser(Some(self));
694        self.push_bytes_input_chunk(input);
695        if !self.suspended.get() {
696            self.parse_sync(cx);
697        }
698    }
699
700    fn tokenize<F>(&self, cx: &mut JSContext, feed: F)
701    where
702        F: Fn(&mut JSContext, &Tokenizer) -> TokenizerResult<DomRoot<HTMLScriptElement>>,
703    {
704        loop {
705            assert!(!self.suspended.get());
706            assert!(!self.aborted.get());
707
708            self.document.window().reflow_if_reflow_timer_expired(cx);
709            let script = match feed(cx, &self.tokenizer) {
710                TokenizerResult::Done => return,
711                TokenizerResult::EncodingIndicator(_) => continue,
712                TokenizerResult::Script(script) => script,
713            };
714
715            // https://html.spec.whatwg.org/multipage/#parsing-main-incdata
716            // branch "An end tag whose tag name is "script"
717            // The spec says to perform the microtask checkpoint before
718            // setting the insertion mode back from Text, but this is not
719            // possible with the way servo and html5ever currently
720            // relate to each other, and hopefully it is not observable.
721            if is_execution_stack_empty() {
722                self.document.window().perform_a_microtask_checkpoint(cx);
723            }
724
725            let script_nesting_level = self.script_nesting_level.get();
726
727            self.script_nesting_level.set(script_nesting_level + 1);
728            script.set_initial_script_text();
729            let introduction_type_override =
730                (script_nesting_level > 0).then_some(IntroductionType::INJECTED_SCRIPT);
731            script.prepare(cx, introduction_type_override);
732            self.script_nesting_level.set(script_nesting_level);
733
734            if self.document.has_pending_parsing_blocking_script() {
735                self.suspended.set(true);
736                return;
737            }
738            if self.aborted.get() {
739                return;
740            }
741        }
742    }
743
744    /// <https://html.spec.whatwg.org/multipage/#abort-a-parser>
745    pub(crate) fn has_aborted(&self) -> bool {
746        self.aborted.get()
747    }
748
749    /// <https://html.spec.whatwg.org/multipage/#stop-parsing>
750    pub(crate) fn has_stopped(&self) -> bool {
751        self.stopped.get()
752    }
753
754    /// <https://html.spec.whatwg.org/multipage/#the-end>
755    fn finish(&self, cx: &mut JSContext) {
756        assert!(!self.suspended.get());
757        assert!(self.last_chunk_received.get());
758        assert!(self.script_input.is_empty());
759        assert!(self.network_input.is_empty());
760        assert!(self.network_decoder.borrow().is_finished());
761
762        self.stopped.set(true);
763
764        // Step 1. If the active speculative HTML parser is not null,
765        // then stop the speculative HTML parser and return.
766        // TODO
767        // Step 2. Set the insertion point to undefined.
768        self.tokenizer.end(cx);
769        // Step 3. Update the current document readiness to "interactive".
770        self.document
771            .set_ready_state(cx, DocumentReadyState::Interactive);
772        // Step 4. Pop all the nodes off the stack of open elements.
773        self.document.set_current_parser(None);
774        // Step 5. While the list of scripts that will execute when the document has finished parsing is not empty:
775        self.document.start_the_end_loading_phase();
776        let url = self.tokenizer.url().clone();
777        self.document.finish_load(LoadType::PageSource(url), cx);
778
779        // Send the source contents to devtools, if needed.
780        if let Some(content_for_devtools) = self
781            .content_for_devtools
782            .as_ref()
783            .map(|content| content.take())
784        {
785            let global = self.document.global();
786            let chan = global.devtools_chan().expect("Guaranteed by new");
787            let pipeline_id = self.document.global().pipeline_id();
788            let _ = chan.send(ScriptToDevtoolsControlMsg::UpdateSourceContent(
789                pipeline_id,
790                content_for_devtools,
791            ));
792        }
793    }
794}
795
796struct FragmentParsingResult<I>
797where
798    I: Iterator<Item = DomRoot<Node>>,
799{
800    inner: I,
801}
802
803impl<I> Iterator for FragmentParsingResult<I>
804where
805    I: Iterator<Item = DomRoot<Node>>,
806{
807    type Item = DomRoot<Node>;
808
809    #[expect(unsafe_code)]
810    fn next(&mut self) -> Option<DomRoot<Node>> {
811        let mut cx = unsafe { script_bindings::script_runtime::temp_cx() };
812        let cx = &mut cx;
813
814        let next = self.inner.next()?;
815        next.remove_self(cx);
816        Some(next)
817    }
818
819    fn size_hint(&self) -> (usize, Option<usize>) {
820        self.inner.size_hint()
821    }
822}
823
824#[derive(JSTraceable, MallocSizeOf, PartialEq)]
825enum ParserKind {
826    Normal,
827    ScriptCreated,
828}
829
830#[derive(JSTraceable, MallocSizeOf)]
831#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
832enum Tokenizer {
833    Html(self::html::Tokenizer),
834    AsyncHtml(self::async_html::Tokenizer),
835    Xml(self::xml::Tokenizer),
836}
837
838impl Tokenizer {
839    fn feed(
840        &self,
841        cx: &mut JSContext,
842        input: &BufferQueue,
843        profiler_chan: ProfilerChan,
844        profiler_metadata: TimerMetadata,
845    ) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
846        match *self {
847            Tokenizer::Html(ref tokenizer) => time_profile!(
848                ProfilerCategory::ScriptParseHTML,
849                Some(profiler_metadata),
850                profiler_chan,
851                || tokenizer.feed(input),
852            ),
853            Tokenizer::AsyncHtml(ref tokenizer) => time_profile!(
854                ProfilerCategory::ScriptParseHTML,
855                Some(profiler_metadata),
856                profiler_chan,
857                || tokenizer.feed(input, cx),
858            ),
859            Tokenizer::Xml(ref tokenizer) => time_profile!(
860                ProfilerCategory::ScriptParseXML,
861                Some(profiler_metadata),
862                profiler_chan,
863                || tokenizer.feed(input),
864            ),
865        }
866    }
867
868    fn end(&self, cx: &mut JSContext) {
869        match *self {
870            Tokenizer::Html(ref tokenizer) => tokenizer.end(),
871            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.end(cx),
872            Tokenizer::Xml(ref tokenizer) => tokenizer.end(),
873        }
874    }
875
876    fn url(&self) -> &ServoUrl {
877        match *self {
878            Tokenizer::Html(ref tokenizer) => tokenizer.url(),
879            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(),
880            Tokenizer::Xml(ref tokenizer) => tokenizer.url(),
881        }
882    }
883
884    fn set_plaintext_state(&self) {
885        match *self {
886            Tokenizer::Html(ref tokenizer) => tokenizer.set_plaintext_state(),
887            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.set_plaintext_state(),
888            Tokenizer::Xml(_) => unimplemented!(),
889        }
890    }
891
892    fn get_current_line(&self) -> u32 {
893        match *self {
894            Tokenizer::Html(ref tokenizer) => tokenizer.get_current_line(),
895            Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.get_current_line(),
896            Tokenizer::Xml(ref tokenizer) => tokenizer.get_current_line(),
897        }
898    }
899}
900
901/// <https://html.spec.whatwg.org/multipage/#navigation-params>
902/// This does not have the relevant fields, but mimics the intent
903/// of the struct when used in loading document spec algorithms.
904struct NavigationParams {
905    /// <https://html.spec.whatwg.org/multipage/#navigation-params-policy-container>
906    policy_container: PolicyContainer,
907    /// content-type of this document, if known. Otherwise need to sniff it
908    content_type: Option<Mime>,
909    /// link headers from the response
910    link_headers: Vec<LinkHeader>,
911    /// <https://html.spec.whatwg.org/multipage/#navigation-params-sandboxing>
912    final_sandboxing_flag_set: SandboxingFlagSet,
913    /// <https://mimesniff.spec.whatwg.org/#resource-header>
914    resource_header: Vec<u8>,
915    /// <https://html.spec.whatwg.org/multipage/#navigation-params-about-base-url>
916    about_base_url: Option<ServoUrl>,
917}
918
919/// The context required for asynchronously fetching a document
920/// and parsing it progressively.
921pub(crate) struct ParserContext {
922    /// The parser that initiated the request.
923    parser: Option<Trusted<ServoParser>>,
924    /// Is this a synthesized document
925    is_synthesized_document: bool,
926    /// Has a document already been loaded (relevant for checking the resource header)
927    has_loaded_document: bool,
928    /// The [`WebViewId`] of the `WebView` associated with this document.
929    webview_id: WebViewId,
930    /// The [`PipelineId`] of the `Pipeline` associated with this document.
931    pipeline_id: PipelineId,
932    /// The URL for this document.
933    url: ServoUrl,
934    /// pushed entry index
935    pushed_entry_index: Option<usize>,
936    /// params required in document load algorithms
937    navigation_params: NavigationParams,
938    /// To report CSP violations to the global that initiated the navigation
939    parent_info: Option<PipelineId>,
940    target_snapshot_params: TargetSnapshotParams,
941    load_origin: LoadOrigin,
942}
943
944impl ParserContext {
945    pub(crate) fn new(
946        webview_id: WebViewId,
947        pipeline_id: PipelineId,
948        url: ServoUrl,
949        creation_sandboxing_flag_set: SandboxingFlagSet,
950        parent_info: Option<PipelineId>,
951        target_snapshot_params: TargetSnapshotParams,
952        load_origin: LoadOrigin,
953    ) -> ParserContext {
954        ParserContext {
955            parser: None,
956            is_synthesized_document: false,
957            has_loaded_document: false,
958            webview_id,
959            pipeline_id,
960            url,
961            parent_info,
962            pushed_entry_index: None,
963            navigation_params: NavigationParams {
964                policy_container: Default::default(),
965                content_type: None,
966                link_headers: vec![],
967                final_sandboxing_flag_set: creation_sandboxing_flag_set,
968                resource_header: vec![],
969                about_base_url: Default::default(),
970            },
971            target_snapshot_params,
972            load_origin,
973        }
974    }
975
976    pub(crate) fn set_policy_container(&mut self, policy_container: Option<&PolicyContainer>) {
977        let Some(policy_container) = policy_container else {
978            return;
979        };
980        self.navigation_params.policy_container = policy_container.clone();
981    }
982
983    pub(crate) fn set_about_base_url(&mut self, about_base_url: Option<ServoUrl>) {
984        self.navigation_params.about_base_url = about_base_url;
985    }
986
987    pub(crate) fn get_document(&self) -> Option<DomRoot<Document>> {
988        self.parser
989            .as_ref()
990            .map(|parser| parser.root().document.as_rooted())
991    }
992
993    pub(crate) fn parent_info(&self) -> Option<PipelineId> {
994        self.parent_info
995    }
996
997    /// <https://html.spec.whatwg.org/multipage/#creating-a-policy-container-from-a-fetch-response>
998    fn create_policy_container_from_fetch_response(metadata: &Metadata) -> PolicyContainer {
999        // TODO Step 1. If response's URL's scheme is "blob", then return a clone of response's
1000        // URL's blob URL entry's environment's policy container.
1001
1002        // Step 2. Let result be a new policy container.
1003        // TODO Step 6. Parse Integrity-Policy headers with response and result.
1004        // Step 7. Return result.
1005        PolicyContainer {
1006            // Step 3. Set result's CSP list to the result of parsing a response's Content Security Policies given response.
1007            csp_list: parse_csp_list_from_metadata(&metadata.headers),
1008            // TODO Step 4. If environment is non-null, then set result's embedder policy to the
1009            // result of obtaining an embedder policy given response and environment.
1010            // Otherwise, set it to "unsafe-none".
1011            embedder_policy: Default::default(),
1012            // Step 5. Set result's referrer policy to the result of parsing the `Referrer-Policy` header given response. [REFERRERPOLICY]
1013            referrer_policy: ReferrerPolicy::parse_header_for_response(&metadata.headers),
1014        }
1015    }
1016
1017    /// <https://html.spec.whatwg.org/multipage/#initialise-the-document-object>
1018    fn initialize_document_object(&self, document: &Document) {
1019        // Step 9. Let document be a new Document, with
1020        document.set_policy_container(self.navigation_params.policy_container.clone());
1021        document.set_active_sandboxing_flag_set(self.navigation_params.final_sandboxing_flag_set);
1022        document.set_about_base_url(self.navigation_params.about_base_url.clone());
1023        // Step 17. Process link headers given document, navigationParams's response, and "pre-media".
1024        process_link_headers(
1025            &self.navigation_params.link_headers,
1026            document,
1027            LinkProcessingPhase::PreMedia,
1028        );
1029    }
1030
1031    /// Part of various load document methods
1032    fn process_link_headers_in_media_phase_with_task(&mut self, document: &Document) {
1033        // The first task that the networking task source places on the task queue
1034        // while fetching runs must process link headers given document,
1035        // navigationParams's response, and "media", after the task has been processed by the HTML parser.
1036        let link_headers = std::mem::take(&mut self.navigation_params.link_headers);
1037        if !link_headers.is_empty() {
1038            let window = document.window();
1039            let document = Trusted::new(document);
1040            window
1041                .upcast::<GlobalScope>()
1042                .task_manager()
1043                .networking_task_source()
1044                .queue(task!(process_link_headers_task: move || {
1045                    process_link_headers(&link_headers, &document.root(), LinkProcessingPhase::Media);
1046                }));
1047        }
1048    }
1049
1050    /// <https://html.spec.whatwg.org/multipage/#loading-a-document>
1051    fn load_document(&mut self, cx: &mut JSContext) {
1052        assert!(!self.has_loaded_document);
1053        self.has_loaded_document = true;
1054        let Some(ref parser) = self.parser.as_ref().map(|p| p.root()) else {
1055            return;
1056        };
1057        // Step 1. Let type be the computed type of navigationParams's response.
1058        let content_type = &self.navigation_params.content_type;
1059        let mime_type = MimeClassifier::default().classify(
1060            LoadContext::Browsing,
1061            NoSniffFlag::Off,
1062            ApacheBugFlag::from_content_type(content_type.as_ref()),
1063            content_type,
1064            &self.navigation_params.resource_header,
1065        );
1066        // Step 2. If the user agent has been configured to process resources of the given type using
1067        // some mechanism other than rendering the content in a navigable, then skip this step.
1068        // Otherwise, if the type is one of the following types:
1069        let Some(media_type) = MimeClassifier::get_media_type(&mime_type) else {
1070            let page = format!(
1071                "<html><body><p>Unknown content type ({}).</p></body></html>",
1072                &mime_type,
1073            );
1074            self.load_inline_unknown_content(cx, parser, page);
1075            return;
1076        };
1077        match media_type {
1078            // Return the result of loading an HTML document, given navigationParams.
1079            MediaType::Html => self.load_html_document(parser),
1080            // Return the result of loading an XML document given navigationParams and type.
1081            MediaType::Xml => self.load_xml_document(parser),
1082            // Return the result of loading a text document given navigationParams and type.
1083            MediaType::JavaScript | MediaType::Text | MediaType::Css => {
1084                self.load_text_document(cx, parser)
1085            },
1086            // Return the result of loading a json document given navigationParams and type.
1087            MediaType::Json => self.load_json_document(cx, parser),
1088            // Return the result of loading a media document given navigationParams and type.
1089            MediaType::Image | MediaType::AudioVideo => {
1090                self.load_media_document(cx, parser, media_type, &mime_type);
1091                return;
1092            },
1093            MediaType::Font => {
1094                let page = format!(
1095                    "<html><body><p>Unable to load font with content type ({}).</p></body></html>",
1096                    &mime_type,
1097                );
1098                self.load_inline_unknown_content(cx, parser, page);
1099                return;
1100            },
1101        };
1102
1103        parser.parse_bytes_chunk(
1104            cx,
1105            std::mem::take(&mut self.navigation_params.resource_header),
1106        );
1107    }
1108
1109    /// <https://html.spec.whatwg.org/multipage/#navigate-html>
1110    fn load_html_document(&mut self, parser: &ServoParser) {
1111        // Step 1. Let document be the result of creating and initializing a
1112        // Document object given "html", "text/html", and navigationParams.
1113        self.initialize_document_object(&parser.document);
1114        // The first task that the networking task source places on the task queue while fetching
1115        // runs must process link headers given document, navigationParams's response, and "media",
1116        // after the task has been processed by the HTML parser.
1117        self.process_link_headers_in_media_phase_with_task(&parser.document);
1118    }
1119
1120    /// <https://html.spec.whatwg.org/multipage/#read-xml>
1121    fn load_xml_document(&mut self, parser: &ServoParser) {
1122        // When faced with displaying an XML file inline, provided navigation params navigationParams
1123        // and a string type, user agents must follow the requirements defined in XML and Namespaces in XML,
1124        // XML Media Types, DOM, and other relevant specifications to create and initialize a
1125        // Document object document, given "xml", type, and navigationParams, and return that Document.
1126        // They must also create a corresponding XML parser. [XML] [XMLNS] [RFC7303] [DOM]
1127        self.initialize_document_object(&parser.document);
1128        // The first task that the networking task source places on the task queue while fetching
1129        // runs must process link headers given document, navigationParams's response, and "media",
1130        // after the task has been processed by the XML parser.
1131        self.process_link_headers_in_media_phase_with_task(&parser.document);
1132    }
1133
1134    /// <https://html.spec.whatwg.org/multipage/#navigate-text>
1135    fn load_text_document(&mut self, cx: &mut JSContext, parser: &ServoParser) {
1136        // Step 1. Let document be the result of creating and initializing a Document
1137        // object given "html", type, and navigationParams.
1138        self.initialize_document_object(&parser.document);
1139        // Step 4. Create an HTML parser and associate it with the document.
1140        // Act as if the tokenizer had emitted a start tag token with the tag name "pre" followed by
1141        // a single U+000A LINE FEED (LF) character, and switch the HTML parser's tokenizer to the PLAINTEXT state.
1142        // Each task that the networking task source places on the task queue while fetching runs must then
1143        // fill the parser's input byte stream with the fetched bytes and cause the HTML parser to perform
1144        // the appropriate processing of the input stream.
1145        let page = "<pre>\n".into();
1146        parser.push_string_input_chunk(page);
1147        parser.parse_sync(cx);
1148        parser.tokenizer.set_plaintext_state();
1149        // The first task that the networking task source places on the task queue while fetching
1150        // runs must process link headers given document, navigationParams's response, and "media",
1151        // after the task has been processed by the HTML parser.
1152        self.process_link_headers_in_media_phase_with_task(&parser.document);
1153    }
1154
1155    /// <https://html.spec.whatwg.org/multipage/#navigate-media>
1156    fn load_media_document(
1157        &mut self,
1158        cx: &mut JSContext,
1159        parser: &ServoParser,
1160        media_type: MediaType,
1161        mime_type: &Mime,
1162    ) {
1163        // Step 1. Let document be the result of creating and initializing a Document
1164        // object given "html", type, and navigationParams.
1165        self.initialize_document_object(&parser.document);
1166        // Step 8. Act as if the user agent had stopped parsing document.
1167        self.is_synthesized_document = true;
1168        parser.last_chunk_received.set(true);
1169        // Step 3. Populate with html/head/body given document.
1170        let page = "<html><body></body></html>".into();
1171        parser.push_string_input_chunk(page);
1172        parser.parse_sync(cx);
1173
1174        let doc = &parser.document;
1175        // Step 5. Set the appropriate attribute of the element host element, as described below,
1176        // to the address of the image, video, or audio resource.
1177        let node = if media_type == MediaType::Image {
1178            let img = Element::create(
1179                cx,
1180                QualName::new(None, ns!(html), local_name!("img")),
1181                None,
1182                doc,
1183                ElementCreator::ParserCreated(1),
1184                CustomElementCreationMode::Asynchronous,
1185                None,
1186            );
1187            let img = DomRoot::downcast::<HTMLImageElement>(img).unwrap();
1188            img.SetSrc(cx, USVString(self.url.to_string()));
1189            DomRoot::upcast::<Node>(img)
1190        } else if mime_type.type_() == mime::AUDIO {
1191            let audio = Element::create(
1192                cx,
1193                QualName::new(None, ns!(html), local_name!("audio")),
1194                None,
1195                doc,
1196                ElementCreator::ParserCreated(1),
1197                CustomElementCreationMode::Asynchronous,
1198                None,
1199            );
1200            let audio = DomRoot::downcast::<HTMLMediaElement>(audio).unwrap();
1201            audio.SetControls(cx, true);
1202            audio.SetSrc(cx, USVString(self.url.to_string()));
1203            DomRoot::upcast::<Node>(audio)
1204        } else {
1205            let video = Element::create(
1206                cx,
1207                QualName::new(None, ns!(html), local_name!("video")),
1208                None,
1209                doc,
1210                ElementCreator::ParserCreated(1),
1211                CustomElementCreationMode::Asynchronous,
1212                None,
1213            );
1214            let video = DomRoot::downcast::<HTMLMediaElement>(video).unwrap();
1215            video.SetControls(cx, true);
1216            video.SetSrc(cx, USVString(self.url.to_string()));
1217            DomRoot::upcast::<Node>(video)
1218        };
1219        // Step 4. Append an element host element for the media, as described below, to the body element.
1220        let doc_body = DomRoot::upcast::<Node>(doc.GetBody().unwrap());
1221        doc_body.AppendChild(cx, &node).expect("Appending failed");
1222        // Step 7. Process link headers given document, navigationParams's response, and "media".
1223        let link_headers = std::mem::take(&mut self.navigation_params.link_headers);
1224        process_link_headers(&link_headers, doc, LinkProcessingPhase::Media);
1225    }
1226
1227    /// Load a JSON document with a pretty-printing, interactive viewer.
1228    fn load_json_document(&mut self, cx: &mut JSContext, parser: &ServoParser) {
1229        self.initialize_document_object(&parser.document);
1230        parser.push_string_input_chunk(resources::read_string(Resource::JsonViewerHTML));
1231        parser.parse_sync(cx);
1232        parser.tokenizer.set_plaintext_state();
1233        self.process_link_headers_in_media_phase_with_task(&parser.document);
1234    }
1235
1236    /// <https://html.spec.whatwg.org/multipage/#navigate-ua-inline>
1237    fn load_inline_unknown_content(
1238        &mut self,
1239        cx: &mut JSContext,
1240        parser: &ServoParser,
1241        page: String,
1242    ) {
1243        self.is_synthesized_document = true;
1244        parser.document.mark_as_internal();
1245        parser.push_string_input_chunk(page);
1246        // Step 7. Act as if the user agent had stopped parsing document.
1247        parser.last_chunk_received.set(true);
1248        parser.parse_sync(cx);
1249    }
1250
1251    /// Store a PerformanceNavigationTiming entry in the globalscope's Performance buffer
1252    fn submit_resource_timing(&mut self, cx: &mut JSContext) {
1253        let Some(parser) = self.parser.as_ref() else {
1254            return;
1255        };
1256        let parser = parser.root();
1257        if parser.aborted.get() {
1258            return;
1259        }
1260
1261        let document = &parser.document;
1262
1263        let performance_entry = PerformanceNavigationTiming::new(cx, &document.global(), document);
1264        self.pushed_entry_index = document
1265            .global()
1266            .performance()
1267            .queue_entry(performance_entry.upcast::<PerformanceEntry>());
1268    }
1269}
1270
1271impl FetchResponseListener for ParserContext {
1272    fn process_request_body(&mut self, _: RequestId) {}
1273
1274    /// Implements parts of
1275    /// <https://html.spec.whatwg.org/multipage/#attempt-to-populate-the-history-entry's-document>
1276    fn process_response(
1277        &mut self,
1278        cx: &mut JSContext,
1279        _: RequestId,
1280        meta_result: Result<FetchMetadata, NetworkError>,
1281    ) {
1282        let (metadata, mut error) = match meta_result {
1283            Ok(meta) => (
1284                Some(match meta {
1285                    FetchMetadata::Unfiltered(m) => m,
1286                    FetchMetadata::Filtered { unsafe_, .. } => unsafe_,
1287                }),
1288                None,
1289            ),
1290            Err(error) => (
1291                // Check variant without moving
1292                match &error {
1293                    NetworkError::LoadCancelled => {
1294                        return;
1295                    },
1296                    _ => {
1297                        let mut meta = Metadata::default(self.url.clone());
1298                        let mime: Option<Mime> = "text/html".parse().ok();
1299                        meta.set_content_type(mime.as_ref());
1300                        Some(meta)
1301                    },
1302                },
1303                Some(error),
1304            ),
1305        };
1306        let content_type: Option<Mime> = metadata
1307            .clone()
1308            .and_then(|meta| meta.content_type)
1309            .map(Serde::into_inner)
1310            .map(Into::into);
1311
1312        // <https://html.spec.whatwg.org/multipage/#create-navigation-params-by-fetching>
1313        // Step 21.9. Set responsePolicyContainer to the result of creating a
1314        // policy container from a fetch response given response and request's
1315        // reserved client.
1316        let (policy_container, endpoints_list, link_headers) = match metadata.as_ref() {
1317            None => (PolicyContainer::default(), None, vec![]),
1318            Some(metadata) => (
1319                Self::create_policy_container_from_fetch_response(metadata),
1320                ReportingEndpoint::parse_reporting_endpoints_header(
1321                    &self.url.clone(),
1322                    &metadata.headers,
1323                ),
1324                extract_links_from_headers(&metadata.headers),
1325            ),
1326        };
1327
1328        // Step 21.10. Set finalSandboxFlags to the union of targetSnapshotParams's
1329        // sandboxing flags and responsePolicyContainer's CSP list's CSP-derived
1330        // sandboxing flags.
1331        let final_sandboxing_flag_set = policy_container
1332            .csp_list
1333            .as_ref()
1334            .and_then(|csp| csp.get_sandboxing_flag_set_for_document())
1335            .unwrap_or(SandboxingFlagSet::empty())
1336            .union(self.target_snapshot_params.sandboxing_flags);
1337
1338        // Step 21.11. Set responseOrigin to the result of determining the origin
1339        // given response's URL, finalSandboxFlags, and entry's document state's
1340        // initiator origin.
1341        let source_origin = match self.load_origin {
1342            LoadOrigin::Script(ref snapshot) => {
1343                Some(MutableOrigin::from_snapshot(snapshot.clone()))
1344            },
1345            _ => None,
1346        };
1347        let origin = determine_the_origin(
1348            metadata.as_ref().map(|metadata| &metadata.final_url),
1349            final_sandboxing_flag_set,
1350            source_origin,
1351        );
1352
1353        let parser = match ScriptThread::page_headers_available(
1354            self.webview_id,
1355            self.pipeline_id,
1356            metadata.as_ref(),
1357            origin.clone(),
1358            cx,
1359        ) {
1360            Some(parser) => parser,
1361            None => return,
1362        };
1363        if parser.aborted.get() {
1364            return;
1365        }
1366
1367        let mut realm = enter_auto_realm(cx, &*parser.document);
1368        let cx = &mut realm;
1369        let document = &parser.document;
1370        let window = document.window();
1371
1372        // https://html.spec.whatwg.org/multipage/#attempt-to-populate-the-history-entry%27s-document
1373        // Step 4. Otherwise, if any of the following are true:
1374        if
1375        // navigationParams is null;
1376        // TODO
1377        // the result of should navigation response to navigation request of
1378        // type in target be blocked by Content Security Policy? given
1379        // navigationParams's request, navigationParams's response, navigationParams's policy container's CSP list,
1380        // cspNavigationType, and navigable is "Blocked";
1381        policy_container.csp_list.should_navigation_response_to_navigation_request_be_blocked(
1382            cx,
1383            window,
1384            self.url.clone().into_url(),
1385            &origin.immutable().clone().into_url_origin(),
1386        )
1387        // navigationParams's reserved environment is non-null and the result of
1388        // checking a navigation response's adherence to its embedder policy given navigationParams's response,
1389        // navigable, and navigationParams's policy container's embedder policy is false; or
1390        // TODO
1391        // the result of checking a navigation response's adherence to `X-Frame-Options`
1392        // given navigationParams's response, navigable, navigationParams's policy container's CSP list,
1393        // and navigationParams's origin is false,
1394        || !check_a_navigation_response_adherence_to_x_frame_options(
1395            window,
1396            policy_container.csp_list.as_ref(),
1397            &origin,
1398            metadata
1399                .as_ref()
1400                .and_then(|metadata| metadata.headers.as_ref()),
1401        ) {
1402            // Step 4.1. Set entry's document state's document to the result of creating a document for inline content
1403            // that doesn't have a DOM, given navigable, null, navTimingType, and userInvolvement.
1404            // The inline content should indicate to the user the sort of error that occurred.
1405            error = Some(NetworkError::ContentSecurityPolicy);
1406            // Step 4.2. Make document unsalvageable given entry's document state's document and "navigation-failure".
1407            document.make_document_unsalvageable();
1408            // Step 4.3. Set saveExtraDocumentState to false.
1409            // TODO
1410            // Step 4.4. If navigationParams is not null, then:
1411            // TODO
1412        }
1413
1414        if let Some(endpoints) = endpoints_list {
1415            window.set_endpoints_list(endpoints);
1416        }
1417        self.parser = Some(Trusted::new(&*parser));
1418        self.navigation_params = NavigationParams {
1419            policy_container,
1420            content_type,
1421            final_sandboxing_flag_set,
1422            link_headers,
1423            about_base_url: document.about_base_url(),
1424            resource_header: vec![],
1425        };
1426        self.submit_resource_timing(cx);
1427
1428        // Part of https://html.spec.whatwg.org/multipage/#loading-a-document
1429        //
1430        // Step 3. If, given type, the new resource is to be handled by displaying some sort of inline content,
1431        // e.g., a native rendering of the content or an error message because the specified type is not supported,
1432        // then return the result of creating a document for inline content that doesn't have a DOM given
1433        // navigationParams's navigable, navigationParams's id, navigationParams's navigation timing type,
1434        // and navigationParams's user involvement.
1435        if let Some(error) = error {
1436            let page = match error {
1437                NetworkError::SslValidation(reason, bytes) => {
1438                    let page = resources::read_string(Resource::BadCertHTML);
1439                    let page = page.replace("${reason}", &reason);
1440                    let encoded_bytes = general_purpose::STANDARD_NO_PAD.encode(bytes);
1441                    let page = page.replace("${bytes}", encoded_bytes.as_str());
1442                    page.replace("${secret}", &net_traits::PRIVILEGED_SECRET.to_string())
1443                },
1444                NetworkError::BlobURLStoreError(reason) |
1445                NetworkError::WebsocketConnectionFailure(reason) |
1446                NetworkError::HttpError(reason) |
1447                NetworkError::ResourceLoadError(reason) |
1448                NetworkError::MimeType(reason) => {
1449                    let page = resources::read_string(Resource::NetErrorHTML);
1450                    page.replace("${reason}", &reason)
1451                },
1452                NetworkError::Crash(details) => {
1453                    let page = resources::read_string(Resource::CrashHTML);
1454                    page.replace("${details}", &details)
1455                },
1456                NetworkError::UnsupportedScheme |
1457                NetworkError::CorsGeneral |
1458                NetworkError::CrossOriginResponse |
1459                NetworkError::CorsCredentials |
1460                NetworkError::CorsAllowMethods |
1461                NetworkError::CorsAllowHeaders |
1462                NetworkError::CorsMethod |
1463                NetworkError::CorsAuthorization |
1464                NetworkError::CorsHeaders |
1465                NetworkError::ConnectionFailure |
1466                NetworkError::RedirectError |
1467                NetworkError::TooManyRedirects |
1468                NetworkError::TooManyInFlightKeepAliveRequests |
1469                NetworkError::InvalidMethod |
1470                NetworkError::ContentSecurityPolicy |
1471                NetworkError::Nosniff |
1472                NetworkError::SubresourceIntegrity |
1473                NetworkError::MixedContent |
1474                NetworkError::CacheError |
1475                NetworkError::InvalidPort |
1476                NetworkError::LocalDirectoryError |
1477                NetworkError::PartialResponseToNonRangeRequestError |
1478                NetworkError::ProtocolHandlerSubstitutionError |
1479                NetworkError::DecompressionError => {
1480                    let page = resources::read_string(Resource::NetErrorHTML);
1481                    page.replace("${reason}", &format!("{:?}", error))
1482                },
1483                NetworkError::LoadCancelled => {
1484                    // The next load will show a page
1485                    return;
1486                },
1487            };
1488            self.load_inline_unknown_content(cx, &parser, page);
1489        }
1490    }
1491
1492    fn process_response_chunk(&mut self, cx: &mut JSContext, _: RequestId, payload: Vec<u8>) {
1493        if self.is_synthesized_document {
1494            return;
1495        }
1496        let Some(parser) = self.parser.as_ref().map(|p| p.root()) else {
1497            return;
1498        };
1499        if parser.aborted.get() {
1500            return;
1501        }
1502        if !self.has_loaded_document {
1503            // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1504            self.navigation_params
1505                .resource_header
1506                .extend_from_slice(&payload);
1507            // the number of bytes in buffer is greater than or equal to 1445.
1508            if self.navigation_params.resource_header.len() >= 1445 {
1509                self.load_document(cx);
1510            }
1511        } else {
1512            parser.parse_bytes_chunk(cx, payload);
1513        }
1514    }
1515
1516    // This method is called via script_thread::handle_fetch_eof, so we must call
1517    // submit_resource_timing in this function
1518    // Resource listeners are called via net_traits::Action::process, which handles submission for them
1519    fn process_response_eof(
1520        mut self,
1521        cx: &mut JSContext,
1522        _: RequestId,
1523        status: Result<(), NetworkError>,
1524        timing: ResourceFetchTiming,
1525    ) {
1526        let parser = match self.parser.as_ref() {
1527            Some(parser) => parser.root(),
1528            None => return,
1529        };
1530        if parser.aborted.get() || self.is_synthesized_document {
1531            return;
1532        }
1533
1534        if let Err(error) = &status {
1535            // TODO(Savago): we should send a notification to callers #5463.
1536            debug!("Failed to load page URL {}, error: {error:?}", self.url);
1537        }
1538
1539        // https://mimesniff.spec.whatwg.org/#read-the-resource-header
1540        //
1541        // the end of the resource is reached.
1542        if !self.has_loaded_document {
1543            self.load_document(cx);
1544        }
1545
1546        let mut realm = enter_auto_realm(cx, &*parser);
1547        let cx = &mut realm;
1548
1549        if status.is_ok() {
1550            parser.document.set_resource_fetch_timing(timing);
1551        }
1552
1553        parser.last_chunk_received.set(true);
1554        if !parser.suspended.get() {
1555            parser.parse_sync(cx);
1556        }
1557
1558        // TODO: Only update if this is the current document resource.
1559        if let Some(pushed_index) = self.pushed_entry_index {
1560            let document = &parser.document;
1561            let performance_entry =
1562                PerformanceNavigationTiming::new(cx, &document.global(), document);
1563            document
1564                .global()
1565                .performance()
1566                .update_entry(pushed_index, performance_entry.upcast::<PerformanceEntry>());
1567        }
1568    }
1569
1570    fn process_csp_violations(&mut self, _: &mut JSContext, _: RequestId, _: Vec<Violation>) {
1571        unreachable!("Script_thread should handle reporting violations for parser contexts");
1572    }
1573}
1574
1575pub(crate) struct FragmentContext<'a> {
1576    pub(crate) context_elem: &'a Node,
1577    pub(crate) form_elem: Option<&'a Node>,
1578    pub(crate) context_element_allows_scripting: bool,
1579}
1580
1581/// <https://html.spec.whatwg.org/multipage/#insert-an-element-at-the-adjusted-insertion-location>
1582#[cfg_attr(crown, expect(crown::unrooted_must_root))]
1583fn insert_an_element_at_the_adjusted_insertion_location(
1584    cx: &mut JSContext,
1585    node_to_insert: Dom<Node>,
1586    adjusted_insertion_location_parent: &Node,
1587    adjusted_insertion_location_child: Option<&Node>,
1588    parsing_algorithm: ParsingAlgorithm,
1589    custom_element_reaction_stack: &CustomElementReactionStack,
1590) {
1591    // Step 1: Let the adjusted insertion location be the appropriate place for inserting a node.
1592    //
1593    // Note: This is handled as part of the input.
1594
1595    // Step 2: If it is not possible to insert element at the adjusted insertion location,
1596    // abort these steps.
1597    if Node::ensure_pre_insertion_validity(
1598        cx.no_gc(),
1599        &node_to_insert,
1600        adjusted_insertion_location_parent,
1601        adjusted_insertion_location_child,
1602    )
1603    .is_err()
1604    {
1605        return;
1606    }
1607
1608    // Step 3. If the parser was not created as part of the HTML fragment parsing algorithm,
1609    // then push a new element queue onto element's relevant agent's custom element reactions
1610    // stack.
1611    let element_in_non_fragment =
1612        parsing_algorithm != ParsingAlgorithm::Fragment && node_to_insert.is::<Element>();
1613    if element_in_non_fragment {
1614        custom_element_reaction_stack.push_new_element_queue();
1615    }
1616
1617    // Step 4: Insert element at the adjusted insertion location.
1618    Node::insert(
1619        cx,
1620        &node_to_insert,
1621        adjusted_insertion_location_parent,
1622        adjusted_insertion_location_child,
1623        SuppressObserver::Unsuppressed,
1624    );
1625
1626    // Step 5: If the parser was not created as part of the HTML fragment parsing algorithm,
1627    // then pop the element queue from element's relevant agent's custom element reactions
1628    // stack, and invoke custom element reactions in that queue.
1629    //
1630    // Note: Handled as part of `pop_current_element_queue()`.
1631    if element_in_non_fragment {
1632        custom_element_reaction_stack.pop_current_element_queue(cx);
1633    }
1634}
1635
1636#[cfg_attr(crown, expect(crown::unrooted_must_root))]
1637fn insert(
1638    cx: &mut JSContext,
1639    parent: &Node,
1640    reference_child: Option<&Node>,
1641    child: NodeOrText<Dom<Node>>,
1642    parsing_algorithm: ParsingAlgorithm,
1643    custom_element_reaction_stack: &CustomElementReactionStack,
1644) {
1645    match child {
1646        NodeOrText::AppendNode(node) => {
1647            // This encompasses two parts of the specification:
1648            //  - https://html.spec.whatwg.org/multipage/#insert-a-foreign-element
1649            //  - https://html.spec.whatwg.org/multipage/#insert-a-comment
1650            //
1651            // TODO: This part of the code should match the specification more closely.
1652            insert_an_element_at_the_adjusted_insertion_location(
1653                cx,
1654                node,
1655                parent,
1656                reference_child,
1657                parsing_algorithm,
1658                custom_element_reaction_stack,
1659            );
1660        },
1661        NodeOrText::AppendText(t) => {
1662            // https://html.spec.whatwg.org/multipage/#insert-a-character
1663            let text = reference_child
1664                .and_then(Node::GetPreviousSibling)
1665                .or_else(|| parent.GetLastChild())
1666                .and_then(DomRoot::downcast::<Text>);
1667
1668            if let Some(text) = text {
1669                text.upcast::<CharacterData>().append_data(cx, &t);
1670            } else {
1671                let text = Text::new(cx, String::from(t).into(), &parent.owner_doc());
1672                parent
1673                    .InsertBefore(cx, text.upcast(), reference_child)
1674                    .unwrap();
1675            }
1676        },
1677    }
1678}
1679
1680#[derive(JSTraceable, MallocSizeOf)]
1681#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
1682pub(crate) struct Sink {
1683    #[no_trace]
1684    base_url: ServoUrl,
1685    document: Dom<Document>,
1686    current_line: Cell<u64>,
1687    script: MutNullableDom<HTMLScriptElement>,
1688    parsing_algorithm: ParsingAlgorithm,
1689    #[conditional_malloc_size_of]
1690    custom_element_reaction_stack: Rc<CustomElementReactionStack>,
1691}
1692
1693impl Sink {
1694    fn same_tree(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1695        let x = x.downcast::<Element>().expect("Element node expected");
1696        let y = y.downcast::<Element>().expect("Element node expected");
1697
1698        x.is_in_same_home_subtree(y)
1699    }
1700
1701    fn has_parent_node(&self, node: &Dom<Node>) -> bool {
1702        node.GetParentNode().is_some()
1703    }
1704}
1705
1706impl TreeSink for Sink {
1707    type Output = Self;
1708
1709    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1710    fn finish(self) -> Self {
1711        self
1712    }
1713
1714    type Handle = Dom<Node>;
1715    type ElemName<'a>
1716        = ExpandedName<'a>
1717    where
1718        Self: 'a;
1719
1720    fn get_document(&self) -> Dom<Node> {
1721        Dom::from_ref(self.document.upcast())
1722    }
1723
1724    #[expect(unsafe_code)]
1725    fn get_template_contents(&self, target: &Dom<Node>) -> Dom<Node> {
1726        // TODO: https://github.com/servo/servo/issues/42839
1727        let mut cx = unsafe { temp_cx() };
1728        let cx = &mut cx;
1729        let template = target
1730            .downcast::<HTMLTemplateElement>()
1731            .expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing");
1732        Dom::from_ref(template.Content(cx).upcast())
1733    }
1734
1735    fn same_node(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
1736        x == y
1737    }
1738
1739    fn elem_name<'a>(&self, target: &'a Dom<Node>) -> ExpandedName<'a> {
1740        let elem = target
1741            .downcast::<Element>()
1742            .expect("tried to get name of non-Element in HTML parsing");
1743        ExpandedName {
1744            ns: elem.namespace(),
1745            local: elem.local_name(),
1746        }
1747    }
1748
1749    #[expect(unsafe_code)]
1750    fn create_element(
1751        &self,
1752        name: QualName,
1753        attrs: Vec<Attribute>,
1754        flags: ElementFlags,
1755    ) -> Dom<Node> {
1756        // TODO: https://github.com/servo/servo/issues/42839
1757        let mut cx = unsafe { temp_cx() };
1758        let cx = &mut cx;
1759        let attrs = attrs
1760            .into_iter()
1761            .map(|attr| ElementAttribute::new(attr.name, DOMString::from(String::from(attr.value))))
1762            .collect();
1763        let parsing_algorithm = if flags.template {
1764            ParsingAlgorithm::Fragment
1765        } else {
1766            self.parsing_algorithm
1767        };
1768        let element = create_element_for_token(
1769            cx,
1770            name,
1771            attrs,
1772            &self.document,
1773            ElementCreator::ParserCreated(self.current_line.get()),
1774            parsing_algorithm,
1775            &self.custom_element_reaction_stack,
1776            flags.had_duplicate_attributes,
1777        );
1778        Dom::from_ref(element.upcast())
1779    }
1780
1781    #[expect(unsafe_code)]
1782    fn create_comment(&self, text: StrTendril) -> Dom<Node> {
1783        // TODO: https://github.com/servo/servo/issues/42839
1784        let mut cx = unsafe { temp_cx() };
1785        let cx = &mut cx;
1786        let comment = Comment::new(
1787            cx,
1788            DOMString::from(String::from(text)),
1789            &self.document,
1790            None,
1791        );
1792        Dom::from_ref(comment.upcast())
1793    }
1794
1795    #[expect(unsafe_code)]
1796    fn create_pi(&self, target: StrTendril, data: StrTendril) -> Dom<Node> {
1797        // TODO: https://github.com/servo/servo/issues/42839
1798        let mut cx = unsafe { temp_cx() };
1799        let cx = &mut cx;
1800        let doc = &*self.document;
1801        let pi = ProcessingInstruction::new(
1802            cx,
1803            DOMString::from(String::from(target)),
1804            DOMString::from(String::from(data)),
1805            doc,
1806        );
1807        Dom::from_ref(pi.upcast())
1808    }
1809
1810    #[expect(unsafe_code)]
1811    fn associate_with_form(
1812        &self,
1813        target: &Dom<Node>,
1814        form: &Dom<Node>,
1815        nodes: (&Dom<Node>, Option<&Dom<Node>>),
1816    ) {
1817        // TODO: https://github.com/servo/servo/issues/42839
1818        let mut cx = unsafe { temp_cx() };
1819        let cx = &mut cx;
1820        let (element, prev_element) = nodes;
1821        let tree_node = prev_element.map_or(element, |prev| {
1822            if self.has_parent_node(element) {
1823                element
1824            } else {
1825                prev
1826            }
1827        });
1828        if !self.same_tree(tree_node, form) {
1829            return;
1830        }
1831
1832        let node = target;
1833        let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
1834            .expect("Owner must be a form element");
1835
1836        let elem = node.downcast::<Element>();
1837        let control = elem.and_then(|e| e.as_maybe_form_control());
1838
1839        if let Some(control) = control {
1840            control.set_form_owner_from_parser(cx, &form);
1841        }
1842    }
1843
1844    #[expect(unsafe_code)]
1845    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1846    fn append_before_sibling(&self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) {
1847        // TODO: https://github.com/servo/servo/issues/42839
1848        let mut cx = unsafe { temp_cx() };
1849        let cx = &mut cx;
1850
1851        let parent = sibling
1852            .GetParentNode()
1853            .expect("append_before_sibling called on node without parent");
1854
1855        insert(
1856            cx,
1857            &parent,
1858            Some(sibling),
1859            new_node,
1860            self.parsing_algorithm,
1861            &self.custom_element_reaction_stack,
1862        );
1863    }
1864
1865    fn parse_error(&self, msg: Cow<'static, str>) {
1866        debug!("Parse error: {}", msg);
1867    }
1868
1869    fn set_quirks_mode(&self, mode: QuirksMode) {
1870        let mode = match mode {
1871            QuirksMode::Quirks => ServoQuirksMode::Quirks,
1872            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
1873            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
1874        };
1875        self.document.set_quirks_mode(mode);
1876    }
1877
1878    #[expect(unsafe_code)]
1879    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1880    fn append(&self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) {
1881        // TODO: https://github.com/servo/servo/issues/42839
1882        let mut cx = unsafe { temp_cx() };
1883        let cx = &mut cx;
1884
1885        insert(
1886            cx,
1887            parent,
1888            None,
1889            child,
1890            self.parsing_algorithm,
1891            &self.custom_element_reaction_stack,
1892        );
1893    }
1894
1895    #[cfg_attr(crown, expect(crown::unrooted_must_root))]
1896    fn append_based_on_parent_node(
1897        &self,
1898        elem: &Dom<Node>,
1899        prev_elem: &Dom<Node>,
1900        child: NodeOrText<Dom<Node>>,
1901    ) {
1902        if self.has_parent_node(elem) {
1903            self.append_before_sibling(elem, child);
1904        } else {
1905            self.append(prev_elem, child);
1906        }
1907    }
1908
1909    #[expect(unsafe_code)]
1910    fn append_doctype_to_document(
1911        &self,
1912        name: StrTendril,
1913        public_id: StrTendril,
1914        system_id: StrTendril,
1915    ) {
1916        // TODO: https://github.com/servo/servo/issues/42839
1917        let mut cx = unsafe { temp_cx() };
1918        let cx = &mut cx;
1919
1920        let doc = &*self.document;
1921        let doctype = DocumentType::new(
1922            cx,
1923            DOMString::from(String::from(name)),
1924            Some(DOMString::from(String::from(public_id))),
1925            Some(DOMString::from(String::from(system_id))),
1926            doc,
1927        );
1928        doc.upcast::<Node>()
1929            .AppendChild(cx, doctype.upcast())
1930            .expect("Appending failed");
1931    }
1932
1933    #[expect(unsafe_code)]
1934    fn add_attrs_if_missing(&self, target: &Dom<Node>, attrs: Vec<Attribute>) {
1935        // TODO: https://github.com/servo/servo/issues/42839
1936        let mut cx = unsafe { temp_cx() };
1937        let cx = &mut cx;
1938
1939        let elem = target
1940            .downcast::<Element>()
1941            .expect("tried to set attrs on non-Element in HTML parsing");
1942        for attr in attrs {
1943            elem.set_attribute_from_parser(
1944                cx,
1945                attr.name,
1946                DOMString::from(String::from(attr.value)),
1947                None,
1948            );
1949        }
1950    }
1951
1952    #[expect(unsafe_code)]
1953    fn remove_from_parent(&self, target: &Dom<Node>) {
1954        // TODO: https://github.com/servo/servo/issues/42839
1955        let mut cx = unsafe { temp_cx() };
1956        let cx = &mut cx;
1957
1958        if let Some(ref parent) = target.GetParentNode() {
1959            parent.RemoveChild(cx, target).unwrap();
1960        }
1961    }
1962
1963    fn mark_script_already_started(&self, node: &Dom<Node>) {
1964        let script = node.downcast::<HTMLScriptElement>();
1965        if let Some(script) = script {
1966            script.set_already_started(true)
1967        }
1968    }
1969
1970    #[expect(unsafe_code)]
1971    fn reparent_children(&self, node: &Dom<Node>, new_parent: &Dom<Node>) {
1972        // TODO: https://github.com/servo/servo/issues/42839
1973        let mut cx = unsafe { temp_cx() };
1974        let cx = &mut cx;
1975
1976        while let Some(ref child) = node.GetFirstChild() {
1977            new_parent.AppendChild(cx, child).unwrap();
1978        }
1979    }
1980
1981    /// <https://html.spec.whatwg.org/multipage/#html-integration-point>
1982    /// Specifically, the `<annotation-xml>` cases.
1983    fn is_mathml_annotation_xml_integration_point(&self, handle: &Dom<Node>) -> bool {
1984        let elem = handle.downcast::<Element>().unwrap();
1985        elem.get_attribute_string_value(&local_name!("encoding"))
1986            .is_some_and(|value| {
1987                value.eq_ignore_ascii_case("text/html") ||
1988                    value.eq_ignore_ascii_case("application/xhtml+xml")
1989            })
1990    }
1991
1992    fn set_current_line(&self, line_number: u64) {
1993        self.current_line.set(line_number);
1994    }
1995
1996    #[expect(unsafe_code)]
1997    fn pop(&self, node: &Dom<Node>) {
1998        // TODO: https://github.com/servo/servo/issues/42839
1999        let mut cx = unsafe { temp_cx() };
2000        let cx = &mut cx;
2001
2002        let node = DomRoot::from_ref(&**node);
2003        vtable_for(&node).pop(cx);
2004    }
2005
2006    fn allow_declarative_shadow_roots(&self, intended_parent: &Dom<Node>) -> bool {
2007        intended_parent.owner_doc().allow_declarative_shadow_roots()
2008    }
2009
2010    /// <https://html.spec.whatwg.org/multipage/#parsing-main-inhead>
2011    /// A start tag whose tag name is "template"
2012    /// Attach shadow path
2013    #[expect(unsafe_code)]
2014    fn attach_declarative_shadow(
2015        &self,
2016        host: &Dom<Node>,
2017        template: &Dom<Node>,
2018        attributes: &[Attribute],
2019    ) -> bool {
2020        // TODO: https://github.com/servo/servo/issues/42839
2021        let mut cx = unsafe { temp_cx() };
2022        let cx = &mut cx;
2023
2024        attach_declarative_shadow_inner(cx, host, template, attributes)
2025    }
2026
2027    #[expect(unsafe_code)]
2028    fn maybe_clone_an_option_into_selectedcontent(&self, option: &Self::Handle) {
2029        // TODO: https://github.com/servo/servo/issues/42839
2030        let mut cx = unsafe { temp_cx() };
2031        let cx = &mut cx;
2032
2033        let Some(option) = option.downcast::<HTMLOptionElement>() else {
2034            if cfg!(debug_assertions) {
2035                unreachable!();
2036            }
2037            log::error!(
2038                "Received non-option element in maybe_clone_an_option_into_selectedcontent"
2039            );
2040            return;
2041        };
2042
2043        option.maybe_clone_an_option_into_selectedcontent(cx)
2044    }
2045}
2046
2047/// <https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token>
2048#[expect(clippy::too_many_arguments)]
2049fn create_element_for_token(
2050    cx: &mut JSContext,
2051    name: QualName,
2052    attrs: Vec<ElementAttribute>,
2053    document: &Document,
2054    creator: ElementCreator,
2055    parsing_algorithm: ParsingAlgorithm,
2056    custom_element_reaction_stack: &CustomElementReactionStack,
2057    had_duplicate_attributes: bool,
2058) -> DomRoot<Element> {
2059    // Step 1. If the active speculative HTML parser is not null, then return the result
2060    // of creating a speculative mock element given namespace, token's tag name, and
2061    // token's attributes.
2062    // TODO: Implement
2063
2064    // Step 2: Otherwise, optionally create a speculative mock element given namespace,
2065    // token's tag name, and token's attributes
2066    // TODO: Implement.
2067
2068    // Step 3. Let document be intendedParent's node document.
2069    // Passed as argument.
2070
2071    // Step 4. Let localName be token's tag name.
2072    // Passed as argument
2073
2074    // Step 5. Let is be the value of the "is" attribute in token, if such an attribute
2075    // exists; otherwise null.
2076    let is = attrs
2077        .iter()
2078        .find(|attr| attr.name.local.eq_str_ignore_ascii_case("is"))
2079        .map(|attr| LocalName::from(&attr.value));
2080
2081    // Step 6. Let registry be the result of looking up a custom element registry given intendedParent.
2082    // TODO: Implement registries other than `Document`.
2083
2084    // Step 7. Let definition be the result of looking up a custom element definition
2085    // given registry, namespace, localName, and is.
2086    let definition = document.lookup_custom_element_definition(&name.ns, &name.local, is.as_ref());
2087
2088    // Step 8. Let willExecuteScript be true if definition is non-null and the parser was
2089    // not created as part of the HTML fragment parsing algorithm; otherwise false.
2090    let will_execute_script =
2091        definition.is_some() && parsing_algorithm != ParsingAlgorithm::Fragment;
2092
2093    // Step 9. If willExecuteScript is true:
2094    if will_execute_script {
2095        // Step 9.1. Increment document's throw-on-dynamic-markup-insertion counter.
2096        document.increment_throw_on_dynamic_markup_insertion_counter();
2097        // Step 6.2. If the JavaScript execution context stack is empty, then perform a
2098        // microtask checkpoint.
2099        if is_execution_stack_empty() {
2100            document.window().perform_a_microtask_checkpoint(cx);
2101        }
2102        // Step 9.3. Push a new element queue onto document's relevant agent's custom
2103        // element reactions stack.
2104        custom_element_reaction_stack.push_new_element_queue()
2105    }
2106
2107    // Step 10. Let element be the result of creating an element given document,
2108    // localName, namespace, null, is, willExecuteScript, and registry.
2109    let creation_mode = if will_execute_script {
2110        CustomElementCreationMode::Synchronous
2111    } else {
2112        CustomElementCreationMode::Asynchronous
2113    };
2114    let element = Element::create(cx, name, is, document, creator, creation_mode, None);
2115
2116    // Step 11. Append each attribute in the given token to element.
2117    for attr in attrs {
2118        element.set_attribute_from_parser(cx, attr.name, attr.value, None);
2119    }
2120
2121    // Record if the tokenizer saw duplicate attributes on this element,
2122    // used for CSP nonce validation (step 3 of "is element nonceable").
2123    if had_duplicate_attributes {
2124        element.set_had_duplicate_attributes();
2125    }
2126
2127    // Step 12. If willExecuteScript is true:
2128    if will_execute_script {
2129        // Step 12.1. Let queue be the result of popping from document's relevant agent's
2130        // custom element reactions stack. (This will be the same element queue as was
2131        // pushed above.)
2132        // Step 12.2 Invoke custom element reactions in queue.
2133        custom_element_reaction_stack.pop_current_element_queue(cx);
2134        // Step 12.3. Decrement document's throw-on-dynamic-markup-insertion counter.
2135        document.decrement_throw_on_dynamic_markup_insertion_counter();
2136    }
2137
2138    // Step 13. If element has an xmlns attribute in the XMLNS namespace whose value is
2139    // not exactly the same as the element's namespace, that is a parse error. Similarly,
2140    // if element has an xmlns:xlink attribute in the XMLNS namespace whose value is not
2141    // the XLink Namespace, that is a parse error.
2142    // TODO: Implement.
2143
2144    // Step 14. If element is a resettable element and not a form-associated custom
2145    // element, then invoke its reset algorithm. (This initializes the element's value and
2146    // checkedness based on the element's attributes.)
2147    if let Some(html_element) = element.downcast::<HTMLElement>() &&
2148        element.is_resettable() &&
2149        !html_element.is_form_associated_custom_element()
2150    {
2151        element.reset(cx);
2152    }
2153
2154    // Step 15. If element is a form-associated element and not a form-associated custom
2155    // element, the form element pointer is not null, there is no template element on the
2156    // stack of open elements, element is either not listed or doesn't have a form attribute,
2157    // and the intendedParent is in the same tree as the element pointed to by the form
2158    // element pointer, then associate element with the form element pointed to by the form
2159    // element pointer and set element's parser inserted flag.
2160    // TODO: Implement
2161
2162    // Step 16. Return element.
2163    element
2164}
2165
2166fn attach_declarative_shadow_inner(
2167    cx: &mut JSContext,
2168    host: &Node,
2169    template: &Node,
2170    attributes: &[Attribute],
2171) -> bool {
2172    let host_element = host.downcast::<Element>().unwrap();
2173
2174    if host_element.shadow_root().is_some() {
2175        return false;
2176    }
2177
2178    let template_element = template.downcast::<HTMLTemplateElement>().unwrap();
2179
2180    // Step 3. Let mode be templateStartTag's shadowrootmode attribute's value.
2181    // Step 4. Let slotAssignment be "named".
2182    // Step 5. If templateStartTag's shadowrootslotassignment attribute is in
2183    // the Manual state, then set slotAssignment to "manual".
2184    // Step 6. Let clonable be true if templateStartTag has a shadowrootclonable attribute; otherwise false.
2185    // Step 7. Let serializable be true if templateStartTag has a shadowrootserializable
2186    // attribute; otherwise false.
2187    // Step 8. Let delegatesFocus be true if templateStartTag has a shadowrootdelegatesfocus
2188    // attribute; otherwise false.
2189    let mut shadow_root_mode = ShadowRootMode::Open;
2190    let mut slot_assignment_mode = SlotAssignmentMode::Named;
2191    let mut clonable = false;
2192    let mut delegatesfocus = false;
2193    let mut serializable = false;
2194
2195    attributes
2196        .iter()
2197        .for_each(|attr: &Attribute| match attr.name.local {
2198            local_name!("shadowrootmode") => {
2199                if attr.value.eq_ignore_ascii_case("open") {
2200                    shadow_root_mode = ShadowRootMode::Open;
2201                } else if attr.value.eq_ignore_ascii_case("closed") {
2202                    shadow_root_mode = ShadowRootMode::Closed;
2203                } else {
2204                    unreachable!("shadowrootmode value is not open nor closed");
2205                }
2206            },
2207            local_name!("shadowrootclonable") => {
2208                clonable = true;
2209            },
2210            local_name!("shadowrootdelegatesfocus") => {
2211                delegatesfocus = true;
2212            },
2213            local_name!("shadowrootserializable") => {
2214                serializable = true;
2215            },
2216            local_name!("shadowrootslotassignment") => {
2217                if attr.value.eq_ignore_ascii_case("manual") {
2218                    slot_assignment_mode = SlotAssignmentMode::Manual;
2219                }
2220            },
2221            _ => {},
2222        });
2223
2224    // Step 8.1. Attach a shadow root with declarative shadow host element,
2225    // mode, clonable, serializable, delegatesFocus, and "named".
2226    match host_element.attach_shadow(
2227        cx,
2228        IsUserAgentWidget::No,
2229        shadow_root_mode,
2230        clonable,
2231        serializable,
2232        delegatesfocus,
2233        slot_assignment_mode,
2234    ) {
2235        Ok(shadow_root) => {
2236            // Step 8.3. Set shadow's declarative to true.
2237            shadow_root.set_declarative(true);
2238
2239            // Set 8.4. Set template's template contents property to shadow.
2240            let shadow = shadow_root.upcast::<DocumentFragment>();
2241            template_element.set_contents(Some(shadow));
2242
2243            // Step 8.5. Set shadow’s available to element internals to true.
2244            shadow_root.set_available_to_element_internals(true);
2245
2246            true
2247        },
2248        Err(_) => false,
2249    }
2250}