script/dom/servoparser/
async_html.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![cfg_attr(crown, allow(crown::unrooted_must_root))]
6
7use std::borrow::Cow;
8use std::cell::{Cell, Ref, RefCell, RefMut};
9use std::collections::HashMap;
10use std::collections::vec_deque::VecDeque;
11use std::rc::Rc;
12use std::thread;
13
14use crossbeam_channel::{Receiver, Sender, unbounded};
15use html5ever::buffer_queue::BufferQueue;
16use html5ever::tendril::fmt::UTF8;
17use html5ever::tendril::{SendTendril, StrTendril, Tendril};
18use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts};
19use html5ever::tree_builder::{
20    ElementFlags, NodeOrText as HtmlNodeOrText, QuirksMode, TreeBuilder, TreeBuilderOpts, TreeSink,
21};
22use html5ever::{Attribute as HtmlAttribute, ExpandedName, QualName, local_name, ns};
23use markup5ever::TokenizerResult;
24use servo_url::ServoUrl;
25use style::context::QuirksMode as ServoQuirksMode;
26
27use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
28use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
29use crate::dom::bindings::inheritance::Castable;
30use crate::dom::bindings::root::{Dom, DomRoot};
31use crate::dom::bindings::str::DOMString;
32use crate::dom::comment::Comment;
33use crate::dom::customelementregistry::CustomElementReactionStack;
34use crate::dom::document::Document;
35use crate::dom::documenttype::DocumentType;
36use crate::dom::element::{Element, ElementCreator};
37use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
38use crate::dom::html::htmlscriptelement::HTMLScriptElement;
39use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
40use crate::dom::node::Node;
41use crate::dom::processinginstruction::ProcessingInstruction;
42use crate::dom::servoparser::{
43    ElementAttribute, ParsingAlgorithm, attach_declarative_shadow_inner, create_element_for_token,
44};
45use crate::dom::virtualmethods::vtable_for;
46use crate::script_runtime::CanGc;
47
48type ParseNodeId = usize;
49
50#[derive(Clone, JSTraceable, MallocSizeOf)]
51pub(crate) struct ParseNode {
52    id: ParseNodeId,
53    #[no_trace]
54    qual_name: Option<QualName>,
55}
56
57#[derive(JSTraceable, MallocSizeOf)]
58enum NodeOrText {
59    Node(ParseNode),
60    Text(String),
61}
62
63#[derive(JSTraceable, MallocSizeOf)]
64struct Attribute {
65    #[no_trace]
66    name: QualName,
67    value: String,
68}
69
70#[derive(JSTraceable, MallocSizeOf)]
71enum ParseOperation {
72    GetTemplateContents {
73        target: ParseNodeId,
74        contents: ParseNodeId,
75    },
76
77    CreateElement {
78        node: ParseNodeId,
79        #[no_trace]
80        name: QualName,
81        attrs: Vec<Attribute>,
82        current_line: u64,
83    },
84
85    CreateComment {
86        text: String,
87        node: ParseNodeId,
88    },
89    AppendBeforeSibling {
90        sibling: ParseNodeId,
91        node: NodeOrText,
92    },
93    AppendBasedOnParentNode {
94        element: ParseNodeId,
95        prev_element: ParseNodeId,
96        node: NodeOrText,
97    },
98    Append {
99        parent: ParseNodeId,
100        node: NodeOrText,
101    },
102
103    AppendDoctypeToDocument {
104        name: String,
105        public_id: String,
106        system_id: String,
107    },
108
109    AddAttrsIfMissing {
110        target: ParseNodeId,
111        attrs: Vec<Attribute>,
112    },
113    RemoveFromParent {
114        target: ParseNodeId,
115    },
116    MarkScriptAlreadyStarted {
117        node: ParseNodeId,
118    },
119    ReparentChildren {
120        parent: ParseNodeId,
121        new_parent: ParseNodeId,
122    },
123
124    AssociateWithForm {
125        target: ParseNodeId,
126        form: ParseNodeId,
127        element: ParseNodeId,
128        prev_element: Option<ParseNodeId>,
129    },
130
131    CreatePI {
132        node: ParseNodeId,
133        target: String,
134        data: String,
135    },
136
137    Pop {
138        node: ParseNodeId,
139    },
140
141    SetQuirksMode {
142        #[ignore_malloc_size_of = "Defined in style"]
143        #[no_trace]
144        mode: ServoQuirksMode,
145    },
146
147    AttachDeclarativeShadowRoot {
148        location: ParseNodeId,
149        template: ParseNodeId,
150        attributes: Vec<Attribute>,
151        /// Used to notify the parser thread whether or not attaching the shadow root succeeded
152        #[no_trace]
153        sender: Sender<bool>,
154    },
155}
156
157#[derive(MallocSizeOf)]
158enum ToTokenizerMsg {
159    // From HtmlTokenizer
160    TokenizerResultDone {
161        #[ignore_malloc_size_of = "Defined in html5ever"]
162        updated_input: VecDeque<SendTendril<UTF8>>,
163    },
164    TokenizerResultScript {
165        script: ParseNode,
166        #[ignore_malloc_size_of = "Defined in html5ever"]
167        updated_input: VecDeque<SendTendril<UTF8>>,
168    },
169    End, // Sent to Tokenizer to signify HtmlTokenizer's end method has returned
170
171    // From Sink
172    ProcessOperation(ParseOperation),
173}
174
175#[derive(MallocSizeOf)]
176enum ToHtmlTokenizerMsg {
177    Feed {
178        #[ignore_malloc_size_of = "Defined in html5ever"]
179        input: VecDeque<SendTendril<UTF8>>,
180    },
181    End,
182    SetPlainTextState,
183}
184
185fn create_buffer_queue(mut buffers: VecDeque<SendTendril<UTF8>>) -> BufferQueue {
186    let buffer_queue = BufferQueue::default();
187    while let Some(st) = buffers.pop_front() {
188        buffer_queue.push_back(StrTendril::from(st));
189    }
190    buffer_queue
191}
192
193// The async HTML Tokenizer consists of two separate types working together: the Tokenizer
194// (defined below), which lives on the main thread, and the HtmlTokenizer, defined in html5ever, which
195// lives on the parser thread.
196// Steps:
197// 1. A call to Tokenizer::new will spin up a new parser thread, creating an HtmlTokenizer instance,
198//    which starts listening for messages from Tokenizer.
199// 2. Upon receiving an input from ServoParser, the Tokenizer forwards it to HtmlTokenizer, where it starts
200//    creating the necessary tree actions based on the input.
201// 3. HtmlTokenizer sends these tree actions to the Tokenizer as soon as it creates them. The Tokenizer
202//    then executes the received actions.
203//
204//    _____________                           _______________
205//   |             |                         |               |
206//   |             |                         |               |
207//   |             |   ToHtmlTokenizerMsg    |               |
208//   |             |------------------------>| HtmlTokenizer |
209//   |             |                         |               |
210//   |  Tokenizer  |     ToTokenizerMsg      |               |
211//   |             |<------------------------|    ________   |
212//   |             |                         |   |        |  |
213//   |             |     ToTokenizerMsg      |   |  Sink  |  |
214//   |             |<------------------------|---|        |  |
215//   |             |                         |   |________|  |
216//   |_____________|                         |_______________|
217//
218#[derive(JSTraceable, MallocSizeOf)]
219#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
220pub(crate) struct Tokenizer {
221    document: Dom<Document>,
222    #[ignore_malloc_size_of = "Defined in std"]
223    #[no_trace]
224    receiver: Receiver<ToTokenizerMsg>,
225    #[ignore_malloc_size_of = "Defined in std"]
226    #[no_trace]
227    html_tokenizer_sender: Sender<ToHtmlTokenizerMsg>,
228    #[ignore_malloc_size_of = "Defined in std"]
229    nodes: RefCell<HashMap<ParseNodeId, Dom<Node>>>,
230    #[no_trace]
231    url: ServoUrl,
232    parsing_algorithm: ParsingAlgorithm,
233    #[conditional_malloc_size_of]
234    custom_element_reaction_stack: Rc<CustomElementReactionStack>,
235}
236
237impl Tokenizer {
238    pub(crate) fn new(
239        document: &Document,
240        url: ServoUrl,
241        fragment_context: Option<super::FragmentContext>,
242    ) -> Self {
243        // Messages from the Tokenizer (main thread) to HtmlTokenizer (parser thread)
244        let (to_html_tokenizer_sender, html_tokenizer_receiver) = unbounded();
245        // Messages from HtmlTokenizer and Sink (parser thread) to Tokenizer (main thread)
246        let (to_tokenizer_sender, tokenizer_receiver) = unbounded();
247
248        let algorithm = match fragment_context {
249            Some(_) => ParsingAlgorithm::Fragment,
250            None => ParsingAlgorithm::Normal,
251        };
252
253        let custom_element_reaction_stack = document.custom_element_reaction_stack();
254        let tokenizer = Tokenizer {
255            document: Dom::from_ref(document),
256            receiver: tokenizer_receiver,
257            html_tokenizer_sender: to_html_tokenizer_sender,
258            nodes: RefCell::new(HashMap::new()),
259            url,
260            parsing_algorithm: algorithm,
261            custom_element_reaction_stack,
262        };
263        tokenizer.insert_node(0, Dom::from_ref(document.upcast()));
264
265        let sink = Sink::new(
266            to_tokenizer_sender.clone(),
267            document.allow_declarative_shadow_roots(),
268        );
269        let mut form_parse_node = None;
270        let mut parser_fragment_context = None;
271        if let Some(fragment_context) = fragment_context {
272            let node = sink.new_parse_node();
273            tokenizer.insert_node(node.id, Dom::from_ref(fragment_context.context_elem));
274            parser_fragment_context =
275                Some((node, fragment_context.context_element_allows_scripting));
276
277            form_parse_node = fragment_context.form_elem.map(|form_elem| {
278                let node = sink.new_parse_node();
279                tokenizer.insert_node(node.id, Dom::from_ref(form_elem));
280                node
281            });
282        };
283
284        // Create new thread for HtmlTokenizer. This is where parser actions
285        // will be generated from the input provided. These parser actions are then passed
286        // onto the main thread to be executed.
287        let scripting_enabled = document.has_browsing_context();
288        thread::Builder::new()
289            .name(format!("Parse:{}", tokenizer.url.debug_compact()))
290            .spawn(move || {
291                run(
292                    sink,
293                    parser_fragment_context,
294                    form_parse_node,
295                    to_tokenizer_sender,
296                    html_tokenizer_receiver,
297                    scripting_enabled,
298                );
299            })
300            .expect("HTML Parser thread spawning failed");
301
302        tokenizer
303    }
304
305    pub(crate) fn feed(
306        &self,
307        input: &BufferQueue,
308        can_gc: CanGc,
309    ) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
310        let mut send_tendrils = VecDeque::new();
311        while let Some(str) = input.pop_front() {
312            send_tendrils.push_back(SendTendril::from(str));
313        }
314
315        // Send message to parser thread, asking it to start reading from the input.
316        // Parser operation messages will be sent to main thread as they are evaluated.
317        self.html_tokenizer_sender
318            .send(ToHtmlTokenizerMsg::Feed {
319                input: send_tendrils,
320            })
321            .unwrap();
322
323        loop {
324            match self
325                .receiver
326                .recv()
327                .expect("Unexpected channel panic in main thread.")
328            {
329                ToTokenizerMsg::ProcessOperation(parse_op) => {
330                    self.process_operation(parse_op, can_gc)
331                },
332                ToTokenizerMsg::TokenizerResultDone { updated_input } => {
333                    let buffer_queue = create_buffer_queue(updated_input);
334                    input.replace_with(buffer_queue);
335                    return TokenizerResult::Done;
336                },
337                ToTokenizerMsg::TokenizerResultScript {
338                    script,
339                    updated_input,
340                } => {
341                    let buffer_queue = create_buffer_queue(updated_input);
342                    input.replace_with(buffer_queue);
343                    let script = self.get_node(&script.id);
344                    return TokenizerResult::Script(DomRoot::from_ref(script.downcast().unwrap()));
345                },
346                _ => unreachable!(),
347            };
348        }
349    }
350
351    pub(crate) fn end(&self, can_gc: CanGc) {
352        self.html_tokenizer_sender
353            .send(ToHtmlTokenizerMsg::End)
354            .unwrap();
355        loop {
356            match self
357                .receiver
358                .recv()
359                .expect("Unexpected channel panic in main thread.")
360            {
361                ToTokenizerMsg::ProcessOperation(parse_op) => {
362                    self.process_operation(parse_op, can_gc)
363                },
364                ToTokenizerMsg::TokenizerResultDone { updated_input: _ } |
365                ToTokenizerMsg::TokenizerResultScript {
366                    script: _,
367                    updated_input: _,
368                } => continue,
369                ToTokenizerMsg::End => return,
370            };
371        }
372    }
373
374    pub(crate) fn url(&self) -> &ServoUrl {
375        &self.url
376    }
377
378    pub(crate) fn set_plaintext_state(&self) {
379        self.html_tokenizer_sender
380            .send(ToHtmlTokenizerMsg::SetPlainTextState)
381            .unwrap();
382    }
383
384    fn insert_node(&self, id: ParseNodeId, node: Dom<Node>) {
385        assert!(self.nodes.borrow_mut().insert(id, node).is_none());
386    }
387
388    fn get_node<'a>(&'a self, id: &ParseNodeId) -> Ref<'a, Dom<Node>> {
389        Ref::map(self.nodes.borrow(), |nodes| {
390            nodes.get(id).expect("Node not found!")
391        })
392    }
393
394    fn append_before_sibling(&self, sibling: ParseNodeId, node: NodeOrText, can_gc: CanGc) {
395        let node = match node {
396            NodeOrText::Node(n) => {
397                HtmlNodeOrText::AppendNode(Dom::from_ref(&**self.get_node(&n.id)))
398            },
399            NodeOrText::Text(text) => HtmlNodeOrText::AppendText(Tendril::from(text)),
400        };
401        let sibling = &**self.get_node(&sibling);
402        let parent = &*sibling
403            .GetParentNode()
404            .expect("append_before_sibling called on node without parent");
405
406        super::insert(
407            parent,
408            Some(sibling),
409            node,
410            self.parsing_algorithm,
411            &self.custom_element_reaction_stack,
412            can_gc,
413        );
414    }
415
416    fn append(&self, parent: ParseNodeId, node: NodeOrText, can_gc: CanGc) {
417        let node = match node {
418            NodeOrText::Node(n) => {
419                HtmlNodeOrText::AppendNode(Dom::from_ref(&**self.get_node(&n.id)))
420            },
421            NodeOrText::Text(text) => HtmlNodeOrText::AppendText(Tendril::from(text)),
422        };
423
424        let parent = &**self.get_node(&parent);
425        super::insert(
426            parent,
427            None,
428            node,
429            self.parsing_algorithm,
430            &self.custom_element_reaction_stack,
431            can_gc,
432        );
433    }
434
435    fn has_parent_node(&self, node: ParseNodeId) -> bool {
436        self.get_node(&node).GetParentNode().is_some()
437    }
438
439    fn same_tree(&self, x: ParseNodeId, y: ParseNodeId) -> bool {
440        let x = self.get_node(&x);
441        let y = self.get_node(&y);
442
443        let x = x.downcast::<Element>().expect("Element node expected");
444        let y = y.downcast::<Element>().expect("Element node expected");
445        x.is_in_same_home_subtree(y)
446    }
447
448    fn process_operation(&self, op: ParseOperation, can_gc: CanGc) {
449        let document = DomRoot::from_ref(&**self.get_node(&0));
450        let document = document
451            .downcast::<Document>()
452            .expect("Document node should be downcasted!");
453        match op {
454            ParseOperation::GetTemplateContents { target, contents } => {
455                let target = DomRoot::from_ref(&**self.get_node(&target));
456                let template = target
457                    .downcast::<HTMLTemplateElement>()
458                    .expect("Tried to extract contents from non-template element while parsing");
459                self.insert_node(contents, Dom::from_ref(template.Content(can_gc).upcast()));
460            },
461            ParseOperation::CreateElement {
462                node,
463                name,
464                attrs,
465                current_line,
466            } => {
467                let attrs = attrs
468                    .into_iter()
469                    .map(|attr| ElementAttribute::new(attr.name, DOMString::from(attr.value)))
470                    .collect();
471                let element = create_element_for_token(
472                    name,
473                    attrs,
474                    &self.document,
475                    ElementCreator::ParserCreated(current_line),
476                    ParsingAlgorithm::Normal,
477                    &self.custom_element_reaction_stack,
478                    can_gc,
479                );
480                self.insert_node(node, Dom::from_ref(element.upcast()));
481            },
482            ParseOperation::CreateComment { text, node } => {
483                let comment = Comment::new(DOMString::from(text), document, None, can_gc);
484                self.insert_node(node, Dom::from_ref(comment.upcast()));
485            },
486            ParseOperation::AppendBeforeSibling { sibling, node } => {
487                self.append_before_sibling(sibling, node, can_gc);
488            },
489            ParseOperation::Append { parent, node } => {
490                self.append(parent, node, can_gc);
491            },
492            ParseOperation::AppendBasedOnParentNode {
493                element,
494                prev_element,
495                node,
496            } => {
497                if self.has_parent_node(element) {
498                    self.append_before_sibling(element, node, can_gc);
499                } else {
500                    self.append(prev_element, node, can_gc);
501                }
502            },
503            ParseOperation::AppendDoctypeToDocument {
504                name,
505                public_id,
506                system_id,
507            } => {
508                let doctype = DocumentType::new(
509                    DOMString::from(name),
510                    Some(DOMString::from(public_id)),
511                    Some(DOMString::from(system_id)),
512                    document,
513                    can_gc,
514                );
515
516                document
517                    .upcast::<Node>()
518                    .AppendChild(doctype.upcast(), can_gc)
519                    .expect("Appending failed");
520            },
521            ParseOperation::AddAttrsIfMissing { target, attrs } => {
522                let node = self.get_node(&target);
523                let elem = node
524                    .downcast::<Element>()
525                    .expect("tried to set attrs on non-Element in HTML parsing");
526                for attr in attrs {
527                    elem.set_attribute_from_parser(
528                        attr.name,
529                        DOMString::from(attr.value),
530                        None,
531                        can_gc,
532                    );
533                }
534            },
535            ParseOperation::RemoveFromParent { target } => {
536                if let Some(ref parent) = self.get_node(&target).GetParentNode() {
537                    parent.RemoveChild(&self.get_node(&target), can_gc).unwrap();
538                }
539            },
540            ParseOperation::MarkScriptAlreadyStarted { node } => {
541                let node = self.get_node(&node);
542                let script = node.downcast::<HTMLScriptElement>();
543                if let Some(script) = script {
544                    script.set_already_started(true)
545                }
546            },
547            ParseOperation::ReparentChildren { parent, new_parent } => {
548                let parent = self.get_node(&parent);
549                let new_parent = self.get_node(&new_parent);
550                while let Some(child) = parent.GetFirstChild() {
551                    new_parent.AppendChild(&child, can_gc).unwrap();
552                }
553            },
554            ParseOperation::AssociateWithForm {
555                target,
556                form,
557                element,
558                prev_element,
559            } => {
560                let tree_node = prev_element.map_or(element, |prev| {
561                    if self.has_parent_node(element) {
562                        element
563                    } else {
564                        prev
565                    }
566                });
567
568                if !self.same_tree(tree_node, form) {
569                    return;
570                }
571                let form = self.get_node(&form);
572                let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
573                    .expect("Owner must be a form element");
574
575                let node = self.get_node(&target);
576                let elem = node.downcast::<Element>();
577                let control = elem.and_then(|e| e.as_maybe_form_control());
578
579                if let Some(control) = control {
580                    control.set_form_owner_from_parser(&form, can_gc);
581                }
582            },
583            ParseOperation::Pop { node } => {
584                vtable_for(&self.get_node(&node)).pop();
585            },
586            ParseOperation::CreatePI { node, target, data } => {
587                let pi = ProcessingInstruction::new(
588                    DOMString::from(target),
589                    DOMString::from(data),
590                    document,
591                    can_gc,
592                );
593                self.insert_node(node, Dom::from_ref(pi.upcast()));
594            },
595            ParseOperation::SetQuirksMode { mode } => {
596                document.set_quirks_mode(mode);
597            },
598            ParseOperation::AttachDeclarativeShadowRoot {
599                location,
600                template,
601                attributes,
602                sender,
603            } => {
604                let location = self.get_node(&location);
605                let template = self.get_node(&template);
606                let attributes: Vec<_> = attributes
607                    .into_iter()
608                    .map(|attribute| HtmlAttribute {
609                        name: attribute.name,
610                        value: StrTendril::from(attribute.value),
611                    })
612                    .collect();
613
614                let did_succeed =
615                    attach_declarative_shadow_inner(&location, &template, &attributes);
616                sender.send(did_succeed).unwrap();
617            },
618        }
619    }
620}
621
622/// Run the parser.
623///
624/// The `fragment_context` argument is `Some` in the fragment case and describes the context
625/// node as well as whether scripting is enabled for the context node. Note that whether or not
626/// scripting is enabled for the context node does not affect whether scripting is enabled for the
627/// parser, that is determined by the `scripting_enabled` argument.
628fn run(
629    sink: Sink,
630    fragment_context: Option<(ParseNode, bool)>,
631    form_parse_node: Option<ParseNode>,
632    sender: Sender<ToTokenizerMsg>,
633    receiver: Receiver<ToHtmlTokenizerMsg>,
634    scripting_enabled: bool,
635) {
636    let options = TreeBuilderOpts {
637        scripting_enabled,
638        ..Default::default()
639    };
640
641    let html_tokenizer = if let Some((context_node, context_scripting_enabled)) = fragment_context {
642        let tree_builder =
643            TreeBuilder::new_for_fragment(sink, context_node, form_parse_node, options);
644
645        let tok_options = TokenizerOpts {
646            initial_state: Some(
647                tree_builder.tokenizer_state_for_context_elem(context_scripting_enabled),
648            ),
649            ..Default::default()
650        };
651
652        HtmlTokenizer::new(tree_builder, tok_options)
653    } else {
654        HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
655    };
656
657    loop {
658        match receiver
659            .recv()
660            .expect("Unexpected channel panic in html parser thread")
661        {
662            ToHtmlTokenizerMsg::Feed { input } => {
663                let input = create_buffer_queue(input);
664                let res = html_tokenizer.feed(&input);
665
666                // Gather changes to 'input' and place them in 'updated_input',
667                // which will be sent to the main thread to update feed method's 'input'
668                let mut updated_input = VecDeque::new();
669                while let Some(st) = input.pop_front() {
670                    updated_input.push_back(SendTendril::from(st));
671                }
672
673                let res = match res {
674                    TokenizerResult::Done => ToTokenizerMsg::TokenizerResultDone { updated_input },
675                    TokenizerResult::Script(script) => ToTokenizerMsg::TokenizerResultScript {
676                        script,
677                        updated_input,
678                    },
679                };
680                sender.send(res).unwrap();
681            },
682            ToHtmlTokenizerMsg::End => {
683                html_tokenizer.end();
684                sender.send(ToTokenizerMsg::End).unwrap();
685                break;
686            },
687            ToHtmlTokenizerMsg::SetPlainTextState => html_tokenizer.set_plaintext_state(),
688        };
689    }
690}
691
692#[derive(Default, JSTraceable, MallocSizeOf)]
693struct ParseNodeData {
694    contents: Option<ParseNode>,
695    is_integration_point: bool,
696}
697
698pub(crate) struct Sink {
699    current_line: Cell<u64>,
700    parse_node_data: RefCell<HashMap<ParseNodeId, ParseNodeData>>,
701    next_parse_node_id: Cell<ParseNodeId>,
702    document_node: ParseNode,
703    sender: Sender<ToTokenizerMsg>,
704    allow_declarative_shadow_roots: bool,
705}
706
707impl Sink {
708    fn new(sender: Sender<ToTokenizerMsg>, allow_declarative_shadow_roots: bool) -> Sink {
709        let sink = Sink {
710            current_line: Cell::new(1),
711            parse_node_data: RefCell::new(HashMap::new()),
712            next_parse_node_id: Cell::new(1),
713            document_node: ParseNode {
714                id: 0,
715                qual_name: None,
716            },
717            sender,
718            allow_declarative_shadow_roots,
719        };
720        let data = ParseNodeData::default();
721        sink.insert_parse_node_data(0, data);
722        sink
723    }
724
725    fn new_parse_node(&self) -> ParseNode {
726        let id = self.next_parse_node_id.get();
727        let data = ParseNodeData::default();
728        self.insert_parse_node_data(id, data);
729        self.next_parse_node_id.set(id + 1);
730        ParseNode {
731            id,
732            qual_name: None,
733        }
734    }
735
736    fn send_op(&self, op: ParseOperation) {
737        self.sender
738            .send(ToTokenizerMsg::ProcessOperation(op))
739            .unwrap();
740    }
741
742    fn insert_parse_node_data(&self, id: ParseNodeId, data: ParseNodeData) {
743        assert!(self.parse_node_data.borrow_mut().insert(id, data).is_none());
744    }
745
746    fn get_parse_node_data<'a>(&'a self, id: &'a ParseNodeId) -> Ref<'a, ParseNodeData> {
747        Ref::map(self.parse_node_data.borrow(), |data| {
748            data.get(id).expect("Parse Node data not found!")
749        })
750    }
751
752    fn get_parse_node_data_mut<'a>(&'a self, id: &'a ParseNodeId) -> RefMut<'a, ParseNodeData> {
753        RefMut::map(self.parse_node_data.borrow_mut(), |data| {
754            data.get_mut(id).expect("Parse Node data not found!")
755        })
756    }
757}
758
759#[cfg_attr(crown, allow(crown::unrooted_must_root))]
760impl TreeSink for Sink {
761    type Output = Self;
762    fn finish(self) -> Self {
763        self
764    }
765
766    type Handle = ParseNode;
767    type ElemName<'a>
768        = ExpandedName<'a>
769    where
770        Self: 'a;
771
772    fn get_document(&self) -> Self::Handle {
773        self.document_node.clone()
774    }
775
776    fn get_template_contents(&self, target: &Self::Handle) -> Self::Handle {
777        if let Some(ref contents) = self.get_parse_node_data(&target.id).contents {
778            return contents.clone();
779        }
780        let node = self.new_parse_node();
781        {
782            let mut data = self.get_parse_node_data_mut(&target.id);
783            data.contents = Some(node.clone());
784        }
785        self.send_op(ParseOperation::GetTemplateContents {
786            target: target.id,
787            contents: node.id,
788        });
789        node
790    }
791
792    fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
793        x.id == y.id
794    }
795
796    fn elem_name<'a>(&self, target: &'a Self::Handle) -> ExpandedName<'a> {
797        target
798            .qual_name
799            .as_ref()
800            .expect("Expected qual name of node!")
801            .expanded()
802    }
803
804    fn create_element(
805        &self,
806        name: QualName,
807        html_attrs: Vec<HtmlAttribute>,
808        _flags: ElementFlags,
809    ) -> Self::Handle {
810        let mut node = self.new_parse_node();
811        node.qual_name = Some(name.clone());
812        {
813            let mut node_data = self.get_parse_node_data_mut(&node.id);
814            node_data.is_integration_point = html_attrs.iter().any(|attr| {
815                let attr_value = &String::from(attr.value.clone());
816                (attr.name.local == local_name!("encoding") && attr.name.ns == ns!()) &&
817                    (attr_value.eq_ignore_ascii_case("text/html") ||
818                        attr_value.eq_ignore_ascii_case("application/xhtml+xml"))
819            });
820        }
821        let attrs = html_attrs
822            .into_iter()
823            .map(|attr| Attribute {
824                name: attr.name,
825                value: String::from(attr.value),
826            })
827            .collect();
828
829        self.send_op(ParseOperation::CreateElement {
830            node: node.id,
831            name,
832            attrs,
833            current_line: self.current_line.get(),
834        });
835        node
836    }
837
838    fn create_comment(&self, text: StrTendril) -> Self::Handle {
839        let node = self.new_parse_node();
840        self.send_op(ParseOperation::CreateComment {
841            text: String::from(text),
842            node: node.id,
843        });
844        node
845    }
846
847    fn create_pi(&self, target: StrTendril, data: StrTendril) -> ParseNode {
848        let node = self.new_parse_node();
849        self.send_op(ParseOperation::CreatePI {
850            node: node.id,
851            target: String::from(target),
852            data: String::from(data),
853        });
854        node
855    }
856
857    fn associate_with_form(
858        &self,
859        target: &Self::Handle,
860        form: &Self::Handle,
861        nodes: (&Self::Handle, Option<&Self::Handle>),
862    ) {
863        let (element, prev_element) = nodes;
864        self.send_op(ParseOperation::AssociateWithForm {
865            target: target.id,
866            form: form.id,
867            element: element.id,
868            prev_element: prev_element.map(|p| p.id),
869        });
870    }
871
872    fn append_before_sibling(
873        &self,
874        sibling: &Self::Handle,
875        new_node: HtmlNodeOrText<Self::Handle>,
876    ) {
877        let new_node = match new_node {
878            HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node),
879            HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)),
880        };
881        self.send_op(ParseOperation::AppendBeforeSibling {
882            sibling: sibling.id,
883            node: new_node,
884        });
885    }
886
887    fn append_based_on_parent_node(
888        &self,
889        elem: &Self::Handle,
890        prev_elem: &Self::Handle,
891        child: HtmlNodeOrText<Self::Handle>,
892    ) {
893        let child = match child {
894            HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node),
895            HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)),
896        };
897        self.send_op(ParseOperation::AppendBasedOnParentNode {
898            element: elem.id,
899            prev_element: prev_elem.id,
900            node: child,
901        });
902    }
903
904    fn parse_error(&self, msg: Cow<'static, str>) {
905        debug!("Parse error: {}", msg);
906    }
907
908    fn set_quirks_mode(&self, mode: QuirksMode) {
909        let mode = match mode {
910            QuirksMode::Quirks => ServoQuirksMode::Quirks,
911            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
912            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
913        };
914        self.send_op(ParseOperation::SetQuirksMode { mode });
915    }
916
917    fn append(&self, parent: &Self::Handle, child: HtmlNodeOrText<Self::Handle>) {
918        let child = match child {
919            HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node),
920            HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)),
921        };
922        self.send_op(ParseOperation::Append {
923            parent: parent.id,
924            node: child,
925        });
926    }
927
928    fn append_doctype_to_document(
929        &self,
930        name: StrTendril,
931        public_id: StrTendril,
932        system_id: StrTendril,
933    ) {
934        self.send_op(ParseOperation::AppendDoctypeToDocument {
935            name: String::from(name),
936            public_id: String::from(public_id),
937            system_id: String::from(system_id),
938        });
939    }
940
941    fn add_attrs_if_missing(&self, target: &Self::Handle, html_attrs: Vec<HtmlAttribute>) {
942        let attrs = html_attrs
943            .into_iter()
944            .map(|attr| Attribute {
945                name: attr.name,
946                value: String::from(attr.value),
947            })
948            .collect();
949        self.send_op(ParseOperation::AddAttrsIfMissing {
950            target: target.id,
951            attrs,
952        });
953    }
954
955    fn remove_from_parent(&self, target: &Self::Handle) {
956        self.send_op(ParseOperation::RemoveFromParent { target: target.id });
957    }
958
959    fn mark_script_already_started(&self, node: &Self::Handle) {
960        self.send_op(ParseOperation::MarkScriptAlreadyStarted { node: node.id });
961    }
962
963    fn reparent_children(&self, parent: &Self::Handle, new_parent: &Self::Handle) {
964        self.send_op(ParseOperation::ReparentChildren {
965            parent: parent.id,
966            new_parent: new_parent.id,
967        });
968    }
969
970    /// <https://html.spec.whatwg.org/multipage/#html-integration-point>
971    /// Specifically, the `<annotation-xml>` cases.
972    fn is_mathml_annotation_xml_integration_point(&self, handle: &Self::Handle) -> bool {
973        let node_data = self.get_parse_node_data(&handle.id);
974        node_data.is_integration_point
975    }
976
977    fn set_current_line(&self, line_number: u64) {
978        self.current_line.set(line_number);
979    }
980
981    fn pop(&self, node: &Self::Handle) {
982        self.send_op(ParseOperation::Pop { node: node.id });
983    }
984
985    fn allow_declarative_shadow_roots(&self, _intended_parent: &Self::Handle) -> bool {
986        self.allow_declarative_shadow_roots
987    }
988
989    fn attach_declarative_shadow(
990        &self,
991        location: &Self::Handle,
992        template: &Self::Handle,
993        attributes: &[HtmlAttribute],
994    ) -> bool {
995        let attributes = attributes
996            .iter()
997            .map(|attribute| Attribute {
998                name: attribute.name.clone(),
999                value: String::from(attribute.value.clone()),
1000            })
1001            .collect();
1002
1003        // Unfortunately the parser can only proceed after it knows whether attaching the shadow root
1004        // succeeded or failed. Attaching a shadow root can fail for many different reasons,
1005        // and so we need to block until the script thread has processed this operation.
1006        let (sender, receiver) = unbounded();
1007        self.send_op(ParseOperation::AttachDeclarativeShadowRoot {
1008            location: location.id,
1009            template: template.id,
1010            attributes,
1011            sender,
1012        });
1013
1014        receiver.recv().unwrap()
1015    }
1016}