script/dom/servoparser/
async_html.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![cfg_attr(crown, expect(crown::unrooted_must_root))]
6
7use std::borrow::Cow;
8use std::cell::{Cell, Ref, RefCell, RefMut};
9use std::collections::vec_deque::VecDeque;
10use std::rc::Rc;
11use std::thread;
12
13use crossbeam_channel::{Receiver, Sender, unbounded};
14use html5ever::buffer_queue::BufferQueue;
15use html5ever::tendril::fmt::UTF8;
16use html5ever::tendril::{SendTendril, StrTendril, Tendril};
17use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts};
18use html5ever::tree_builder::{
19    ElementFlags, NodeOrText as HtmlNodeOrText, QuirksMode, TreeBuilder, TreeBuilderOpts, TreeSink,
20};
21use html5ever::{Attribute as HtmlAttribute, ExpandedName, QualName, local_name, ns};
22use markup5ever::TokenizerResult;
23use rustc_hash::FxHashMap;
24use servo_url::ServoUrl;
25use style::context::QuirksMode as ServoQuirksMode;
26
27use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
28use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
29use crate::dom::bindings::inheritance::Castable;
30use crate::dom::bindings::root::{Dom, DomRoot};
31use crate::dom::bindings::str::DOMString;
32use crate::dom::comment::Comment;
33use crate::dom::customelementregistry::CustomElementReactionStack;
34use crate::dom::document::Document;
35use crate::dom::documenttype::DocumentType;
36use crate::dom::element::{Element, ElementCreator};
37use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
38use crate::dom::html::htmlscriptelement::HTMLScriptElement;
39use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
40use crate::dom::node::Node;
41use crate::dom::processinginstruction::ProcessingInstruction;
42use crate::dom::servoparser::{
43    ElementAttribute, ParsingAlgorithm, attach_declarative_shadow_inner, create_element_for_token,
44};
45use crate::dom::virtualmethods::vtable_for;
46use crate::script_runtime::CanGc;
47
48type ParseNodeId = usize;
49
50#[derive(Clone, JSTraceable, MallocSizeOf)]
51pub(crate) struct ParseNode {
52    id: ParseNodeId,
53    #[no_trace]
54    qual_name: Option<QualName>,
55}
56
57#[derive(JSTraceable, MallocSizeOf)]
58enum NodeOrText {
59    Node(ParseNode),
60    Text(String),
61}
62
63#[derive(JSTraceable, MallocSizeOf)]
64struct Attribute {
65    #[no_trace]
66    name: QualName,
67    value: String,
68}
69
70#[derive(JSTraceable, MallocSizeOf)]
71enum ParseOperation {
72    GetTemplateContents {
73        target: ParseNodeId,
74        contents: ParseNodeId,
75    },
76
77    CreateElement {
78        node: ParseNodeId,
79        #[no_trace]
80        name: QualName,
81        attrs: Vec<Attribute>,
82        current_line: u64,
83    },
84
85    CreateComment {
86        text: String,
87        node: ParseNodeId,
88    },
89    AppendBeforeSibling {
90        sibling: ParseNodeId,
91        node: NodeOrText,
92    },
93    AppendBasedOnParentNode {
94        element: ParseNodeId,
95        prev_element: ParseNodeId,
96        node: NodeOrText,
97    },
98    Append {
99        parent: ParseNodeId,
100        node: NodeOrText,
101    },
102
103    AppendDoctypeToDocument {
104        name: String,
105        public_id: String,
106        system_id: String,
107    },
108
109    AddAttrsIfMissing {
110        target: ParseNodeId,
111        attrs: Vec<Attribute>,
112    },
113    RemoveFromParent {
114        target: ParseNodeId,
115    },
116    MarkScriptAlreadyStarted {
117        node: ParseNodeId,
118    },
119    ReparentChildren {
120        parent: ParseNodeId,
121        new_parent: ParseNodeId,
122    },
123
124    AssociateWithForm {
125        target: ParseNodeId,
126        form: ParseNodeId,
127        element: ParseNodeId,
128        prev_element: Option<ParseNodeId>,
129    },
130
131    CreatePI {
132        node: ParseNodeId,
133        target: String,
134        data: String,
135    },
136
137    Pop {
138        node: ParseNodeId,
139    },
140
141    SetQuirksMode {
142        #[ignore_malloc_size_of = "Defined in style"]
143        #[no_trace]
144        mode: ServoQuirksMode,
145    },
146
147    AttachDeclarativeShadowRoot {
148        location: ParseNodeId,
149        template: ParseNodeId,
150        attributes: Vec<Attribute>,
151        /// Used to notify the parser thread whether or not attaching the shadow root succeeded
152        #[no_trace]
153        sender: Sender<bool>,
154    },
155}
156
157#[derive(MallocSizeOf)]
158enum ToTokenizerMsg {
159    // From HtmlTokenizer
160    TokenizerResultDone {
161        updated_input: VecDeque<SendTendril<UTF8>>,
162    },
163    TokenizerResultScript {
164        script: ParseNode,
165        updated_input: VecDeque<SendTendril<UTF8>>,
166    },
167    EncodingIndicator(SendTendril<UTF8>),
168    End, // Sent to Tokenizer to signify HtmlTokenizer's end method has returned
169    // From Sink
170    ProcessOperation(ParseOperation),
171}
172
173#[derive(MallocSizeOf)]
174enum ToHtmlTokenizerMsg {
175    Feed { input: VecDeque<SendTendril<UTF8>> },
176    End,
177    SetPlainTextState,
178}
179
180fn create_buffer_queue(mut buffers: VecDeque<SendTendril<UTF8>>) -> BufferQueue {
181    let buffer_queue = BufferQueue::default();
182    while let Some(st) = buffers.pop_front() {
183        buffer_queue.push_back(StrTendril::from(st));
184    }
185    buffer_queue
186}
187
188// The async HTML Tokenizer consists of two separate types working together: the Tokenizer
189// (defined below), which lives on the main thread, and the HtmlTokenizer, defined in html5ever, which
190// lives on the parser thread.
191// Steps:
192// 1. A call to Tokenizer::new will spin up a new parser thread, creating an HtmlTokenizer instance,
193//    which starts listening for messages from Tokenizer.
194// 2. Upon receiving an input from ServoParser, the Tokenizer forwards it to HtmlTokenizer, where it starts
195//    creating the necessary tree actions based on the input.
196// 3. HtmlTokenizer sends these tree actions to the Tokenizer as soon as it creates them. The Tokenizer
197//    then executes the received actions.
198//
199//    _____________                           _______________
200//   |             |                         |               |
201//   |             |                         |               |
202//   |             |   ToHtmlTokenizerMsg    |               |
203//   |             |------------------------>| HtmlTokenizer |
204//   |             |                         |               |
205//   |  Tokenizer  |     ToTokenizerMsg      |               |
206//   |             |<------------------------|    ________   |
207//   |             |                         |   |        |  |
208//   |             |     ToTokenizerMsg      |   |  Sink  |  |
209//   |             |<------------------------|---|        |  |
210//   |             |                         |   |________|  |
211//   |_____________|                         |_______________|
212//
213#[derive(JSTraceable, MallocSizeOf)]
214#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
215pub(crate) struct Tokenizer {
216    document: Dom<Document>,
217    #[ignore_malloc_size_of = "Defined in std"]
218    #[no_trace]
219    receiver: Receiver<ToTokenizerMsg>,
220    #[ignore_malloc_size_of = "Defined in std"]
221    #[no_trace]
222    html_tokenizer_sender: Sender<ToHtmlTokenizerMsg>,
223    //#[ignore_malloc_size_of = "Defined in std"]
224    nodes: RefCell<FxHashMap<ParseNodeId, Dom<Node>>>,
225    #[no_trace]
226    url: ServoUrl,
227    parsing_algorithm: ParsingAlgorithm,
228    #[conditional_malloc_size_of]
229    custom_element_reaction_stack: Rc<CustomElementReactionStack>,
230    current_line: Cell<u64>,
231}
232
233impl Tokenizer {
234    pub(crate) fn new(
235        document: &Document,
236        url: ServoUrl,
237        fragment_context: Option<super::FragmentContext>,
238    ) -> Self {
239        // Messages from the Tokenizer (main thread) to HtmlTokenizer (parser thread)
240        let (to_html_tokenizer_sender, html_tokenizer_receiver) = unbounded();
241        // Messages from HtmlTokenizer and Sink (parser thread) to Tokenizer (main thread)
242        let (to_tokenizer_sender, tokenizer_receiver) = unbounded();
243
244        let algorithm = match fragment_context {
245            Some(_) => ParsingAlgorithm::Fragment,
246            None => ParsingAlgorithm::Normal,
247        };
248
249        let custom_element_reaction_stack = document.custom_element_reaction_stack();
250        let tokenizer = Tokenizer {
251            document: Dom::from_ref(document),
252            receiver: tokenizer_receiver,
253            html_tokenizer_sender: to_html_tokenizer_sender,
254            nodes: RefCell::new(FxHashMap::default()),
255            url,
256            parsing_algorithm: algorithm,
257            custom_element_reaction_stack,
258            current_line: Cell::new(1),
259        };
260        tokenizer.insert_node(0, Dom::from_ref(document.upcast()));
261
262        let sink = Sink::new(
263            to_tokenizer_sender.clone(),
264            document.allow_declarative_shadow_roots(),
265        );
266        let mut form_parse_node = None;
267        let mut parser_fragment_context = None;
268        if let Some(fragment_context) = fragment_context {
269            let node = sink.new_parse_node();
270            tokenizer.insert_node(node.id, Dom::from_ref(fragment_context.context_elem));
271            parser_fragment_context =
272                Some((node, fragment_context.context_element_allows_scripting));
273
274            form_parse_node = fragment_context.form_elem.map(|form_elem| {
275                let node = sink.new_parse_node();
276                tokenizer.insert_node(node.id, Dom::from_ref(form_elem));
277                node
278            });
279        };
280
281        // Create new thread for HtmlTokenizer. This is where parser actions
282        // will be generated from the input provided. These parser actions are then passed
283        // onto the main thread to be executed.
284        let scripting_enabled = document.has_browsing_context();
285        thread::Builder::new()
286            .name(format!("Parse:{}", tokenizer.url.debug_compact()))
287            .spawn(move || {
288                run(
289                    sink,
290                    parser_fragment_context,
291                    form_parse_node,
292                    to_tokenizer_sender,
293                    html_tokenizer_receiver,
294                    scripting_enabled,
295                );
296            })
297            .expect("HTML Parser thread spawning failed");
298
299        tokenizer
300    }
301
302    pub(crate) fn feed(
303        &self,
304        input: &BufferQueue,
305        can_gc: CanGc,
306    ) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
307        let mut send_tendrils = VecDeque::new();
308        while let Some(str) = input.pop_front() {
309            send_tendrils.push_back(SendTendril::from(str));
310        }
311
312        // Send message to parser thread, asking it to start reading from the input.
313        // Parser operation messages will be sent to main thread as they are evaluated.
314        self.html_tokenizer_sender
315            .send(ToHtmlTokenizerMsg::Feed {
316                input: send_tendrils,
317            })
318            .unwrap();
319
320        loop {
321            match self
322                .receiver
323                .recv()
324                .expect("Unexpected channel panic in main thread.")
325            {
326                ToTokenizerMsg::ProcessOperation(parse_op) => {
327                    self.process_operation(parse_op, can_gc)
328                },
329                ToTokenizerMsg::TokenizerResultDone { updated_input } => {
330                    let buffer_queue = create_buffer_queue(updated_input);
331                    input.replace_with(buffer_queue);
332                    return TokenizerResult::Done;
333                },
334                ToTokenizerMsg::TokenizerResultScript {
335                    script,
336                    updated_input,
337                } => {
338                    let buffer_queue = create_buffer_queue(updated_input);
339                    input.replace_with(buffer_queue);
340                    let script = self.get_node(&script.id);
341                    return TokenizerResult::Script(DomRoot::from_ref(script.downcast().unwrap()));
342                },
343                _ => unreachable!(),
344            };
345        }
346    }
347
348    pub(crate) fn end(&self, can_gc: CanGc) {
349        self.html_tokenizer_sender
350            .send(ToHtmlTokenizerMsg::End)
351            .unwrap();
352        loop {
353            match self
354                .receiver
355                .recv()
356                .expect("Unexpected channel panic in main thread.")
357            {
358                ToTokenizerMsg::ProcessOperation(parse_op) => {
359                    self.process_operation(parse_op, can_gc)
360                },
361                ToTokenizerMsg::TokenizerResultDone { updated_input: _ } |
362                ToTokenizerMsg::TokenizerResultScript {
363                    script: _,
364                    updated_input: _,
365                } |
366                ToTokenizerMsg::EncodingIndicator(_) => continue,
367                ToTokenizerMsg::End => return,
368            };
369        }
370    }
371
372    pub(crate) fn url(&self) -> &ServoUrl {
373        &self.url
374    }
375
376    pub(crate) fn set_plaintext_state(&self) {
377        self.html_tokenizer_sender
378            .send(ToHtmlTokenizerMsg::SetPlainTextState)
379            .unwrap();
380    }
381
382    pub(crate) fn get_current_line(&self) -> u32 {
383        self.current_line.get() as u32
384    }
385
386    fn insert_node(&self, id: ParseNodeId, node: Dom<Node>) {
387        assert!(self.nodes.borrow_mut().insert(id, node).is_none());
388    }
389
390    fn get_node<'a>(&'a self, id: &ParseNodeId) -> Ref<'a, Dom<Node>> {
391        Ref::map(self.nodes.borrow(), |nodes| {
392            nodes.get(id).expect("Node not found!")
393        })
394    }
395
396    fn append_before_sibling(&self, sibling: ParseNodeId, node: NodeOrText, can_gc: CanGc) {
397        let node = match node {
398            NodeOrText::Node(n) => {
399                HtmlNodeOrText::AppendNode(Dom::from_ref(&**self.get_node(&n.id)))
400            },
401            NodeOrText::Text(text) => HtmlNodeOrText::AppendText(Tendril::from(text)),
402        };
403        let sibling = &**self.get_node(&sibling);
404        let parent = &*sibling
405            .GetParentNode()
406            .expect("append_before_sibling called on node without parent");
407
408        super::insert(
409            parent,
410            Some(sibling),
411            node,
412            self.parsing_algorithm,
413            &self.custom_element_reaction_stack,
414            can_gc,
415        );
416    }
417
418    fn append(&self, parent: ParseNodeId, node: NodeOrText, can_gc: CanGc) {
419        let node = match node {
420            NodeOrText::Node(n) => {
421                HtmlNodeOrText::AppendNode(Dom::from_ref(&**self.get_node(&n.id)))
422            },
423            NodeOrText::Text(text) => HtmlNodeOrText::AppendText(Tendril::from(text)),
424        };
425
426        let parent = &**self.get_node(&parent);
427        super::insert(
428            parent,
429            None,
430            node,
431            self.parsing_algorithm,
432            &self.custom_element_reaction_stack,
433            can_gc,
434        );
435    }
436
437    fn has_parent_node(&self, node: ParseNodeId) -> bool {
438        self.get_node(&node).GetParentNode().is_some()
439    }
440
441    fn same_tree(&self, x: ParseNodeId, y: ParseNodeId) -> bool {
442        let x = self.get_node(&x);
443        let y = self.get_node(&y);
444
445        let x = x.downcast::<Element>().expect("Element node expected");
446        let y = y.downcast::<Element>().expect("Element node expected");
447        x.is_in_same_home_subtree(y)
448    }
449
450    fn process_operation(&self, op: ParseOperation, can_gc: CanGc) {
451        let document = DomRoot::from_ref(&**self.get_node(&0));
452        let document = document
453            .downcast::<Document>()
454            .expect("Document node should be downcasted!");
455        match op {
456            ParseOperation::GetTemplateContents { target, contents } => {
457                let target = DomRoot::from_ref(&**self.get_node(&target));
458                let template = target
459                    .downcast::<HTMLTemplateElement>()
460                    .expect("Tried to extract contents from non-template element while parsing");
461                self.insert_node(contents, Dom::from_ref(template.Content(can_gc).upcast()));
462            },
463            ParseOperation::CreateElement {
464                node,
465                name,
466                attrs,
467                current_line,
468            } => {
469                self.current_line.set(current_line);
470                let attrs = attrs
471                    .into_iter()
472                    .map(|attr| ElementAttribute::new(attr.name, DOMString::from(attr.value)))
473                    .collect();
474                let element = create_element_for_token(
475                    name,
476                    attrs,
477                    &self.document,
478                    ElementCreator::ParserCreated(current_line),
479                    ParsingAlgorithm::Normal,
480                    &self.custom_element_reaction_stack,
481                    can_gc,
482                );
483                self.insert_node(node, Dom::from_ref(element.upcast()));
484            },
485            ParseOperation::CreateComment { text, node } => {
486                let comment = Comment::new(DOMString::from(text), document, None, can_gc);
487                self.insert_node(node, Dom::from_ref(comment.upcast()));
488            },
489            ParseOperation::AppendBeforeSibling { sibling, node } => {
490                self.append_before_sibling(sibling, node, can_gc);
491            },
492            ParseOperation::Append { parent, node } => {
493                self.append(parent, node, can_gc);
494            },
495            ParseOperation::AppendBasedOnParentNode {
496                element,
497                prev_element,
498                node,
499            } => {
500                if self.has_parent_node(element) {
501                    self.append_before_sibling(element, node, can_gc);
502                } else {
503                    self.append(prev_element, node, can_gc);
504                }
505            },
506            ParseOperation::AppendDoctypeToDocument {
507                name,
508                public_id,
509                system_id,
510            } => {
511                let doctype = DocumentType::new(
512                    DOMString::from(name),
513                    Some(DOMString::from(public_id)),
514                    Some(DOMString::from(system_id)),
515                    document,
516                    can_gc,
517                );
518
519                document
520                    .upcast::<Node>()
521                    .AppendChild(doctype.upcast(), can_gc)
522                    .expect("Appending failed");
523            },
524            ParseOperation::AddAttrsIfMissing { target, attrs } => {
525                let node = self.get_node(&target);
526                let elem = node
527                    .downcast::<Element>()
528                    .expect("tried to set attrs on non-Element in HTML parsing");
529                for attr in attrs {
530                    elem.set_attribute_from_parser(
531                        attr.name,
532                        DOMString::from(attr.value),
533                        None,
534                        can_gc,
535                    );
536                }
537            },
538            ParseOperation::RemoveFromParent { target } => {
539                if let Some(ref parent) = self.get_node(&target).GetParentNode() {
540                    parent.RemoveChild(&self.get_node(&target), can_gc).unwrap();
541                }
542            },
543            ParseOperation::MarkScriptAlreadyStarted { node } => {
544                let node = self.get_node(&node);
545                let script = node.downcast::<HTMLScriptElement>();
546                if let Some(script) = script {
547                    script.set_already_started(true)
548                }
549            },
550            ParseOperation::ReparentChildren { parent, new_parent } => {
551                let parent = self.get_node(&parent);
552                let new_parent = self.get_node(&new_parent);
553                while let Some(child) = parent.GetFirstChild() {
554                    new_parent.AppendChild(&child, can_gc).unwrap();
555                }
556            },
557            ParseOperation::AssociateWithForm {
558                target,
559                form,
560                element,
561                prev_element,
562            } => {
563                let tree_node = prev_element.map_or(element, |prev| {
564                    if self.has_parent_node(element) {
565                        element
566                    } else {
567                        prev
568                    }
569                });
570
571                if !self.same_tree(tree_node, form) {
572                    return;
573                }
574                let form = self.get_node(&form);
575                let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
576                    .expect("Owner must be a form element");
577
578                let node = self.get_node(&target);
579                let elem = node.downcast::<Element>();
580                let control = elem.and_then(|e| e.as_maybe_form_control());
581
582                if let Some(control) = control {
583                    control.set_form_owner_from_parser(&form, can_gc);
584                }
585            },
586            ParseOperation::Pop { node } => {
587                vtable_for(&self.get_node(&node)).pop();
588            },
589            ParseOperation::CreatePI { node, target, data } => {
590                let pi = ProcessingInstruction::new(
591                    DOMString::from(target),
592                    DOMString::from(data),
593                    document,
594                    can_gc,
595                );
596                self.insert_node(node, Dom::from_ref(pi.upcast()));
597            },
598            ParseOperation::SetQuirksMode { mode } => {
599                document.set_quirks_mode(mode);
600            },
601            ParseOperation::AttachDeclarativeShadowRoot {
602                location,
603                template,
604                attributes,
605                sender,
606            } => {
607                let location = self.get_node(&location);
608                let template = self.get_node(&template);
609                let attributes: Vec<_> = attributes
610                    .into_iter()
611                    .map(|attribute| HtmlAttribute {
612                        name: attribute.name,
613                        value: StrTendril::from(attribute.value),
614                    })
615                    .collect();
616
617                let did_succeed =
618                    attach_declarative_shadow_inner(&location, &template, &attributes);
619                sender.send(did_succeed).unwrap();
620            },
621        }
622    }
623}
624
625/// Run the parser.
626///
627/// The `fragment_context` argument is `Some` in the fragment case and describes the context
628/// node as well as whether scripting is enabled for the context node. Note that whether or not
629/// scripting is enabled for the context node does not affect whether scripting is enabled for the
630/// parser, that is determined by the `scripting_enabled` argument.
631fn run(
632    sink: Sink,
633    fragment_context: Option<(ParseNode, bool)>,
634    form_parse_node: Option<ParseNode>,
635    sender: Sender<ToTokenizerMsg>,
636    receiver: Receiver<ToHtmlTokenizerMsg>,
637    scripting_enabled: bool,
638) {
639    let options = TreeBuilderOpts {
640        scripting_enabled,
641        ..Default::default()
642    };
643
644    let html_tokenizer = if let Some((context_node, context_scripting_enabled)) = fragment_context {
645        let tree_builder =
646            TreeBuilder::new_for_fragment(sink, context_node, form_parse_node, options);
647
648        let tok_options = TokenizerOpts {
649            initial_state: Some(
650                tree_builder.tokenizer_state_for_context_elem(context_scripting_enabled),
651            ),
652            ..Default::default()
653        };
654
655        HtmlTokenizer::new(tree_builder, tok_options)
656    } else {
657        HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
658    };
659
660    loop {
661        match receiver
662            .recv()
663            .expect("Unexpected channel panic in html parser thread")
664        {
665            ToHtmlTokenizerMsg::Feed { input } => {
666                let input = create_buffer_queue(input);
667                let res = html_tokenizer.feed(&input);
668
669                // Gather changes to 'input' and place them in 'updated_input',
670                // which will be sent to the main thread to update feed method's 'input'
671                let mut updated_input = VecDeque::new();
672                while let Some(st) = input.pop_front() {
673                    updated_input.push_back(SendTendril::from(st));
674                }
675
676                let res = match res {
677                    TokenizerResult::Done => ToTokenizerMsg::TokenizerResultDone { updated_input },
678                    TokenizerResult::Script(script) => ToTokenizerMsg::TokenizerResultScript {
679                        script,
680                        updated_input,
681                    },
682                    TokenizerResult::EncodingIndicator(encoding) => {
683                        ToTokenizerMsg::EncodingIndicator(SendTendril::from(encoding))
684                    },
685                };
686                sender.send(res).unwrap();
687            },
688            ToHtmlTokenizerMsg::End => {
689                html_tokenizer.end();
690                sender.send(ToTokenizerMsg::End).unwrap();
691                break;
692            },
693            ToHtmlTokenizerMsg::SetPlainTextState => html_tokenizer.set_plaintext_state(),
694        };
695    }
696}
697
698#[derive(Default, JSTraceable, MallocSizeOf)]
699struct ParseNodeData {
700    contents: Option<ParseNode>,
701    is_integration_point: bool,
702}
703
704pub(crate) struct Sink {
705    current_line: Cell<u64>,
706    parse_node_data: RefCell<FxHashMap<ParseNodeId, ParseNodeData>>,
707    next_parse_node_id: Cell<ParseNodeId>,
708    document_node: ParseNode,
709    sender: Sender<ToTokenizerMsg>,
710    allow_declarative_shadow_roots: bool,
711}
712
713impl Sink {
714    fn new(sender: Sender<ToTokenizerMsg>, allow_declarative_shadow_roots: bool) -> Sink {
715        let sink = Sink {
716            current_line: Cell::new(1),
717            parse_node_data: RefCell::new(FxHashMap::default()),
718            next_parse_node_id: Cell::new(1),
719            document_node: ParseNode {
720                id: 0,
721                qual_name: None,
722            },
723            sender,
724            allow_declarative_shadow_roots,
725        };
726        let data = ParseNodeData::default();
727        sink.insert_parse_node_data(0, data);
728        sink
729    }
730
731    fn new_parse_node(&self) -> ParseNode {
732        let id = self.next_parse_node_id.get();
733        let data = ParseNodeData::default();
734        self.insert_parse_node_data(id, data);
735        self.next_parse_node_id.set(id + 1);
736        ParseNode {
737            id,
738            qual_name: None,
739        }
740    }
741
742    fn send_op(&self, op: ParseOperation) {
743        self.sender
744            .send(ToTokenizerMsg::ProcessOperation(op))
745            .unwrap();
746    }
747
748    fn insert_parse_node_data(&self, id: ParseNodeId, data: ParseNodeData) {
749        assert!(self.parse_node_data.borrow_mut().insert(id, data).is_none());
750    }
751
752    fn get_parse_node_data<'a>(&'a self, id: &'a ParseNodeId) -> Ref<'a, ParseNodeData> {
753        Ref::map(self.parse_node_data.borrow(), |data| {
754            data.get(id).expect("Parse Node data not found!")
755        })
756    }
757
758    fn get_parse_node_data_mut<'a>(&'a self, id: &'a ParseNodeId) -> RefMut<'a, ParseNodeData> {
759        RefMut::map(self.parse_node_data.borrow_mut(), |data| {
760            data.get_mut(id).expect("Parse Node data not found!")
761        })
762    }
763}
764
765impl TreeSink for Sink {
766    type Output = Self;
767    fn finish(self) -> Self {
768        self
769    }
770
771    type Handle = ParseNode;
772    type ElemName<'a>
773        = ExpandedName<'a>
774    where
775        Self: 'a;
776
777    fn get_document(&self) -> Self::Handle {
778        self.document_node.clone()
779    }
780
781    fn get_template_contents(&self, target: &Self::Handle) -> Self::Handle {
782        if let Some(ref contents) = self.get_parse_node_data(&target.id).contents {
783            return contents.clone();
784        }
785        let node = self.new_parse_node();
786        {
787            let mut data = self.get_parse_node_data_mut(&target.id);
788            data.contents = Some(node.clone());
789        }
790        self.send_op(ParseOperation::GetTemplateContents {
791            target: target.id,
792            contents: node.id,
793        });
794        node
795    }
796
797    fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
798        x.id == y.id
799    }
800
801    fn elem_name<'a>(&self, target: &'a Self::Handle) -> ExpandedName<'a> {
802        target
803            .qual_name
804            .as_ref()
805            .expect("Expected qual name of node!")
806            .expanded()
807    }
808
809    fn create_element(
810        &self,
811        name: QualName,
812        html_attrs: Vec<HtmlAttribute>,
813        _flags: ElementFlags,
814    ) -> Self::Handle {
815        let mut node = self.new_parse_node();
816        node.qual_name = Some(name.clone());
817        {
818            let mut node_data = self.get_parse_node_data_mut(&node.id);
819            node_data.is_integration_point = html_attrs.iter().any(|attr| {
820                let attr_value = &String::from(attr.value.clone());
821                (attr.name.local == local_name!("encoding") && attr.name.ns == ns!()) &&
822                    (attr_value.eq_ignore_ascii_case("text/html") ||
823                        attr_value.eq_ignore_ascii_case("application/xhtml+xml"))
824            });
825        }
826        let attrs = html_attrs
827            .into_iter()
828            .map(|attr| Attribute {
829                name: attr.name,
830                value: String::from(attr.value),
831            })
832            .collect();
833
834        self.send_op(ParseOperation::CreateElement {
835            node: node.id,
836            name,
837            attrs,
838            current_line: self.current_line.get(),
839        });
840        node
841    }
842
843    fn create_comment(&self, text: StrTendril) -> Self::Handle {
844        let node = self.new_parse_node();
845        self.send_op(ParseOperation::CreateComment {
846            text: String::from(text),
847            node: node.id,
848        });
849        node
850    }
851
852    fn create_pi(&self, target: StrTendril, data: StrTendril) -> ParseNode {
853        let node = self.new_parse_node();
854        self.send_op(ParseOperation::CreatePI {
855            node: node.id,
856            target: String::from(target),
857            data: String::from(data),
858        });
859        node
860    }
861
862    fn associate_with_form(
863        &self,
864        target: &Self::Handle,
865        form: &Self::Handle,
866        nodes: (&Self::Handle, Option<&Self::Handle>),
867    ) {
868        let (element, prev_element) = nodes;
869        self.send_op(ParseOperation::AssociateWithForm {
870            target: target.id,
871            form: form.id,
872            element: element.id,
873            prev_element: prev_element.map(|p| p.id),
874        });
875    }
876
877    fn append_before_sibling(
878        &self,
879        sibling: &Self::Handle,
880        new_node: HtmlNodeOrText<Self::Handle>,
881    ) {
882        let new_node = match new_node {
883            HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node),
884            HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)),
885        };
886        self.send_op(ParseOperation::AppendBeforeSibling {
887            sibling: sibling.id,
888            node: new_node,
889        });
890    }
891
892    fn append_based_on_parent_node(
893        &self,
894        elem: &Self::Handle,
895        prev_elem: &Self::Handle,
896        child: HtmlNodeOrText<Self::Handle>,
897    ) {
898        let child = match child {
899            HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node),
900            HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)),
901        };
902        self.send_op(ParseOperation::AppendBasedOnParentNode {
903            element: elem.id,
904            prev_element: prev_elem.id,
905            node: child,
906        });
907    }
908
909    fn parse_error(&self, msg: Cow<'static, str>) {
910        debug!("Parse error: {}", msg);
911    }
912
913    fn set_quirks_mode(&self, mode: QuirksMode) {
914        let mode = match mode {
915            QuirksMode::Quirks => ServoQuirksMode::Quirks,
916            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
917            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
918        };
919        self.send_op(ParseOperation::SetQuirksMode { mode });
920    }
921
922    fn append(&self, parent: &Self::Handle, child: HtmlNodeOrText<Self::Handle>) {
923        let child = match child {
924            HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node),
925            HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)),
926        };
927        self.send_op(ParseOperation::Append {
928            parent: parent.id,
929            node: child,
930        });
931    }
932
933    fn append_doctype_to_document(
934        &self,
935        name: StrTendril,
936        public_id: StrTendril,
937        system_id: StrTendril,
938    ) {
939        self.send_op(ParseOperation::AppendDoctypeToDocument {
940            name: String::from(name),
941            public_id: String::from(public_id),
942            system_id: String::from(system_id),
943        });
944    }
945
946    fn add_attrs_if_missing(&self, target: &Self::Handle, html_attrs: Vec<HtmlAttribute>) {
947        let attrs = html_attrs
948            .into_iter()
949            .map(|attr| Attribute {
950                name: attr.name,
951                value: String::from(attr.value),
952            })
953            .collect();
954        self.send_op(ParseOperation::AddAttrsIfMissing {
955            target: target.id,
956            attrs,
957        });
958    }
959
960    fn remove_from_parent(&self, target: &Self::Handle) {
961        self.send_op(ParseOperation::RemoveFromParent { target: target.id });
962    }
963
964    fn mark_script_already_started(&self, node: &Self::Handle) {
965        self.send_op(ParseOperation::MarkScriptAlreadyStarted { node: node.id });
966    }
967
968    fn reparent_children(&self, parent: &Self::Handle, new_parent: &Self::Handle) {
969        self.send_op(ParseOperation::ReparentChildren {
970            parent: parent.id,
971            new_parent: new_parent.id,
972        });
973    }
974
975    /// <https://html.spec.whatwg.org/multipage/#html-integration-point>
976    /// Specifically, the `<annotation-xml>` cases.
977    fn is_mathml_annotation_xml_integration_point(&self, handle: &Self::Handle) -> bool {
978        let node_data = self.get_parse_node_data(&handle.id);
979        node_data.is_integration_point
980    }
981
982    fn set_current_line(&self, line_number: u64) {
983        self.current_line.set(line_number);
984    }
985
986    fn pop(&self, node: &Self::Handle) {
987        self.send_op(ParseOperation::Pop { node: node.id });
988    }
989
990    fn allow_declarative_shadow_roots(&self, _intended_parent: &Self::Handle) -> bool {
991        self.allow_declarative_shadow_roots
992    }
993
994    fn attach_declarative_shadow(
995        &self,
996        location: &Self::Handle,
997        template: &Self::Handle,
998        attributes: &[HtmlAttribute],
999    ) -> bool {
1000        let attributes = attributes
1001            .iter()
1002            .map(|attribute| Attribute {
1003                name: attribute.name.clone(),
1004                value: String::from(attribute.value.clone()),
1005            })
1006            .collect();
1007
1008        // Unfortunately the parser can only proceed after it knows whether attaching the shadow root
1009        // succeeded or failed. Attaching a shadow root can fail for many different reasons,
1010        // and so we need to block until the script thread has processed this operation.
1011        let (sender, receiver) = unbounded();
1012        self.send_op(ParseOperation::AttachDeclarativeShadowRoot {
1013            location: location.id,
1014            template: template.id,
1015            attributes,
1016            sender,
1017        });
1018
1019        receiver.recv().unwrap()
1020    }
1021}