Skip to main content

script/dom/servoparser/
async_html.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![cfg_attr(crown, expect(crown::unrooted_must_root))]
6
7use std::borrow::Cow;
8use std::cell::{Cell, Ref, RefCell, RefMut};
9use std::collections::vec_deque::VecDeque;
10use std::rc::Rc;
11use std::thread;
12
13use crossbeam_channel::{Receiver, Sender, unbounded};
14use html5ever::buffer_queue::BufferQueue;
15use html5ever::tendril::fmt::UTF8;
16use html5ever::tendril::{SendTendril, StrTendril, Tendril};
17use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts};
18use html5ever::tree_builder::{
19    ElementFlags, NodeOrText as HtmlNodeOrText, QuirksMode, TreeBuilder, TreeBuilderOpts, TreeSink,
20};
21use html5ever::{Attribute as HtmlAttribute, ExpandedName, QualName, local_name, ns};
22use markup5ever::TokenizerResult;
23use rustc_hash::FxHashMap;
24use servo_url::ServoUrl;
25use style::context::QuirksMode as ServoQuirksMode;
26
27use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
28use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
29use crate::dom::bindings::inheritance::Castable;
30use crate::dom::bindings::root::{Dom, DomRoot};
31use crate::dom::bindings::str::DOMString;
32use crate::dom::comment::Comment;
33use crate::dom::customelementregistry::CustomElementReactionStack;
34use crate::dom::document::Document;
35use crate::dom::documenttype::DocumentType;
36use crate::dom::element::{Element, ElementCreator};
37use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
38use crate::dom::html::htmlscriptelement::HTMLScriptElement;
39use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
40use crate::dom::node::Node;
41use crate::dom::processinginstruction::ProcessingInstruction;
42use crate::dom::servoparser::{
43    ElementAttribute, ParsingAlgorithm, attach_declarative_shadow_inner, create_element_for_token,
44};
45use crate::dom::virtualmethods::vtable_for;
46
47type ParseNodeId = usize;
48
49#[derive(Clone, Debug, JSTraceable, MallocSizeOf)]
50pub(crate) struct ParseNode {
51    id: ParseNodeId,
52    #[no_trace]
53    qual_name: Option<QualName>,
54}
55
56#[derive(Debug, JSTraceable, MallocSizeOf)]
57enum NodeOrText {
58    Node(ParseNode),
59    Text(String),
60}
61
62#[derive(Debug, JSTraceable, MallocSizeOf)]
63struct Attribute {
64    #[no_trace]
65    name: QualName,
66    value: String,
67}
68
69#[derive(Debug, JSTraceable, MallocSizeOf)]
70enum ParseOperation {
71    GetTemplateContents {
72        target: ParseNodeId,
73        contents: ParseNodeId,
74    },
75    CreateElement {
76        node: ParseNodeId,
77        #[no_trace]
78        name: QualName,
79        attrs: Vec<Attribute>,
80        current_line: u64,
81        had_duplicate_attributes: bool,
82    },
83    CreateComment {
84        text: String,
85        node: ParseNodeId,
86    },
87    AppendBeforeSibling {
88        sibling: ParseNodeId,
89        node: NodeOrText,
90    },
91    AppendBasedOnParentNode {
92        element: ParseNodeId,
93        prev_element: ParseNodeId,
94        node: NodeOrText,
95    },
96    Append {
97        parent: ParseNodeId,
98        node: NodeOrText,
99    },
100    AppendDoctypeToDocument {
101        name: String,
102        public_id: String,
103        system_id: String,
104    },
105    AddAttrsIfMissing {
106        target: ParseNodeId,
107        attrs: Vec<Attribute>,
108    },
109    RemoveFromParent {
110        target: ParseNodeId,
111    },
112    MarkScriptAlreadyStarted {
113        node: ParseNodeId,
114    },
115    ReparentChildren {
116        parent: ParseNodeId,
117        new_parent: ParseNodeId,
118    },
119    AssociateWithForm {
120        target: ParseNodeId,
121        form: ParseNodeId,
122        element: ParseNodeId,
123        prev_element: Option<ParseNodeId>,
124    },
125    CreatePI {
126        node: ParseNodeId,
127        target: String,
128        data: String,
129    },
130    Pop {
131        node: ParseNodeId,
132    },
133    SetQuirksMode {
134        #[ignore_malloc_size_of = "Defined in style"]
135        #[no_trace]
136        mode: ServoQuirksMode,
137    },
138    AttachDeclarativeShadowRoot {
139        location: ParseNodeId,
140        template: ParseNodeId,
141        attributes: Vec<Attribute>,
142        /// Used to notify the parser thread whether or not attaching the shadow root succeeded
143        #[no_trace]
144        sender: Sender<bool>,
145    },
146}
147
148#[derive(MallocSizeOf)]
149enum FromParserThreadMsg {
150    TokenizerResultDone {
151        updated_input: VecDeque<SendTendril<UTF8>>,
152    },
153    TokenizerResultScript {
154        script: ParseNode,
155        updated_input: VecDeque<SendTendril<UTF8>>,
156    },
157    EncodingIndicator {
158        encoding: SendTendril<UTF8>,
159        updated_input: VecDeque<SendTendril<UTF8>>,
160    },
161    /// Sent to main thread to signify that the parser thread's end method has returned.
162    End,
163    ProcessOperation(ParseOperation),
164}
165
166#[derive(MallocSizeOf)]
167enum ToParserThreadMsg {
168    Feed { input: VecDeque<SendTendril<UTF8>> },
169    End,
170    SetPlainTextState,
171}
172
173fn create_buffer_queue(mut buffers: VecDeque<SendTendril<UTF8>>) -> BufferQueue {
174    let buffer_queue = BufferQueue::default();
175    while let Some(st) = buffers.pop_front() {
176        buffer_queue.push_back(StrTendril::from(st));
177    }
178    buffer_queue
179}
180
181// The async HTML Tokenizer consists of two separate types threads working together:
182// the main thread, which communicates with the rest of script, and the parser thread, which
183// feeds input to the tokenizer from html5ever.
184//
185// Steps:
186// 1. A call to Tokenizer::new will spin up a new parser thread, which starts listening for messages from Tokenizer.
187// 2. Upon receiving an input from ServoParser, the tokenizer forwards it to the parser thread, where it starts
188//    creating the necessary tree actions based on the input.
189// 3. The parser thread sends these tree actions to the main thread as soon as it creates them. The main thread
190//    then executes the received actions.
191//
192//    _____________                           _______________
193//   |             |                         |               |
194//   |             |                         |               |
195//   |             |   ToParserThreadMsg     |               |
196//   |             |------------------------>| Parser Thread |
197//   |    Main     |                         |               |
198//   |   Thread    |   FromParserThreadMsg   |               |
199//   |             |<------------------------|    ________   |
200//   |             |                         |   |        |  |
201//   |             |   FromParserThreadMsg   |   |  Sink  |  |
202//   |             |<------------------------|---|        |  |
203//   |             |                         |   |________|  |
204//   |_____________|                         |_______________|
205//
206#[derive(JSTraceable, MallocSizeOf)]
207#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
208pub(crate) struct Tokenizer {
209    document: Dom<Document>,
210    #[no_trace]
211    from_parser_thread_receiver: Receiver<FromParserThreadMsg>,
212    /// Sender from the main thread to the parser thread.
213    #[no_trace]
214    to_parser_thread_sender: Sender<ToParserThreadMsg>,
215    nodes: RefCell<FxHashMap<ParseNodeId, Dom<Node>>>,
216    #[no_trace]
217    url: ServoUrl,
218    parsing_algorithm: ParsingAlgorithm,
219    #[conditional_malloc_size_of]
220    custom_element_reaction_stack: Rc<CustomElementReactionStack>,
221    current_line: Cell<u64>,
222    has_ended: Cell<bool>,
223}
224
225impl Tokenizer {
226    pub(crate) fn new(
227        document: &Document,
228        url: ServoUrl,
229        fragment_context: Option<super::FragmentContext>,
230    ) -> Self {
231        // Messages from the main thread to the parser thread
232        let (to_parser_thread_sender, from_main_thread_receiver) = unbounded();
233        // Messages from the parser thread to the main thread
234        let (to_main_thread_sender, from_parser_thread_receiver) = unbounded();
235
236        let algorithm = match fragment_context {
237            Some(_) => ParsingAlgorithm::Fragment,
238            None => ParsingAlgorithm::Normal,
239        };
240
241        let custom_element_reaction_stack = document.custom_element_reaction_stack();
242        let tokenizer = Tokenizer {
243            document: Dom::from_ref(document),
244            from_parser_thread_receiver,
245            to_parser_thread_sender,
246            nodes: RefCell::new(FxHashMap::default()),
247            url,
248            parsing_algorithm: algorithm,
249            custom_element_reaction_stack,
250            current_line: Cell::new(1),
251            has_ended: Cell::new(false),
252        };
253        tokenizer.insert_node(0, Dom::from_ref(document.upcast()));
254
255        let sink = Sink::new(
256            to_main_thread_sender.clone(),
257            document.allow_declarative_shadow_roots(),
258        );
259        let mut form_parse_node = None;
260        let mut parser_fragment_context = None;
261        if let Some(fragment_context) = fragment_context {
262            let node = sink.new_parse_node();
263            tokenizer.insert_node(node.id, Dom::from_ref(fragment_context.context_elem));
264            parser_fragment_context =
265                Some((node, fragment_context.context_element_allows_scripting));
266
267            form_parse_node = fragment_context.form_elem.map(|form_elem| {
268                let node = sink.new_parse_node();
269                tokenizer.insert_node(node.id, Dom::from_ref(form_elem));
270                node
271            });
272        };
273
274        // Create new thread for parser. This is where parser actions
275        // will be generated from the input provided. These parser actions are then passed
276        // onto the main thread to be executed.
277        let scripting_enabled = document.has_browsing_context();
278        thread::Builder::new()
279            .name(format!("Parse:{}", tokenizer.url.debug_compact()))
280            .spawn(move || {
281                run(
282                    sink,
283                    parser_fragment_context,
284                    form_parse_node,
285                    to_main_thread_sender,
286                    from_main_thread_receiver,
287                    scripting_enabled,
288                );
289            })
290            .expect("HTML Parser thread spawning failed");
291
292        tokenizer
293    }
294
295    pub(crate) fn feed(
296        &self,
297        input: &BufferQueue,
298        cx: &mut js::context::JSContext,
299    ) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
300        let mut send_tendrils = VecDeque::new();
301        while let Some(str) = input.pop_front() {
302            send_tendrils.push_back(SendTendril::from(str));
303        }
304
305        // Send message to parser thread, asking it to start reading from the input.
306        // Parser operation messages will be sent to main thread as they are evaluated.
307        self.to_parser_thread_sender
308            .send(ToParserThreadMsg::Feed {
309                input: send_tendrils,
310            })
311            .unwrap();
312
313        loop {
314            debug_assert!(!self.has_ended.get());
315
316            match self
317                .from_parser_thread_receiver
318                .recv()
319                .expect("Unexpected channel panic in main thread.")
320            {
321                FromParserThreadMsg::ProcessOperation(parse_op) => {
322                    self.process_operation(parse_op, cx);
323
324                    // The parser might have been aborted during the execution
325                    // of `parse_op`.
326                    if self.has_ended.get() {
327                        return TokenizerResult::Done;
328                    }
329                },
330                FromParserThreadMsg::TokenizerResultDone { updated_input } => {
331                    let buffer_queue = create_buffer_queue(updated_input);
332                    input.replace_with(buffer_queue);
333                    return TokenizerResult::Done;
334                },
335                FromParserThreadMsg::TokenizerResultScript {
336                    script,
337                    updated_input,
338                } => {
339                    let buffer_queue = create_buffer_queue(updated_input);
340                    input.replace_with(buffer_queue);
341                    let script = self.get_node(&script.id);
342                    return TokenizerResult::Script(DomRoot::from_ref(script.downcast().unwrap()));
343                },
344                FromParserThreadMsg::EncodingIndicator { updated_input, .. } => {
345                    // We don't handle encoding indicators yet, so just tell the
346                    // parser thread to continue.
347                    self.to_parser_thread_sender
348                        .send(ToParserThreadMsg::Feed {
349                            input: updated_input,
350                        })
351                        .unwrap();
352                },
353                _ => unreachable!(),
354            };
355        }
356    }
357
358    pub(crate) fn end(&self, cx: &mut js::context::JSContext) {
359        if self.has_ended.replace(true) {
360            return;
361        }
362
363        self.to_parser_thread_sender
364            .send(ToParserThreadMsg::End)
365            .unwrap();
366
367        loop {
368            match self
369                .from_parser_thread_receiver
370                .recv()
371                .expect("Unexpected channel panic in main thread.")
372            {
373                FromParserThreadMsg::ProcessOperation(parse_op) => {
374                    self.process_operation(parse_op, cx);
375                },
376                FromParserThreadMsg::TokenizerResultDone { updated_input: _ } |
377                FromParserThreadMsg::TokenizerResultScript { .. } |
378                FromParserThreadMsg::EncodingIndicator { .. } => continue,
379                FromParserThreadMsg::End => return,
380            };
381        }
382    }
383
384    pub(crate) fn url(&self) -> &ServoUrl {
385        &self.url
386    }
387
388    pub(crate) fn set_plaintext_state(&self) {
389        self.to_parser_thread_sender
390            .send(ToParserThreadMsg::SetPlainTextState)
391            .unwrap();
392    }
393
394    pub(crate) fn get_current_line(&self) -> u32 {
395        self.current_line.get() as u32
396    }
397
398    fn insert_node(&self, id: ParseNodeId, node: Dom<Node>) {
399        assert!(self.nodes.borrow_mut().insert(id, node).is_none());
400    }
401
402    fn get_node<'a>(&'a self, id: &ParseNodeId) -> Ref<'a, Dom<Node>> {
403        Ref::map(self.nodes.borrow(), |nodes| {
404            nodes.get(id).expect("Node not found!")
405        })
406    }
407
408    fn append_before_sibling(
409        &self,
410        cx: &mut js::context::JSContext,
411        sibling: ParseNodeId,
412        node: NodeOrText,
413    ) {
414        let node = match node {
415            NodeOrText::Node(n) => {
416                HtmlNodeOrText::AppendNode(Dom::from_ref(&**self.get_node(&n.id)))
417            },
418            NodeOrText::Text(text) => HtmlNodeOrText::AppendText(Tendril::from(text)),
419        };
420        let sibling = &**self.get_node(&sibling);
421        let parent = &*sibling
422            .GetParentNode()
423            .expect("append_before_sibling called on node without parent");
424
425        super::insert(
426            cx,
427            parent,
428            Some(sibling),
429            node,
430            self.parsing_algorithm,
431            &self.custom_element_reaction_stack,
432        );
433    }
434
435    fn append(&self, cx: &mut js::context::JSContext, parent: ParseNodeId, node: NodeOrText) {
436        let node = match node {
437            NodeOrText::Node(n) => {
438                HtmlNodeOrText::AppendNode(Dom::from_ref(&**self.get_node(&n.id)))
439            },
440            NodeOrText::Text(text) => HtmlNodeOrText::AppendText(Tendril::from(text)),
441        };
442
443        let parent = &**self.get_node(&parent);
444        super::insert(
445            cx,
446            parent,
447            None,
448            node,
449            self.parsing_algorithm,
450            &self.custom_element_reaction_stack,
451        );
452    }
453
454    fn has_parent_node(&self, node: ParseNodeId) -> bool {
455        self.get_node(&node).GetParentNode().is_some()
456    }
457
458    fn same_tree(&self, x: ParseNodeId, y: ParseNodeId) -> bool {
459        let x = self.get_node(&x);
460        let y = self.get_node(&y);
461
462        let x = x.downcast::<Element>().expect("Element node expected");
463        let y = y.downcast::<Element>().expect("Element node expected");
464        x.is_in_same_home_subtree(y)
465    }
466
467    fn process_operation(&self, op: ParseOperation, cx: &mut js::context::JSContext) {
468        let document = DomRoot::from_ref(&**self.get_node(&0));
469        let document = document
470            .downcast::<Document>()
471            .expect("Document node should be downcasted!");
472        match op {
473            ParseOperation::GetTemplateContents { target, contents } => {
474                let target = DomRoot::from_ref(&**self.get_node(&target));
475                let template = target
476                    .downcast::<HTMLTemplateElement>()
477                    .expect("Tried to extract contents from non-template element while parsing");
478                self.insert_node(contents, Dom::from_ref(template.Content(cx).upcast()));
479            },
480            ParseOperation::CreateElement {
481                node,
482                name,
483                attrs,
484                current_line,
485                had_duplicate_attributes,
486            } => {
487                self.current_line.set(current_line);
488                let attrs = attrs
489                    .into_iter()
490                    .map(|attr| ElementAttribute::new(attr.name, DOMString::from(attr.value)))
491                    .collect();
492                let element = create_element_for_token(
493                    cx,
494                    name,
495                    attrs,
496                    &self.document,
497                    ElementCreator::ParserCreated(current_line),
498                    ParsingAlgorithm::Normal,
499                    &self.custom_element_reaction_stack,
500                    had_duplicate_attributes,
501                );
502                self.insert_node(node, Dom::from_ref(element.upcast()));
503            },
504            ParseOperation::CreateComment { text, node } => {
505                let comment = Comment::new(cx, DOMString::from(text), document, None);
506                self.insert_node(node, Dom::from_ref(comment.upcast()));
507            },
508            ParseOperation::AppendBeforeSibling { sibling, node } => {
509                self.append_before_sibling(cx, sibling, node);
510            },
511            ParseOperation::Append { parent, node } => {
512                self.append(cx, parent, node);
513            },
514            ParseOperation::AppendBasedOnParentNode {
515                element,
516                prev_element,
517                node,
518            } => {
519                if self.has_parent_node(element) {
520                    self.append_before_sibling(cx, element, node);
521                } else {
522                    self.append(cx, prev_element, node);
523                }
524            },
525            ParseOperation::AppendDoctypeToDocument {
526                name,
527                public_id,
528                system_id,
529            } => {
530                let doctype = DocumentType::new(
531                    cx,
532                    DOMString::from(name),
533                    Some(DOMString::from(public_id)),
534                    Some(DOMString::from(system_id)),
535                    document,
536                );
537
538                document
539                    .upcast::<Node>()
540                    .AppendChild(cx, doctype.upcast())
541                    .expect("Appending failed");
542            },
543            ParseOperation::AddAttrsIfMissing { target, attrs } => {
544                let node = self.get_node(&target);
545                let elem = node
546                    .downcast::<Element>()
547                    .expect("tried to set attrs on non-Element in HTML parsing");
548                for attr in attrs {
549                    elem.set_attribute_from_parser(
550                        cx,
551                        attr.name,
552                        DOMString::from(attr.value),
553                        None,
554                    );
555                }
556            },
557            ParseOperation::RemoveFromParent { target } => {
558                if let Some(ref parent) = self.get_node(&target).GetParentNode() {
559                    parent.RemoveChild(cx, &self.get_node(&target)).unwrap();
560                }
561            },
562            ParseOperation::MarkScriptAlreadyStarted { node } => {
563                let node = self.get_node(&node);
564                let script = node.downcast::<HTMLScriptElement>();
565                if let Some(script) = script {
566                    script.set_already_started(true)
567                }
568            },
569            ParseOperation::ReparentChildren { parent, new_parent } => {
570                let parent = self.get_node(&parent);
571                let new_parent = self.get_node(&new_parent);
572                while let Some(child) = parent.GetFirstChild() {
573                    new_parent.AppendChild(cx, &child).unwrap();
574                }
575            },
576            ParseOperation::AssociateWithForm {
577                target,
578                form,
579                element,
580                prev_element,
581            } => {
582                let tree_node = prev_element.map_or(element, |prev| {
583                    if self.has_parent_node(element) {
584                        element
585                    } else {
586                        prev
587                    }
588                });
589
590                if !self.same_tree(tree_node, form) {
591                    return;
592                }
593                let form = self.get_node(&form);
594                let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
595                    .expect("Owner must be a form element");
596
597                let node = self.get_node(&target);
598                let elem = node.downcast::<Element>();
599                let control = elem.and_then(|e| e.as_maybe_form_control());
600
601                if let Some(control) = control {
602                    control.set_form_owner_from_parser(cx, &form);
603                }
604            },
605            ParseOperation::Pop { node } => {
606                vtable_for(&self.get_node(&node)).pop(cx);
607            },
608            ParseOperation::CreatePI { node, target, data } => {
609                let pi = ProcessingInstruction::new(
610                    cx,
611                    DOMString::from(target),
612                    DOMString::from(data),
613                    document,
614                );
615                self.insert_node(node, Dom::from_ref(pi.upcast()));
616            },
617            ParseOperation::SetQuirksMode { mode } => {
618                document.set_quirks_mode(mode);
619            },
620            ParseOperation::AttachDeclarativeShadowRoot {
621                location,
622                template,
623                attributes,
624                sender,
625            } => {
626                let location = self.get_node(&location);
627                let template = self.get_node(&template);
628                let attributes: Vec<_> = attributes
629                    .into_iter()
630                    .map(|attribute| HtmlAttribute {
631                        name: attribute.name,
632                        value: StrTendril::from(attribute.value),
633                    })
634                    .collect();
635
636                let did_succeed =
637                    attach_declarative_shadow_inner(cx, &location, &template, &attributes);
638                sender.send(did_succeed).unwrap();
639            },
640        }
641    }
642}
643
644/// Run the parser.
645///
646/// The `fragment_context` argument is `Some` in the fragment case and describes the context
647/// node as well as whether scripting is enabled for the context node. Note that whether or not
648/// scripting is enabled for the context node does not affect whether scripting is enabled for the
649/// parser, that is determined by the `scripting_enabled` argument.
650fn run(
651    sink: Sink,
652    fragment_context: Option<(ParseNode, bool)>,
653    form_parse_node: Option<ParseNode>,
654    sender: Sender<FromParserThreadMsg>,
655    receiver: Receiver<ToParserThreadMsg>,
656    scripting_enabled: bool,
657) {
658    let options = TreeBuilderOpts {
659        scripting_enabled,
660        ..Default::default()
661    };
662
663    let html_tokenizer = if let Some((context_node, context_scripting_enabled)) = fragment_context {
664        let tree_builder =
665            TreeBuilder::new_for_fragment(sink, context_node, form_parse_node, options);
666
667        let tok_options = TokenizerOpts {
668            initial_state: Some(
669                tree_builder.tokenizer_state_for_context_elem(context_scripting_enabled),
670            ),
671            ..Default::default()
672        };
673
674        HtmlTokenizer::new(tree_builder, tok_options)
675    } else {
676        HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
677    };
678
679    loop {
680        match receiver
681            .recv()
682            .expect("Unexpected channel panic in html parser thread")
683        {
684            ToParserThreadMsg::Feed { input } => {
685                let input = create_buffer_queue(input);
686                let res = html_tokenizer.feed(&input);
687
688                // Gather changes to 'input' and place them in 'updated_input',
689                // which will be sent to the main thread to update feed method's 'input'
690                let mut updated_input = VecDeque::new();
691                while let Some(st) = input.pop_front() {
692                    updated_input.push_back(SendTendril::from(st));
693                }
694
695                let res = match res {
696                    TokenizerResult::Done => {
697                        FromParserThreadMsg::TokenizerResultDone { updated_input }
698                    },
699                    TokenizerResult::Script(script) => FromParserThreadMsg::TokenizerResultScript {
700                        script,
701                        updated_input,
702                    },
703                    TokenizerResult::EncodingIndicator(encoding) => {
704                        FromParserThreadMsg::EncodingIndicator {
705                            encoding: SendTendril::from(encoding),
706                            updated_input,
707                        }
708                    },
709                };
710                sender.send(res).unwrap();
711            },
712            ToParserThreadMsg::End => {
713                html_tokenizer.end();
714                sender.send(FromParserThreadMsg::End).unwrap();
715                break;
716            },
717            ToParserThreadMsg::SetPlainTextState => html_tokenizer.set_plaintext_state(),
718        };
719    }
720}
721
722#[derive(Default, JSTraceable, MallocSizeOf)]
723struct ParseNodeData {
724    contents: Option<ParseNode>,
725    is_integration_point: bool,
726}
727
728pub(crate) struct Sink {
729    current_line: Cell<u64>,
730    parse_node_data: RefCell<FxHashMap<ParseNodeId, ParseNodeData>>,
731    next_parse_node_id: Cell<ParseNodeId>,
732    document_node: ParseNode,
733    sender: Sender<FromParserThreadMsg>,
734    allow_declarative_shadow_roots: bool,
735}
736
737impl Sink {
738    fn new(sender: Sender<FromParserThreadMsg>, allow_declarative_shadow_roots: bool) -> Sink {
739        let sink = Sink {
740            current_line: Cell::new(1),
741            parse_node_data: RefCell::new(FxHashMap::default()),
742            next_parse_node_id: Cell::new(1),
743            document_node: ParseNode {
744                id: 0,
745                qual_name: None,
746            },
747            sender,
748            allow_declarative_shadow_roots,
749        };
750        let data = ParseNodeData::default();
751        sink.insert_parse_node_data(0, data);
752        sink
753    }
754
755    fn new_parse_node(&self) -> ParseNode {
756        let id = self.next_parse_node_id.get();
757        let data = ParseNodeData::default();
758        self.insert_parse_node_data(id, data);
759        self.next_parse_node_id.set(id + 1);
760        ParseNode {
761            id,
762            qual_name: None,
763        }
764    }
765
766    fn send_op(&self, op: ParseOperation) {
767        self.sender
768            .send(FromParserThreadMsg::ProcessOperation(op))
769            .unwrap();
770    }
771
772    fn insert_parse_node_data(&self, id: ParseNodeId, data: ParseNodeData) {
773        assert!(self.parse_node_data.borrow_mut().insert(id, data).is_none());
774    }
775
776    fn get_parse_node_data<'a>(&'a self, id: &'a ParseNodeId) -> Ref<'a, ParseNodeData> {
777        Ref::map(self.parse_node_data.borrow(), |data| {
778            data.get(id).expect("Parse Node data not found!")
779        })
780    }
781
782    fn get_parse_node_data_mut<'a>(&'a self, id: &'a ParseNodeId) -> RefMut<'a, ParseNodeData> {
783        RefMut::map(self.parse_node_data.borrow_mut(), |data| {
784            data.get_mut(id).expect("Parse Node data not found!")
785        })
786    }
787}
788
789impl TreeSink for Sink {
790    type Output = Self;
791    fn finish(self) -> Self {
792        self
793    }
794
795    type Handle = ParseNode;
796    type ElemName<'a>
797        = ExpandedName<'a>
798    where
799        Self: 'a;
800
801    fn get_document(&self) -> Self::Handle {
802        self.document_node.clone()
803    }
804
805    fn get_template_contents(&self, target: &Self::Handle) -> Self::Handle {
806        if let Some(ref contents) = self.get_parse_node_data(&target.id).contents {
807            return contents.clone();
808        }
809        let node = self.new_parse_node();
810        {
811            let mut data = self.get_parse_node_data_mut(&target.id);
812            data.contents = Some(node.clone());
813        }
814        self.send_op(ParseOperation::GetTemplateContents {
815            target: target.id,
816            contents: node.id,
817        });
818        node
819    }
820
821    fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
822        x.id == y.id
823    }
824
825    fn elem_name<'a>(&self, target: &'a Self::Handle) -> ExpandedName<'a> {
826        target
827            .qual_name
828            .as_ref()
829            .expect("Expected qual name of node!")
830            .expanded()
831    }
832
833    fn create_element(
834        &self,
835        name: QualName,
836        html_attrs: Vec<HtmlAttribute>,
837        flags: ElementFlags,
838    ) -> Self::Handle {
839        let mut node = self.new_parse_node();
840        node.qual_name = Some(name.clone());
841        {
842            let mut node_data = self.get_parse_node_data_mut(&node.id);
843            node_data.is_integration_point = html_attrs.iter().any(|attr| {
844                let attr_value = &String::from(attr.value.clone());
845                (attr.name.local == local_name!("encoding") && attr.name.ns == ns!()) &&
846                    (attr_value.eq_ignore_ascii_case("text/html") ||
847                        attr_value.eq_ignore_ascii_case("application/xhtml+xml"))
848            });
849        }
850        let attrs = html_attrs
851            .into_iter()
852            .map(|attr| Attribute {
853                name: attr.name,
854                value: String::from(attr.value),
855            })
856            .collect();
857
858        self.send_op(ParseOperation::CreateElement {
859            node: node.id,
860            name,
861            attrs,
862            current_line: self.current_line.get(),
863            had_duplicate_attributes: flags.had_duplicate_attributes,
864        });
865        node
866    }
867
868    fn create_comment(&self, text: StrTendril) -> Self::Handle {
869        let node = self.new_parse_node();
870        self.send_op(ParseOperation::CreateComment {
871            text: String::from(text),
872            node: node.id,
873        });
874        node
875    }
876
877    fn create_pi(&self, target: StrTendril, data: StrTendril) -> ParseNode {
878        let node = self.new_parse_node();
879        self.send_op(ParseOperation::CreatePI {
880            node: node.id,
881            target: String::from(target),
882            data: String::from(data),
883        });
884        node
885    }
886
887    fn associate_with_form(
888        &self,
889        target: &Self::Handle,
890        form: &Self::Handle,
891        nodes: (&Self::Handle, Option<&Self::Handle>),
892    ) {
893        let (element, prev_element) = nodes;
894        self.send_op(ParseOperation::AssociateWithForm {
895            target: target.id,
896            form: form.id,
897            element: element.id,
898            prev_element: prev_element.map(|p| p.id),
899        });
900    }
901
902    fn append_before_sibling(
903        &self,
904        sibling: &Self::Handle,
905        new_node: HtmlNodeOrText<Self::Handle>,
906    ) {
907        let new_node = match new_node {
908            HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node),
909            HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)),
910        };
911        self.send_op(ParseOperation::AppendBeforeSibling {
912            sibling: sibling.id,
913            node: new_node,
914        });
915    }
916
917    fn append_based_on_parent_node(
918        &self,
919        elem: &Self::Handle,
920        prev_elem: &Self::Handle,
921        child: HtmlNodeOrText<Self::Handle>,
922    ) {
923        let child = match child {
924            HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node),
925            HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)),
926        };
927        self.send_op(ParseOperation::AppendBasedOnParentNode {
928            element: elem.id,
929            prev_element: prev_elem.id,
930            node: child,
931        });
932    }
933
934    fn parse_error(&self, msg: Cow<'static, str>) {
935        debug!("Parse error: {}", msg);
936    }
937
938    fn set_quirks_mode(&self, mode: QuirksMode) {
939        let mode = match mode {
940            QuirksMode::Quirks => ServoQuirksMode::Quirks,
941            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
942            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
943        };
944        self.send_op(ParseOperation::SetQuirksMode { mode });
945    }
946
947    fn append(&self, parent: &Self::Handle, child: HtmlNodeOrText<Self::Handle>) {
948        let child = match child {
949            HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node),
950            HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)),
951        };
952        self.send_op(ParseOperation::Append {
953            parent: parent.id,
954            node: child,
955        });
956    }
957
958    fn append_doctype_to_document(
959        &self,
960        name: StrTendril,
961        public_id: StrTendril,
962        system_id: StrTendril,
963    ) {
964        self.send_op(ParseOperation::AppendDoctypeToDocument {
965            name: String::from(name),
966            public_id: String::from(public_id),
967            system_id: String::from(system_id),
968        });
969    }
970
971    fn add_attrs_if_missing(&self, target: &Self::Handle, html_attrs: Vec<HtmlAttribute>) {
972        let attrs = html_attrs
973            .into_iter()
974            .map(|attr| Attribute {
975                name: attr.name,
976                value: String::from(attr.value),
977            })
978            .collect();
979        self.send_op(ParseOperation::AddAttrsIfMissing {
980            target: target.id,
981            attrs,
982        });
983    }
984
985    fn remove_from_parent(&self, target: &Self::Handle) {
986        self.send_op(ParseOperation::RemoveFromParent { target: target.id });
987    }
988
989    fn mark_script_already_started(&self, node: &Self::Handle) {
990        self.send_op(ParseOperation::MarkScriptAlreadyStarted { node: node.id });
991    }
992
993    fn reparent_children(&self, parent: &Self::Handle, new_parent: &Self::Handle) {
994        self.send_op(ParseOperation::ReparentChildren {
995            parent: parent.id,
996            new_parent: new_parent.id,
997        });
998    }
999
1000    /// <https://html.spec.whatwg.org/multipage/#html-integration-point>
1001    /// Specifically, the `<annotation-xml>` cases.
1002    fn is_mathml_annotation_xml_integration_point(&self, handle: &Self::Handle) -> bool {
1003        let node_data = self.get_parse_node_data(&handle.id);
1004        node_data.is_integration_point
1005    }
1006
1007    fn set_current_line(&self, line_number: u64) {
1008        self.current_line.set(line_number);
1009    }
1010
1011    fn pop(&self, node: &Self::Handle) {
1012        self.send_op(ParseOperation::Pop { node: node.id });
1013    }
1014
1015    fn allow_declarative_shadow_roots(&self, _intended_parent: &Self::Handle) -> bool {
1016        self.allow_declarative_shadow_roots
1017    }
1018
1019    fn attach_declarative_shadow(
1020        &self,
1021        location: &Self::Handle,
1022        template: &Self::Handle,
1023        attributes: &[HtmlAttribute],
1024    ) -> bool {
1025        let attributes = attributes
1026            .iter()
1027            .map(|attribute| Attribute {
1028                name: attribute.name.clone(),
1029                value: String::from(attribute.value.clone()),
1030            })
1031            .collect();
1032
1033        // Unfortunately the parser can only proceed after it knows whether attaching the shadow root
1034        // succeeded or failed. Attaching a shadow root can fail for many different reasons,
1035        // and so we need to block until the script thread has processed this operation.
1036        let (sender, receiver) = unbounded();
1037        self.send_op(ParseOperation::AttachDeclarativeShadowRoot {
1038            location: location.id,
1039            template: template.id,
1040            attributes,
1041            sender,
1042        });
1043
1044        receiver.recv().unwrap()
1045    }
1046}