script/dom/servoparser/
async_html.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5#![cfg_attr(crown, expect(crown::unrooted_must_root))]
6
7use std::borrow::Cow;
8use std::cell::{Cell, Ref, RefCell, RefMut};
9use std::collections::vec_deque::VecDeque;
10use std::rc::Rc;
11use std::thread;
12
13use crossbeam_channel::{Receiver, Sender, unbounded};
14use html5ever::buffer_queue::BufferQueue;
15use html5ever::tendril::fmt::UTF8;
16use html5ever::tendril::{SendTendril, StrTendril, Tendril};
17use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts};
18use html5ever::tree_builder::{
19    ElementFlags, NodeOrText as HtmlNodeOrText, QuirksMode, TreeBuilder, TreeBuilderOpts, TreeSink,
20};
21use html5ever::{Attribute as HtmlAttribute, ExpandedName, QualName, local_name, ns};
22use markup5ever::TokenizerResult;
23use rustc_hash::FxHashMap;
24use servo_url::ServoUrl;
25use style::context::QuirksMode as ServoQuirksMode;
26
27use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
28use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
29use crate::dom::bindings::inheritance::Castable;
30use crate::dom::bindings::root::{Dom, DomRoot};
31use crate::dom::bindings::str::DOMString;
32use crate::dom::comment::Comment;
33use crate::dom::customelementregistry::CustomElementReactionStack;
34use crate::dom::document::Document;
35use crate::dom::documenttype::DocumentType;
36use crate::dom::element::{Element, ElementCreator};
37use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
38use crate::dom::html::htmlscriptelement::HTMLScriptElement;
39use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
40use crate::dom::node::Node;
41use crate::dom::processinginstruction::ProcessingInstruction;
42use crate::dom::servoparser::{
43    ElementAttribute, ParsingAlgorithm, attach_declarative_shadow_inner, create_element_for_token,
44};
45use crate::dom::virtualmethods::vtable_for;
46use crate::script_runtime::CanGc;
47
48type ParseNodeId = usize;
49
50#[derive(Clone, Debug, JSTraceable, MallocSizeOf)]
51pub(crate) struct ParseNode {
52    id: ParseNodeId,
53    #[no_trace]
54    qual_name: Option<QualName>,
55}
56
57#[derive(Debug, JSTraceable, MallocSizeOf)]
58enum NodeOrText {
59    Node(ParseNode),
60    Text(String),
61}
62
63#[derive(Debug, JSTraceable, MallocSizeOf)]
64struct Attribute {
65    #[no_trace]
66    name: QualName,
67    value: String,
68}
69
70#[derive(Debug, JSTraceable, MallocSizeOf)]
71enum ParseOperation {
72    GetTemplateContents {
73        target: ParseNodeId,
74        contents: ParseNodeId,
75    },
76    CreateElement {
77        node: ParseNodeId,
78        #[no_trace]
79        name: QualName,
80        attrs: Vec<Attribute>,
81        current_line: u64,
82        had_duplicate_attributes: bool,
83    },
84    CreateComment {
85        text: String,
86        node: ParseNodeId,
87    },
88    AppendBeforeSibling {
89        sibling: ParseNodeId,
90        node: NodeOrText,
91    },
92    AppendBasedOnParentNode {
93        element: ParseNodeId,
94        prev_element: ParseNodeId,
95        node: NodeOrText,
96    },
97    Append {
98        parent: ParseNodeId,
99        node: NodeOrText,
100    },
101    AppendDoctypeToDocument {
102        name: String,
103        public_id: String,
104        system_id: String,
105    },
106    AddAttrsIfMissing {
107        target: ParseNodeId,
108        attrs: Vec<Attribute>,
109    },
110    RemoveFromParent {
111        target: ParseNodeId,
112    },
113    MarkScriptAlreadyStarted {
114        node: ParseNodeId,
115    },
116    ReparentChildren {
117        parent: ParseNodeId,
118        new_parent: ParseNodeId,
119    },
120    AssociateWithForm {
121        target: ParseNodeId,
122        form: ParseNodeId,
123        element: ParseNodeId,
124        prev_element: Option<ParseNodeId>,
125    },
126    CreatePI {
127        node: ParseNodeId,
128        target: String,
129        data: String,
130    },
131    Pop {
132        node: ParseNodeId,
133    },
134    SetQuirksMode {
135        #[ignore_malloc_size_of = "Defined in style"]
136        #[no_trace]
137        mode: ServoQuirksMode,
138    },
139    AttachDeclarativeShadowRoot {
140        location: ParseNodeId,
141        template: ParseNodeId,
142        attributes: Vec<Attribute>,
143        /// Used to notify the parser thread whether or not attaching the shadow root succeeded
144        #[no_trace]
145        sender: Sender<bool>,
146    },
147}
148
149#[derive(MallocSizeOf)]
150enum FromParserThreadMsg {
151    TokenizerResultDone {
152        updated_input: VecDeque<SendTendril<UTF8>>,
153    },
154    TokenizerResultScript {
155        script: ParseNode,
156        updated_input: VecDeque<SendTendril<UTF8>>,
157    },
158    EncodingIndicator {
159        encoding: SendTendril<UTF8>,
160        updated_input: VecDeque<SendTendril<UTF8>>,
161    },
162    /// Sent to main thread to signify that the parser thread's end method has returned.
163    End,
164    ProcessOperation(ParseOperation),
165}
166
167#[derive(MallocSizeOf)]
168enum ToParserThreadMsg {
169    Feed { input: VecDeque<SendTendril<UTF8>> },
170    End,
171    SetPlainTextState,
172}
173
174fn create_buffer_queue(mut buffers: VecDeque<SendTendril<UTF8>>) -> BufferQueue {
175    let buffer_queue = BufferQueue::default();
176    while let Some(st) = buffers.pop_front() {
177        buffer_queue.push_back(StrTendril::from(st));
178    }
179    buffer_queue
180}
181
182// The async HTML Tokenizer consists of two separate types threads working together:
183// the main thread, which communicates with the rest of script, and the parser thread, which
184// feeds input to the tokenizer from html5ever.
185//
186// Steps:
187// 1. A call to Tokenizer::new will spin up a new parser thread, which starts listening for messages from Tokenizer.
188// 2. Upon receiving an input from ServoParser, the tokenizer forwards it to the parser thread, where it starts
189//    creating the necessary tree actions based on the input.
190// 3. The parser thread sends these tree actions to the main thread as soon as it creates them. The main thread
191//    then executes the received actions.
192//
193//    _____________                           _______________
194//   |             |                         |               |
195//   |             |                         |               |
196//   |             |   ToParserThreadMsg     |               |
197//   |             |------------------------>| Parser Thread |
198//   |    Main     |                         |               |
199//   |   Thread    |   FromParserThreadMsg   |               |
200//   |             |<------------------------|    ________   |
201//   |             |                         |   |        |  |
202//   |             |   FromParserThreadMsg   |   |  Sink  |  |
203//   |             |<------------------------|---|        |  |
204//   |             |                         |   |________|  |
205//   |_____________|                         |_______________|
206//
207#[derive(JSTraceable, MallocSizeOf)]
208#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
209pub(crate) struct Tokenizer {
210    document: Dom<Document>,
211    #[no_trace]
212    from_parser_thread_receiver: Receiver<FromParserThreadMsg>,
213    /// Sender from the main thread to the parser thread.
214    #[no_trace]
215    to_parser_thread_sender: Sender<ToParserThreadMsg>,
216    nodes: RefCell<FxHashMap<ParseNodeId, Dom<Node>>>,
217    #[no_trace]
218    url: ServoUrl,
219    parsing_algorithm: ParsingAlgorithm,
220    #[conditional_malloc_size_of]
221    custom_element_reaction_stack: Rc<CustomElementReactionStack>,
222    current_line: Cell<u64>,
223    has_ended: Cell<bool>,
224}
225
226impl Tokenizer {
227    pub(crate) fn new(
228        document: &Document,
229        url: ServoUrl,
230        fragment_context: Option<super::FragmentContext>,
231    ) -> Self {
232        // Messages from the main thread to the parser thread
233        let (to_parser_thread_sender, from_main_thread_receiver) = unbounded();
234        // Messages from the parser thread to the main thread
235        let (to_main_thread_sender, from_parser_thread_receiver) = unbounded();
236
237        let algorithm = match fragment_context {
238            Some(_) => ParsingAlgorithm::Fragment,
239            None => ParsingAlgorithm::Normal,
240        };
241
242        let custom_element_reaction_stack = document.custom_element_reaction_stack();
243        let tokenizer = Tokenizer {
244            document: Dom::from_ref(document),
245            from_parser_thread_receiver,
246            to_parser_thread_sender,
247            nodes: RefCell::new(FxHashMap::default()),
248            url,
249            parsing_algorithm: algorithm,
250            custom_element_reaction_stack,
251            current_line: Cell::new(1),
252            has_ended: Cell::new(false),
253        };
254        tokenizer.insert_node(0, Dom::from_ref(document.upcast()));
255
256        let sink = Sink::new(
257            to_main_thread_sender.clone(),
258            document.allow_declarative_shadow_roots(),
259        );
260        let mut form_parse_node = None;
261        let mut parser_fragment_context = None;
262        if let Some(fragment_context) = fragment_context {
263            let node = sink.new_parse_node();
264            tokenizer.insert_node(node.id, Dom::from_ref(fragment_context.context_elem));
265            parser_fragment_context =
266                Some((node, fragment_context.context_element_allows_scripting));
267
268            form_parse_node = fragment_context.form_elem.map(|form_elem| {
269                let node = sink.new_parse_node();
270                tokenizer.insert_node(node.id, Dom::from_ref(form_elem));
271                node
272            });
273        };
274
275        // Create new thread for parser. This is where parser actions
276        // will be generated from the input provided. These parser actions are then passed
277        // onto the main thread to be executed.
278        let scripting_enabled = document.has_browsing_context();
279        thread::Builder::new()
280            .name(format!("Parse:{}", tokenizer.url.debug_compact()))
281            .spawn(move || {
282                run(
283                    sink,
284                    parser_fragment_context,
285                    form_parse_node,
286                    to_main_thread_sender,
287                    from_main_thread_receiver,
288                    scripting_enabled,
289                );
290            })
291            .expect("HTML Parser thread spawning failed");
292
293        tokenizer
294    }
295
296    pub(crate) fn feed(
297        &self,
298        input: &BufferQueue,
299        cx: &mut js::context::JSContext,
300    ) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
301        let mut send_tendrils = VecDeque::new();
302        while let Some(str) = input.pop_front() {
303            send_tendrils.push_back(SendTendril::from(str));
304        }
305
306        // Send message to parser thread, asking it to start reading from the input.
307        // Parser operation messages will be sent to main thread as they are evaluated.
308        self.to_parser_thread_sender
309            .send(ToParserThreadMsg::Feed {
310                input: send_tendrils,
311            })
312            .unwrap();
313
314        loop {
315            debug_assert!(!self.has_ended.get());
316
317            match self
318                .from_parser_thread_receiver
319                .recv()
320                .expect("Unexpected channel panic in main thread.")
321            {
322                FromParserThreadMsg::ProcessOperation(parse_op) => {
323                    self.process_operation(parse_op, cx);
324
325                    // The parser might have been aborted during the execution
326                    // of `parse_op`.
327                    if self.has_ended.get() {
328                        return TokenizerResult::Done;
329                    }
330                },
331                FromParserThreadMsg::TokenizerResultDone { updated_input } => {
332                    let buffer_queue = create_buffer_queue(updated_input);
333                    input.replace_with(buffer_queue);
334                    return TokenizerResult::Done;
335                },
336                FromParserThreadMsg::TokenizerResultScript {
337                    script,
338                    updated_input,
339                } => {
340                    let buffer_queue = create_buffer_queue(updated_input);
341                    input.replace_with(buffer_queue);
342                    let script = self.get_node(&script.id);
343                    return TokenizerResult::Script(DomRoot::from_ref(script.downcast().unwrap()));
344                },
345                FromParserThreadMsg::EncodingIndicator { updated_input, .. } => {
346                    // We don't handle encoding indicators yet, so just tell the
347                    // parser thread to continue.
348                    self.to_parser_thread_sender
349                        .send(ToParserThreadMsg::Feed {
350                            input: updated_input,
351                        })
352                        .unwrap();
353                },
354                _ => unreachable!(),
355            };
356        }
357    }
358
359    pub(crate) fn end(&self, cx: &mut js::context::JSContext) {
360        if self.has_ended.replace(true) {
361            return;
362        }
363
364        self.to_parser_thread_sender
365            .send(ToParserThreadMsg::End)
366            .unwrap();
367
368        loop {
369            match self
370                .from_parser_thread_receiver
371                .recv()
372                .expect("Unexpected channel panic in main thread.")
373            {
374                FromParserThreadMsg::ProcessOperation(parse_op) => {
375                    self.process_operation(parse_op, cx);
376                },
377                FromParserThreadMsg::TokenizerResultDone { updated_input: _ } |
378                FromParserThreadMsg::TokenizerResultScript { .. } |
379                FromParserThreadMsg::EncodingIndicator { .. } => continue,
380                FromParserThreadMsg::End => return,
381            };
382        }
383    }
384
385    pub(crate) fn url(&self) -> &ServoUrl {
386        &self.url
387    }
388
389    pub(crate) fn set_plaintext_state(&self) {
390        self.to_parser_thread_sender
391            .send(ToParserThreadMsg::SetPlainTextState)
392            .unwrap();
393    }
394
395    pub(crate) fn get_current_line(&self) -> u32 {
396        self.current_line.get() as u32
397    }
398
399    fn insert_node(&self, id: ParseNodeId, node: Dom<Node>) {
400        assert!(self.nodes.borrow_mut().insert(id, node).is_none());
401    }
402
403    fn get_node<'a>(&'a self, id: &ParseNodeId) -> Ref<'a, Dom<Node>> {
404        Ref::map(self.nodes.borrow(), |nodes| {
405            nodes.get(id).expect("Node not found!")
406        })
407    }
408
409    fn append_before_sibling(
410        &self,
411        cx: &mut js::context::JSContext,
412        sibling: ParseNodeId,
413        node: NodeOrText,
414    ) {
415        let node = match node {
416            NodeOrText::Node(n) => {
417                HtmlNodeOrText::AppendNode(Dom::from_ref(&**self.get_node(&n.id)))
418            },
419            NodeOrText::Text(text) => HtmlNodeOrText::AppendText(Tendril::from(text)),
420        };
421        let sibling = &**self.get_node(&sibling);
422        let parent = &*sibling
423            .GetParentNode()
424            .expect("append_before_sibling called on node without parent");
425
426        super::insert(
427            cx,
428            parent,
429            Some(sibling),
430            node,
431            self.parsing_algorithm,
432            &self.custom_element_reaction_stack,
433        );
434    }
435
436    fn append(&self, cx: &mut js::context::JSContext, parent: ParseNodeId, node: NodeOrText) {
437        let node = match node {
438            NodeOrText::Node(n) => {
439                HtmlNodeOrText::AppendNode(Dom::from_ref(&**self.get_node(&n.id)))
440            },
441            NodeOrText::Text(text) => HtmlNodeOrText::AppendText(Tendril::from(text)),
442        };
443
444        let parent = &**self.get_node(&parent);
445        super::insert(
446            cx,
447            parent,
448            None,
449            node,
450            self.parsing_algorithm,
451            &self.custom_element_reaction_stack,
452        );
453    }
454
455    fn has_parent_node(&self, node: ParseNodeId) -> bool {
456        self.get_node(&node).GetParentNode().is_some()
457    }
458
459    fn same_tree(&self, x: ParseNodeId, y: ParseNodeId) -> bool {
460        let x = self.get_node(&x);
461        let y = self.get_node(&y);
462
463        let x = x.downcast::<Element>().expect("Element node expected");
464        let y = y.downcast::<Element>().expect("Element node expected");
465        x.is_in_same_home_subtree(y)
466    }
467
468    fn process_operation(&self, op: ParseOperation, cx: &mut js::context::JSContext) {
469        let document = DomRoot::from_ref(&**self.get_node(&0));
470        let document = document
471            .downcast::<Document>()
472            .expect("Document node should be downcasted!");
473        match op {
474            ParseOperation::GetTemplateContents { target, contents } => {
475                let target = DomRoot::from_ref(&**self.get_node(&target));
476                let template = target
477                    .downcast::<HTMLTemplateElement>()
478                    .expect("Tried to extract contents from non-template element while parsing");
479                self.insert_node(
480                    contents,
481                    Dom::from_ref(template.Content(CanGc::from_cx(cx)).upcast()),
482                );
483            },
484            ParseOperation::CreateElement {
485                node,
486                name,
487                attrs,
488                current_line,
489                had_duplicate_attributes,
490            } => {
491                self.current_line.set(current_line);
492                let attrs = attrs
493                    .into_iter()
494                    .map(|attr| ElementAttribute::new(attr.name, DOMString::from(attr.value)))
495                    .collect();
496                let element = create_element_for_token(
497                    name,
498                    attrs,
499                    &self.document,
500                    ElementCreator::ParserCreated(current_line),
501                    ParsingAlgorithm::Normal,
502                    &self.custom_element_reaction_stack,
503                    had_duplicate_attributes,
504                    cx,
505                );
506                self.insert_node(node, Dom::from_ref(element.upcast()));
507            },
508            ParseOperation::CreateComment { text, node } => {
509                let comment =
510                    Comment::new(DOMString::from(text), document, None, CanGc::from_cx(cx));
511                self.insert_node(node, Dom::from_ref(comment.upcast()));
512            },
513            ParseOperation::AppendBeforeSibling { sibling, node } => {
514                self.append_before_sibling(cx, sibling, node);
515            },
516            ParseOperation::Append { parent, node } => {
517                self.append(cx, parent, node);
518            },
519            ParseOperation::AppendBasedOnParentNode {
520                element,
521                prev_element,
522                node,
523            } => {
524                if self.has_parent_node(element) {
525                    self.append_before_sibling(cx, element, node);
526                } else {
527                    self.append(cx, prev_element, node);
528                }
529            },
530            ParseOperation::AppendDoctypeToDocument {
531                name,
532                public_id,
533                system_id,
534            } => {
535                let doctype = DocumentType::new(
536                    DOMString::from(name),
537                    Some(DOMString::from(public_id)),
538                    Some(DOMString::from(system_id)),
539                    document,
540                    CanGc::from_cx(cx),
541                );
542
543                document
544                    .upcast::<Node>()
545                    .AppendChild(cx, doctype.upcast())
546                    .expect("Appending failed");
547            },
548            ParseOperation::AddAttrsIfMissing { target, attrs } => {
549                let node = self.get_node(&target);
550                let elem = node
551                    .downcast::<Element>()
552                    .expect("tried to set attrs on non-Element in HTML parsing");
553                for attr in attrs {
554                    elem.set_attribute_from_parser(
555                        attr.name,
556                        DOMString::from(attr.value),
557                        None,
558                        CanGc::from_cx(cx),
559                    );
560                }
561            },
562            ParseOperation::RemoveFromParent { target } => {
563                if let Some(ref parent) = self.get_node(&target).GetParentNode() {
564                    parent.RemoveChild(cx, &self.get_node(&target)).unwrap();
565                }
566            },
567            ParseOperation::MarkScriptAlreadyStarted { node } => {
568                let node = self.get_node(&node);
569                let script = node.downcast::<HTMLScriptElement>();
570                if let Some(script) = script {
571                    script.set_already_started(true)
572                }
573            },
574            ParseOperation::ReparentChildren { parent, new_parent } => {
575                let parent = self.get_node(&parent);
576                let new_parent = self.get_node(&new_parent);
577                while let Some(child) = parent.GetFirstChild() {
578                    new_parent.AppendChild(cx, &child).unwrap();
579                }
580            },
581            ParseOperation::AssociateWithForm {
582                target,
583                form,
584                element,
585                prev_element,
586            } => {
587                let tree_node = prev_element.map_or(element, |prev| {
588                    if self.has_parent_node(element) {
589                        element
590                    } else {
591                        prev
592                    }
593                });
594
595                if !self.same_tree(tree_node, form) {
596                    return;
597                }
598                let form = self.get_node(&form);
599                let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
600                    .expect("Owner must be a form element");
601
602                let node = self.get_node(&target);
603                let elem = node.downcast::<Element>();
604                let control = elem.and_then(|e| e.as_maybe_form_control());
605
606                if let Some(control) = control {
607                    control.set_form_owner_from_parser(&form, CanGc::from_cx(cx));
608                }
609            },
610            ParseOperation::Pop { node } => {
611                vtable_for(&self.get_node(&node)).pop();
612            },
613            ParseOperation::CreatePI { node, target, data } => {
614                let pi = ProcessingInstruction::new(
615                    DOMString::from(target),
616                    DOMString::from(data),
617                    document,
618                    CanGc::from_cx(cx),
619                );
620                self.insert_node(node, Dom::from_ref(pi.upcast()));
621            },
622            ParseOperation::SetQuirksMode { mode } => {
623                document.set_quirks_mode(mode);
624            },
625            ParseOperation::AttachDeclarativeShadowRoot {
626                location,
627                template,
628                attributes,
629                sender,
630            } => {
631                let location = self.get_node(&location);
632                let template = self.get_node(&template);
633                let attributes: Vec<_> = attributes
634                    .into_iter()
635                    .map(|attribute| HtmlAttribute {
636                        name: attribute.name,
637                        value: StrTendril::from(attribute.value),
638                    })
639                    .collect();
640
641                let did_succeed =
642                    attach_declarative_shadow_inner(cx, &location, &template, &attributes);
643                sender.send(did_succeed).unwrap();
644            },
645        }
646    }
647}
648
649/// Run the parser.
650///
651/// The `fragment_context` argument is `Some` in the fragment case and describes the context
652/// node as well as whether scripting is enabled for the context node. Note that whether or not
653/// scripting is enabled for the context node does not affect whether scripting is enabled for the
654/// parser, that is determined by the `scripting_enabled` argument.
655fn run(
656    sink: Sink,
657    fragment_context: Option<(ParseNode, bool)>,
658    form_parse_node: Option<ParseNode>,
659    sender: Sender<FromParserThreadMsg>,
660    receiver: Receiver<ToParserThreadMsg>,
661    scripting_enabled: bool,
662) {
663    let options = TreeBuilderOpts {
664        scripting_enabled,
665        ..Default::default()
666    };
667
668    let html_tokenizer = if let Some((context_node, context_scripting_enabled)) = fragment_context {
669        let tree_builder =
670            TreeBuilder::new_for_fragment(sink, context_node, form_parse_node, options);
671
672        let tok_options = TokenizerOpts {
673            initial_state: Some(
674                tree_builder.tokenizer_state_for_context_elem(context_scripting_enabled),
675            ),
676            ..Default::default()
677        };
678
679        HtmlTokenizer::new(tree_builder, tok_options)
680    } else {
681        HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default())
682    };
683
684    loop {
685        match receiver
686            .recv()
687            .expect("Unexpected channel panic in html parser thread")
688        {
689            ToParserThreadMsg::Feed { input } => {
690                let input = create_buffer_queue(input);
691                let res = html_tokenizer.feed(&input);
692
693                // Gather changes to 'input' and place them in 'updated_input',
694                // which will be sent to the main thread to update feed method's 'input'
695                let mut updated_input = VecDeque::new();
696                while let Some(st) = input.pop_front() {
697                    updated_input.push_back(SendTendril::from(st));
698                }
699
700                let res = match res {
701                    TokenizerResult::Done => {
702                        FromParserThreadMsg::TokenizerResultDone { updated_input }
703                    },
704                    TokenizerResult::Script(script) => FromParserThreadMsg::TokenizerResultScript {
705                        script,
706                        updated_input,
707                    },
708                    TokenizerResult::EncodingIndicator(encoding) => {
709                        FromParserThreadMsg::EncodingIndicator {
710                            encoding: SendTendril::from(encoding),
711                            updated_input,
712                        }
713                    },
714                };
715                sender.send(res).unwrap();
716            },
717            ToParserThreadMsg::End => {
718                html_tokenizer.end();
719                sender.send(FromParserThreadMsg::End).unwrap();
720                break;
721            },
722            ToParserThreadMsg::SetPlainTextState => html_tokenizer.set_plaintext_state(),
723        };
724    }
725}
726
727#[derive(Default, JSTraceable, MallocSizeOf)]
728struct ParseNodeData {
729    contents: Option<ParseNode>,
730    is_integration_point: bool,
731}
732
733pub(crate) struct Sink {
734    current_line: Cell<u64>,
735    parse_node_data: RefCell<FxHashMap<ParseNodeId, ParseNodeData>>,
736    next_parse_node_id: Cell<ParseNodeId>,
737    document_node: ParseNode,
738    sender: Sender<FromParserThreadMsg>,
739    allow_declarative_shadow_roots: bool,
740}
741
742impl Sink {
743    fn new(sender: Sender<FromParserThreadMsg>, allow_declarative_shadow_roots: bool) -> Sink {
744        let sink = Sink {
745            current_line: Cell::new(1),
746            parse_node_data: RefCell::new(FxHashMap::default()),
747            next_parse_node_id: Cell::new(1),
748            document_node: ParseNode {
749                id: 0,
750                qual_name: None,
751            },
752            sender,
753            allow_declarative_shadow_roots,
754        };
755        let data = ParseNodeData::default();
756        sink.insert_parse_node_data(0, data);
757        sink
758    }
759
760    fn new_parse_node(&self) -> ParseNode {
761        let id = self.next_parse_node_id.get();
762        let data = ParseNodeData::default();
763        self.insert_parse_node_data(id, data);
764        self.next_parse_node_id.set(id + 1);
765        ParseNode {
766            id,
767            qual_name: None,
768        }
769    }
770
771    fn send_op(&self, op: ParseOperation) {
772        self.sender
773            .send(FromParserThreadMsg::ProcessOperation(op))
774            .unwrap();
775    }
776
777    fn insert_parse_node_data(&self, id: ParseNodeId, data: ParseNodeData) {
778        assert!(self.parse_node_data.borrow_mut().insert(id, data).is_none());
779    }
780
781    fn get_parse_node_data<'a>(&'a self, id: &'a ParseNodeId) -> Ref<'a, ParseNodeData> {
782        Ref::map(self.parse_node_data.borrow(), |data| {
783            data.get(id).expect("Parse Node data not found!")
784        })
785    }
786
787    fn get_parse_node_data_mut<'a>(&'a self, id: &'a ParseNodeId) -> RefMut<'a, ParseNodeData> {
788        RefMut::map(self.parse_node_data.borrow_mut(), |data| {
789            data.get_mut(id).expect("Parse Node data not found!")
790        })
791    }
792}
793
794impl TreeSink for Sink {
795    type Output = Self;
796    fn finish(self) -> Self {
797        self
798    }
799
800    type Handle = ParseNode;
801    type ElemName<'a>
802        = ExpandedName<'a>
803    where
804        Self: 'a;
805
806    fn get_document(&self) -> Self::Handle {
807        self.document_node.clone()
808    }
809
810    fn get_template_contents(&self, target: &Self::Handle) -> Self::Handle {
811        if let Some(ref contents) = self.get_parse_node_data(&target.id).contents {
812            return contents.clone();
813        }
814        let node = self.new_parse_node();
815        {
816            let mut data = self.get_parse_node_data_mut(&target.id);
817            data.contents = Some(node.clone());
818        }
819        self.send_op(ParseOperation::GetTemplateContents {
820            target: target.id,
821            contents: node.id,
822        });
823        node
824    }
825
826    fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
827        x.id == y.id
828    }
829
830    fn elem_name<'a>(&self, target: &'a Self::Handle) -> ExpandedName<'a> {
831        target
832            .qual_name
833            .as_ref()
834            .expect("Expected qual name of node!")
835            .expanded()
836    }
837
838    fn create_element(
839        &self,
840        name: QualName,
841        html_attrs: Vec<HtmlAttribute>,
842        flags: ElementFlags,
843    ) -> Self::Handle {
844        let mut node = self.new_parse_node();
845        node.qual_name = Some(name.clone());
846        {
847            let mut node_data = self.get_parse_node_data_mut(&node.id);
848            node_data.is_integration_point = html_attrs.iter().any(|attr| {
849                let attr_value = &String::from(attr.value.clone());
850                (attr.name.local == local_name!("encoding") && attr.name.ns == ns!()) &&
851                    (attr_value.eq_ignore_ascii_case("text/html") ||
852                        attr_value.eq_ignore_ascii_case("application/xhtml+xml"))
853            });
854        }
855        let attrs = html_attrs
856            .into_iter()
857            .map(|attr| Attribute {
858                name: attr.name,
859                value: String::from(attr.value),
860            })
861            .collect();
862
863        self.send_op(ParseOperation::CreateElement {
864            node: node.id,
865            name,
866            attrs,
867            current_line: self.current_line.get(),
868            had_duplicate_attributes: flags.had_duplicate_attributes,
869        });
870        node
871    }
872
873    fn create_comment(&self, text: StrTendril) -> Self::Handle {
874        let node = self.new_parse_node();
875        self.send_op(ParseOperation::CreateComment {
876            text: String::from(text),
877            node: node.id,
878        });
879        node
880    }
881
882    fn create_pi(&self, target: StrTendril, data: StrTendril) -> ParseNode {
883        let node = self.new_parse_node();
884        self.send_op(ParseOperation::CreatePI {
885            node: node.id,
886            target: String::from(target),
887            data: String::from(data),
888        });
889        node
890    }
891
892    fn associate_with_form(
893        &self,
894        target: &Self::Handle,
895        form: &Self::Handle,
896        nodes: (&Self::Handle, Option<&Self::Handle>),
897    ) {
898        let (element, prev_element) = nodes;
899        self.send_op(ParseOperation::AssociateWithForm {
900            target: target.id,
901            form: form.id,
902            element: element.id,
903            prev_element: prev_element.map(|p| p.id),
904        });
905    }
906
907    fn append_before_sibling(
908        &self,
909        sibling: &Self::Handle,
910        new_node: HtmlNodeOrText<Self::Handle>,
911    ) {
912        let new_node = match new_node {
913            HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node),
914            HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)),
915        };
916        self.send_op(ParseOperation::AppendBeforeSibling {
917            sibling: sibling.id,
918            node: new_node,
919        });
920    }
921
922    fn append_based_on_parent_node(
923        &self,
924        elem: &Self::Handle,
925        prev_elem: &Self::Handle,
926        child: HtmlNodeOrText<Self::Handle>,
927    ) {
928        let child = match child {
929            HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node),
930            HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)),
931        };
932        self.send_op(ParseOperation::AppendBasedOnParentNode {
933            element: elem.id,
934            prev_element: prev_elem.id,
935            node: child,
936        });
937    }
938
939    fn parse_error(&self, msg: Cow<'static, str>) {
940        debug!("Parse error: {}", msg);
941    }
942
943    fn set_quirks_mode(&self, mode: QuirksMode) {
944        let mode = match mode {
945            QuirksMode::Quirks => ServoQuirksMode::Quirks,
946            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
947            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
948        };
949        self.send_op(ParseOperation::SetQuirksMode { mode });
950    }
951
952    fn append(&self, parent: &Self::Handle, child: HtmlNodeOrText<Self::Handle>) {
953        let child = match child {
954            HtmlNodeOrText::AppendNode(node) => NodeOrText::Node(node),
955            HtmlNodeOrText::AppendText(text) => NodeOrText::Text(String::from(text)),
956        };
957        self.send_op(ParseOperation::Append {
958            parent: parent.id,
959            node: child,
960        });
961    }
962
963    fn append_doctype_to_document(
964        &self,
965        name: StrTendril,
966        public_id: StrTendril,
967        system_id: StrTendril,
968    ) {
969        self.send_op(ParseOperation::AppendDoctypeToDocument {
970            name: String::from(name),
971            public_id: String::from(public_id),
972            system_id: String::from(system_id),
973        });
974    }
975
976    fn add_attrs_if_missing(&self, target: &Self::Handle, html_attrs: Vec<HtmlAttribute>) {
977        let attrs = html_attrs
978            .into_iter()
979            .map(|attr| Attribute {
980                name: attr.name,
981                value: String::from(attr.value),
982            })
983            .collect();
984        self.send_op(ParseOperation::AddAttrsIfMissing {
985            target: target.id,
986            attrs,
987        });
988    }
989
990    fn remove_from_parent(&self, target: &Self::Handle) {
991        self.send_op(ParseOperation::RemoveFromParent { target: target.id });
992    }
993
994    fn mark_script_already_started(&self, node: &Self::Handle) {
995        self.send_op(ParseOperation::MarkScriptAlreadyStarted { node: node.id });
996    }
997
998    fn reparent_children(&self, parent: &Self::Handle, new_parent: &Self::Handle) {
999        self.send_op(ParseOperation::ReparentChildren {
1000            parent: parent.id,
1001            new_parent: new_parent.id,
1002        });
1003    }
1004
1005    /// <https://html.spec.whatwg.org/multipage/#html-integration-point>
1006    /// Specifically, the `<annotation-xml>` cases.
1007    fn is_mathml_annotation_xml_integration_point(&self, handle: &Self::Handle) -> bool {
1008        let node_data = self.get_parse_node_data(&handle.id);
1009        node_data.is_integration_point
1010    }
1011
1012    fn set_current_line(&self, line_number: u64) {
1013        self.current_line.set(line_number);
1014    }
1015
1016    fn pop(&self, node: &Self::Handle) {
1017        self.send_op(ParseOperation::Pop { node: node.id });
1018    }
1019
1020    fn allow_declarative_shadow_roots(&self, _intended_parent: &Self::Handle) -> bool {
1021        self.allow_declarative_shadow_roots
1022    }
1023
1024    fn attach_declarative_shadow(
1025        &self,
1026        location: &Self::Handle,
1027        template: &Self::Handle,
1028        attributes: &[HtmlAttribute],
1029    ) -> bool {
1030        let attributes = attributes
1031            .iter()
1032            .map(|attribute| Attribute {
1033                name: attribute.name.clone(),
1034                value: String::from(attribute.value.clone()),
1035            })
1036            .collect();
1037
1038        // Unfortunately the parser can only proceed after it knows whether attaching the shadow root
1039        // succeeded or failed. Attaching a shadow root can fail for many different reasons,
1040        // and so we need to block until the script thread has processed this operation.
1041        let (sender, receiver) = unbounded();
1042        self.send_op(ParseOperation::AttachDeclarativeShadowRoot {
1043            location: location.id,
1044            template: template.id,
1045            attributes,
1046            sender,
1047        });
1048
1049        receiver.recv().unwrap()
1050    }
1051}