html5ever/tree_builder/
mod.rs

1// Copyright 2014-2017 The html5ever Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! The HTML5 tree builder.
11
12pub use crate::interface::{create_element, ElemName, ElementFlags, Tracer, TreeSink};
13pub use crate::interface::{AppendNode, AppendText, Attribute, NodeOrText};
14pub use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
15
16use self::types::*;
17
18use crate::tendril::StrTendril;
19use crate::{ExpandedName, LocalName, Namespace, QualName};
20
21use crate::tokenizer;
22use crate::tokenizer::states as tok_state;
23use crate::tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult};
24
25use std::borrow::Cow::{self, Borrowed};
26use std::cell::{Cell, Ref, RefCell};
27use std::collections::VecDeque;
28use std::iter::{Enumerate, Rev};
29use std::{fmt, slice};
30
31use crate::tokenizer::states::RawKind;
32use crate::tree_builder::tag_sets::*;
33use crate::util::str::to_escaped_string;
34use log::{debug, log_enabled, warn, Level};
35use markup5ever::{expanded_name, local_name, namespace_prefix, ns};
36
37#[macro_use]
38mod tag_sets;
39
40mod data;
41mod rules;
42mod types;
43
44/// Tree builder options, with an impl for Default.
45#[derive(Copy, Clone)]
46pub struct TreeBuilderOpts {
47    /// Report all parse errors described in the spec, at some
48    /// performance penalty? Default: false
49    pub exact_errors: bool,
50
51    /// Is scripting enabled?
52    ///
53    /// This affects how `<noscript>` elements are parsed:
54    ///   - If scripting **is** enabled then the contents of a `<noscript>` element are parsed as a single text node
55    ///   - If scriping is **not** enabled then the contents of a `<noscript>` element are parsed as a normal tree of nodes
56    pub scripting_enabled: bool,
57
58    /// Is this document being parsed from the `srcdoc` attribute of an `<iframe>` element?
59    ///
60    /// This affects heuristics that infer `QuirksMode` from `<!DOCTYPE>`.
61    pub iframe_srcdoc: bool,
62
63    /// Should we drop the DOCTYPE (if any) from the tree?
64    pub drop_doctype: bool,
65
66    /// Initial TreeBuilder quirks mode. Default: NoQuirks
67    pub quirks_mode: QuirksMode,
68}
69
70impl Default for TreeBuilderOpts {
71    fn default() -> TreeBuilderOpts {
72        TreeBuilderOpts {
73            exact_errors: false,
74            scripting_enabled: true,
75            iframe_srcdoc: false,
76            drop_doctype: false,
77            quirks_mode: NoQuirks,
78        }
79    }
80}
81
82/// The HTML tree builder.
83pub struct TreeBuilder<Handle, Sink> {
84    /// Options controlling the behavior of the tree builder.
85    opts: TreeBuilderOpts,
86
87    /// Consumer of tree modifications.
88    pub sink: Sink,
89
90    /// Insertion mode.
91    mode: Cell<InsertionMode>,
92
93    /// Original insertion mode, used by Text and InTableText modes.
94    orig_mode: Cell<Option<InsertionMode>>,
95
96    /// Stack of template insertion modes.
97    template_modes: RefCell<Vec<InsertionMode>>,
98
99    /// Pending table character tokens.
100    pending_table_text: RefCell<Vec<(SplitStatus, StrTendril)>>,
101
102    /// Quirks mode as set by the parser.
103    /// FIXME: can scripts etc. change this?
104    quirks_mode: Cell<QuirksMode>,
105
106    /// The document node, which is created by the sink.
107    doc_handle: Handle,
108
109    /// Stack of open elements, most recently added at end.
110    open_elems: RefCell<Vec<Handle>>,
111
112    /// List of active formatting elements.
113    active_formatting: RefCell<Vec<FormatEntry<Handle>>>,
114
115    //§ the-element-pointers
116    /// Head element pointer.
117    head_elem: RefCell<Option<Handle>>,
118
119    /// Form element pointer.
120    form_elem: RefCell<Option<Handle>>,
121
122    /// Frameset-ok flag.
123    frameset_ok: Cell<bool>,
124
125    /// Ignore a following U+000A LINE FEED?
126    ignore_lf: Cell<bool>,
127
128    /// Is foster parenting enabled?
129    foster_parenting: Cell<bool>,
130
131    /// The context element for the fragment parsing algorithm.
132    context_elem: RefCell<Option<Handle>>,
133
134    /// Track current line
135    current_line: Cell<u64>,
136    // WARNING: If you add new fields that contain Handles, you
137    // must add them to trace_handles() below to preserve memory
138    // safety!
139    //
140    // FIXME: Auto-generate the trace hooks like Servo does.
141}
142
143impl<Handle, Sink> TreeBuilder<Handle, Sink>
144where
145    Handle: Clone,
146    Sink: TreeSink<Handle = Handle>,
147{
148    /// Create a new tree builder which sends tree modifications to a particular `TreeSink`.
149    ///
150    /// The tree builder is also a `TokenSink`.
151    pub fn new(sink: Sink, opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink> {
152        let doc_handle = sink.get_document();
153        TreeBuilder {
154            opts,
155            sink,
156            mode: Cell::new(InsertionMode::Initial),
157            orig_mode: Cell::new(None),
158            template_modes: Default::default(),
159            pending_table_text: Default::default(),
160            quirks_mode: Cell::new(opts.quirks_mode),
161            doc_handle,
162            open_elems: Default::default(),
163            active_formatting: Default::default(),
164            head_elem: Default::default(),
165            form_elem: Default::default(),
166            frameset_ok: Cell::new(true),
167            ignore_lf: Default::default(),
168            foster_parenting: Default::default(),
169            context_elem: Default::default(),
170            current_line: Cell::new(1),
171        }
172    }
173
174    /// Create a new tree builder which sends tree modifications to a particular `TreeSink`.
175    /// This is for parsing fragments.
176    ///
177    /// The tree builder is also a `TokenSink`.
178    pub fn new_for_fragment(
179        sink: Sink,
180        context_elem: Handle,
181        form_elem: Option<Handle>,
182        opts: TreeBuilderOpts,
183    ) -> TreeBuilder<Handle, Sink> {
184        let doc_handle = sink.get_document();
185        let context_is_template =
186            sink.elem_name(&context_elem).expanded() == expanded_name!(html "template");
187        let template_modes = if context_is_template {
188            RefCell::new(vec![InsertionMode::InTemplate])
189        } else {
190            RefCell::new(vec![])
191        };
192
193        let tb = TreeBuilder {
194            opts,
195            sink,
196            mode: Cell::new(InsertionMode::Initial),
197            orig_mode: Cell::new(None),
198            template_modes,
199            pending_table_text: Default::default(),
200            quirks_mode: Cell::new(opts.quirks_mode),
201            doc_handle,
202            open_elems: Default::default(),
203            active_formatting: Default::default(),
204            head_elem: Default::default(),
205            form_elem: RefCell::new(form_elem),
206            frameset_ok: Cell::new(true),
207            ignore_lf: Default::default(),
208            foster_parenting: Default::default(),
209            context_elem: RefCell::new(Some(context_elem)),
210            current_line: Cell::new(1),
211        };
212
213        // https://html.spec.whatwg.org/multipage/#parsing-html-fragments
214        // 5. Let root be a new html element with no attributes.
215        // 6. Append the element root to the Document node created above.
216        // 7. Set up the parser's stack of open elements so that it contains just the single element root.
217        tb.create_root(vec![]);
218        // 10. Reset the parser's insertion mode appropriately.
219        let old_insertion_mode = tb.reset_insertion_mode();
220        tb.mode.set(old_insertion_mode);
221
222        tb
223    }
224
225    // https://html.spec.whatwg.org/multipage/#concept-frag-parse-context
226    // Step 4. Set the state of the HTML parser's tokenization stage as follows:
227    pub fn tokenizer_state_for_context_elem(
228        &self,
229        context_element_allows_scripting: bool,
230    ) -> tok_state::State {
231        let context_elem = self.context_elem.borrow();
232        let elem = context_elem.as_ref().expect("no context element");
233        let elem_name = self.sink.elem_name(elem);
234        let name = match elem_name.expanded() {
235            ExpandedName {
236                ns: &ns!(html),
237                local,
238            } => local,
239            _ => return tok_state::Data,
240        };
241        match *name {
242            local_name!("title") | local_name!("textarea") => tok_state::RawData(tok_state::Rcdata),
243
244            local_name!("style")
245            | local_name!("xmp")
246            | local_name!("iframe")
247            | local_name!("noembed")
248            | local_name!("noframes") => tok_state::RawData(tok_state::Rawtext),
249
250            local_name!("script") => tok_state::RawData(tok_state::ScriptData),
251
252            local_name!("noscript") => {
253                if context_element_allows_scripting {
254                    tok_state::RawData(tok_state::Rawtext)
255                } else {
256                    tok_state::Data
257                }
258            },
259
260            local_name!("plaintext") => tok_state::Plaintext,
261
262            _ => tok_state::Data,
263        }
264    }
265
266    /// Call the `Tracer`'s `trace_handle` method on every `Handle` in the tree builder's
267    /// internal state. This is intended to support garbage-collected DOMs.
268    pub fn trace_handles(&self, tracer: &dyn Tracer<Handle = Handle>) {
269        tracer.trace_handle(&self.doc_handle);
270        for e in &*self.open_elems.borrow() {
271            tracer.trace_handle(e);
272        }
273
274        for e in &*self.active_formatting.borrow() {
275            if let FormatEntry::Element(handle, _) = e {
276                tracer.trace_handle(handle);
277            }
278        }
279
280        if let Some(head_elem) = self.head_elem.borrow().as_ref() {
281            tracer.trace_handle(head_elem);
282        }
283
284        if let Some(form_elem) = self.form_elem.borrow().as_ref() {
285            tracer.trace_handle(form_elem);
286        }
287
288        if let Some(context_elem) = self.context_elem.borrow().as_ref() {
289            tracer.trace_handle(context_elem);
290        }
291    }
292
293    #[allow(dead_code)]
294    fn dump_state(&self, label: String) {
295        println!("dump_state on {label}");
296        print!("    open_elems:");
297        for node in self.open_elems.borrow().iter() {
298            let name = self.sink.elem_name(node);
299            match *name.ns() {
300                ns!(html) => print!(" {}", name.local_name()),
301                _ => panic!(),
302            }
303        }
304        println!();
305        print!("    active_formatting:");
306        for entry in self.active_formatting.borrow().iter() {
307            match entry {
308                &FormatEntry::Marker => print!(" Marker"),
309                FormatEntry::Element(h, _) => {
310                    let name = self.sink.elem_name(h);
311                    match *name.ns() {
312                        ns!(html) => print!(" {}", name.local_name()),
313                        _ => panic!(),
314                    }
315                },
316            }
317        }
318        println!();
319    }
320
321    fn debug_step(&self, mode: InsertionMode, token: &Token) {
322        if log_enabled!(Level::Debug) {
323            debug!(
324                "processing {} in insertion mode {:?}",
325                to_escaped_string(token),
326                mode
327            );
328        }
329    }
330
331    fn process_to_completion(&self, mut token: Token) -> TokenSinkResult<Handle> {
332        // Queue of additional tokens yet to be processed.
333        // This stays empty in the common case where we don't split whitespace.
334        let mut more_tokens = VecDeque::new();
335
336        loop {
337            let should_have_acknowledged_self_closing_flag = matches!(
338                token,
339                Token::Tag(Tag {
340                    self_closing: true,
341                    kind: StartTag,
342                    ..
343                })
344            );
345            let result = if self.is_foreign(&token) {
346                self.step_foreign(token)
347            } else {
348                let mode = self.mode.get();
349                self.step(mode, token)
350            };
351            match result {
352                ProcessResult::Done => {
353                    if should_have_acknowledged_self_closing_flag {
354                        self.sink
355                            .parse_error(Borrowed("Unacknowledged self-closing tag"));
356                    }
357                    let Some(new_token) = more_tokens.pop_front() else {
358                        return tokenizer::TokenSinkResult::Continue;
359                    };
360                    token = new_token;
361                },
362                ProcessResult::DoneAckSelfClosing => {
363                    let Some(new_token) = more_tokens.pop_front() else {
364                        return tokenizer::TokenSinkResult::Continue;
365                    };
366                    token = new_token;
367                },
368                ProcessResult::Reprocess(m, t) => {
369                    self.mode.set(m);
370                    token = t;
371                },
372                ProcessResult::ReprocessForeign(t) => {
373                    token = t;
374                },
375                ProcessResult::SplitWhitespace(mut buf) => {
376                    let p = buf.pop_front_char_run(|c| c.is_ascii_whitespace());
377                    let Some((first, is_ws)) = p else {
378                        return tokenizer::TokenSinkResult::Continue;
379                    };
380                    let status = if is_ws {
381                        SplitStatus::Whitespace
382                    } else {
383                        SplitStatus::NotWhitespace
384                    };
385                    token = Token::Characters(status, first);
386
387                    if buf.len32() > 0 {
388                        more_tokens.push_back(Token::Characters(SplitStatus::NotSplit, buf));
389                    }
390                },
391                ProcessResult::Script(node) => {
392                    assert!(more_tokens.is_empty());
393                    return tokenizer::TokenSinkResult::Script(node);
394                },
395                ProcessResult::ToPlaintext => {
396                    assert!(more_tokens.is_empty());
397                    return tokenizer::TokenSinkResult::Plaintext;
398                },
399                ProcessResult::ToRawData(k) => {
400                    assert!(more_tokens.is_empty());
401                    return tokenizer::TokenSinkResult::RawData(k);
402                },
403                ProcessResult::EncodingIndicator(encoding) => {
404                    return tokenizer::TokenSinkResult::EncodingIndicator(encoding)
405                },
406            }
407        }
408    }
409
410    /// Are we parsing a HTML fragment?
411    pub fn is_fragment(&self) -> bool {
412        self.context_elem.borrow().is_some()
413    }
414
415    /// https://html.spec.whatwg.org/multipage/#appropriate-place-for-inserting-a-node
416    fn appropriate_place_for_insertion(
417        &self,
418        override_target: Option<Handle>,
419    ) -> InsertionPoint<Handle> {
420        use self::tag_sets::*;
421
422        declare_tag_set!(foster_target = "table" "tbody" "tfoot" "thead" "tr");
423        let target = override_target.unwrap_or_else(|| self.current_node().clone());
424        if !(self.foster_parenting.get() && self.elem_in(&target, foster_target)) {
425            if self.html_elem_named(&target, local_name!("template")) {
426                // No foster parenting (inside template).
427                let contents = self.sink.get_template_contents(&target);
428                return InsertionPoint::LastChild(contents);
429            } else {
430                // No foster parenting (the common case).
431                return InsertionPoint::LastChild(target);
432            }
433        }
434
435        // Foster parenting
436        let open_elems = self.open_elems.borrow();
437        let mut iter = open_elems.iter().rev().peekable();
438        while let Some(elem) = iter.next() {
439            if self.html_elem_named(elem, local_name!("template")) {
440                let contents = self.sink.get_template_contents(elem);
441                return InsertionPoint::LastChild(contents);
442            } else if self.html_elem_named(elem, local_name!("table")) {
443                return InsertionPoint::TableFosterParenting {
444                    element: elem.clone(),
445                    prev_element: (*iter.peek().unwrap()).clone(),
446                };
447            }
448        }
449        let html_elem = self.html_elem();
450        InsertionPoint::LastChild(html_elem.clone())
451    }
452
453    fn insert_at(&self, insertion_point: InsertionPoint<Handle>, child: NodeOrText<Handle>) {
454        match insertion_point {
455            InsertionPoint::LastChild(parent) => self.sink.append(&parent, child),
456            InsertionPoint::BeforeSibling(sibling) => {
457                self.sink.append_before_sibling(&sibling, child)
458            },
459            InsertionPoint::TableFosterParenting {
460                element,
461                prev_element,
462            } => self
463                .sink
464                .append_based_on_parent_node(&element, &prev_element, child),
465        }
466    }
467}
468
469impl<Handle, Sink> TokenSink for TreeBuilder<Handle, Sink>
470where
471    Handle: Clone,
472    Sink: TreeSink<Handle = Handle>,
473{
474    type Handle = Handle;
475
476    fn process_token(&self, token: tokenizer::Token, line_number: u64) -> TokenSinkResult<Handle> {
477        if line_number != self.current_line.get() {
478            self.sink.set_current_line(line_number);
479        }
480        let ignore_lf = self.ignore_lf.take();
481
482        // Handle `ParseError` and `DoctypeToken`; convert everything else to the local `Token` type.
483        let token = match token {
484            tokenizer::ParseError(e) => {
485                self.sink.parse_error(e);
486                return tokenizer::TokenSinkResult::Continue;
487            },
488
489            tokenizer::DoctypeToken(dt) => {
490                if self.mode.get() == InsertionMode::Initial {
491                    let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc);
492                    if err {
493                        self.sink.parse_error(if self.opts.exact_errors {
494                            Cow::from(format!("Bad DOCTYPE: {dt:?}"))
495                        } else {
496                            Cow::from("Bad DOCTYPE")
497                        });
498                    }
499                    let Doctype {
500                        name,
501                        public_id,
502                        system_id,
503                        force_quirks: _,
504                    } = dt;
505                    if !self.opts.drop_doctype {
506                        self.sink.append_doctype_to_document(
507                            name.unwrap_or(StrTendril::new()),
508                            public_id.unwrap_or(StrTendril::new()),
509                            system_id.unwrap_or(StrTendril::new()),
510                        );
511                    }
512                    self.set_quirks_mode(quirk);
513
514                    self.mode.set(InsertionMode::BeforeHtml);
515                    return tokenizer::TokenSinkResult::Continue;
516                } else {
517                    self.sink.parse_error(if self.opts.exact_errors {
518                        Cow::from(format!("DOCTYPE in insertion mode {:?}", self.mode.get()))
519                    } else {
520                        Cow::from("DOCTYPE in body")
521                    });
522                    return tokenizer::TokenSinkResult::Continue;
523                }
524            },
525
526            tokenizer::TagToken(x) => Token::Tag(x),
527            tokenizer::CommentToken(x) => Token::Comment(x),
528            tokenizer::NullCharacterToken => Token::NullCharacter,
529            tokenizer::EOFToken => Token::Eof,
530
531            tokenizer::CharacterTokens(mut x) => {
532                if ignore_lf && x.starts_with("\n") {
533                    x.pop_front(1);
534                }
535                if x.is_empty() {
536                    return tokenizer::TokenSinkResult::Continue;
537                }
538                Token::Characters(SplitStatus::NotSplit, x)
539            },
540        };
541
542        self.process_to_completion(token)
543    }
544
545    fn end(&self) {
546        for elem in self.open_elems.borrow_mut().drain(..).rev() {
547            self.sink.pop(&elem);
548        }
549    }
550
551    fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool {
552        !self.open_elems.borrow().is_empty()
553            && *self.sink.elem_name(&self.adjusted_current_node()).ns() != ns!(html)
554    }
555}
556
557pub fn html_elem<Handle>(open_elems: &[Handle]) -> &Handle {
558    &open_elems[0]
559}
560
561struct ActiveFormattingView<'a, Handle: 'a> {
562    data: Ref<'a, Vec<FormatEntry<Handle>>>,
563}
564
565impl<'a, Handle: 'a> ActiveFormattingView<'a, Handle> {
566    fn iter(&'a self) -> impl Iterator<Item = (usize, &'a Handle, &'a Tag)> + 'a {
567        ActiveFormattingIter {
568            iter: self.data.iter().enumerate().rev(),
569        }
570    }
571}
572
573pub struct ActiveFormattingIter<'a, Handle: 'a> {
574    iter: Rev<Enumerate<slice::Iter<'a, FormatEntry<Handle>>>>,
575}
576
577impl<'a, Handle> Iterator for ActiveFormattingIter<'a, Handle> {
578    type Item = (usize, &'a Handle, &'a Tag);
579    fn next(&mut self) -> Option<(usize, &'a Handle, &'a Tag)> {
580        match self.iter.next() {
581            None | Some((_, &FormatEntry::Marker)) => None,
582            Some((i, FormatEntry::Element(h, t))) => Some((i, h, t)),
583        }
584    }
585}
586
587pub enum PushFlag {
588    Push,
589    NoPush,
590}
591
592enum Bookmark<Handle> {
593    Replace(Handle),
594    InsertAfter(Handle),
595}
596
597macro_rules! qualname {
598    ("", $local:tt) => {
599        QualName {
600            prefix: None,
601            ns: ns!(),
602            local: local_name!($local),
603        }
604    };
605    ($prefix: tt $ns:tt $local:tt) => {
606        QualName {
607            prefix: Some(namespace_prefix!($prefix)),
608            ns: ns!($ns),
609            local: local_name!($local),
610        }
611    };
612}
613
614#[doc(hidden)]
615impl<Handle, Sink> TreeBuilder<Handle, Sink>
616where
617    Handle: Clone,
618    Sink: TreeSink<Handle = Handle>,
619{
620    fn unexpected<T: fmt::Debug>(&self, _thing: &T) -> ProcessResult<Handle> {
621        self.sink.parse_error(if self.opts.exact_errors {
622            Cow::from(format!(
623                "Unexpected token {} in insertion mode {:?}",
624                to_escaped_string(_thing),
625                self.mode.get()
626            ))
627        } else {
628            Cow::from("Unexpected token")
629        });
630        ProcessResult::Done
631    }
632
633    fn assert_named(&self, node: &Handle, name: LocalName) {
634        assert!(self.html_elem_named(node, name));
635    }
636
637    /// Iterate over the active formatting elements (with index in the list) from the end
638    /// to the last marker, or the beginning if there are no markers.
639    fn active_formatting_end_to_marker(&self) -> ActiveFormattingView<'_, Handle> {
640        ActiveFormattingView {
641            data: self.active_formatting.borrow(),
642        }
643    }
644
645    fn position_in_active_formatting(&self, element: &Handle) -> Option<usize> {
646        self.active_formatting
647            .borrow()
648            .iter()
649            .position(|n| match n {
650                FormatEntry::Marker => false,
651                FormatEntry::Element(ref handle, _) => self.sink.same_node(handle, element),
652            })
653    }
654
655    fn set_quirks_mode(&self, mode: QuirksMode) {
656        self.quirks_mode.set(mode);
657        self.sink.set_quirks_mode(mode);
658    }
659
660    fn stop_parsing(&self) -> ProcessResult<Handle> {
661        ProcessResult::Done
662    }
663
664    //§ parsing-elements-that-contain-only-text
665    // Switch to `Text` insertion mode, save the old mode, and
666    // switch the tokenizer to a raw-data state.
667    // The latter only takes effect after the current / next
668    // `process_token` of a start tag returns!
669    fn to_raw_text_mode(&self, k: RawKind) -> ProcessResult<Handle> {
670        self.orig_mode.set(Some(self.mode.get()));
671        self.mode.set(InsertionMode::Text);
672        ProcessResult::ToRawData(k)
673    }
674
675    // The generic raw text / RCDATA parsing algorithm.
676    fn parse_raw_data(&self, tag: Tag, k: RawKind) -> ProcessResult<Handle> {
677        self.insert_element_for(tag);
678        self.to_raw_text_mode(k)
679    }
680    //§ END
681
682    fn current_node(&self) -> Ref<'_, Handle> {
683        Ref::map(self.open_elems.borrow(), |elems| {
684            elems.last().expect("no current element")
685        })
686    }
687
688    fn adjusted_current_node(&self) -> Ref<'_, Handle> {
689        if self.open_elems.borrow().len() == 1 {
690            let context_elem = self.context_elem.borrow();
691            let ctx = Ref::filter_map(context_elem, |e| e.as_ref());
692            if let Ok(ctx) = ctx {
693                return ctx;
694            }
695        }
696        self.current_node()
697    }
698
699    fn current_node_in<TagSet>(&self, set: TagSet) -> bool
700    where
701        TagSet: Fn(ExpandedName) -> bool,
702    {
703        set(self.sink.elem_name(&self.current_node()).expanded())
704    }
705
706    // Insert at the "appropriate place for inserting a node".
707    fn insert_appropriately(&self, child: NodeOrText<Handle>, override_target: Option<Handle>) {
708        let insertion_point = self.appropriate_place_for_insertion(override_target);
709        self.insert_at(insertion_point, child);
710    }
711
712    fn adoption_agency(&self, subject: LocalName) {
713        // 1.
714        if self.current_node_named(subject.clone())
715            && self
716                .position_in_active_formatting(&self.current_node())
717                .is_none()
718        {
719            self.pop();
720            return;
721        }
722
723        // 2. 3. 4.
724        for _ in 0..8 {
725            // 5.
726            // We clone the Handle and Tag so they don't cause an immutable borrow of self.
727            let maybe_fmt_entry = self
728                .active_formatting_end_to_marker()
729                .iter()
730                .find(|&(_, _, tag)| tag.name == subject)
731                .map(|(i, h, t)| (i, h.clone(), t.clone()));
732
733            let Some((fmt_elem_index, fmt_elem, fmt_elem_tag)) = maybe_fmt_entry else {
734                return self.process_end_tag_in_body(Tag {
735                    kind: EndTag,
736                    name: subject,
737                    self_closing: false,
738                    attrs: vec![],
739                });
740            };
741
742            let Some(fmt_elem_stack_index) = self
743                .open_elems
744                .borrow()
745                .iter()
746                .rposition(|n| self.sink.same_node(n, &fmt_elem))
747            else {
748                self.sink
749                    .parse_error(Borrowed("Formatting element not open"));
750                self.active_formatting.borrow_mut().remove(fmt_elem_index);
751                return;
752            };
753
754            // 7.
755            if !self.in_scope(default_scope, |n| self.sink.same_node(&n, &fmt_elem)) {
756                self.sink
757                    .parse_error(Borrowed("Formatting element not in scope"));
758                return;
759            }
760
761            // 8.
762            if !self.sink.same_node(&self.current_node(), &fmt_elem) {
763                self.sink
764                    .parse_error(Borrowed("Formatting element not current node"));
765            }
766
767            // 9.
768            let maybe_furthest_block = self
769                .open_elems
770                .borrow()
771                .iter()
772                .enumerate()
773                .skip(fmt_elem_stack_index)
774                .find(|&(_, open_element)| self.elem_in(open_element, special_tag))
775                .map(|(i, h)| (i, h.clone()));
776
777            let Some((furthest_block_index, furthest_block)) = maybe_furthest_block else {
778                // 10.
779                self.open_elems.borrow_mut().truncate(fmt_elem_stack_index);
780                self.active_formatting.borrow_mut().remove(fmt_elem_index);
781                return;
782            };
783
784            // 11.
785            let common_ancestor = self.open_elems.borrow()[fmt_elem_stack_index - 1].clone();
786
787            // 12.
788            let mut bookmark = Bookmark::Replace(fmt_elem.clone());
789
790            // 13.
791            let mut node;
792            let mut node_index = furthest_block_index;
793            let mut last_node = furthest_block.clone();
794
795            // 13.1.
796            let mut inner_counter = 0;
797            loop {
798                // 13.2.
799                inner_counter += 1;
800
801                // 13.3.
802                node_index -= 1;
803                node = self.open_elems.borrow()[node_index].clone();
804
805                // 13.4.
806                if self.sink.same_node(&node, &fmt_elem) {
807                    break;
808                }
809
810                // 13.5.
811                if inner_counter > 3 {
812                    self.position_in_active_formatting(&node)
813                        .map(|position| self.active_formatting.borrow_mut().remove(position));
814                    self.open_elems.borrow_mut().remove(node_index);
815                    continue;
816                }
817
818                let Some(node_formatting_index) = self.position_in_active_formatting(&node) else {
819                    // 13.6.
820                    self.open_elems.borrow_mut().remove(node_index);
821                    continue;
822                };
823
824                // 13.7.
825                let tag = match self.active_formatting.borrow()[node_formatting_index] {
826                    FormatEntry::Element(ref h, ref t) => {
827                        assert!(self.sink.same_node(h, &node));
828                        t.clone()
829                    },
830                    FormatEntry::Marker => panic!("Found marker during adoption agency"),
831                };
832                // FIXME: Is there a way to avoid cloning the attributes twice here (once on their
833                // own, once as part of t.clone() above)?
834                let new_element = create_element(
835                    &self.sink,
836                    QualName::new(None, ns!(html), tag.name.clone()),
837                    tag.attrs.clone(),
838                );
839                self.open_elems.borrow_mut()[node_index] = new_element.clone();
840                self.active_formatting.borrow_mut()[node_formatting_index] =
841                    FormatEntry::Element(new_element.clone(), tag);
842                node = new_element;
843
844                // 13.8.
845                if self.sink.same_node(&last_node, &furthest_block) {
846                    bookmark = Bookmark::InsertAfter(node.clone());
847                }
848
849                // 13.9.
850                self.sink.remove_from_parent(&last_node);
851                self.sink.append(&node, AppendNode(last_node.clone()));
852
853                // 13.10.
854                last_node = node.clone();
855
856                // 13.11.
857            }
858
859            // 14.
860            self.sink.remove_from_parent(&last_node);
861            self.insert_appropriately(AppendNode(last_node.clone()), Some(common_ancestor));
862
863            // 15.
864            // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own,
865            // once as part of t.clone() above)?
866            let new_element = create_element(
867                &self.sink,
868                QualName::new(None, ns!(html), fmt_elem_tag.name.clone()),
869                fmt_elem_tag.attrs.clone(),
870            );
871            let new_entry = FormatEntry::Element(new_element.clone(), fmt_elem_tag);
872
873            // 16.
874            self.sink.reparent_children(&furthest_block, &new_element);
875
876            // 17.
877            self.sink
878                .append(&furthest_block, AppendNode(new_element.clone()));
879
880            // 18.
881            // FIXME: We could probably get rid of the position_in_active_formatting() calls here
882            // if we had a more clever Bookmark representation.
883            match bookmark {
884                Bookmark::Replace(to_replace) => {
885                    let index = self
886                        .position_in_active_formatting(&to_replace)
887                        .expect("bookmark not found in active formatting elements");
888                    self.active_formatting.borrow_mut()[index] = new_entry;
889                },
890                Bookmark::InsertAfter(previous) => {
891                    let index = self
892                        .position_in_active_formatting(&previous)
893                        .expect("bookmark not found in active formatting elements")
894                        + 1;
895                    self.active_formatting.borrow_mut().insert(index, new_entry);
896                    let old_index = self
897                        .position_in_active_formatting(&fmt_elem)
898                        .expect("formatting element not found in active formatting elements");
899                    self.active_formatting.borrow_mut().remove(old_index);
900                },
901            }
902
903            // 19.
904            self.remove_from_stack(&fmt_elem);
905            let new_furthest_block_index = self
906                .open_elems
907                .borrow()
908                .iter()
909                .position(|n| self.sink.same_node(n, &furthest_block))
910                .expect("furthest block missing from open element stack");
911            self.open_elems
912                .borrow_mut()
913                .insert(new_furthest_block_index + 1, new_element);
914
915            // 20.
916        }
917    }
918
919    fn push(&self, elem: &Handle) {
920        self.open_elems.borrow_mut().push(elem.clone());
921    }
922
923    fn pop(&self) -> Handle {
924        let elem = self
925            .open_elems
926            .borrow_mut()
927            .pop()
928            .expect("no current element");
929
930        self.sink.pop(&elem);
931        elem
932    }
933
934    fn remove_from_stack(&self, elem: &Handle) {
935        let position = self
936            .open_elems
937            .borrow()
938            .iter()
939            .rposition(|x| self.sink.same_node(elem, x));
940        if let Some(position) = position {
941            self.open_elems.borrow_mut().remove(position);
942            self.sink.pop(elem);
943        }
944    }
945
946    fn is_marker_or_open(&self, entry: &FormatEntry<Handle>) -> bool {
947        match *entry {
948            FormatEntry::Marker => true,
949            FormatEntry::Element(ref node, _) => self
950                .open_elems
951                .borrow()
952                .iter()
953                .rev()
954                .any(|n| self.sink.same_node(n, node)),
955        }
956    }
957
958    /// <https://html.spec.whatwg.org/#reconstruct-the-active-formatting-elements>
959    fn reconstruct_active_formatting_elements(&self) {
960        {
961            let active_formatting = self.active_formatting.borrow();
962
963            // Step 1. If there are no entries in the list of active formatting elements,
964            // then there is nothing to reconstruct; stop this algorithm.
965            let Some(last) = active_formatting.last() else {
966                return;
967            };
968
969            // Step 2. If the last (most recently added) entry in the list of active formatting elements is a marker,
970            // or if it is an element that is in the stack of open elements, then there is nothing to reconstruct;
971            // stop this algorithm.
972            if self.is_marker_or_open(last) {
973                return;
974            }
975        }
976
977        // Step 3. Let entry be the last (most recently added) element in the list of active formatting elements.
978        // NOTE: We track the index of the element instead
979        let mut entry_index = self.active_formatting.borrow().len() - 1;
980        loop {
981            // Step 4. Rewind: If there are no entries before entry in the list of active formatting elements,
982            // then jump to the step labeled create.
983            if entry_index == 0 {
984                break;
985            }
986
987            // Step 5. Let entry be the entry one earlier than entry in the list of active formatting elements.
988            entry_index -= 1;
989
990            // Step 6. If entry is neither a marker nor an element that is also in the stack of open elements,
991            // go to the step labeled rewind.
992            // Step 7. Advance: Let entry be the element one later than entry in the list
993            // of active formatting elements.
994            if self.is_marker_or_open(&self.active_formatting.borrow()[entry_index]) {
995                entry_index += 1;
996                break;
997            }
998        }
999
1000        loop {
1001            // Step 8. Create: Insert an HTML element for the token for which the element entry was created,
1002            // to obtain new element.
1003            let tag = match self.active_formatting.borrow()[entry_index] {
1004                FormatEntry::Element(_, ref t) => t.clone(),
1005                FormatEntry::Marker => {
1006                    panic!("Found marker during formatting element reconstruction")
1007                },
1008            };
1009
1010            // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own,
1011            // once as part of t.clone() above)?
1012            let new_element = self.insert_element(
1013                PushFlag::Push,
1014                ns!(html),
1015                tag.name.clone(),
1016                tag.attrs.clone(),
1017            );
1018
1019            // Step 9. Replace the entry for entry in the list with an entry for new element.
1020            self.active_formatting.borrow_mut()[entry_index] =
1021                FormatEntry::Element(new_element, tag);
1022
1023            // Step 10. If the entry for new element in the list of active formatting elements is
1024            // not the last entry in the list, return to the step labeled advance.
1025            if entry_index == self.active_formatting.borrow().len() - 1 {
1026                break;
1027            }
1028            entry_index += 1;
1029        }
1030    }
1031
1032    /// Get the first element on the stack, which will be the <html> element.
1033    fn html_elem(&self) -> Ref<'_, Handle> {
1034        Ref::map(self.open_elems.borrow(), |elems| &elems[0])
1035    }
1036
1037    /// Get the second element on the stack, if it's a HTML body element.
1038    fn body_elem(&self) -> Option<Ref<'_, Handle>> {
1039        if self.open_elems.borrow().len() <= 1 {
1040            return None;
1041        }
1042
1043        let node = Ref::map(self.open_elems.borrow(), |elems| &elems[1]);
1044        if self.html_elem_named(&node, local_name!("body")) {
1045            Some(node)
1046        } else {
1047            None
1048        }
1049    }
1050
1051    /// Signal an error depending on the state of the stack of open elements at
1052    /// the end of the body.
1053    fn check_body_end(&self) {
1054        declare_tag_set!(body_end_ok =
1055            "dd" "dt" "li" "optgroup" "option" "p" "rp" "rt" "tbody" "td" "tfoot" "th"
1056            "thead" "tr" "body" "html");
1057
1058        for elem in self.open_elems.borrow().iter() {
1059            let error = {
1060                let elem_name = self.sink.elem_name(elem);
1061                let name = elem_name.expanded();
1062                if body_end_ok(name) {
1063                    continue;
1064                }
1065
1066                if self.opts.exact_errors {
1067                    Cow::from(format!("Unexpected open tag {name:?} at end of body"))
1068                } else {
1069                    Cow::from("Unexpected open tag at end of body")
1070                }
1071            };
1072            self.sink.parse_error(error);
1073            // FIXME: Do we keep checking after finding one bad tag?
1074            // The spec suggests not.
1075            return;
1076        }
1077    }
1078
1079    fn in_scope<TagSet, Pred>(&self, scope: TagSet, pred: Pred) -> bool
1080    where
1081        TagSet: Fn(ExpandedName) -> bool,
1082        Pred: Fn(Handle) -> bool,
1083    {
1084        for node in self.open_elems.borrow().iter().rev() {
1085            if pred(node.clone()) {
1086                return true;
1087            }
1088            if scope(self.sink.elem_name(node).expanded()) {
1089                return false;
1090            }
1091        }
1092
1093        // supposed to be impossible, because <html> is always in scope
1094
1095        false
1096    }
1097
1098    fn elem_in<TagSet>(&self, elem: &Handle, set: TagSet) -> bool
1099    where
1100        TagSet: Fn(ExpandedName) -> bool,
1101    {
1102        set(self.sink.elem_name(elem).expanded())
1103    }
1104
1105    fn html_elem_named(&self, elem: &Handle, name: LocalName) -> bool {
1106        let elem_name = self.sink.elem_name(elem);
1107        *elem_name.ns() == ns!(html) && *elem_name.local_name() == name
1108    }
1109
1110    fn in_html_elem_named(&self, name: LocalName) -> bool {
1111        self.open_elems
1112            .borrow()
1113            .iter()
1114            .any(|elem| self.html_elem_named(elem, name.clone()))
1115    }
1116
1117    fn current_node_named(&self, name: LocalName) -> bool {
1118        self.html_elem_named(&self.current_node(), name)
1119    }
1120
1121    fn in_scope_named<TagSet>(&self, scope: TagSet, name: LocalName) -> bool
1122    where
1123        TagSet: Fn(ExpandedName) -> bool,
1124    {
1125        self.in_scope(scope, |elem| self.html_elem_named(&elem, name.clone()))
1126    }
1127
1128    /// <https://html.spec.whatwg.org/#generate-implied-end-tags>
1129    fn generate_implied_end_tags<TagSet>(&self, set: TagSet)
1130    where
1131        TagSet: Fn(ExpandedName) -> bool,
1132    {
1133        loop {
1134            {
1135                let open_elems = self.open_elems.borrow();
1136                let Some(elem) = open_elems.last() else {
1137                    return;
1138                };
1139                let elem_name = self.sink.elem_name(elem);
1140                if !set(elem_name.expanded()) {
1141                    return;
1142                }
1143            }
1144            self.pop();
1145        }
1146    }
1147
1148    fn generate_implied_end_except(&self, except: LocalName) {
1149        self.generate_implied_end_tags(|p| {
1150            if *p.ns == ns!(html) && *p.local == except {
1151                false
1152            } else {
1153                cursory_implied_end(p)
1154            }
1155        });
1156    }
1157    //§ END
1158
1159    // Pop elements until the current element is in the set.
1160    fn pop_until_current<TagSet>(&self, tag_set: TagSet)
1161    where
1162        TagSet: Fn(ExpandedName) -> bool,
1163    {
1164        while !self.current_node_in(&tag_set) {
1165            self.open_elems.borrow_mut().pop();
1166        }
1167    }
1168
1169    // Pop elements until an element from the set has been popped.  Returns the
1170    // number of elements popped.
1171    fn pop_until<P>(&self, pred: P) -> usize
1172    where
1173        P: Fn(ExpandedName) -> bool,
1174    {
1175        let mut n = 0;
1176        loop {
1177            n += 1;
1178            match self.open_elems.borrow_mut().pop() {
1179                None => break,
1180                Some(elem) => {
1181                    if pred(self.sink.elem_name(&elem).expanded()) {
1182                        break;
1183                    }
1184                },
1185            }
1186        }
1187        n
1188    }
1189
1190    /// Pop element until an element with the given name has been popped.
1191    fn pop_until_named(&self, name: LocalName) -> usize {
1192        self.pop_until(|p| *p.ns == ns!(html) && *p.local == name)
1193    }
1194
1195    /// Pop elements until one with the specified name has been popped.
1196    /// Signal an error if it was not the first one.
1197    fn expect_to_close(&self, name: LocalName) {
1198        if self.pop_until_named(name.clone()) != 1 {
1199            self.sink.parse_error(if self.opts.exact_errors {
1200                Cow::from(format!("Unexpected open element while closing {name:?}"))
1201            } else {
1202                Cow::from("Unexpected open element")
1203            });
1204        }
1205    }
1206
1207    fn close_p_element(&self) {
1208        declare_tag_set!(implied = [cursory_implied_end] - "p");
1209        self.generate_implied_end_tags(implied);
1210        self.expect_to_close(local_name!("p"));
1211    }
1212
1213    fn close_p_element_in_button_scope(&self) {
1214        if self.in_scope_named(button_scope, local_name!("p")) {
1215            self.close_p_element();
1216        }
1217    }
1218
1219    // Check <input> tags for type=hidden
1220    fn is_type_hidden(&self, tag: &Tag) -> bool {
1221        match tag
1222            .attrs
1223            .iter()
1224            .find(|&at| at.name.expanded() == expanded_name!("", "type"))
1225        {
1226            None => false,
1227            Some(at) => at.value.eq_ignore_ascii_case("hidden"),
1228        }
1229    }
1230
1231    fn foster_parent_in_body(&self, token: Token) -> ProcessResult<Handle> {
1232        warn!("foster parenting not implemented");
1233        self.foster_parenting.set(true);
1234        let res = self.step(InsertionMode::InBody, token);
1235        // FIXME: what if res is Reprocess?
1236        self.foster_parenting.set(false);
1237        res
1238    }
1239
1240    fn process_chars_in_table(&self, token: Token) -> ProcessResult<Handle> {
1241        declare_tag_set!(table_outer = "table" "tbody" "tfoot" "thead" "tr");
1242        if self.current_node_in(table_outer) {
1243            assert!(self.pending_table_text.borrow().is_empty());
1244            self.orig_mode.set(Some(self.mode.get()));
1245            ProcessResult::Reprocess(InsertionMode::InTableText, token)
1246        } else {
1247            self.sink.parse_error(if self.opts.exact_errors {
1248                Cow::from(format!(
1249                    "Unexpected characters {} in table",
1250                    to_escaped_string(&token)
1251                ))
1252            } else {
1253                Cow::from("Unexpected characters in table")
1254            });
1255            self.foster_parent_in_body(token)
1256        }
1257    }
1258
1259    // https://html.spec.whatwg.org/multipage/#reset-the-insertion-mode-appropriately
1260    fn reset_insertion_mode(&self) -> InsertionMode {
1261        let open_elems = self.open_elems.borrow();
1262        for (i, mut node) in open_elems.iter().enumerate().rev() {
1263            let last = i == 0usize;
1264            let context_elem = self.context_elem.borrow();
1265            if let (true, Some(ctx)) = (last, context_elem.as_ref()) {
1266                node = ctx;
1267            }
1268            let elem_name = self.sink.elem_name(node);
1269            let name = match elem_name.expanded() {
1270                ExpandedName {
1271                    ns: &ns!(html),
1272                    local,
1273                } => local,
1274                _ => continue,
1275            };
1276            match *name {
1277                local_name!("td") | local_name!("th") => {
1278                    if !last {
1279                        return InsertionMode::InCell;
1280                    }
1281                },
1282                local_name!("tr") => return InsertionMode::InRow,
1283                local_name!("tbody") | local_name!("thead") | local_name!("tfoot") => {
1284                    return InsertionMode::InTableBody;
1285                },
1286                local_name!("caption") => return InsertionMode::InCaption,
1287                local_name!("colgroup") => return InsertionMode::InColumnGroup,
1288                local_name!("table") => return InsertionMode::InTable,
1289                local_name!("template") => return *self.template_modes.borrow().last().unwrap(),
1290                local_name!("head") => {
1291                    if !last {
1292                        return InsertionMode::InHead;
1293                    }
1294                },
1295                local_name!("body") => return InsertionMode::InBody,
1296                local_name!("frameset") => return InsertionMode::InFrameset,
1297                local_name!("html") => match *self.head_elem.borrow() {
1298                    None => return InsertionMode::BeforeHead,
1299                    Some(_) => return InsertionMode::AfterHead,
1300                },
1301
1302                _ => (),
1303            }
1304        }
1305        InsertionMode::InBody
1306    }
1307
1308    fn close_the_cell(&self) {
1309        self.generate_implied_end_tags(cursory_implied_end);
1310        if self.pop_until(td_th) != 1 {
1311            self.sink
1312                .parse_error(Borrowed("expected to close <td> or <th> with cell"));
1313        }
1314        self.clear_active_formatting_to_marker();
1315    }
1316
1317    fn append_text(&self, text: StrTendril) -> ProcessResult<Handle> {
1318        self.insert_appropriately(AppendText(text), None);
1319        ProcessResult::Done
1320    }
1321
1322    fn append_comment(&self, text: StrTendril) -> ProcessResult<Handle> {
1323        let comment = self.sink.create_comment(text);
1324        self.insert_appropriately(AppendNode(comment), None);
1325        ProcessResult::Done
1326    }
1327
1328    fn append_comment_to_doc(&self, text: StrTendril) -> ProcessResult<Handle> {
1329        let comment = self.sink.create_comment(text);
1330        self.sink.append(&self.doc_handle, AppendNode(comment));
1331        ProcessResult::Done
1332    }
1333
1334    fn append_comment_to_html(&self, text: StrTendril) -> ProcessResult<Handle> {
1335        let open_elems = self.open_elems.borrow();
1336        let target = html_elem(&open_elems);
1337        let comment = self.sink.create_comment(text);
1338        self.sink.append(target, AppendNode(comment));
1339        ProcessResult::Done
1340    }
1341
1342    //§ creating-and-inserting-nodes
1343    fn create_root(&self, attrs: Vec<Attribute>) {
1344        let elem = create_element(
1345            &self.sink,
1346            QualName::new(None, ns!(html), local_name!("html")),
1347            attrs,
1348        );
1349        self.push(&elem);
1350        self.sink.append(&self.doc_handle, AppendNode(elem));
1351        // FIXME: application cache selection algorithm
1352    }
1353
1354    /// <https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token>
1355    fn insert_element(
1356        &self,
1357        push: PushFlag,
1358        ns: Namespace,
1359        name: LocalName,
1360        attrs: Vec<Attribute>,
1361    ) -> Handle {
1362        declare_tag_set!(form_associatable =
1363            "button" "fieldset" "input" "object"
1364            "output" "select" "textarea" "img");
1365
1366        declare_tag_set!(listed = [form_associatable] - "img");
1367
1368        // Step 7.
1369        let qname = QualName::new(None, ns, name);
1370        let elem = create_element(&self.sink, qname.clone(), attrs.clone());
1371
1372        let insertion_point = self.appropriate_place_for_insertion(None);
1373        let (node1, node2) = match insertion_point {
1374            InsertionPoint::LastChild(ref p) | InsertionPoint::BeforeSibling(ref p) => {
1375                (p.clone(), None)
1376            },
1377            InsertionPoint::TableFosterParenting {
1378                ref element,
1379                ref prev_element,
1380            } => (element.clone(), Some(prev_element.clone())),
1381        };
1382
1383        // Step 12.
1384        if form_associatable(qname.expanded())
1385            && self.form_elem.borrow().is_some()
1386            && !self.in_html_elem_named(local_name!("template"))
1387            && !(listed(qname.expanded())
1388                && attrs
1389                    .iter()
1390                    .any(|a| a.name.expanded() == expanded_name!("", "form")))
1391        {
1392            let form = self.form_elem.borrow().as_ref().unwrap().clone();
1393            self.sink
1394                .associate_with_form(&elem, &form, (&node1, node2.as_ref()));
1395        }
1396
1397        self.insert_at(insertion_point, AppendNode(elem.clone()));
1398
1399        match push {
1400            PushFlag::Push => self.push(&elem),
1401            PushFlag::NoPush => (),
1402        }
1403        // FIXME: Remove from the stack if we can't append?
1404        elem
1405    }
1406
1407    fn insert_element_for(&self, tag: Tag) -> Handle {
1408        self.insert_element(PushFlag::Push, ns!(html), tag.name, tag.attrs)
1409    }
1410
1411    fn insert_and_pop_element_for(&self, tag: Tag) -> Handle {
1412        self.insert_element(PushFlag::NoPush, ns!(html), tag.name, tag.attrs)
1413    }
1414
1415    fn insert_phantom(&self, name: LocalName) -> Handle {
1416        self.insert_element(PushFlag::Push, ns!(html), name, vec![])
1417    }
1418
1419    /// <https://html.spec.whatwg.org/multipage/parsing.html#insert-an-element-at-the-adjusted-insertion-location>
1420    fn insert_foreign_element(
1421        &self,
1422        tag: Tag,
1423        ns: Namespace,
1424        only_add_to_element_stack: bool,
1425    ) -> Handle {
1426        let adjusted_insertion_location = self.appropriate_place_for_insertion(None);
1427        let qname = QualName::new(None, ns, tag.name);
1428        let elem = create_element(&self.sink, qname.clone(), tag.attrs.clone());
1429
1430        if !only_add_to_element_stack {
1431            self.insert_at(adjusted_insertion_location, AppendNode(elem.clone()));
1432        }
1433
1434        self.push(&elem);
1435
1436        elem
1437    }
1438    //§ END
1439
1440    /// <https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead>
1441    ///
1442    /// A start tag whose tag name is "template"
1443    fn should_attach_declarative_shadow(&self, tag: &Tag) -> bool {
1444        let adjusted_insertion_location = self.appropriate_place_for_insertion(None);
1445
1446        let (intended_parent, _node2) = match adjusted_insertion_location {
1447            InsertionPoint::LastChild(ref p) | InsertionPoint::BeforeSibling(ref p) => {
1448                (p.clone(), None)
1449            },
1450            InsertionPoint::TableFosterParenting {
1451                ref element,
1452                ref prev_element,
1453            } => (element.clone(), Some(prev_element.clone())),
1454        };
1455
1456        // template start tag's shadowrootmode is not in the none state
1457        let is_shadow_root_mode = tag.attrs.iter().any(|attr| {
1458            attr.name.local == local_name!("shadowrootmode")
1459                && (attr.value.as_ref() == "open" || attr.value.as_ref() == "closed")
1460        });
1461
1462        // Check if intended_parent's document allows declarative shadow roots
1463        let allow_declarative_shadow_roots =
1464            self.sink.allow_declarative_shadow_roots(&intended_parent);
1465
1466        // the adjusted current node is not the topmost element in the stack of open elements
1467        let adjusted_current_node_not_topmost = match self.open_elems.borrow().first() {
1468            // The stack grows downwards; the topmost node on the stack is the first one added to the stack
1469            // The current node is the bottommost node in this stack of open elements.
1470            //
1471            // (1) The adjusted current node is the context element if the parser was created as part of the HTML fragment parsing algorithm
1472            // and the stack of open elements has only one element in it (fragment case);
1473            // (2) otherwise, the adjusted current node is the current node (the bottomost node)
1474            //
1475            // => adjusted current node != topmost element in the stack when the stack size > 1
1476            Some(_) => self.open_elems.borrow().len() > 1,
1477            None => true,
1478        };
1479
1480        is_shadow_root_mode && allow_declarative_shadow_roots && adjusted_current_node_not_topmost
1481    }
1482
1483    /// <https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead>
1484    ///
1485    /// A start tag whose tag name is "template"
1486    fn attach_declarative_shadow(
1487        &self,
1488        tag: &Tag,
1489        shadow_host: &Handle,
1490        template: &Handle,
1491    ) -> bool {
1492        self.sink
1493            .attach_declarative_shadow(shadow_host, template, &tag.attrs)
1494    }
1495
1496    fn create_formatting_element_for(&self, tag: Tag) -> Handle {
1497        // FIXME: This really wants unit tests.
1498        let mut first_match = None;
1499        let mut matches = 0usize;
1500        for (i, _, old_tag) in self.active_formatting_end_to_marker().iter() {
1501            if tag.equiv_modulo_attr_order(old_tag) {
1502                first_match = Some(i);
1503                matches += 1;
1504            }
1505        }
1506
1507        if matches >= 3 {
1508            self.active_formatting
1509                .borrow_mut()
1510                .remove(first_match.expect("matches with no index"));
1511        }
1512
1513        let elem = self.insert_element(
1514            PushFlag::Push,
1515            ns!(html),
1516            tag.name.clone(),
1517            tag.attrs.clone(),
1518        );
1519        self.active_formatting
1520            .borrow_mut()
1521            .push(FormatEntry::Element(elem.clone(), tag));
1522        elem
1523    }
1524
1525    fn clear_active_formatting_to_marker(&self) {
1526        loop {
1527            match self.active_formatting.borrow_mut().pop() {
1528                None | Some(FormatEntry::Marker) => break,
1529                _ => (),
1530            }
1531        }
1532    }
1533
1534    fn process_end_tag_in_body(&self, tag: Tag) {
1535        // Look back for a matching open element.
1536        let mut match_idx = None;
1537        for (i, elem) in self.open_elems.borrow().iter().enumerate().rev() {
1538            if self.html_elem_named(elem, tag.name.clone()) {
1539                match_idx = Some(i);
1540                break;
1541            }
1542
1543            if self.elem_in(elem, special_tag) {
1544                self.sink
1545                    .parse_error(Borrowed("Found special tag while closing generic tag"));
1546                return;
1547            }
1548        }
1549
1550        let Some(match_idx) = match_idx else {
1551            // I believe this is impossible, because the root
1552            // <html> element is in special_tag.
1553            self.unexpected(&tag);
1554            return;
1555        };
1556
1557        self.generate_implied_end_except(tag.name.clone());
1558
1559        if match_idx != self.open_elems.borrow().len() - 1 {
1560            // mis-nested tags
1561            self.unexpected(&tag);
1562        }
1563        self.open_elems.borrow_mut().truncate(match_idx);
1564    }
1565
1566    fn handle_misnested_a_tags(&self, tag: &Tag) {
1567        let Some(node) = self
1568            .active_formatting_end_to_marker()
1569            .iter()
1570            .find(|&(_, n, _)| self.html_elem_named(n, local_name!("a")))
1571            .map(|(_, n, _)| n.clone())
1572        else {
1573            return;
1574        };
1575
1576        self.unexpected(tag);
1577        self.adoption_agency(local_name!("a"));
1578        self.position_in_active_formatting(&node)
1579            .map(|index| self.active_formatting.borrow_mut().remove(index));
1580        self.remove_from_stack(&node);
1581    }
1582
1583    //§ tree-construction
1584    fn is_foreign(&self, token: &Token) -> bool {
1585        if let Token::Eof = *token {
1586            return false;
1587        }
1588
1589        if self.open_elems.borrow().is_empty() {
1590            return false;
1591        }
1592
1593        let current = self.adjusted_current_node();
1594        let elem_name = self.sink.elem_name(&current);
1595        let name = elem_name.expanded();
1596        if let ns!(html) = *name.ns {
1597            return false;
1598        }
1599
1600        if mathml_text_integration_point(name) {
1601            match *token {
1602                Token::Characters(..) | Token::NullCharacter => return false,
1603                Token::Tag(Tag {
1604                    kind: StartTag,
1605                    ref name,
1606                    ..
1607                }) if !matches!(*name, local_name!("mglyph") | local_name!("malignmark")) => {
1608                    return false;
1609                },
1610                _ => (),
1611            }
1612        }
1613
1614        if svg_html_integration_point(name) {
1615            match *token {
1616                Token::Characters(..) | Token::NullCharacter => return false,
1617                Token::Tag(Tag { kind: StartTag, .. }) => return false,
1618                _ => (),
1619            }
1620        }
1621
1622        if let expanded_name!(mathml "annotation-xml") = name {
1623            match *token {
1624                Token::Tag(Tag {
1625                    kind: StartTag,
1626                    name: local_name!("svg"),
1627                    ..
1628                }) => return false,
1629                Token::Characters(..)
1630                | Token::NullCharacter
1631                | Token::Tag(Tag { kind: StartTag, .. }) => {
1632                    return !self
1633                        .sink
1634                        .is_mathml_annotation_xml_integration_point(&self.adjusted_current_node());
1635                },
1636                _ => {},
1637            };
1638        }
1639
1640        true
1641    }
1642    //§ END
1643
1644    fn enter_foreign(&self, mut tag: Tag, ns: Namespace) -> ProcessResult<Handle> {
1645        match ns {
1646            ns!(mathml) => self.adjust_mathml_attributes(&mut tag),
1647            ns!(svg) => self.adjust_svg_attributes(&mut tag),
1648            _ => (),
1649        }
1650        self.adjust_foreign_attributes(&mut tag);
1651
1652        if tag.self_closing {
1653            self.insert_element(PushFlag::NoPush, ns, tag.name, tag.attrs);
1654            ProcessResult::DoneAckSelfClosing
1655        } else {
1656            self.insert_element(PushFlag::Push, ns, tag.name, tag.attrs);
1657            ProcessResult::Done
1658        }
1659    }
1660
1661    fn adjust_svg_tag_name(&self, tag: &mut Tag) {
1662        let Tag { ref mut name, .. } = *tag;
1663        match *name {
1664            local_name!("altglyph") => *name = local_name!("altGlyph"),
1665            local_name!("altglyphdef") => *name = local_name!("altGlyphDef"),
1666            local_name!("altglyphitem") => *name = local_name!("altGlyphItem"),
1667            local_name!("animatecolor") => *name = local_name!("animateColor"),
1668            local_name!("animatemotion") => *name = local_name!("animateMotion"),
1669            local_name!("animatetransform") => *name = local_name!("animateTransform"),
1670            local_name!("clippath") => *name = local_name!("clipPath"),
1671            local_name!("feblend") => *name = local_name!("feBlend"),
1672            local_name!("fecolormatrix") => *name = local_name!("feColorMatrix"),
1673            local_name!("fecomponenttransfer") => *name = local_name!("feComponentTransfer"),
1674            local_name!("fecomposite") => *name = local_name!("feComposite"),
1675            local_name!("feconvolvematrix") => *name = local_name!("feConvolveMatrix"),
1676            local_name!("fediffuselighting") => *name = local_name!("feDiffuseLighting"),
1677            local_name!("fedisplacementmap") => *name = local_name!("feDisplacementMap"),
1678            local_name!("fedistantlight") => *name = local_name!("feDistantLight"),
1679            local_name!("fedropshadow") => *name = local_name!("feDropShadow"),
1680            local_name!("feflood") => *name = local_name!("feFlood"),
1681            local_name!("fefunca") => *name = local_name!("feFuncA"),
1682            local_name!("fefuncb") => *name = local_name!("feFuncB"),
1683            local_name!("fefuncg") => *name = local_name!("feFuncG"),
1684            local_name!("fefuncr") => *name = local_name!("feFuncR"),
1685            local_name!("fegaussianblur") => *name = local_name!("feGaussianBlur"),
1686            local_name!("feimage") => *name = local_name!("feImage"),
1687            local_name!("femerge") => *name = local_name!("feMerge"),
1688            local_name!("femergenode") => *name = local_name!("feMergeNode"),
1689            local_name!("femorphology") => *name = local_name!("feMorphology"),
1690            local_name!("feoffset") => *name = local_name!("feOffset"),
1691            local_name!("fepointlight") => *name = local_name!("fePointLight"),
1692            local_name!("fespecularlighting") => *name = local_name!("feSpecularLighting"),
1693            local_name!("fespotlight") => *name = local_name!("feSpotLight"),
1694            local_name!("fetile") => *name = local_name!("feTile"),
1695            local_name!("feturbulence") => *name = local_name!("feTurbulence"),
1696            local_name!("foreignobject") => *name = local_name!("foreignObject"),
1697            local_name!("glyphref") => *name = local_name!("glyphRef"),
1698            local_name!("lineargradient") => *name = local_name!("linearGradient"),
1699            local_name!("radialgradient") => *name = local_name!("radialGradient"),
1700            local_name!("textpath") => *name = local_name!("textPath"),
1701            _ => (),
1702        }
1703    }
1704
1705    fn adjust_attributes<F>(&self, tag: &mut Tag, mut map: F)
1706    where
1707        F: FnMut(LocalName) -> Option<QualName>,
1708    {
1709        for &mut Attribute { ref mut name, .. } in &mut tag.attrs {
1710            if let Some(replacement) = map(name.local.clone()) {
1711                *name = replacement;
1712            }
1713        }
1714    }
1715
1716    fn adjust_svg_attributes(&self, tag: &mut Tag) {
1717        self.adjust_attributes(tag, |k| match k {
1718            local_name!("attributename") => Some(qualname!("", "attributeName")),
1719            local_name!("attributetype") => Some(qualname!("", "attributeType")),
1720            local_name!("basefrequency") => Some(qualname!("", "baseFrequency")),
1721            local_name!("baseprofile") => Some(qualname!("", "baseProfile")),
1722            local_name!("calcmode") => Some(qualname!("", "calcMode")),
1723            local_name!("clippathunits") => Some(qualname!("", "clipPathUnits")),
1724            local_name!("diffuseconstant") => Some(qualname!("", "diffuseConstant")),
1725            local_name!("edgemode") => Some(qualname!("", "edgeMode")),
1726            local_name!("filterunits") => Some(qualname!("", "filterUnits")),
1727            local_name!("glyphref") => Some(qualname!("", "glyphRef")),
1728            local_name!("gradienttransform") => Some(qualname!("", "gradientTransform")),
1729            local_name!("gradientunits") => Some(qualname!("", "gradientUnits")),
1730            local_name!("kernelmatrix") => Some(qualname!("", "kernelMatrix")),
1731            local_name!("kernelunitlength") => Some(qualname!("", "kernelUnitLength")),
1732            local_name!("keypoints") => Some(qualname!("", "keyPoints")),
1733            local_name!("keysplines") => Some(qualname!("", "keySplines")),
1734            local_name!("keytimes") => Some(qualname!("", "keyTimes")),
1735            local_name!("lengthadjust") => Some(qualname!("", "lengthAdjust")),
1736            local_name!("limitingconeangle") => Some(qualname!("", "limitingConeAngle")),
1737            local_name!("markerheight") => Some(qualname!("", "markerHeight")),
1738            local_name!("markerunits") => Some(qualname!("", "markerUnits")),
1739            local_name!("markerwidth") => Some(qualname!("", "markerWidth")),
1740            local_name!("maskcontentunits") => Some(qualname!("", "maskContentUnits")),
1741            local_name!("maskunits") => Some(qualname!("", "maskUnits")),
1742            local_name!("numoctaves") => Some(qualname!("", "numOctaves")),
1743            local_name!("pathlength") => Some(qualname!("", "pathLength")),
1744            local_name!("patterncontentunits") => Some(qualname!("", "patternContentUnits")),
1745            local_name!("patterntransform") => Some(qualname!("", "patternTransform")),
1746            local_name!("patternunits") => Some(qualname!("", "patternUnits")),
1747            local_name!("pointsatx") => Some(qualname!("", "pointsAtX")),
1748            local_name!("pointsaty") => Some(qualname!("", "pointsAtY")),
1749            local_name!("pointsatz") => Some(qualname!("", "pointsAtZ")),
1750            local_name!("preservealpha") => Some(qualname!("", "preserveAlpha")),
1751            local_name!("preserveaspectratio") => Some(qualname!("", "preserveAspectRatio")),
1752            local_name!("primitiveunits") => Some(qualname!("", "primitiveUnits")),
1753            local_name!("refx") => Some(qualname!("", "refX")),
1754            local_name!("refy") => Some(qualname!("", "refY")),
1755            local_name!("repeatcount") => Some(qualname!("", "repeatCount")),
1756            local_name!("repeatdur") => Some(qualname!("", "repeatDur")),
1757            local_name!("requiredextensions") => Some(qualname!("", "requiredExtensions")),
1758            local_name!("requiredfeatures") => Some(qualname!("", "requiredFeatures")),
1759            local_name!("specularconstant") => Some(qualname!("", "specularConstant")),
1760            local_name!("specularexponent") => Some(qualname!("", "specularExponent")),
1761            local_name!("spreadmethod") => Some(qualname!("", "spreadMethod")),
1762            local_name!("startoffset") => Some(qualname!("", "startOffset")),
1763            local_name!("stddeviation") => Some(qualname!("", "stdDeviation")),
1764            local_name!("stitchtiles") => Some(qualname!("", "stitchTiles")),
1765            local_name!("surfacescale") => Some(qualname!("", "surfaceScale")),
1766            local_name!("systemlanguage") => Some(qualname!("", "systemLanguage")),
1767            local_name!("tablevalues") => Some(qualname!("", "tableValues")),
1768            local_name!("targetx") => Some(qualname!("", "targetX")),
1769            local_name!("targety") => Some(qualname!("", "targetY")),
1770            local_name!("textlength") => Some(qualname!("", "textLength")),
1771            local_name!("viewbox") => Some(qualname!("", "viewBox")),
1772            local_name!("viewtarget") => Some(qualname!("", "viewTarget")),
1773            local_name!("xchannelselector") => Some(qualname!("", "xChannelSelector")),
1774            local_name!("ychannelselector") => Some(qualname!("", "yChannelSelector")),
1775            local_name!("zoomandpan") => Some(qualname!("", "zoomAndPan")),
1776            _ => None,
1777        });
1778    }
1779
1780    fn adjust_mathml_attributes(&self, tag: &mut Tag) {
1781        self.adjust_attributes(tag, |k| match k {
1782            local_name!("definitionurl") => Some(qualname!("", "definitionURL")),
1783            _ => None,
1784        });
1785    }
1786
1787    fn adjust_foreign_attributes(&self, tag: &mut Tag) {
1788        self.adjust_attributes(tag, |k| match k {
1789            local_name!("xlink:actuate") => Some(qualname!("xlink" xlink "actuate")),
1790            local_name!("xlink:arcrole") => Some(qualname!("xlink" xlink "arcrole")),
1791            local_name!("xlink:href") => Some(qualname!("xlink" xlink "href")),
1792            local_name!("xlink:role") => Some(qualname!("xlink" xlink "role")),
1793            local_name!("xlink:show") => Some(qualname!("xlink" xlink "show")),
1794            local_name!("xlink:title") => Some(qualname!("xlink" xlink "title")),
1795            local_name!("xlink:type") => Some(qualname!("xlink" xlink "type")),
1796            local_name!("xml:lang") => Some(qualname!("xml" xml "lang")),
1797            local_name!("xml:space") => Some(qualname!("xml" xml "space")),
1798            local_name!("xmlns") => Some(qualname!("" xmlns "xmlns")),
1799            local_name!("xmlns:xlink") => Some(qualname!("xmlns" xmlns "xlink")),
1800            _ => None,
1801        });
1802    }
1803
1804    fn foreign_start_tag(&self, mut tag: Tag) -> ProcessResult<Handle> {
1805        let current_ns = self
1806            .sink
1807            .elem_name(&self.adjusted_current_node())
1808            .ns()
1809            .clone();
1810        match current_ns {
1811            ns!(mathml) => self.adjust_mathml_attributes(&mut tag),
1812            ns!(svg) => {
1813                self.adjust_svg_tag_name(&mut tag);
1814                self.adjust_svg_attributes(&mut tag);
1815            },
1816            _ => (),
1817        }
1818        self.adjust_foreign_attributes(&mut tag);
1819        if tag.self_closing {
1820            // FIXME(#118): <script /> in SVG
1821            self.insert_element(PushFlag::NoPush, current_ns, tag.name, tag.attrs);
1822            ProcessResult::DoneAckSelfClosing
1823        } else {
1824            self.insert_element(PushFlag::Push, current_ns, tag.name, tag.attrs);
1825            ProcessResult::Done
1826        }
1827    }
1828
1829    fn unexpected_start_tag_in_foreign_content(&self, tag: Tag) -> ProcessResult<Handle> {
1830        self.unexpected(&tag);
1831        while !self.current_node_in(|n| {
1832            *n.ns == ns!(html) || mathml_text_integration_point(n) || svg_html_integration_point(n)
1833        }) {
1834            self.pop();
1835        }
1836        self.step(self.mode.get(), Token::Tag(tag))
1837    }
1838}