xml5ever/tokenizer/
interface.rs

1// Copyright 2014-2017 The html5ever Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10use std::borrow::Cow;
11
12use crate::tendril::StrTendril;
13use crate::tokenizer::ProcessResult;
14use crate::{Attribute, QualName};
15
16pub use self::TagKind::{EmptyTag, EndTag, ShortTag, StartTag};
17
18/// Tag kind denotes which kind of tag did we encounter.
19#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
20pub enum TagKind {
21    /// Beginning of a tag (e.g. `<a>`).
22    StartTag,
23    /// End of a tag (e.g. `</a>`).
24    EndTag,
25    /// Empty tag (e.g. `<a/>`).
26    EmptyTag,
27    /// Short tag (e.g. `</>`).
28    ShortTag,
29}
30
31/// XML 5 Tag Token
32#[derive(PartialEq, Eq, Debug, Clone)]
33pub struct Tag {
34    /// Token kind denotes which type of token was encountered.
35    /// E.g. if parser parsed `</a>` the token kind would be `EndTag`.
36    pub kind: TagKind,
37    /// Qualified name of the tag.
38    pub name: QualName,
39    /// List of attributes attached to this tag.
40    /// Only valid in start and empty tag.
41    pub attrs: Vec<Attribute>,
42}
43
44impl Tag {
45    /// Sorts attributes in a tag.
46    pub fn equiv_modulo_attr_order(&self, other: &Tag) -> bool {
47        if (self.kind != other.kind) || (self.name != other.name) {
48            return false;
49        }
50
51        let mut self_attrs = self.attrs.clone();
52        let mut other_attrs = other.attrs.clone();
53        self_attrs.sort();
54        other_attrs.sort();
55
56        self_attrs == other_attrs
57    }
58}
59
60/// A `DOCTYPE` token.
61/// Doctype token in XML5 is rather limited for reasons, such as:
62/// security and simplicity. XML5 only supports declaring DTD with
63/// name, public identifier and system identifier
64#[derive(PartialEq, Eq, Clone, Debug, Default)]
65pub struct Doctype {
66    /// Name of DOCTYPE declared
67    pub name: Option<StrTendril>,
68    /// Public identifier of this DOCTYPE.
69    pub public_id: Option<StrTendril>,
70    /// System identifier of this DOCTYPE.
71    pub system_id: Option<StrTendril>,
72}
73
74/// A ProcessingInstruction token.
75#[derive(PartialEq, Eq, Clone, Debug)]
76pub struct Pi {
77    /// What is the name of processing instruction.
78    pub target: StrTendril,
79
80    /// Text of processing instruction.
81    pub data: StrTendril,
82}
83
84/// Describes tokens encountered during parsing of input.
85#[derive(PartialEq, Eq, Debug)]
86pub enum Token {
87    /// Doctype token
88    Doctype(Doctype),
89    /// Token tag founds. This token applies to all
90    /// possible kinds of tags (like start, end, empty tag, etc.).
91    Tag(Tag),
92    /// Processing Instruction token
93    ProcessingInstruction(Pi),
94    /// Comment token.
95    Comment(StrTendril),
96    /// Token that represents a series of characters.
97    Characters(StrTendril),
98    /// End of File found.
99    EndOfFile,
100    /// NullCharacter encountered.
101    NullCharacter,
102    /// Error happened
103    ParseError(Cow<'static, str>),
104}
105
106/// Types which can receive tokens from the tokenizer.
107pub trait TokenSink {
108    /// Handle to a DOM script element
109    type Handle;
110
111    /// Process a token.
112    fn process_token(&self, token: Token) -> ProcessResult<Self::Handle>;
113
114    /// Signal to the sink that parsing has ended.
115    fn end(&self) {}
116}