xml5ever/tokenizer/
states.rs

1// Copyright 2014-2017 The html5ever Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! Tokenizer states.
11
12/// Specifies either the public or system identifier from a [Document Type Declaration] (DTD).
13///
14/// [Document Type Declaration]: https://en.wikipedia.org/wiki/Document_type_declaration
15#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
16pub enum DoctypeKind {
17    /// The public identifier.
18    Public,
19    /// The system identifier.
20    System,
21}
22
23/// Specifies the different states a XML tokenizer will assume during parsing.
24#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
25pub enum XmlState {
26    /// The initial state of the parser.
27    ///
28    /// It is equivalent to the [`Data`](https://html.spec.whatwg.org/#data-state) state of the html parser,
29    /// except null codepoints do not cause errors.
30    Data,
31    /// Indicates that the parser has found a `<` character and will try to parse a tag.
32    TagState,
33    /// Indicates that the parser has consumed the `/` of a closing tag, like `</foo>`.
34    EndTagState,
35    /// Indicates that the parser is currently parsing the name of a closing tag, like the `foo` of `</foo>`.
36    EndTagName,
37    /// Indicates that the parser has finished parsing the name of a closing tag and expects a `>` to follow.
38    EndTagNameAfter,
39    /// Indicates that the parser has started parsing a [processing instruction] (PI).
40    ///
41    /// This state is reached after the initial `?` character has been consumed.
42    ///
43    /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
44    Pi,
45    /// Indicates that the parser is currently parsing the target of a [processing instruction].
46    ///
47    /// For example, the target of `<?xml-stylesheet type="text/xsl" href="style.xsl"?>` is `xml-stylesheet`.
48    ///
49    /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
50    PiTarget,
51    /// Indicates that the parser has finished parsing the target of a [processing instruction].
52    ///
53    /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
54    PiTargetAfter,
55    /// Indicates that the parser is currently parsing the data of a [processing instruction].
56    ///
57    /// The "data" refers to everything between the target and the closing `?` character.
58    ///
59    /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
60    PiData,
61    /// Indicates that the parser has parsed the closing `?` of a [processing instruction].
62    ///
63    /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
64    PiAfter,
65    /// Indicates that the parser has parsed the initial `!` of a markup declaration.
66    ///
67    /// Examples of such declarations include `<!ENTITY chap1 SYSTEM "chap1.xml">` or `<!-- Comment -->`.
68    MarkupDecl,
69    /// Indicates that the parser has parsed the start of a comment (`<!--`).
70    CommentStart,
71    /// Indicates that the parser has parsed the start of a comment and a `-` directly after it.
72    CommentStartDash,
73    /// Indicates that the parser is currently parsing the data within a comment.
74    Comment,
75    /// Indicates that the parser has parsed a `<` character within a comment.
76    CommentLessThan,
77    /// Indicates that the parser has parsed `<!` within a comment.
78    CommentLessThanBang,
79    /// Indicates that the parser has parsed `<!-` within a comment.
80    CommentLessThanBangDash,
81    /// Indicates that the parser has parsed `<!--` within a comment.
82    CommentLessThanBangDashDash,
83    /// Indicates that the parser has parsed two `-` characters within a comment which may or may not
84    /// be the beginning of the comment end (`-->`).
85    CommentEnd,
86    /// Indicates that the parser has parsed a `-` character within a comment which may or may not
87    /// be the beginning of the comment end (`-->`).
88    CommentEndDash,
89    /// Indicates that the parser has parsed `--!` within a comment which may or may not be part of the
90    /// end of the comment. Comments in XML can be closed with `--!>`.
91    CommentEndBang,
92    /// Indicates that the parser has parsed the beginning of a CDATA section (`<![CDATA[`).
93    Cdata,
94    /// Indicates that the parser has parsed a `]` character within a CDATA section, which may be part of
95    /// the end of the section (`]]>`).
96    CdataBracket,
97    /// Indicates that the parser has parsed two `]` characters within a CDATA section, which may be part of
98    /// the end of the section (`]]>`).
99    CdataEnd,
100    /// Indicates that the parser is currently parsing the name of a tag, such as `foo` in `<foo>`.
101    TagName,
102    /// Indicates that the parser has parsed the `/` of a self-closing tag, such as `<foo/>`.
103    TagEmpty,
104    /// Indicates that the parser has finished parsing the name of a tag and is now expecting either attributes or
105    /// a `>`.
106    TagAttrNameBefore,
107    /// Indicates that the parser is currently parsing the name of an attribute within a tag, such as
108    /// `bar` in `<foo bar=baz>`.
109    TagAttrName,
110    /// Indicates that the parser has finished parsing the name of an attribute.
111    TagAttrNameAfter,
112    /// Indicates that the parser is about to parse the value of an attribute.
113    TagAttrValueBefore,
114    /// Indicates that the parser is currently parsing the value of an attribute, such as `baz` in
115    /// `<foo bar=baz>`.
116    ///
117    /// Includes information about how the value is quoted, because the quotes before and after the attribute
118    /// value need to match.
119    TagAttrValue(AttrValueKind),
120    /// Indicates that the parser has parsed the beginning of a document type definition (`<!DOCTYPE`).
121    Doctype,
122    /// Indicates that the parser expects to parse the name of the document type definition next.
123    BeforeDoctypeName,
124    /// Indicates that the parser is currently parsing the name of a document type definition, such as
125    /// `html` in `<!DOCTYPE html>`.
126    DoctypeName,
127    /// Indicates that the parser has finished parsing the name of the document type definition and now optionally
128    /// expects either a public or a system identifier.
129    AfterDoctypeName,
130    /// Indicates that the parser has parsed a keyword for either a public or system identifier (`PUBLIC` or `SYSTEM`).
131    AfterDoctypeKeyword(DoctypeKind),
132    /// Indicates that the parser is about to parse the value of a public or system identifier within
133    /// a document type definition, such as `foo` in
134    /// `<!DOCTYPE html PUBLIC "foo" "bar">`.
135    BeforeDoctypeIdentifier(DoctypeKind),
136    /// Indicates that the parser is currently parsing the value of a public or system identifier
137    /// that is surrounded by double quotes , such as `foo` in
138    /// `<!DOCTYPE html PUBLIC "foo" "bar">`.
139    DoctypeIdentifierDoubleQuoted(DoctypeKind),
140    /// Indicates that the parser is currently parsing the value of a public or system identifier
141    /// that is surrounded by single quotes , such as `foo` in
142    /// `<!DOCTYPE html PUBLIC 'foo' 'bar'>`.
143    DoctypeIdentifierSingleQuoted(DoctypeKind),
144    /// Indicates that the parser has finished parsing either a public or system identifier within a
145    /// document type definition.
146    AfterDoctypeIdentifier(DoctypeKind),
147    /// Indicates that the parser has finished parsing a public identifier and now expects
148    /// a system identifier.
149    BetweenDoctypePublicAndSystemIdentifiers,
150    /// Indicates that the parser is currently parsing an ill-formed document type defintion, such as
151    /// `<!DOCTYPE html what-is-this>`.
152    BogusDoctype,
153    /// Indicates that the parser is currently parsing an ill-formed comment, such as
154    /// `<? this is not what a comment should look like! >`.
155    BogusComment,
156}
157
158/// Specifies how an attribute value is quoted, if at all.
159#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
160pub enum AttrValueKind {
161    /// A attribute value that is not surrounded by quotes, like `bar` in `foo=bar`.
162    Unquoted,
163    /// A attribute value that is not surrounded by quotes, like `bar` in `foo='bar'`.
164    SingleQuoted,
165    /// A attribute value that is not surrounded by quotes, like `bar` in `foo="bar"`.
166    DoubleQuoted,
167}