xml5ever/tokenizer/states.rs
1// Copyright 2014-2017 The html5ever Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! Tokenizer states.
11
12/// Specifies either the public or system identifier from a [Document Type Declaration] (DTD).
13///
14/// [Document Type Declaration]: https://en.wikipedia.org/wiki/Document_type_declaration
15#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
16pub enum DoctypeKind {
17 /// The public identifier.
18 Public,
19 /// The system identifier.
20 System,
21}
22
23/// Specifies the different states a XML tokenizer will assume during parsing.
24#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
25pub enum XmlState {
26 /// The initial state of the parser.
27 ///
28 /// It is equivalent to the [`Data`](https://html.spec.whatwg.org/#data-state) state of the html parser,
29 /// except null codepoints do not cause errors.
30 Data,
31 /// Indicates that the parser has found a `<` character and will try to parse a tag.
32 TagState,
33 /// Indicates that the parser has consumed the `/` of a closing tag, like `</foo>`.
34 EndTagState,
35 /// Indicates that the parser is currently parsing the name of a closing tag, like the `foo` of `</foo>`.
36 EndTagName,
37 /// Indicates that the parser has finished parsing the name of a closing tag and expects a `>` to follow.
38 EndTagNameAfter,
39 /// Indicates that the parser has started parsing a [processing instruction] (PI).
40 ///
41 /// This state is reached after the initial `?` character has been consumed.
42 ///
43 /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
44 Pi,
45 /// Indicates that the parser is currently parsing the target of a [processing instruction].
46 ///
47 /// For example, the target of `<?xml-stylesheet type="text/xsl" href="style.xsl"?>` is `xml-stylesheet`.
48 ///
49 /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
50 PiTarget,
51 /// Indicates that the parser has finished parsing the target of a [processing instruction].
52 ///
53 /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
54 PiTargetAfter,
55 /// Indicates that the parser is currently parsing the data of a [processing instruction].
56 ///
57 /// The "data" refers to everything between the target and the closing `?` character.
58 ///
59 /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
60 PiData,
61 /// Indicates that the parser has parsed the closing `?` of a [processing instruction].
62 ///
63 /// [processing instruction]: https://en.wikipedia.org/wiki/Processing_Instruction
64 PiAfter,
65 /// Indicates that the parser has parsed the initial `!` of a markup declaration.
66 ///
67 /// Examples of such declarations include `<!ENTITY chap1 SYSTEM "chap1.xml">` or `<!-- Comment -->`.
68 MarkupDecl,
69 /// Indicates that the parser has parsed the start of a comment (`<!--`).
70 CommentStart,
71 /// Indicates that the parser has parsed the start of a comment and a `-` directly after it.
72 CommentStartDash,
73 /// Indicates that the parser is currently parsing the data within a comment.
74 Comment,
75 /// Indicates that the parser has parsed a `<` character within a comment.
76 CommentLessThan,
77 /// Indicates that the parser has parsed `<!` within a comment.
78 CommentLessThanBang,
79 /// Indicates that the parser has parsed `<!-` within a comment.
80 CommentLessThanBangDash,
81 /// Indicates that the parser has parsed `<!--` within a comment.
82 CommentLessThanBangDashDash,
83 /// Indicates that the parser has parsed two `-` characters within a comment which may or may not
84 /// be the beginning of the comment end (`-->`).
85 CommentEnd,
86 /// Indicates that the parser has parsed a `-` character within a comment which may or may not
87 /// be the beginning of the comment end (`-->`).
88 CommentEndDash,
89 /// Indicates that the parser has parsed `--!` within a comment which may or may not be part of the
90 /// end of the comment. Comments in XML can be closed with `--!>`.
91 CommentEndBang,
92 /// Indicates that the parser has parsed the beginning of a CDATA section (`<![CDATA[`).
93 Cdata,
94 /// Indicates that the parser has parsed a `]` character within a CDATA section, which may be part of
95 /// the end of the section (`]]>`).
96 CdataBracket,
97 /// Indicates that the parser has parsed two `]` characters within a CDATA section, which may be part of
98 /// the end of the section (`]]>`).
99 CdataEnd,
100 /// Indicates that the parser is currently parsing the name of a tag, such as `foo` in `<foo>`.
101 TagName,
102 /// Indicates that the parser has parsed the `/` of a self-closing tag, such as `<foo/>`.
103 TagEmpty,
104 /// Indicates that the parser has finished parsing the name of a tag and is now expecting either attributes or
105 /// a `>`.
106 TagAttrNameBefore,
107 /// Indicates that the parser is currently parsing the name of an attribute within a tag, such as
108 /// `bar` in `<foo bar=baz>`.
109 TagAttrName,
110 /// Indicates that the parser has finished parsing the name of an attribute.
111 TagAttrNameAfter,
112 /// Indicates that the parser is about to parse the value of an attribute.
113 TagAttrValueBefore,
114 /// Indicates that the parser is currently parsing the value of an attribute, such as `baz` in
115 /// `<foo bar=baz>`.
116 ///
117 /// Includes information about how the value is quoted, because the quotes before and after the attribute
118 /// value need to match.
119 TagAttrValue(AttrValueKind),
120 /// Indicates that the parser has parsed the beginning of a document type definition (`<!DOCTYPE`).
121 Doctype,
122 /// Indicates that the parser expects to parse the name of the document type definition next.
123 BeforeDoctypeName,
124 /// Indicates that the parser is currently parsing the name of a document type definition, such as
125 /// `html` in `<!DOCTYPE html>`.
126 DoctypeName,
127 /// Indicates that the parser has finished parsing the name of the document type definition and now optionally
128 /// expects either a public or a system identifier.
129 AfterDoctypeName,
130 /// Indicates that the parser has parsed a keyword for either a public or system identifier (`PUBLIC` or `SYSTEM`).
131 AfterDoctypeKeyword(DoctypeKind),
132 /// Indicates that the parser is about to parse the value of a public or system identifier within
133 /// a document type definition, such as `foo` in
134 /// `<!DOCTYPE html PUBLIC "foo" "bar">`.
135 BeforeDoctypeIdentifier(DoctypeKind),
136 /// Indicates that the parser is currently parsing the value of a public or system identifier
137 /// that is surrounded by double quotes , such as `foo` in
138 /// `<!DOCTYPE html PUBLIC "foo" "bar">`.
139 DoctypeIdentifierDoubleQuoted(DoctypeKind),
140 /// Indicates that the parser is currently parsing the value of a public or system identifier
141 /// that is surrounded by single quotes , such as `foo` in
142 /// `<!DOCTYPE html PUBLIC 'foo' 'bar'>`.
143 DoctypeIdentifierSingleQuoted(DoctypeKind),
144 /// Indicates that the parser has finished parsing either a public or system identifier within a
145 /// document type definition.
146 AfterDoctypeIdentifier(DoctypeKind),
147 /// Indicates that the parser has finished parsing a public identifier and now expects
148 /// a system identifier.
149 BetweenDoctypePublicAndSystemIdentifiers,
150 /// Indicates that the parser is currently parsing an ill-formed document type defintion, such as
151 /// `<!DOCTYPE html what-is-this>`.
152 BogusDoctype,
153 /// Indicates that the parser is currently parsing an ill-formed comment, such as
154 /// `<? this is not what a comment should look like! >`.
155 BogusComment,
156}
157
158/// Specifies how an attribute value is quoted, if at all.
159#[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, Debug)]
160pub enum AttrValueKind {
161 /// A attribute value that is not surrounded by quotes, like `bar` in `foo=bar`.
162 Unquoted,
163 /// A attribute value that is not surrounded by quotes, like `bar` in `foo='bar'`.
164 SingleQuoted,
165 /// A attribute value that is not surrounded by quotes, like `bar` in `foo="bar"`.
166 DoubleQuoted,
167}