1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
// Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::borrow::Cow;
use crate::tendril::StrTendril;
use crate::{Attribute, QualName};
pub use self::TagKind::{EmptyTag, EndTag, ShortTag, StartTag};
pub use self::Token::{CharacterTokens, EOFToken, NullCharacterToken, ParseError};
pub use self::Token::{CommentToken, DoctypeToken, PIToken, TagToken};
use super::states;
/// Tag kind denotes which kind of tag did we encounter.
#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
pub enum TagKind {
/// Beginning of a tag (e.g. `<a>`).
StartTag,
/// End of a tag (e.g. `</a>`).
EndTag,
/// Empty tag (e.g. `<a/>`).
EmptyTag,
/// Short tag (e.g. `</>`).
ShortTag,
}
/// XML 5 Tag Token
#[derive(PartialEq, Eq, Debug, Clone)]
pub struct Tag {
/// Token kind denotes which type of token was encountered.
/// E.g. if parser parsed `</a>` the token kind would be `EndTag`.
pub kind: TagKind,
/// Qualified name of the tag.
pub name: QualName,
/// List of attributes attached to this tag.
/// Only valid in start and empty tag.
pub attrs: Vec<Attribute>,
}
impl Tag {
/// Sorts attributes in a tag.
pub fn equiv_modulo_attr_order(&self, other: &Tag) -> bool {
if (self.kind != other.kind) || (self.name != other.name) {
return false;
}
let mut self_attrs = self.attrs.clone();
let mut other_attrs = other.attrs.clone();
self_attrs.sort();
other_attrs.sort();
self_attrs == other_attrs
}
}
/// A `DOCTYPE` token.
/// Doctype token in XML5 is rather limited for reasons, such as:
/// security and simplicity. XML5 only supports declaring DTD with
/// name, public identifier and system identifier
#[derive(PartialEq, Eq, Clone, Debug, Default)]
pub struct Doctype {
/// Name of DOCTYPE declared
pub name: Option<StrTendril>,
/// Public identifier of this DOCTYPE.
pub public_id: Option<StrTendril>,
/// System identifier of this DOCTYPE.
pub system_id: Option<StrTendril>,
}
/// A ProcessingInstruction token.
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct Pi {
/// What is the name of processing instruction.
pub target: StrTendril,
/// Text of processing instruction.
pub data: StrTendril,
}
/// Describes tokens encountered during parsing of input.
#[derive(PartialEq, Eq, Debug)]
pub enum Token {
/// Doctype token
DoctypeToken(Doctype),
/// Token tag founds. This token applies to all
/// possible kinds of tags (like start, end, empty tag, etc.).
TagToken(Tag),
/// Processing Instruction token
PIToken(Pi),
/// Comment token.
CommentToken(StrTendril),
/// Token that represents a series of characters.
CharacterTokens(StrTendril),
/// End of File found.
EOFToken,
/// NullCharacter encountered.
NullCharacterToken,
/// Error happened
ParseError(Cow<'static, str>),
}
/// Types which can receive tokens from the tokenizer.
pub trait TokenSink {
/// Process a token.
fn process_token(&self, token: Token);
/// Signal to the sink that parsing has ended.
fn end(&self) {}
/// The tokenizer will call this after emitting any start tag.
/// This allows the tree builder to change the tokenizer's state.
/// By default no state changes occur.
fn query_state_change(&self) -> Option<states::XmlState> {
None
}
}