pub struct Tokenizer<Sink> {Show 22 fields
opts: TokenizerOpts,
pub sink: Sink,
state: State,
at_eof: bool,
char_ref_tokenizer: Option<Box<CharRefTokenizer>>,
current_char: char,
reconsume: bool,
ignore_lf: bool,
discard_bom: bool,
current_tag_kind: TagKind,
current_tag_name: StrTendril,
current_tag_self_closing: bool,
current_tag_attrs: Vec<Attribute>,
current_attr_name: StrTendril,
current_attr_value: StrTendril,
current_comment: StrTendril,
current_doctype: Doctype,
last_start_tag_name: Option<LocalName>,
temp_buf: StrTendril,
state_profile: BTreeMap<State, u64>,
time_in_sink: u64,
current_line: u64,
}
Expand description
The HTML tokenizer.
Fields§
§opts: TokenizerOpts
Options controlling the behavior of the tokenizer.
sink: Sink
Destination for tokens we emit.
state: State
The abstract machine state as described in the spec.
at_eof: bool
Are we at the end of the file, once buffers have been processed completely? This affects whether we will wait for lookahead or not.
char_ref_tokenizer: Option<Box<CharRefTokenizer>>
Tokenizer for character references, if we’re tokenizing one at the moment.
current_char: char
Current input character. Just consumed, may reconsume.
reconsume: bool
Should we reconsume the current input character?
ignore_lf: bool
Did we just consume \r, translating it to \n? In that case we need to ignore the next character if it’s \n.
discard_bom: bool
Discard a U+FEFF BYTE ORDER MARK if we see one? Only done at the beginning of the stream.
current_tag_kind: TagKind
Current tag kind.
current_tag_name: StrTendril
Current tag name.
current_tag_self_closing: bool
Current tag is self-closing?
current_tag_attrs: Vec<Attribute>
Current tag attributes.
current_attr_name: StrTendril
Current attribute name.
current_attr_value: StrTendril
Current attribute value.
current_comment: StrTendril
Current comment.
current_doctype: Doctype
Current doctype token.
last_start_tag_name: Option<LocalName>
Last start tag name, for use in checking “appropriate end tag”.
temp_buf: StrTendril
The “temporary buffer” mentioned in the spec.
state_profile: BTreeMap<State, u64>
Record of how many ns we spent in each state, if profiling is enabled.
time_in_sink: u64
Record of how many ns we spent in the token sink.
current_line: u64
Track current line
Implementations§
source§impl<Sink: TokenSink> Tokenizer<Sink>
impl<Sink: TokenSink> Tokenizer<Sink>
sourcepub fn new(sink: Sink, opts: TokenizerOpts) -> Tokenizer<Sink>
pub fn new(sink: Sink, opts: TokenizerOpts) -> Tokenizer<Sink>
Create a new tokenizer which feeds tokens to a particular TokenSink
.
sourcepub fn feed(&mut self, input: &mut BufferQueue) -> TokenizerResult<Sink::Handle>
pub fn feed(&mut self, input: &mut BufferQueue) -> TokenizerResult<Sink::Handle>
Feed an input string into the tokenizer.
pub fn set_plaintext_state(&mut self)
fn process_token(&mut self, token: Token) -> TokenSinkResult<Sink::Handle>
fn process_token_and_continue(&mut self, token: Token)
fn get_preprocessed_char( &mut self, c: char, input: &mut BufferQueue ) -> Option<char>
fn get_char(&mut self, input: &mut BufferQueue) -> Option<char>
fn pop_except_from( &mut self, input: &mut BufferQueue, set: SmallCharSet ) -> Option<SetResult>
fn eat( &mut self, input: &mut BufferQueue, pat: &str, eq: fn(_: &u8, _: &u8) -> bool ) -> Option<bool>
sourcefn run(&mut self, input: &mut BufferQueue) -> TokenizerResult<Sink::Handle>
fn run(&mut self, input: &mut BufferQueue) -> TokenizerResult<Sink::Handle>
Run the state machine for as long as we can.