pub struct Tokenizer<Sink> {
Show 22 fields opts: TokenizerOpts, pub sink: Sink, state: State, at_eof: bool, char_ref_tokenizer: Option<Box<CharRefTokenizer>>, current_char: char, reconsume: bool, ignore_lf: bool, discard_bom: bool, current_tag_kind: TagKind, current_tag_name: StrTendril, current_tag_self_closing: bool, current_tag_attrs: Vec<Attribute>, current_attr_name: StrTendril, current_attr_value: StrTendril, current_comment: StrTendril, current_doctype: Doctype, last_start_tag_name: Option<LocalName>, temp_buf: StrTendril, state_profile: BTreeMap<State, u64>, time_in_sink: u64, current_line: u64,
}
Expand description

The HTML tokenizer.

Fields§

§opts: TokenizerOpts

Options controlling the behavior of the tokenizer.

§sink: Sink

Destination for tokens we emit.

§state: State

The abstract machine state as described in the spec.

§at_eof: bool

Are we at the end of the file, once buffers have been processed completely? This affects whether we will wait for lookahead or not.

§char_ref_tokenizer: Option<Box<CharRefTokenizer>>

Tokenizer for character references, if we’re tokenizing one at the moment.

§current_char: char

Current input character. Just consumed, may reconsume.

§reconsume: bool

Should we reconsume the current input character?

§ignore_lf: bool

Did we just consume \r, translating it to \n? In that case we need to ignore the next character if it’s \n.

§discard_bom: bool

Discard a U+FEFF BYTE ORDER MARK if we see one? Only done at the beginning of the stream.

§current_tag_kind: TagKind

Current tag kind.

§current_tag_name: StrTendril

Current tag name.

§current_tag_self_closing: bool

Current tag is self-closing?

§current_tag_attrs: Vec<Attribute>

Current tag attributes.

§current_attr_name: StrTendril

Current attribute name.

§current_attr_value: StrTendril

Current attribute value.

§current_comment: StrTendril

Current comment.

§current_doctype: Doctype

Current doctype token.

§last_start_tag_name: Option<LocalName>

Last start tag name, for use in checking “appropriate end tag”.

§temp_buf: StrTendril

The “temporary buffer” mentioned in the spec.

§state_profile: BTreeMap<State, u64>

Record of how many ns we spent in each state, if profiling is enabled.

§time_in_sink: u64

Record of how many ns we spent in the token sink.

§current_line: u64

Track current line

Implementations§

source§

impl<Sink: TokenSink> Tokenizer<Sink>

source

pub fn new(sink: Sink, opts: TokenizerOpts) -> Tokenizer<Sink>

Create a new tokenizer which feeds tokens to a particular TokenSink.

source

pub fn feed(&mut self, input: &mut BufferQueue) -> TokenizerResult<Sink::Handle>

Feed an input string into the tokenizer.

source

pub fn set_plaintext_state(&mut self)

source

fn process_token(&mut self, token: Token) -> TokenSinkResult<Sink::Handle>

source

fn process_token_and_continue(&mut self, token: Token)

source

fn get_preprocessed_char( &mut self, c: char, input: &mut BufferQueue ) -> Option<char>

source

fn get_char(&mut self, input: &mut BufferQueue) -> Option<char>

source

fn pop_except_from( &mut self, input: &mut BufferQueue, set: SmallCharSet ) -> Option<SetResult>

source

fn eat( &mut self, input: &mut BufferQueue, pat: &str, eq: fn(_: &u8, _: &u8) -> bool ) -> Option<bool>

source

fn run(&mut self, input: &mut BufferQueue) -> TokenizerResult<Sink::Handle>

Run the state machine for as long as we can.

source

fn bad_char_error(&mut self)

source

fn bad_eof_error(&mut self)

source

fn emit_char(&mut self, c: char)

source

fn emit_chars(&mut self, b: StrTendril)

source

fn emit_current_tag(&mut self) -> ProcessResult<Sink::Handle>

source

fn emit_temp_buf(&mut self)

source

fn clear_temp_buf(&mut self)

source

fn emit_current_comment(&mut self)

source

fn discard_tag(&mut self)

source

fn create_tag(&mut self, kind: TagKind, c: char)

source

fn have_appropriate_end_tag(&self) -> bool

source

fn create_attribute(&mut self, c: char)

source

fn finish_attribute(&mut self)

source

fn emit_current_doctype(&mut self)

source

fn doctype_id(&mut self, kind: DoctypeIdKind) -> &mut Option<StrTendril>

source

fn clear_doctype_id(&mut self, kind: DoctypeIdKind)

source

fn consume_char_ref(&mut self, addnl_allowed: Option<char>)

source

fn emit_eof(&mut self)

source

fn peek(&mut self, input: &BufferQueue) -> Option<char>

source

fn discard_char(&mut self, input: &mut BufferQueue)

source

fn emit_error(&mut self, error: Cow<'static, str>)

source§

impl<Sink: TokenSink> Tokenizer<Sink>

source

fn step(&mut self, input: &mut BufferQueue) -> ProcessResult<Sink::Handle>

source

fn step_char_ref_tokenizer( &mut self, input: &mut BufferQueue ) -> ProcessResult<Sink::Handle>

source

fn process_char_ref(&mut self, char_ref: CharRef)

source

pub fn end(&mut self)

Indicate that we have reached the end of the input.

source

fn dump_profile(&self)

source

fn eof_step(&mut self) -> ProcessResult<Sink::Handle>

Auto Trait Implementations§

§

impl<Sink> !RefUnwindSafe for Tokenizer<Sink>

§

impl<Sink> !Send for Tokenizer<Sink>

§

impl<Sink> !Sync for Tokenizer<Sink>

§

impl<Sink> Unpin for Tokenizer<Sink>where Sink: Unpin,

§

impl<Sink> UnwindSafe for Tokenizer<Sink>where Sink: UnwindSafe,

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for Twhere U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.