Struct html5ever::tokenizer::Tokenizer

source ·
pub struct Tokenizer<Sink> {
Show 22 fields opts: TokenizerOpts, pub sink: Sink, state: Cell<State>, at_eof: Cell<bool>, char_ref_tokenizer: RefCell<Option<Box<CharRefTokenizer>>>, current_char: Cell<char>, reconsume: Cell<bool>, ignore_lf: Cell<bool>, discard_bom: Cell<bool>, current_tag_kind: Cell<TagKind>, current_tag_name: RefCell<StrTendril>, current_tag_self_closing: Cell<bool>, current_tag_attrs: RefCell<Vec<Attribute>>, current_attr_name: RefCell<StrTendril>, current_attr_value: RefCell<StrTendril>, current_comment: RefCell<StrTendril>, current_doctype: RefCell<Doctype>, last_start_tag_name: RefCell<Option<LocalName>>, temp_buf: RefCell<StrTendril>, state_profile: RefCell<BTreeMap<State, u64>>, time_in_sink: Cell<u64>, current_line: Cell<u64>,
}
Expand description

The HTML tokenizer.

Fields§

§opts: TokenizerOpts

Options controlling the behavior of the tokenizer.

§sink: Sink

Destination for tokens we emit.

§state: Cell<State>

The abstract machine state as described in the spec.

§at_eof: Cell<bool>

Are we at the end of the file, once buffers have been processed completely? This affects whether we will wait for lookahead or not.

§char_ref_tokenizer: RefCell<Option<Box<CharRefTokenizer>>>

Tokenizer for character references, if we’re tokenizing one at the moment.

§current_char: Cell<char>

Current input character. Just consumed, may reconsume.

§reconsume: Cell<bool>

Should we reconsume the current input character?

§ignore_lf: Cell<bool>

Did we just consume \r, translating it to \n? In that case we need to ignore the next character if it’s \n.

§discard_bom: Cell<bool>

Discard a U+FEFF BYTE ORDER MARK if we see one? Only done at the beginning of the stream.

§current_tag_kind: Cell<TagKind>

Current tag kind.

§current_tag_name: RefCell<StrTendril>

Current tag name.

§current_tag_self_closing: Cell<bool>

Current tag is self-closing?

§current_tag_attrs: RefCell<Vec<Attribute>>

Current tag attributes.

§current_attr_name: RefCell<StrTendril>

Current attribute name.

§current_attr_value: RefCell<StrTendril>

Current attribute value.

§current_comment: RefCell<StrTendril>

Current comment.

§current_doctype: RefCell<Doctype>

Current doctype token.

§last_start_tag_name: RefCell<Option<LocalName>>

Last start tag name, for use in checking “appropriate end tag”.

§temp_buf: RefCell<StrTendril>

The “temporary buffer” mentioned in the spec.

§state_profile: RefCell<BTreeMap<State, u64>>

Record of how many ns we spent in each state, if profiling is enabled.

§time_in_sink: Cell<u64>

Record of how many ns we spent in the token sink.

§current_line: Cell<u64>

Track current line

Implementations§

source§

impl<Sink: TokenSink> Tokenizer<Sink>

source

pub fn new(sink: Sink, opts: TokenizerOpts) -> Tokenizer<Sink>

Create a new tokenizer which feeds tokens to a particular TokenSink.

source

pub fn feed(&self, input: &BufferQueue) -> TokenizerResult<Sink::Handle>

Feed an input string into the tokenizer.

source

pub fn set_plaintext_state(&self)

source

fn process_token(&self, token: Token) -> TokenSinkResult<Sink::Handle>

source

fn process_token_and_continue(&self, token: Token)

source

fn get_preprocessed_char(&self, c: char, input: &BufferQueue) -> Option<char>

source

fn get_char(&self, input: &BufferQueue) -> Option<char>

source

fn pop_except_from( &self, input: &BufferQueue, set: SmallCharSet, ) -> Option<SetResult>

source

fn eat( &self, input: &BufferQueue, pat: &str, eq: fn(_: &u8, _: &u8) -> bool, ) -> Option<bool>

source

fn run(&self, input: &BufferQueue) -> TokenizerResult<Sink::Handle>

Run the state machine for as long as we can.

source

fn bad_char_error(&self)

source

fn bad_eof_error(&self)

source

fn emit_char(&self, c: char)

source

fn emit_chars(&self, b: StrTendril)

source

fn emit_current_tag(&self) -> ProcessResult<Sink::Handle>

source

fn emit_temp_buf(&self)

source

fn clear_temp_buf(&self)

source

fn emit_current_comment(&self)

source

fn discard_tag(&self)

source

fn create_tag(&self, kind: TagKind, c: char)

source

fn have_appropriate_end_tag(&self) -> bool

source

fn create_attribute(&self, c: char)

source

fn finish_attribute(&self)

source

fn emit_current_doctype(&self)

source

fn doctype_id(&self, kind: DoctypeIdKind) -> RefMut<'_, Option<StrTendril>>

source

fn clear_doctype_id(&self, kind: DoctypeIdKind)

source

fn consume_char_ref(&self)

source

fn emit_eof(&self)

source

fn peek(&self, input: &BufferQueue) -> Option<char>

source

fn discard_char(&self, input: &BufferQueue)

source

fn emit_error(&self, error: Cow<'static, str>)

source§

impl<Sink: TokenSink> Tokenizer<Sink>

source

fn step(&self, input: &BufferQueue) -> ProcessResult<Sink::Handle>

source

fn step_char_ref_tokenizer( &self, input: &BufferQueue, ) -> ProcessResult<Sink::Handle>

source

fn process_char_ref(&self, char_ref: CharRef)

source

pub fn end(&self)

Indicate that we have reached the end of the input.

source

fn dump_profile(&self)

source

fn eof_step(&self) -> ProcessResult<Sink::Handle>

Auto Trait Implementations§

§

impl<Sink> !Freeze for Tokenizer<Sink>

§

impl<Sink> !RefUnwindSafe for Tokenizer<Sink>

§

impl<Sink> !Send for Tokenizer<Sink>

§

impl<Sink> !Sync for Tokenizer<Sink>

§

impl<Sink> Unpin for Tokenizer<Sink>
where Sink: Unpin,

§

impl<Sink> UnwindSafe for Tokenizer<Sink>
where Sink: UnwindSafe,

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.