pub struct Tokenizer<Sink> {Show 22 fields
opts: TokenizerOpts,
pub sink: Sink,
state: Cell<State>,
at_eof: Cell<bool>,
char_ref_tokenizer: RefCell<Option<Box<CharRefTokenizer>>>,
current_char: Cell<char>,
reconsume: Cell<bool>,
ignore_lf: Cell<bool>,
discard_bom: Cell<bool>,
current_tag_kind: Cell<TagKind>,
current_tag_name: RefCell<StrTendril>,
current_tag_self_closing: Cell<bool>,
current_tag_attrs: RefCell<Vec<Attribute>>,
current_attr_name: RefCell<StrTendril>,
current_attr_value: RefCell<StrTendril>,
current_comment: RefCell<StrTendril>,
current_doctype: RefCell<Doctype>,
last_start_tag_name: RefCell<Option<LocalName>>,
temp_buf: RefCell<StrTendril>,
state_profile: RefCell<BTreeMap<State, u64>>,
time_in_sink: Cell<u64>,
current_line: Cell<u64>,
}
Expand description
The HTML tokenizer.
Fields§
§opts: TokenizerOpts
Options controlling the behavior of the tokenizer.
sink: Sink
Destination for tokens we emit.
state: Cell<State>
The abstract machine state as described in the spec.
at_eof: Cell<bool>
Are we at the end of the file, once buffers have been processed completely? This affects whether we will wait for lookahead or not.
char_ref_tokenizer: RefCell<Option<Box<CharRefTokenizer>>>
Tokenizer for character references, if we’re tokenizing one at the moment.
current_char: Cell<char>
Current input character. Just consumed, may reconsume.
reconsume: Cell<bool>
Should we reconsume the current input character?
ignore_lf: Cell<bool>
Did we just consume \r, translating it to \n? In that case we need to ignore the next character if it’s \n.
discard_bom: Cell<bool>
Discard a U+FEFF BYTE ORDER MARK if we see one? Only done at the beginning of the stream.
current_tag_kind: Cell<TagKind>
Current tag kind.
current_tag_name: RefCell<StrTendril>
Current tag name.
current_tag_self_closing: Cell<bool>
Current tag is self-closing?
current_tag_attrs: RefCell<Vec<Attribute>>
Current tag attributes.
current_attr_name: RefCell<StrTendril>
Current attribute name.
current_attr_value: RefCell<StrTendril>
Current attribute value.
current_comment: RefCell<StrTendril>
Current comment.
current_doctype: RefCell<Doctype>
Current doctype token.
last_start_tag_name: RefCell<Option<LocalName>>
Last start tag name, for use in checking “appropriate end tag”.
temp_buf: RefCell<StrTendril>
The “temporary buffer” mentioned in the spec.
state_profile: RefCell<BTreeMap<State, u64>>
Record of how many ns we spent in each state, if profiling is enabled.
time_in_sink: Cell<u64>
Record of how many ns we spent in the token sink.
current_line: Cell<u64>
Track current line
Implementations§
source§impl<Sink: TokenSink> Tokenizer<Sink>
impl<Sink: TokenSink> Tokenizer<Sink>
sourcepub fn new(sink: Sink, opts: TokenizerOpts) -> Tokenizer<Sink>
pub fn new(sink: Sink, opts: TokenizerOpts) -> Tokenizer<Sink>
Create a new tokenizer which feeds tokens to a particular TokenSink
.
sourcepub fn feed(&self, input: &BufferQueue) -> TokenizerResult<Sink::Handle>
pub fn feed(&self, input: &BufferQueue) -> TokenizerResult<Sink::Handle>
Feed an input string into the tokenizer.
pub fn set_plaintext_state(&self)
fn process_token(&self, token: Token) -> TokenSinkResult<Sink::Handle>
fn process_token_and_continue(&self, token: Token)
fn get_preprocessed_char(&self, c: char, input: &BufferQueue) -> Option<char>
fn get_char(&self, input: &BufferQueue) -> Option<char>
fn pop_except_from( &self, input: &BufferQueue, set: SmallCharSet, ) -> Option<SetResult>
fn eat( &self, input: &BufferQueue, pat: &str, eq: fn(_: &u8, _: &u8) -> bool, ) -> Option<bool>
sourcefn run(&self, input: &BufferQueue) -> TokenizerResult<Sink::Handle>
fn run(&self, input: &BufferQueue) -> TokenizerResult<Sink::Handle>
Run the state machine for as long as we can.