Struct xml5ever::tokenizer::XmlTokenizer

source ·
pub struct XmlTokenizer<Sink> {
Show 21 fields opts: XmlTokenizerOpts, pub sink: Sink, state: Cell<XmlState>, at_eof: Cell<bool>, char_ref_tokenizer: RefCell<Option<Box<CharRefTokenizer>>>, current_char: Cell<char>, reconsume: Cell<bool>, ignore_lf: Cell<bool>, discard_bom: Cell<bool>, temp_buf: RefCell<StrTendril>, current_tag_kind: Cell<TagKind>, current_tag_name: RefCell<StrTendril>, current_tag_attrs: RefCell<Vec<Attribute>>, current_attr_name: RefCell<StrTendril>, current_attr_value: RefCell<StrTendril>, current_doctype: RefCell<Doctype>, current_comment: RefCell<StrTendril>, current_pi_target: RefCell<StrTendril>, current_pi_data: RefCell<StrTendril>, state_profile: RefCell<BTreeMap<XmlState, u64>>, time_in_sink: Cell<u64>,
}
Expand description

The Xml tokenizer.

Fields§

§opts: XmlTokenizerOpts

Options controlling the behavior of the tokenizer.

§sink: Sink

Destination for tokens we emit.

§state: Cell<XmlState>

The abstract machine state as described in the spec.

§at_eof: Cell<bool>

Are we at the end of the file, once buffers have been processed completely? This affects whether we will wait for lookahead or not.

§char_ref_tokenizer: RefCell<Option<Box<CharRefTokenizer>>>

Tokenizer for character references, if we’re tokenizing one at the moment.

§current_char: Cell<char>

Current input character. Just consumed, may reconsume.

§reconsume: Cell<bool>

Should we reconsume the current input character?

§ignore_lf: Cell<bool>

Did we just consume \r, translating it to \n? In that case we need to ignore the next character if it’s \n.

§discard_bom: Cell<bool>

Discard a U+FEFF BYTE ORDER MARK if we see one? Only done at the beginning of the stream.

§temp_buf: RefCell<StrTendril>

Temporary buffer

§current_tag_kind: Cell<TagKind>

Current tag kind.

§current_tag_name: RefCell<StrTendril>

Current tag name.

§current_tag_attrs: RefCell<Vec<Attribute>>

Current tag attributes.

§current_attr_name: RefCell<StrTendril>

Current attribute name.

§current_attr_value: RefCell<StrTendril>

Current attribute value.

§current_doctype: RefCell<Doctype>§current_comment: RefCell<StrTendril>

Current comment.

§current_pi_target: RefCell<StrTendril>

Current processing instruction target.

§current_pi_data: RefCell<StrTendril>

Current processing instruction value.

§state_profile: RefCell<BTreeMap<XmlState, u64>>

Record of how many ns we spent in each state, if profiling is enabled.

§time_in_sink: Cell<u64>

Record of how many ns we spent in the token sink.

Implementations§

source§

impl<Sink: TokenSink> XmlTokenizer<Sink>

source

pub fn new(sink: Sink, opts: XmlTokenizerOpts) -> XmlTokenizer<Sink>

Create a new tokenizer which feeds tokens to a particular TokenSink.

source

pub fn feed(&self, input: &BufferQueue)

Feed an input string into the tokenizer.

source

fn process_token(&self, token: Token)

source

fn get_preprocessed_char(&self, c: char, input: &BufferQueue) -> Option<char>

source

fn bad_eof_error(&self)

source

fn pop_except_from( &self, input: &BufferQueue, set: SmallCharSet, ) -> Option<SetResult>

source

fn eat(&self, input: &BufferQueue, pat: &str) -> Option<bool>

source

pub fn run(&self, input: &BufferQueue)

Run the state machine for as long as we can.

source

fn get_char(&self, input: &BufferQueue) -> Option<char>

source

fn bad_char_error(&self)

source

fn discard_tag(&self)

source

fn create_tag(&self, kind: TagKind, c: char)

source

fn create_pi(&self, c: char)

source

fn emit_char(&self, c: char)

source

fn emit_short_tag(&self)

source

fn emit_empty_tag(&self)

source

fn set_empty_tag(&self)

source

fn emit_start_tag(&self)

source

fn emit_current_tag(&self)

source

fn emit_chars(&self, b: StrTendril)

source

fn emit_pi(&self)

source

fn consume_char_ref(&self, addnl_allowed: Option<char>)

source

fn emit_eof(&self)

source

fn emit_error(&self, error: Cow<'static, str>)

source

fn emit_current_comment(&self)

source

fn emit_current_doctype(&self)

source

fn doctype_id(&self, kind: DoctypeKind) -> RefMut<'_, Option<StrTendril>>

source

fn clear_doctype_id(&self, kind: DoctypeKind)

source

fn peek(&self, input: &BufferQueue) -> Option<char>

source

fn discard_char(&self, input: &BufferQueue)

source

fn unconsume(&self, input: &BufferQueue, buf: StrTendril)

source§

impl<Sink: TokenSink> XmlTokenizer<Sink>

source

fn step(&self, input: &BufferQueue) -> bool

source

pub fn end(&self)

Indicate that we have reached the end of the input.

source

fn dump_profile(&self)

source

fn eof_step(&self) -> bool

source

fn process_char_ref(&self, char_ref: CharRef)

source

fn step_char_ref_tokenizer(&self, input: &BufferQueue) -> bool

source

fn finish_attribute(&self)

source

fn create_attribute(&self, c: char)

Auto Trait Implementations§

§

impl<Sink> !Freeze for XmlTokenizer<Sink>

§

impl<Sink> !RefUnwindSafe for XmlTokenizer<Sink>

§

impl<Sink> !Send for XmlTokenizer<Sink>

§

impl<Sink> !Sync for XmlTokenizer<Sink>

§

impl<Sink> Unpin for XmlTokenizer<Sink>
where Sink: Unpin,

§

impl<Sink> UnwindSafe for XmlTokenizer<Sink>
where Sink: UnwindSafe,

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.