pub(crate) mod machine;
use crate::offset::{Offset, Position};
use crate::reader::{IntoReader, Reader};
use crate::Emitter;
use machine::ControlToken;
#[cfg(feature = "integration-tests")]
pub use machine::State as InternalState;
pub struct Tokenizer<R, O, E> {
machine: machine::Machine<R, O, E>,
eof: bool,
}
impl<R: Reader + Position<O>, O: Offset, E: Emitter<O>> Tokenizer<R, O, E> {
pub fn new<'a>(reader: impl IntoReader<'a, Reader = R>, emitter: E) -> Self {
Tokenizer {
machine: machine::Machine::new(reader.into_reader(), emitter),
eof: false,
}
}
pub fn handle_cdata_open(
&mut self,
adjusted_current_node_present_and_not_in_html_namespace: bool,
) {
machine::handle_cdata_open(
&mut self.machine,
adjusted_current_node_present_and_not_in_html_namespace,
);
}
pub fn emitter_mut(&mut self) -> &mut E {
&mut self.machine.emitter
}
}
#[derive(Clone, Debug)]
pub enum Event<T> {
Token(T),
CdataOpen,
}
#[derive(Debug)]
#[non_exhaustive]
pub enum State {
Data,
Plaintext,
Rcdata,
Rawtext,
ScriptData,
}
impl From<State> for machine::State {
fn from(state: State) -> Self {
match state {
State::Data => machine::State::Data,
State::Plaintext => machine::State::Plaintext,
State::Rcdata => machine::State::Rcdata,
State::Rawtext => machine::State::Rawtext,
State::ScriptData => machine::State::ScriptData,
}
}
}
impl<R, O, E> Tokenizer<R, O, E> {
#[cfg(feature = "integration-tests")]
pub fn set_internal_state(&mut self, state: InternalState) {
self.machine.state = state;
}
pub fn set_state(&mut self, state: State) {
self.machine.state = state.into();
}
}
impl<O, R, E> Iterator for Tokenizer<R, O, E>
where
O: Offset,
R: Reader + Position<O>,
E: Emitter<O> + Iterator,
{
type Item = Result<Event<E::Item>, R::Error>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(token) = self.machine.emitter.next() {
return Some(Ok(Event::Token(token)));
}
if self.eof {
return None;
}
match machine::consume(&mut self.machine) {
Err(e) => return Some(Err(e)),
Ok(ControlToken::Continue) => (),
Ok(ControlToken::Eof) => {
self.eof = true;
self.machine
.emitter
.emit_eof(self.machine.reader_position());
}
Ok(ControlToken::CdataOpen) => return Some(Ok(Event::CdataOpen)),
}
}
}
}
impl<R, O, E> Tokenizer<R, O, E> {
pub(crate) fn enable_naive_state_switching(&mut self) {
self.machine.naively_switch_state = true;
}
#[cfg(feature = "integration-tests")]
pub fn set_last_start_tag(&mut self, last_start_tag: &str) {
self.machine.last_start_tag_name.clear();
self.machine.last_start_tag_name.push_str(last_start_tag);
}
}