use std::convert::Infallible;
use crate::char_validator::CharValidator;
use crate::machine;
use crate::machine_helper::MachineHelper;
use crate::read_helper::ReadHelper;
use crate::utils::ControlToken;
use crate::{DefaultEmitter, Emitter, Readable, Reader};
#[cfg(feature = "integration-tests")]
use crate::State;
#[derive(Debug)]
pub struct Tokenizer<R: Reader, E: Emitter = DefaultEmitter> {
eof: bool,
pub(crate) validator: CharValidator,
pub(crate) emitter: E,
pub(crate) reader: ReadHelper<R>,
pub(crate) machine_helper: MachineHelper,
}
impl<R: Reader> Tokenizer<R> {
pub fn new<'a, S: Readable<'a, Reader = R>>(input: S) -> Self {
Tokenizer::<S::Reader>::new_with_emitter(input, DefaultEmitter::default())
}
}
impl<R: Reader, E: Emitter> Tokenizer<R, E> {
pub fn new_with_emitter<'a, S: Readable<'a, Reader = R>>(input: S, emitter: E) -> Self {
Tokenizer {
eof: false,
validator: CharValidator::default(),
emitter,
reader: ReadHelper::new(input.to_reader()),
machine_helper: MachineHelper::default(),
}
}
#[cfg(feature = "integration-tests")]
pub fn set_state(&mut self, state: State) {
self.machine_helper.state = state.into();
}
#[cfg(feature = "integration-tests")]
pub fn set_last_start_tag(&mut self, last_start_tag: Option<&str>) {
self.emitter
.set_last_start_tag(last_start_tag.map(str::as_bytes));
}
}
impl<R: Reader, E: Emitter> Iterator for Tokenizer<R, E> {
type Item = Result<E::Token, R::Error>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(token) = self.emitter.pop_token() {
break Some(Ok(token));
} else if !self.eof {
match machine::consume(self) {
Ok(ControlToken::Continue) => (),
Ok(ControlToken::Eof) => {
self.validator.flush_character_error(&mut self.emitter);
self.eof = true;
self.emitter.emit_eof();
}
Err(e) => break Some(Err(e)),
}
} else {
break None;
}
}
}
}
#[derive(Debug)]
pub struct InfallibleTokenizer<R: Reader<Error = Infallible>, E: Emitter>(Tokenizer<R, E>);
impl<R: Reader<Error = Infallible>, E: Emitter> Tokenizer<R, E> {
pub fn infallible(self) -> InfallibleTokenizer<R, E> {
InfallibleTokenizer(self)
}
}
impl<R: Reader<Error = Infallible>, E: Emitter> Iterator for InfallibleTokenizer<R, E> {
type Item = E::Token;
fn next(&mut self) -> Option<Self::Item> {
match self.0.next()? {
Ok(token) => Some(token),
Err(e) => match e {},
}
}
}