use crate::{datum_error, DatumChar, DatumCharClass, DatumOffset, DatumPipe, DatumResult};
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum DatumTokenType {
String,
Symbol,
SpecialID,
Numeric,
ListStart,
ListEnd,
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
enum DatumTokenizerState {
Start,
LineComment,
String(DatumOffset),
PotentialIdentifier(DatumOffset, DatumTokenType, DatumTokenType),
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum DatumTokenizerAction {
Push(char),
Token(DatumTokenType),
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct DatumTokenizer(DatumTokenizerState, bool);
impl Default for DatumTokenizer {
fn default() -> Self {
Self(DatumTokenizerState::Start, false)
}
}
impl DatumPipe for DatumTokenizer {
type Input = DatumChar;
type Output = DatumTokenizerAction;
fn feed<F: FnMut(DatumOffset, DatumTokenizerAction) -> DatumResult<()>>(
&mut self,
at: DatumOffset,
chr: Option<DatumChar>,
f: &mut F,
) -> DatumResult<()> {
self.0 = match self.0 {
DatumTokenizerState::Start => match chr {
Some(chr) => Self::start_feed(f, at, chr),
None => Ok(DatumTokenizerState::Start),
},
DatumTokenizerState::LineComment => match chr.map(|v| v.class()) {
Some(DatumCharClass::Newline) => Ok(DatumTokenizerState::Start),
None => Ok(DatumTokenizerState::Start),
_ => Ok(DatumTokenizerState::LineComment),
},
DatumTokenizerState::String(start) => match chr.map(|v| v.class()) {
Some(DatumCharClass::String) => {
f(start, DatumTokenizerAction::Token(DatumTokenType::String))?;
Ok(DatumTokenizerState::Start)
}
Some(_) => {
f(at, DatumTokenizerAction::Push(chr.unwrap().char()))?;
Ok(DatumTokenizerState::String(start))
}
None => Err(datum_error!(Interrupted, at, "token1: mid-string eof")),
},
DatumTokenizerState::PotentialIdentifier(start, immediate, expanded) => match chr {
None => {
f(start, DatumTokenizerAction::Token(immediate))?;
Ok(DatumTokenizerState::Start)
}
Some(chr) => {
if chr.potential_identifier() {
f(at, DatumTokenizerAction::Push(chr.char()))?;
Ok(DatumTokenizerState::PotentialIdentifier(
start, expanded, expanded,
))
} else {
f(start, DatumTokenizerAction::Token(immediate))?;
Self::start_feed(f, at, chr)
}
}
},
}?;
Ok(())
}
}
impl DatumTokenizer {
fn start_feed<F: FnMut(DatumOffset, DatumTokenizerAction) -> DatumResult<()>>(
f: &mut F,
at: DatumOffset,
chr: DatumChar,
) -> DatumResult<DatumTokenizerState> {
match chr.class() {
DatumCharClass::Content => {
f(at, DatumTokenizerAction::Push(chr.char()))?;
Ok(DatumTokenizerState::PotentialIdentifier(
at,
DatumTokenType::Symbol,
DatumTokenType::Symbol,
))
}
DatumCharClass::Whitespace => Ok(DatumTokenizerState::Start),
DatumCharClass::Newline => Ok(DatumTokenizerState::Start),
DatumCharClass::LineComment => Ok(DatumTokenizerState::LineComment),
DatumCharClass::String => Ok(DatumTokenizerState::String(at)),
DatumCharClass::ListStart => {
f(at, DatumTokenizerAction::Token(DatumTokenType::ListStart))?;
Ok(DatumTokenizerState::Start)
}
DatumCharClass::ListEnd => {
f(at, DatumTokenizerAction::Token(DatumTokenType::ListEnd))?;
Ok(DatumTokenizerState::Start)
}
DatumCharClass::SpecialID => Ok(DatumTokenizerState::PotentialIdentifier(
at,
DatumTokenType::SpecialID,
DatumTokenType::SpecialID,
)),
DatumCharClass::Sign => {
f(at, DatumTokenizerAction::Push(chr.char()))?;
Ok(DatumTokenizerState::PotentialIdentifier(
at,
DatumTokenType::Symbol,
DatumTokenType::Numeric,
))
}
DatumCharClass::Digit => {
f(at, DatumTokenizerAction::Push(chr.char()))?;
Ok(DatumTokenizerState::PotentialIdentifier(
at,
DatumTokenType::Numeric,
DatumTokenType::Numeric,
))
}
}
}
}