use std::str::CharIndices;
use crate::{Token, TokenStream, Tokenizer};
#[derive(Clone, Default)]
pub struct SimpleTokenizer {
token: Token,
}
pub struct SimpleTokenStream<'a> {
text: &'a str,
chars: CharIndices<'a>,
token: &'a mut Token,
}
impl Tokenizer for SimpleTokenizer {
type TokenStream<'a> = SimpleTokenStream<'a>;
fn token_stream<'a>(&'a mut self, text: &'a str) -> Self::TokenStream<'a> {
self.token.reset();
SimpleTokenStream {
text,
chars: text.char_indices(),
token: &mut self.token,
}
}
}
impl SimpleTokenStream<'_> {
fn search_token_end(&mut self) -> usize {
(&mut self.chars)
.filter(|(_, c)| !c.is_alphanumeric())
.map(|(offset, _)| offset)
.next()
.unwrap_or(self.text.len())
}
}
impl TokenStream for SimpleTokenStream<'_> {
fn advance(&mut self) -> bool {
self.token.text.clear();
self.token.position = self.token.position.wrapping_add(1);
while let Some((offset_from, c)) = self.chars.next() {
if c.is_alphanumeric() {
let offset_to = self.search_token_end();
self.token.offset_from = offset_from;
self.token.offset_to = offset_to;
self.token.text.push_str(&self.text[offset_from..offset_to]);
return true;
}
}
false
}
fn token(&self) -> &Token {
self.token
}
fn token_mut(&mut self) -> &mut Token {
self.token
}
}