use std::mem;
pub(crate) struct Tokenizer<'a> {
internal: Vec<InternalTokenizer<'a>>,
active: usize,
switched: bool,
}
struct InternalTokenizer<'a> {
input: &'a str,
next: Option<Token>,
orig_len: usize,
returned_eof: usize,
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
pub struct Token {
pub(crate) kind: TokenKind,
pub(crate) span: TokenSpan,
}
impl Token {
#[must_use]
pub fn span(&self) -> TokenSpan {
self.span
}
#[must_use]
pub fn kind(&self) -> TokenKind {
self.kind
}
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
pub struct TokenSpan {
pub(crate) start: usize,
pub(crate) stop: usize,
}
impl TokenSpan {
#[must_use]
pub fn start(&self) -> usize {
self.start
}
#[must_use]
pub fn stop(&self) -> usize {
self.stop
}
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.kind)
}
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
pub enum TokenKind {
And,
Or,
ParenOpen,
ParenClose,
Colon,
Char(char),
Eof,
Break,
}
impl std::fmt::Display for TokenKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenKind::Char(other) => write!(f, "Token::Char({other:?})"),
TokenKind::And
| TokenKind::Or
| TokenKind::ParenOpen
| TokenKind::ParenClose
| TokenKind::Colon
| TokenKind::Break
| TokenKind::Eof => {
write!(f, "Token::")?;
<Self as std::fmt::Debug>::fmt(self, f)
}
}
}
}
impl<'a> Tokenizer<'a> {
pub(crate) fn from_slice<T>(input: T) -> Option<Self>
where
T: Iterator<Item = &'a str>,
{
let mut internal = vec![];
let mut previous = 0;
for (index, split) in input.into_iter().enumerate() {
internal.push(InternalTokenizer::new(split, previous + index));
previous += split.len();
}
if internal.is_empty() {
None
} else {
Some(Self {
internal,
active: 0,
switched: false,
})
}
}
pub(crate) fn peek(&mut self) -> Token {
self.check_valid();
if self.switched {
self.return_break_token()
} else {
self.active_tokenizer().peek()
}
}
pub(crate) fn next_token(&mut self) -> Token {
self.check_valid();
if self.switched {
let output = self.return_break_token();
self.switched = false;
return output;
}
self.active_tokenizer().next_token()
}
fn return_break_token(&mut self) -> Token {
let previous_location = self
.internal
.get(self.active - 1)
.expect("All indexes are valid")
.get_location();
Token {
kind: TokenKind::Break,
span: TokenSpan {
start: previous_location + 1,
stop: previous_location + 1,
},
}
}
fn check_valid(&mut self) {
{
let final_internal_index = self.internal.len() - 1;
let current_index = self.active;
let next_token = {
self.internal
.get_mut(current_index)
.expect("All indexes are valid")
.peek()
};
if next_token.kind == TokenKind::Eof && current_index != final_internal_index {
self.active += 1;
self.switched = true;
}
}
}
fn active_tokenizer(&mut self) -> &mut InternalTokenizer<'a> {
self.internal
.get_mut(self.active)
.expect("This should always be a valid index.")
}
}
impl<'a> InternalTokenizer<'a> {
fn new(input: &'a str, previous: usize) -> Self {
Self {
orig_len: input.len() + previous,
input,
next: None,
returned_eof: 0,
}
}
fn get_location(&self) -> usize {
self.orig_len - self.input.len()
}
fn next_token(&mut self) -> Token {
self.populate();
mem::take(&mut self.next).expect("`self.next` should be some.")
}
fn peek(&mut self) -> Token {
if self.next.is_none() {
self.populate();
}
self.next.expect("Is some")
}
fn actual_next_token(&mut self) -> Token {
if self.input.is_empty() {
self.returned_eof += 1;
assert!(
(self.returned_eof < 2),
"BUG: Tried to drain this tokenizer over EOF for {} times.",
self.returned_eof
);
return Token {
kind: TokenKind::Eof,
span: TokenSpan {
start: self.get_location(),
stop: self.get_location(),
},
};
}
let (token, size) = match self.next() {
'A' => self.tokenize_and(),
'O' => self.tokenize_or(),
'(' => (TokenKind::ParenOpen, 1),
')' => (TokenKind::ParenClose, 1),
':' => (TokenKind::Colon, 1),
other => (TokenKind::Char(other), other.len_utf8()),
};
let current_location = self.get_location();
self.chomp(size);
Token {
kind: token,
span: TokenSpan {
start: current_location,
stop: self.get_location(),
},
}
}
fn populate(&mut self) {
if self.next.is_none() {
let next = self.actual_next_token();
self.next = Some(next);
}
}
fn chomp(&mut self, number: usize) {
self.input = &self.input[number..];
}
fn take(&self, number: usize) -> &str {
&self.input[0..number]
}
fn next(&self) -> char {
self.input.chars().next().expect("Is not empty")
}
fn tokenize_and(&self) -> (TokenKind, usize) {
if self.take(3) == "AND" {
(TokenKind::And, 3)
} else {
(TokenKind::Char(self.next()), 1)
}
}
fn tokenize_or(&self) -> (TokenKind, usize) {
if self.take(2) == "OR" {
(TokenKind::Or, 2)
} else {
(TokenKind::Char(self.next()), 1)
}
}
}