use std::str::Chars;
#[derive(Clone, Debug, PartialEq)]
pub enum Token {
Level(u8),
Tag(String),
LineValue(String),
Pointer(String),
CustomTag(String),
EOF,
None,
}
pub struct Tokenizer<'a> {
pub current_token: Token,
current_char: char,
chars: Chars<'a>,
pub line: u32,
}
impl<'a> Tokenizer<'a> {
#[must_use]
pub fn new(chars: Chars<'a>) -> Tokenizer<'a> {
Tokenizer {
current_char: '\n',
current_token: Token::None,
chars,
line: 0,
}
}
#[must_use]
pub fn done(&self) -> bool {
self.current_token == Token::EOF
}
pub fn next_token(&mut self) {
if self.current_char == '\0' {
self.current_token = Token::EOF;
return;
}
if self.current_char == '\r' {
self.next_char();
}
if self.current_char == '\n' {
self.next_char();
self.current_token = Token::Level(self.extract_number());
self.line += 1;
return;
}
self.skip_whitespace();
if self.current_char == '\n' {
self.next_token();
return;
}
self.current_token = match self.current_token {
Token::Level(_) => {
if self.current_char == '@' {
Token::Pointer(self.extract_word())
} else if self.current_char == '_' {
Token::CustomTag(self.extract_word())
} else {
Token::Tag(self.extract_word())
}
}
Token::Pointer(_) => Token::Tag(self.extract_word()),
Token::Tag(_) | Token::CustomTag(_) => Token::LineValue(self.extract_value()),
_ => panic!(
"line {}: Tokenization error! {:?}",
self.line, self.current_token
),
};
}
pub fn take_token(&mut self) -> Token {
let current_token = self.current_token.clone();
self.next_token();
current_token
}
fn next_char(&mut self) {
self.current_char = self.chars.next().unwrap_or('\0');
}
fn extract_number(&mut self) -> u8 {
self.skip_whitespace();
let mut digits: Vec<char> = Vec::new();
while self.current_char.is_ascii_digit() {
digits.push(self.current_char);
self.next_char();
}
digits.iter().collect::<String>().parse::<u8>().unwrap()
}
fn extract_word(&mut self) -> String {
let mut letters: Vec<char> = Vec::new();
while !self.current_char.is_whitespace() && self.current_char != '\0' {
letters.push(self.current_char);
self.next_char();
}
letters.iter().collect::<String>()
}
fn extract_value(&mut self) -> String {
let mut letters: Vec<char> = Vec::new();
while self.current_char != '\n' && self.current_char != '\r' {
letters.push(self.current_char);
self.next_char();
}
letters.iter().collect::<String>()
}
fn skip_whitespace(&mut self) {
while self.is_nonnewline_whitespace() {
self.next_char();
}
}
fn is_nonnewline_whitespace(&self) -> bool {
let is_zero_width_space = self.current_char as u32 == 65279_u32;
let not_a_newline = self.current_char != '\n';
(self.current_char.is_whitespace() || is_zero_width_space) && not_a_newline
}
#[must_use]
pub fn debug(&self) -> String {
format!("line {}:", self.line)
}
pub fn take_line_value(&mut self) -> String {
let mut value = String::new();
self.next_token();
match &self.current_token {
Token::LineValue(val) => {
value = val.to_string();
self.next_token();
}
Token::Level(_) => (),
_ => panic!(
"{} Expected LineValue, found {:?}",
self.debug(),
self.current_token
),
}
value
}
pub fn take_continued_text(&mut self, level: u8) -> String {
let mut value = self.take_line_value();
loop {
if let Token::Level(cur_level) = self.current_token {
if cur_level <= level {
break;
}
}
match &self.current_token {
Token::Tag(tag) => match tag.as_str() {
"CONT" => {
value.push('\n');
value.push_str(&self.take_line_value());
}
"CONC" => {
value.push_str(&self.take_line_value());
}
_ => panic!("{} Unhandled Continuation Tag: {}", self.debug(), tag),
},
Token::Level(_) => self.next_token(),
_ => panic!(
"{} Unhandled Continuation Token: {:?}",
self.debug(),
self.current_token
),
}
}
value
}
}