use crate::Argument;
use crate::Type;
use crate::TYPE_START;
use crate::TYPE_STOP;
use std::fmt;
use std::slice::Iter;
const BOOLEAN_FALSE_AS_STR: &'static str = "f";
const BOOLEAN_TRUE_AS_STR: &'static str = "t";
const STRING_START: char = '“';
const STRING_STOP: char = '”';
#[derive(Debug, PartialEq)]
enum Token {
BooleanValue(bool, usize),
Error(String, usize),
StringValue(String, usize),
TypeEnd(usize),
TypeName(String, usize),
TypeStart(usize),
}
impl Token {
fn new_boolean_value(s: String, i: usize) -> Token {
match s.as_str() {
BOOLEAN_FALSE_AS_STR => Token::BooleanValue(false, i),
BOOLEAN_TRUE_AS_STR => Token::BooleanValue(true, i),
_ => Token::Error(
format!(
"Illegal lexeme '{}' for Token::BooleanValue at character index {}.",
s, i
),
i,
),
}
}
}
#[derive(Debug, PartialEq)]
pub struct TokenList {
list: Vec<Token>,
index: usize,
}
impl TokenList {
fn new() -> TokenList {
TokenList {
list: Vec::new(),
index: 0,
}
}
fn append(&mut self, token: Token) {
self.list.push(token)
}
fn iter(&self) -> Iter<Token> {
self.list.iter()
}
}
impl From<Vec<Token>> for TokenList {
fn from(list: Vec<Token>) -> Self {
TokenList { list, index: 0 }
}
}
impl fmt::Display for TokenList {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut s = String::new();
for t in self.list.iter() {
s.push_str(&format!("{:?} ", t));
}
write!(f, "{}", s)
}
}
enum Mode {
Start,
TypeName,
Arguments,
BooleanValue,
StringValue,
}
pub struct Tokenize<'a> {
input: &'a str,
mode_stack: Vec<Mode>,
lexeme: String,
}
impl<'a> Tokenize<'a> {
pub fn new(input: &str) -> Tokenize {
Tokenize {
input,
mode_stack: vec![Mode::Start],
lexeme: String::with_capacity(64),
}
}
pub fn execute(&mut self) -> TokenList {
let mut tokens = TokenList::new();
let mut enumerated_chars = self.input.chars().enumerate();
while let Some((i, c)) = enumerated_chars.next() {
match self.mode_stack.last() {
Some(Mode::Start) => self.start(c, i, &mut tokens),
Some(Mode::TypeName) => self.type_name(c, i, &mut tokens),
Some(Mode::Arguments) => self.type_parameters(c, i, &mut tokens),
Some(Mode::BooleanValue) => self.boolean_value(c, i, &mut tokens),
Some(Mode::StringValue) => self.string_value(c, i, &mut tokens),
None => unreachable!("Tokenize should never allow an empty mode stack."),
}
}
tokens
}
fn start(&mut self, c: char, i: usize, tokens: &mut TokenList) {
match c {
TYPE_START => {
tokens.append(Token::TypeStart(i));
self.mode_stack.push(Mode::TypeName);
}
' ' | '\t' | '\n' | '\r' => (),
_ => tokens.append(Token::Error(
format!(
"Illegal character '{}', while scanning for type start '{}' at character {}.",
c, TYPE_START, i
),
i,
)),
}
}
fn type_name(&mut self, c: char, i: usize, tokens: &mut TokenList) {
match c {
'A'..='Z' | 'a'..='z' | '/' | '_' => self.lexeme.push(c),
' ' | '\t' | '\n' | '\r' => {
tokens.append(Token::TypeName(self.lexeme.split_off(0), i - 1));
self.mode_stack.pop();
self.mode_stack.push(Mode::Arguments);
}
TYPE_STOP => {
tokens.append(Token::TypeName(self.lexeme.split_off(0), i - 1));
tokens.append(Token::TypeEnd(i));
self.mode_stack.pop();
}
TYPE_START => {
tokens.append(Token::TypeName(self.lexeme.split_off(0), i - 1));
tokens.append(Token::TypeStart(i));
self.mode_stack.pop();
self.mode_stack.push(Mode::Arguments);
self.mode_stack.push(Mode::TypeName);
}
_ => tokens.append(Token::Error(
format!(
"Illegal character '{}', while scanning type name '{}' at character {}.",
c, self.lexeme, i
),
i,
)),
}
}
fn type_parameters(&mut self, c: char, i: usize, tokens: &mut TokenList) {
match c {
TYPE_STOP => {
tokens.append(Token::TypeEnd(i));
self.mode_stack.pop();
}
TYPE_START => {
tokens.append(Token::TypeStart(i));
self.mode_stack.push(Mode::TypeName);
}
STRING_START => {
self.mode_stack.push(Mode::StringValue);
}
'f' | 't' => {
self.lexeme.push(c);
self.mode_stack.push(Mode::BooleanValue);
}
' ' | '\t' | '\n' | '\r' => (),
_ => tokens.append(Token::Error(
format!(
"Illegal character '{}', while scanning type parameters at character {}.",
c, i
),
i,
)),
}
}
fn boolean_value(&mut self, c: char, i: usize, tokens: &mut TokenList) {
match c {
' ' | '\t' | '\n' | '\r' => {
tokens.append(Token::new_boolean_value(self.lexeme.split_off(0), i - 1));
self.mode_stack.pop();
}
TYPE_STOP => {
tokens.append(Token::new_boolean_value(self.lexeme.split_off(0), i - 1));
tokens.append(Token::TypeEnd(i));
self.mode_stack.pop();
self.mode_stack.pop();
}
_ => tokens.append(Token::Error(
format!(
"Illegal character '{}', while scanning boolean value at character {}.",
c, i
),
i,
)),
}
}
fn string_value(&mut self, c: char, i: usize, tokens: &mut TokenList) {
match c {
STRING_STOP => {
tokens.append(Token::StringValue(self.lexeme.split_off(0), i - 1));
self.mode_stack.pop();
}
_ => self.lexeme.push(c),
}
}
}
#[cfg(test)]
mod tokenize {
use super::*;
#[test]
fn smoke() {
let mut tokenize = Tokenize::new("[mathematics/boolean_algebra/Value t]");
assert_eq!(
tokenize.execute(),
TokenList::from(vec![
Token::TypeStart(0),
Token::TypeName("mathematics/boolean_algebra/Value".to_string(), 33),
Token::BooleanValue(true, 35),
Token::TypeEnd(36),
])
);
}
}
pub struct Parse<'a> {
input: &'a str,
default_type_space: String,
}
impl<'a> Parse<'a> {
pub fn new(input: &'a str, default_type_space: &'a str) -> Parse<'a> {
Parse {
input,
default_type_space: String::from(default_type_space),
}
}
pub fn execute(&mut self) -> Type {
let token_list = Tokenize::new(self.input).execute();
let mut token_iter = token_list.iter();
if let Err(t) = self.parse_type_start(&mut token_iter) {
return t;
}
match self.parse_type_body(&mut token_iter) {
Ok(t) => t,
Err(t) => t,
}
}
fn parse_type_start(&self, token_iter: &mut Iter<Token>) -> Result<(), Type> {
if let Some(token) = token_iter.next() {
match token {
Token::TypeStart(_) => Ok(()),
_ => Err(Type::ErrorMessage(format!(
"Expected a Token::TypeStart, but got a {:?} instead.",
token,
))),
}
} else {
return Err(Type::ErrorMessage(
"Expected a Token::TypeStart, but got no token at all.".to_string(),
));
}
}
fn parse_type_body(&mut self, token_iter: &mut Iter<Token>) -> Result<Type, Type> {
let qname = self.parse_type_name(token_iter)?;
let args = self.parse_type_arguments(token_iter)?;
Type::new(&qname, args)
}
fn parse_type_name(&mut self, token_iter: &mut Iter<Token>) -> Result<String, Type> {
if let Some(token) = token_iter.next() {
match token {
Token::TypeName(name, _) => match name.rfind('/') {
Some(j) => {
self.default_type_space = name[0..=j].to_string();
return Ok(name.to_string());
}
None => {
let mut qname = self.default_type_space.clone();
qname.push_str(name);
return Ok(qname);
}
},
_ => {
return Err(Type::ErrorMessage(format!(
"Expected a Token::TypeName, but got a {:?} instead.",
token
)))
}
}
} else {
return Err(Type::ErrorMessage(
"Expected a Token::TypeName, but got no token at all.".to_string(),
));
}
}
fn parse_type_arguments(
&mut self,
token_iter: &mut Iter<Token>,
) -> Result<Vec<Argument>, Type> {
let mut args: Vec<Argument> = Vec::new();
loop {
match token_iter.next() {
Some(Token::TypeEnd(_)) => break,
Some(Token::BooleanValue(b, _)) => args.push(Argument::Bool(*b)),
Some(Token::StringValue(s, _)) => args.push(Argument::String(s.to_string())),
Some(Token::TypeName(s, i)) => unreachable!(format!(
"Illegal Token::TypeName({},{}) in type arguments.",
s, i
)),
Some(Token::TypeStart(_)) => {
args.push(Argument::Type(self.parse_type_body(token_iter)?))
}
Some(Token::Error(s, i)) => {
return Err(Type::ErrorMessage(format!(
"Token error at character {}: {}",
i, s
)));
}
None => {
return Err(Type::ErrorMessage(String::from(
"Ran out of tokens which parsing type arguments.",
)))
}
}
}
Ok(args)
}
}