use xdl_core::{XdlResult, XdlError};
use nom::{
branch::alt,
bytes::complete::{tag, take_while, take_while1},
character::complete::{char, digit1, multispace0, multispace1, none_of},
combinator::{map, map_res, opt, recognize, value},
multi::{many0, many_till},
sequence::{delimited, pair, preceded, terminated},
IResult,
};
#[derive(Debug, Clone, PartialEq)]
pub struct TokenSpan {
pub token: Token,
pub line: usize,
pub column: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
Integer(i64),
Float(f64),
String(String),
If, Then, Else, Endif,
For, Endfor, Foreach,
While, Endwhile,
Repeat, Until,
Break, Continue,
Function, Endfunction,
Procedure, Pro, Endpro,
Return,
Common,
CompileOpt,
Begin, End,
Case, Of, Endcase,
Switch, Endswitch,
Plus, Minus, Multiply, Divide, Modulo, Power, MatrixMultiply,
Assign, PlusAssign, MinusAssign, MultiplyAssign, DivideAssign,
Equal, NotEqual, Less, Greater, LessEqual, GreaterEqual,
And, Or, Not, Xor,
BitwiseAnd, BitwiseOr, BitwiseXor, BitwiseNot,
LeftParen, RightParen, LeftBracket, RightBracket, LeftBrace, RightBrace, Comma, Semicolon, Colon, Dot, Arrow,
Identifier(String),
SystemVariable(String), Label(String), Comment(String), Newline,
EOF,
}
type ParseResult<T> = IResult<&str, T>;
fn is_identifier_char(c: char) -> bool {
c.is_alphanumeric() || c == '_'
}
fn is_identifier_start(c: char) -> bool {
c.is_alphabetic() || c == '_'
}
fn parse_integer(input: &str) -> ParseResult<Token> {
map_res(digit1, |s: &str| {
s.parse::<i64>().map(Token::Integer)
})(input)
}
fn parse_float(input: &str) -> ParseResult<Token> {
map_res(
recognize(pair(
digit1,
pair(char('.'), opt(digit1))
)),
|s: &str| s.parse::<f64>().map(Token::Float)
)(input)
}
fn parse_number(input: &str) -> ParseResult<Token> {
alt((parse_float, parse_integer))(input)
}
fn parse_string(input: &str) -> ParseResult<Token> {
alt((
delimited(
char('"'),
map(many0(none_of("\"")), |chars| {
Token::String(chars.into_iter().collect())
}),
char('"')
),
delimited(
char('\''),
map(many0(none_of("'")), |chars| {
Token::String(chars.into_iter().collect())
}),
char('\'')
)
))(input)
}
fn parse_identifier_or_keyword(input: &str) -> ParseResult<Token> {
let (input, name) = recognize(pair(
take_while1(is_identifier_start),
take_while(is_identifier_char)
))(input)?;
let token = match name.to_uppercase().as_str() {
"IF" => Token::If,
"THEN" => Token::Then,
"ELSE" => Token::Else,
"ENDIF" => Token::Endif,
"FOR" => Token::For,
"ENDFOR" => Token::Endfor,
"FOREACH" => Token::Foreach,
"WHILE" => Token::While,
"ENDWHILE" => Token::Endwhile,
"REPEAT" => Token::Repeat,
"UNTIL" => Token::Until,
"BREAK" => Token::Break,
"CONTINUE" => Token::Continue,
"FUNCTION" => Token::Function,
"ENDFUNCTION" => Token::Endfunction,
"PROCEDURE" | "PRO" => Token::Pro,
"ENDPRO" => Token::Endpro,
"RETURN" => Token::Return,
"COMMON" => Token::Common,
"COMPILE_OPT" => Token::CompileOpt,
"BEGIN" => Token::Begin,
"END" => Token::End,
"CASE" => Token::Case,
"OF" => Token::Of,
"ENDCASE" => Token::Endcase,
"SWITCH" => Token::Switch,
"ENDSWITCH" => Token::Endswitch,
"MOD" => Token::Modulo,
"EQ" => Token::Equal,
"NE" => Token::NotEqual,
"LT" => Token::Less,
"GT" => Token::Greater,
"LE" => Token::LessEqual,
"GE" => Token::GreaterEqual,
"AND" => Token::And,
"OR" => Token::Or,
"NOT" => Token::Not,
"XOR" => Token::Xor,
_ => Token::Identifier(name.to_string()),
};
Ok((input, token))
}
fn parse_system_variable(input: &str) -> ParseResult<Token> {
preceded(
char('!'),
map(
take_while1(is_identifier_char),
|s: &str| Token::SystemVariable(s.to_uppercase())
)
)(input)
}
fn parse_comment(input: &str) -> ParseResult<Token> {
preceded(
char(';'),
map(
take_while(|c| c != '\n'),
|s: &str| Token::Comment(s.to_string())
)
)(input)
}
fn parse_operator(input: &str) -> ParseResult<Token> {
alt((
value(Token::PlusAssign, tag("+=")),
value(Token::MinusAssign, tag("-=")),
value(Token::MultiplyAssign, tag("*=")),
value(Token::DivideAssign, tag("/=")),
value(Token::Arrow, tag("->"))),
value(Token::MatrixMultiply, char('#')),
value(Token::Power, char('^')),
value(Token::Plus, char('+')),
value(Token::Minus, char('-')),
value(Token::Multiply, char('*')),
value(Token::Divide, char('/')),
value(Token::Assign, char('=')),
))(input)
}
fn parse_delimiter(input: &str) -> ParseResult<Token> {
alt((
value(Token::LeftParen, char('(')),
value(Token::RightParen, char(')')),
value(Token::LeftBracket, char('[')),
value(Token::RightBracket, char(']')),
value(Token::LeftBrace, char('{')),
value(Token::RightBrace, char('}')),
value(Token::Comma, char(',')),
value(Token::Semicolon, char(';')),
value(Token::Colon, char(':')),
value(Token::Dot, char('.')),
))(input)
}
fn parse_token(input: &str) -> ParseResult<Token> {
preceded(
multispace0,
alt((
parse_comment,
parse_string,
parse_number,
parse_system_variable,
parse_identifier_or_keyword,
parse_operator,
parse_delimiter,
value(Token::Newline, char('\n')),
))
)(input)
}
pub fn tokenize(input: &str) -> XdlResult<Vec<Token>> {
let mut remaining = input;
let mut tokens = Vec::new();
while !remaining.is_empty() {
match parse_token(remaining) {
Ok((rest, token)) => {
match token {
Token::Comment(_) => {}, _ => tokens.push(token),
}
remaining = rest;
}
Err(_) => {
remaining = &remaining[1..];
}
}
}
tokens.push(Token::EOF);
Ok(tokens)
}