use nom::{
branch::alt,
bytes::complete::{tag, take_while, take_while1},
character::complete::{char, digit1, none_of},
combinator::{map, map_res, opt, recognize, value},
multi::many0,
sequence::{delimited, pair, preceded},
IResult,
};
fn ws0(input: &str) -> IResult<&str, &str> {
take_while(|c: char| c == ' ' || c == '\t' || c == '\r')(input)
}
use xdl_core::XdlResult;
#[derive(Debug, Clone, PartialEq)]
pub struct TokenSpan {
pub token: Token,
pub line: usize,
pub column: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
Integer(i64),
Float(f64),
String(String),
If,
Then,
Else,
Endif,
For,
Endfor,
Foreach,
While,
Endwhile,
Repeat,
Until,
Break,
Continue,
Function,
Endfunction,
Procedure,
Pro,
Endpro,
Return,
Goto,
Common,
CompileOpt,
Begin,
End,
Case,
Of,
Endcase,
Switch,
Endswitch,
Plus, Minus, Multiply, Divide, Modulo, Power, MatrixMultiply,
Assign, PlusAssign, MinusAssign, MultiplyAssign, DivideAssign,
Equal, NotEqual, Less, Greater, LessEqual, GreaterEqual,
And, Or, Not, Xor,
BitwiseAnd, BitwiseOr, BitwiseXor, BitwiseNot,
LeftParen, RightParen, LeftBracket, RightBracket, LeftBrace, RightBrace, Comma, Semicolon, Colon, DoubleColon, Dot, Arrow, QuestionMark,
Identifier(String),
SystemVariable(String), Label(String), Comment(String), Newline,
EOF,
}
type ParseResult<'a, T> = IResult<&'a str, T>;
fn is_identifier_char(c: char) -> bool {
c.is_alphanumeric() || c == '_'
}
fn is_identifier_start(c: char) -> bool {
c.is_alphabetic() || c == '_'
}
fn parse_integer(input: &str) -> ParseResult<'_, Token> {
map_res(digit1, |s: &str| s.parse::<i64>().map(Token::Integer))(input)
}
fn parse_float(input: &str) -> ParseResult<'_, Token> {
map_res(
recognize(pair(digit1, pair(char('.'), opt(digit1)))),
|s: &str| s.parse::<f64>().map(Token::Float),
)(input)
}
fn parse_number(input: &str) -> ParseResult<'_, Token> {
alt((parse_float, parse_integer))(input)
}
fn parse_string(input: &str) -> ParseResult<'_, Token> {
alt((
delimited(
char('"'),
map(many0(none_of("\"")), |chars| {
Token::String(chars.into_iter().collect())
}),
char('"'),
),
delimited(
char('\''),
map(many0(none_of("'")), |chars| {
Token::String(chars.into_iter().collect())
}),
char('\''),
),
))(input)
}
fn parse_label(input: &str) -> ParseResult<'_, Token> {
let (remaining, name) = recognize(pair(
take_while1(is_identifier_start),
take_while(is_identifier_char),
))(input)?;
if remaining.starts_with(':') && !remaining.starts_with("::") {
let is_keyword = matches!(
name.to_uppercase().as_str(),
"IF" | "THEN" | "ELSE" | "ENDIF" | "FOR" | "ENDFOR" | "FOREACH" | "WHILE"
| "ENDWHILE" | "REPEAT" | "UNTIL" | "BREAK" | "CONTINUE" | "FUNCTION"
| "ENDFUNCTION" | "PROCEDURE" | "PRO" | "ENDPRO" | "RETURN" | "GOTO"
| "COMMON" | "COMPILE_OPT" | "BEGIN" | "END" | "CASE" | "OF" | "ENDCASE"
| "SWITCH" | "ENDSWITCH" | "MOD" | "EQ" | "NE" | "LT" | "GT" | "LE"
| "GE" | "AND" | "OR" | "NOT" | "XOR"
);
if is_keyword {
Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)))
} else {
let remaining = &remaining[1..];
Ok((remaining, Token::Label(name.to_string())))
}
} else {
Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)))
}
}
fn parse_identifier_or_keyword(input: &str) -> ParseResult<'_, Token> {
let (input, name) = recognize(pair(
take_while1(is_identifier_start),
take_while(is_identifier_char),
))(input)?;
let token = match name.to_uppercase().as_str() {
"IF" => Token::If,
"THEN" => Token::Then,
"ELSE" => Token::Else,
"ENDIF" => Token::Endif,
"FOR" => Token::For,
"ENDFOR" => Token::Endfor,
"FOREACH" => Token::Foreach,
"WHILE" => Token::While,
"ENDWHILE" => Token::Endwhile,
"REPEAT" => Token::Repeat,
"UNTIL" => Token::Until,
"BREAK" => Token::Break,
"CONTINUE" => Token::Continue,
"FUNCTION" => Token::Function,
"ENDFUNCTION" => Token::Endfunction,
"PROCEDURE" | "PRO" => Token::Pro,
"ENDPRO" => Token::Endpro,
"RETURN" => Token::Return,
"GOTO" => Token::Goto,
"COMMON" => Token::Common,
"COMPILE_OPT" => Token::CompileOpt,
"BEGIN" => Token::Begin,
"END" => Token::End,
"CASE" => Token::Case,
"OF" => Token::Of,
"ENDCASE" => Token::Endcase,
"SWITCH" => Token::Switch,
"ENDSWITCH" => Token::Endswitch,
"MOD" => Token::Modulo,
"EQ" => Token::Equal,
"NE" => Token::NotEqual,
"LT" => Token::Less,
"GT" => Token::Greater,
"LE" => Token::LessEqual,
"GE" => Token::GreaterEqual,
"AND" => Token::And,
"OR" => Token::Or,
"NOT" => Token::Not,
"XOR" => Token::Xor,
_ => Token::Identifier(name.to_string()),
};
Ok((input, token))
}
fn parse_system_variable(input: &str) -> ParseResult<'_, Token> {
preceded(
char('!'),
map(take_while1(is_identifier_char), |s: &str| {
Token::SystemVariable(s.to_uppercase())
}),
)(input)
}
fn parse_comment(input: &str) -> ParseResult<'_, Token> {
preceded(
char(';'),
map(take_while(|c| c != '\n'), |s: &str| {
Token::Comment(s.to_string())
}),
)(input)
}
fn parse_operator(input: &str) -> ParseResult<'_, Token> {
alt((
value(Token::PlusAssign, tag("+=")),
value(Token::MinusAssign, tag("-=")),
value(Token::MultiplyAssign, tag("*=")),
value(Token::DivideAssign, tag("/=")),
value(Token::Arrow, tag("->")),
value(Token::MatrixMultiply, char('#')),
value(Token::Power, char('^')),
value(Token::Plus, char('+')),
value(Token::Minus, char('-')),
value(Token::Multiply, char('*')),
value(Token::Divide, char('/')),
value(Token::Assign, char('=')),
value(Token::QuestionMark, char('?')),
))(input)
}
fn parse_delimiter(input: &str) -> ParseResult<'_, Token> {
alt((
value(Token::LeftParen, char('(')),
value(Token::RightParen, char(')')),
value(Token::LeftBracket, char('[')),
value(Token::RightBracket, char(']')),
value(Token::LeftBrace, char('{')),
value(Token::RightBrace, char('}')),
value(Token::Comma, char(',')),
value(Token::Semicolon, char(';')),
value(Token::DoubleColon, tag("::")),
value(Token::Colon, char(':')),
value(Token::Dot, char('.')),
))(input)
}
fn parse_token(input: &str) -> ParseResult<'_, Token> {
preceded(
ws0, alt((
parse_comment,
parse_string,
parse_number,
parse_system_variable,
parse_label, parse_identifier_or_keyword,
parse_operator,
parse_delimiter,
value(Token::Newline, char('\n')),
)),
)(input)
}
pub fn tokenize(input: &str) -> XdlResult<Vec<Token>> {
let mut remaining = input;
let mut tokens = Vec::new();
while !remaining.is_empty() {
if remaining.starts_with('$') {
let after_dollar = &remaining[1..];
let trimmed = after_dollar.trim_start_matches([' ', '\t', '\r']);
if trimmed.is_empty() || trimmed.starts_with('\n') {
if let Some(stripped) = trimmed.strip_prefix('\n') {
remaining = stripped;
} else {
remaining = trimmed;
}
continue;
}
remaining = after_dollar;
continue;
}
match parse_token(remaining) {
Ok((rest, token)) => {
match token {
Token::Comment(_) => {} _ => tokens.push(token),
}
remaining = rest;
}
Err(_) => {
remaining = &remaining[1..];
}
}
}
tokens.push(Token::EOF);
Ok(tokens)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenize_simple() {
let input = "x = 42";
let tokens = tokenize(input).unwrap();
assert_eq!(
tokens,
vec![
Token::Identifier("x".to_string()),
Token::Assign,
Token::Integer(42),
Token::EOF
]
);
}
#[test]
fn test_tokenize_string() {
let input = r#"print, "Hello, World!""#;
let tokens = tokenize(input).unwrap();
assert_eq!(
tokens,
vec![
Token::Identifier("print".to_string()),
Token::Comma,
Token::String("Hello, World!".to_string()),
Token::EOF
]
);
}
#[test]
fn test_tokenize_keywords() {
let input = "if x eq 42 then";
let tokens = tokenize(input).unwrap();
assert_eq!(
tokens,
vec![
Token::If,
Token::Identifier("x".to_string()),
Token::Equal,
Token::Integer(42),
Token::Then,
Token::EOF
]
);
}
#[test]
fn test_tokenize_system_variable() {
let input = "!PI";
let tokens = tokenize(input).unwrap();
assert_eq!(
tokens,
vec![Token::SystemVariable("PI".to_string()), Token::EOF]
);
}
}