1use nom::{
4 branch::alt,
5 bytes::complete::{tag, take_while, take_while1},
6 character::complete::{char, digit1, none_of},
7 combinator::{map, map_res, opt, recognize, value},
8 multi::many0,
9 sequence::{delimited, pair, preceded},
10 IResult,
11};
12
13fn ws0(input: &str) -> IResult<&str, &str> {
16 take_while(|c: char| c == ' ' || c == '\t' || c == '\r')(input)
17}
18use xdl_core::XdlResult;
19
20#[derive(Debug, Clone, PartialEq)]
21pub struct TokenSpan {
22 pub token: Token,
23 pub line: usize,
24 pub column: usize,
25}
26
27#[derive(Debug, Clone, PartialEq)]
28pub enum Token {
29 Integer(i64),
31 Float(f64),
32 String(String),
33
34 If,
36 Then,
37 Else,
38 Endif,
39 For,
40 Endfor,
41 Foreach,
42 While,
43 Endwhile,
44 Repeat,
45 Until,
46 Break,
47 Continue,
48 Function,
49 Endfunction,
50 Procedure,
51 Pro,
52 Endpro,
53 Return,
54 Goto,
55 Common,
56 CompileOpt,
57 Begin,
58 End,
59 Case,
60 Of,
61 Endcase,
62 Switch,
63 Endswitch,
64
65 Plus, Minus, Multiply, Divide, Modulo, Power, MatrixMultiply, Assign, PlusAssign, MinusAssign, MultiplyAssign, DivideAssign, Equal, NotEqual, Less, Greater, LessEqual, GreaterEqual, And, Or, Not, Xor, BitwiseAnd, BitwiseOr, BitwiseXor, BitwiseNot, LeftParen, RightParen, LeftBracket, RightBracket, LeftBrace, RightBrace, Comma, Semicolon, Colon, DoubleColon, Dot, Arrow, QuestionMark, Identifier(String),
118 SystemVariable(String), Label(String), Comment(String), Newline,
122 EOF,
123}
124
125type ParseResult<'a, T> = IResult<&'a str, T>;
126
127fn is_identifier_char(c: char) -> bool {
129 c.is_alphanumeric() || c == '_'
130}
131
132fn is_identifier_start(c: char) -> bool {
133 c.is_alphabetic() || c == '_'
134}
135
136fn parse_integer(input: &str) -> ParseResult<'_, Token> {
138 map_res(digit1, |s: &str| s.parse::<i64>().map(Token::Integer))(input)
139}
140
141fn parse_float(input: &str) -> ParseResult<'_, Token> {
143 map_res(
144 recognize(pair(digit1, pair(char('.'), opt(digit1)))),
145 |s: &str| s.parse::<f64>().map(Token::Float),
146 )(input)
147}
148
149fn parse_number(input: &str) -> ParseResult<'_, Token> {
151 alt((parse_float, parse_integer))(input)
152}
153
154fn parse_string(input: &str) -> ParseResult<'_, Token> {
156 alt((
157 delimited(
159 char('"'),
160 map(many0(none_of("\"")), |chars| {
161 Token::String(chars.into_iter().collect())
162 }),
163 char('"'),
164 ),
165 delimited(
167 char('\''),
168 map(many0(none_of("'")), |chars| {
169 Token::String(chars.into_iter().collect())
170 }),
171 char('\''),
172 ),
173 ))(input)
174}
175
176fn parse_label(input: &str) -> ParseResult<'_, Token> {
178 let (remaining, name) = recognize(pair(
179 take_while1(is_identifier_start),
180 take_while(is_identifier_char),
181 ))(input)?;
182
183 if remaining.starts_with(':') && !remaining.starts_with("::") {
185 let is_keyword = matches!(
187 name.to_uppercase().as_str(),
188 "IF" | "THEN" | "ELSE" | "ENDIF" | "FOR" | "ENDFOR" | "FOREACH" | "WHILE"
189 | "ENDWHILE" | "REPEAT" | "UNTIL" | "BREAK" | "CONTINUE" | "FUNCTION"
190 | "ENDFUNCTION" | "PROCEDURE" | "PRO" | "ENDPRO" | "RETURN" | "GOTO"
191 | "COMMON" | "COMPILE_OPT" | "BEGIN" | "END" | "CASE" | "OF" | "ENDCASE"
192 | "SWITCH" | "ENDSWITCH" | "MOD" | "EQ" | "NE" | "LT" | "GT" | "LE"
193 | "GE" | "AND" | "OR" | "NOT" | "XOR"
194 );
195
196 if is_keyword {
197 Err(nom::Err::Error(nom::error::Error::new(
199 input,
200 nom::error::ErrorKind::Tag,
201 )))
202 } else {
203 let remaining = &remaining[1..];
205 Ok((remaining, Token::Label(name.to_string())))
206 }
207 } else {
208 Err(nom::Err::Error(nom::error::Error::new(
210 input,
211 nom::error::ErrorKind::Tag,
212 )))
213 }
214}
215
216fn parse_identifier_or_keyword(input: &str) -> ParseResult<'_, Token> {
218 let (input, name) = recognize(pair(
219 take_while1(is_identifier_start),
220 take_while(is_identifier_char),
221 ))(input)?;
222
223 let token = match name.to_uppercase().as_str() {
224 "IF" => Token::If,
226 "THEN" => Token::Then,
227 "ELSE" => Token::Else,
228 "ENDIF" => Token::Endif,
229 "FOR" => Token::For,
230 "ENDFOR" => Token::Endfor,
231 "FOREACH" => Token::Foreach,
232 "WHILE" => Token::While,
233 "ENDWHILE" => Token::Endwhile,
234 "REPEAT" => Token::Repeat,
235 "UNTIL" => Token::Until,
236 "BREAK" => Token::Break,
237 "CONTINUE" => Token::Continue,
238
239 "FUNCTION" => Token::Function,
241 "ENDFUNCTION" => Token::Endfunction,
242 "PROCEDURE" | "PRO" => Token::Pro,
243 "ENDPRO" => Token::Endpro,
244 "RETURN" => Token::Return,
245 "GOTO" => Token::Goto,
246
247 "COMMON" => Token::Common,
249 "COMPILE_OPT" => Token::CompileOpt,
250 "BEGIN" => Token::Begin,
251 "END" => Token::End,
252 "CASE" => Token::Case,
253 "OF" => Token::Of,
254 "ENDCASE" => Token::Endcase,
255 "SWITCH" => Token::Switch,
256 "ENDSWITCH" => Token::Endswitch,
257
258 "MOD" => Token::Modulo,
260 "EQ" => Token::Equal,
261 "NE" => Token::NotEqual,
262 "LT" => Token::Less,
263 "GT" => Token::Greater,
264 "LE" => Token::LessEqual,
265 "GE" => Token::GreaterEqual,
266 "AND" => Token::And,
267 "OR" => Token::Or,
268 "NOT" => Token::Not,
269 "XOR" => Token::Xor,
270
271 _ => Token::Identifier(name.to_string()),
273 };
274
275 Ok((input, token))
276}
277
278fn parse_system_variable(input: &str) -> ParseResult<'_, Token> {
280 preceded(
281 char('!'),
282 map(take_while1(is_identifier_char), |s: &str| {
283 Token::SystemVariable(s.to_uppercase())
284 }),
285 )(input)
286}
287
288fn parse_comment(input: &str) -> ParseResult<'_, Token> {
290 preceded(
291 char(';'),
292 map(take_while(|c| c != '\n'), |s: &str| {
293 Token::Comment(s.to_string())
294 }),
295 )(input)
296}
297
298fn parse_operator(input: &str) -> ParseResult<'_, Token> {
300 alt((
301 value(Token::PlusAssign, tag("+=")),
302 value(Token::MinusAssign, tag("-=")),
303 value(Token::MultiplyAssign, tag("*=")),
304 value(Token::DivideAssign, tag("/=")),
305 value(Token::Arrow, tag("->")),
306 value(Token::MatrixMultiply, char('#')),
307 value(Token::Power, char('^')),
308 value(Token::Plus, char('+')),
309 value(Token::Minus, char('-')),
310 value(Token::Multiply, char('*')),
311 value(Token::Divide, char('/')),
312 value(Token::Assign, char('=')),
313 value(Token::QuestionMark, char('?')),
314 ))(input)
315}
316
317fn parse_delimiter(input: &str) -> ParseResult<'_, Token> {
319 alt((
320 value(Token::LeftParen, char('(')),
321 value(Token::RightParen, char(')')),
322 value(Token::LeftBracket, char('[')),
323 value(Token::RightBracket, char(']')),
324 value(Token::LeftBrace, char('{')),
325 value(Token::RightBrace, char('}')),
326 value(Token::Comma, char(',')),
327 value(Token::Semicolon, char(';')),
328 value(Token::DoubleColon, tag("::")),
329 value(Token::Colon, char(':')),
330 value(Token::Dot, char('.')),
331 ))(input)
332}
333
334fn parse_token(input: &str) -> ParseResult<'_, Token> {
336 preceded(
337 ws0, alt((
339 parse_comment,
340 parse_string,
341 parse_number,
342 parse_system_variable,
343 parse_label, parse_identifier_or_keyword,
345 parse_operator,
346 parse_delimiter,
347 value(Token::Newline, char('\n')),
348 )),
349 )(input)
350}
351
352pub fn tokenize(input: &str) -> XdlResult<Vec<Token>> {
354 let mut remaining = input;
355 let mut tokens = Vec::new();
356
357 while !remaining.is_empty() {
358 if remaining.starts_with('$') {
360 let after_dollar = &remaining[1..];
361 let trimmed = after_dollar.trim_start_matches([' ', '\t', '\r']);
363 if trimmed.is_empty() || trimmed.starts_with('\n') {
365 if let Some(stripped) = trimmed.strip_prefix('\n') {
366 remaining = stripped;
367 } else {
368 remaining = trimmed;
369 }
370 continue;
371 }
372 remaining = after_dollar;
374 continue;
375 }
376
377 match parse_token(remaining) {
378 Ok((rest, token)) => {
379 match token {
381 Token::Comment(_) => {} _ => tokens.push(token),
383 }
384 remaining = rest;
385 }
386 Err(_) => {
387 remaining = &remaining[1..];
389 }
390 }
391 }
392
393 tokens.push(Token::EOF);
394 Ok(tokens)
395}
396
397#[cfg(test)]
398mod tests {
399 use super::*;
400
401 #[test]
402 fn test_tokenize_simple() {
403 let input = "x = 42";
404 let tokens = tokenize(input).unwrap();
405 assert_eq!(
406 tokens,
407 vec![
408 Token::Identifier("x".to_string()),
409 Token::Assign,
410 Token::Integer(42),
411 Token::EOF
412 ]
413 );
414 }
415
416 #[test]
417 fn test_tokenize_string() {
418 let input = r#"print, "Hello, World!""#;
419 let tokens = tokenize(input).unwrap();
420 assert_eq!(
421 tokens,
422 vec![
423 Token::Identifier("print".to_string()),
424 Token::Comma,
425 Token::String("Hello, World!".to_string()),
426 Token::EOF
427 ]
428 );
429 }
430
431 #[test]
432 fn test_tokenize_keywords() {
433 let input = "if x eq 42 then";
434 let tokens = tokenize(input).unwrap();
435 assert_eq!(
436 tokens,
437 vec![
438 Token::If,
439 Token::Identifier("x".to_string()),
440 Token::Equal,
441 Token::Integer(42),
442 Token::Then,
443 Token::EOF
444 ]
445 );
446 }
447
448 #[test]
449 fn test_tokenize_system_variable() {
450 let input = "!PI";
451 let tokens = tokenize(input).unwrap();
452 assert_eq!(
453 tokens,
454 vec![Token::SystemVariable("PI".to_string()), Token::EOF]
455 );
456 }
457}