bean_script/
lexer.rs

1use std::{cell::RefCell, mem};
2
3use crate::pat_check;
4
5const SYMBOLS: [char; 7] = [':', '(', ')', '{', '}', ',', '.'];
6
7enum Context {
8    Program,
9    String,
10    Name,
11    LineComment,
12    BlockComment,
13}
14
15fn chunk(code: String) -> Vec<String> {
16    if code.contains('\r') {
17        println!("\x1b[33;1mwarn\x1b[0m: file contains CRLF line endings, which are not supported.")
18    }
19
20    let mut chunks: Vec<String> = Vec::new();
21    let current_chunk = RefCell::from(String::new());
22    let mut context = Context::Program;
23    let chars: Vec<char> = code.chars().collect();
24
25    let mut split = || {
26        if current_chunk.borrow().len() > 0 {
27            chunks.push(mem::replace(&mut current_chunk.borrow_mut(), String::new()));
28        }
29    };
30
31    let append = |char: &char| current_chunk.borrow_mut().push_str(&char.to_string());
32
33    for (i, char) in code.chars().enumerate() {
34        match context {
35            Context::Program => {
36                if char == ' ' || char == '\t' {
37                    split();
38                } else if char == '\n' {
39                    split();
40                    append(&char);
41                    split();
42                } else if char == '/' && chars[i + 1] == '/' {
43                    split();
44                    context = Context::LineComment;
45                } else if char == '/' && chars[i + 1] == '*' {
46                    split();
47                    context = Context::BlockComment;
48                } else if char == '"' {
49                    split();
50                    append(&char);
51                    context = Context::String;
52                } else if SYMBOLS.contains(&char)
53                    && !(char == '.' && RefCell::borrow(&current_chunk).parse::<f64>().is_ok())
54                {
55                    split();
56                    append(&char);
57                    split();
58                } else if char == '<' {
59                    split();
60                    append(&char);
61                    context = Context::Name;
62                } else {
63                    append(&char);
64                }
65            }
66            Context::String => {
67                if char == '"' && chars[i - 1] != '\\' {
68                    append(&char);
69                    split();
70                    context = Context::Program;
71                } else {
72                    append(&char);
73                }
74            }
75            Context::Name => {
76                if char == '>' {
77                    append(&char);
78                    split();
79                    context = Context::Program;
80                } else {
81                    append(&char);
82                }
83            }
84            Context::LineComment => {
85                if char == '\n' {
86                    split();
87                    append(&char);
88                    split();
89                    context = Context::Program;
90                }
91            }
92            Context::BlockComment => {
93                if chars[i - 1] == '*' && char == '/' {
94                    context = Context::Program;
95                } else if char == '\n' {
96                    split();
97                    append(&char);
98                    split();
99                }
100            }
101        }
102    }
103
104    split();
105    return chunks;
106}
107
108#[derive(Debug)]
109pub enum Token {
110    FnName(String),
111    FnBody,
112    ArgSeparator,
113    ArgOpen,
114    ArgClose,
115    ScopeOpen,
116    ScopeClose,
117    Accessor,
118
119    Boolean(bool),
120    Number(f64),
121    String(String),
122    Name(String),
123    None,
124
125    LineBreak,
126    EOF,
127}
128
129pub fn tokenize(code: String) -> Vec<Token> {
130    let chunks = chunk(code);
131    let mut tokens: Vec<Token> = Vec::new();
132
133    for chunk in chunks {
134        tokens.push(if chunk == "\n" {
135            Token::LineBreak
136        } else if let Ok(n) = chunk.parse::<f64>() {
137            if tokens
138                .last()
139                .is_some_and(|x| pat_check!(Token::Accessor = x))
140            {
141                Token::FnName(chunk)
142            } else {
143                Token::Number(n)
144            }
145        } else if chunk.starts_with('"') && chunk.ends_with('"') {
146            Token::String(String::from(chunk.trim_matches('"')))
147        } else if chunk == "true" || chunk == "false" {
148            Token::Boolean(chunk == "true")
149        } else if chunk == "none" {
150            Token::None
151        } else if chunk.starts_with('<') && chunk.ends_with('>') {
152            Token::Name(String::from(chunk.trim_matches(['<', '>'])))
153        } else if chunk == ":" {
154            Token::FnBody
155        } else if chunk == "," {
156            Token::ArgSeparator
157        } else if chunk == "(" {
158            Token::ArgOpen
159        } else if chunk == ")" {
160            Token::ArgClose
161        } else if chunk == "{" {
162            Token::ScopeOpen
163        } else if chunk == "}" {
164            Token::ScopeClose
165        } else if chunk == "." {
166            Token::Accessor
167        } else {
168            Token::FnName(chunk)
169        });
170    }
171
172    while let Some(Token::LineBreak) = tokens.last() {
173        tokens.pop();
174    }
175    tokens.push(Token::EOF);
176
177    return tokens;
178}