1use std::{cell::RefCell, mem};
2
3use crate::pat_check;
4
5const SYMBOLS: [char; 7] = [':', '(', ')', '{', '}', ',', '.'];
6
7enum Context {
8 Program,
9 String,
10 Name,
11 LineComment,
12 BlockComment,
13}
14
15fn chunk(code: String) -> Vec<String> {
16 if code.contains('\r') {
17 println!("\x1b[33;1mwarn\x1b[0m: file contains CRLF line endings, which are not supported.")
18 }
19
20 let mut chunks: Vec<String> = Vec::new();
21 let current_chunk = RefCell::from(String::new());
22 let mut context = Context::Program;
23 let chars: Vec<char> = code.chars().collect();
24
25 let mut split = || {
26 if current_chunk.borrow().len() > 0 {
27 chunks.push(mem::replace(&mut current_chunk.borrow_mut(), String::new()));
28 }
29 };
30
31 let append = |char: &char| current_chunk.borrow_mut().push_str(&char.to_string());
32
33 for (i, char) in code.chars().enumerate() {
34 match context {
35 Context::Program => {
36 if char == ' ' || char == '\t' {
37 split();
38 } else if char == '\n' {
39 split();
40 append(&char);
41 split();
42 } else if char == '/' && chars[i + 1] == '/' {
43 split();
44 context = Context::LineComment;
45 } else if char == '/' && chars[i + 1] == '*' {
46 split();
47 context = Context::BlockComment;
48 } else if char == '"' {
49 split();
50 append(&char);
51 context = Context::String;
52 } else if SYMBOLS.contains(&char)
53 && !(char == '.' && RefCell::borrow(¤t_chunk).parse::<f64>().is_ok())
54 {
55 split();
56 append(&char);
57 split();
58 } else if char == '<' {
59 split();
60 append(&char);
61 context = Context::Name;
62 } else {
63 append(&char);
64 }
65 }
66 Context::String => {
67 if char == '"' && chars[i - 1] != '\\' {
68 append(&char);
69 split();
70 context = Context::Program;
71 } else {
72 append(&char);
73 }
74 }
75 Context::Name => {
76 if char == '>' {
77 append(&char);
78 split();
79 context = Context::Program;
80 } else {
81 append(&char);
82 }
83 }
84 Context::LineComment => {
85 if char == '\n' {
86 split();
87 append(&char);
88 split();
89 context = Context::Program;
90 }
91 }
92 Context::BlockComment => {
93 if chars[i - 1] == '*' && char == '/' {
94 context = Context::Program;
95 } else if char == '\n' {
96 split();
97 append(&char);
98 split();
99 }
100 }
101 }
102 }
103
104 split();
105 return chunks;
106}
107
108#[derive(Debug)]
109pub enum Token {
110 FnName(String),
111 FnBody,
112 ArgSeparator,
113 ArgOpen,
114 ArgClose,
115 ScopeOpen,
116 ScopeClose,
117 Accessor,
118
119 Boolean(bool),
120 Number(f64),
121 String(String),
122 Name(String),
123 None,
124
125 LineBreak,
126 EOF,
127}
128
129pub fn tokenize(code: String) -> Vec<Token> {
130 let chunks = chunk(code);
131 let mut tokens: Vec<Token> = Vec::new();
132
133 for chunk in chunks {
134 tokens.push(if chunk == "\n" {
135 Token::LineBreak
136 } else if let Ok(n) = chunk.parse::<f64>() {
137 if tokens
138 .last()
139 .is_some_and(|x| pat_check!(Token::Accessor = x))
140 {
141 Token::FnName(chunk)
142 } else {
143 Token::Number(n)
144 }
145 } else if chunk.starts_with('"') && chunk.ends_with('"') {
146 Token::String(String::from(chunk.trim_matches('"')))
147 } else if chunk == "true" || chunk == "false" {
148 Token::Boolean(chunk == "true")
149 } else if chunk == "none" {
150 Token::None
151 } else if chunk.starts_with('<') && chunk.ends_with('>') {
152 Token::Name(String::from(chunk.trim_matches(['<', '>'])))
153 } else if chunk == ":" {
154 Token::FnBody
155 } else if chunk == "," {
156 Token::ArgSeparator
157 } else if chunk == "(" {
158 Token::ArgOpen
159 } else if chunk == ")" {
160 Token::ArgClose
161 } else if chunk == "{" {
162 Token::ScopeOpen
163 } else if chunk == "}" {
164 Token::ScopeClose
165 } else if chunk == "." {
166 Token::Accessor
167 } else {
168 Token::FnName(chunk)
169 });
170 }
171
172 while let Some(Token::LineBreak) = tokens.last() {
173 tokens.pop();
174 }
175 tokens.push(Token::EOF);
176
177 return tokens;
178}