1use std::cell::RefCell;
2
3use self::types::*;
4
5pub mod types;
6
7#[derive(Debug, Clone, PartialOrd, PartialEq)]
8pub struct Token<'a> {
9 pub span: Span<'a>,
10 pub token_type: TokenType,
11}
12
13#[derive(Debug, Clone, PartialOrd, PartialEq)]
14pub struct Span<'a> {
15 pub text: &'a str,
16 pub start: usize,
17 pub end: usize,
18}
19
20impl<'a> Span<'a> {
21 pub fn len(&self) -> usize {
22 self.end - self.start
23 }
24
25 pub fn is_empty(&self) -> bool {
26 self.len() == 0
27 }
28}
29
30#[derive(Debug)]
31pub struct Cursor {
32 pos: RefCell<usize>,
33}
34
35pub fn tokenize(text: &str) -> Vec<Token> {
36 let cursor = Cursor {
37 pos: RefCell::new(0),
38 };
39 let mut token_vec = Vec::new();
40
41 while !cursor.eos(text) {
42 cursor.skip_whitespace(text);
43 token_vec.push(parse_token(&cursor, text))
44 }
45
46 token_vec
47}
48
49impl Cursor {
50 fn get_pos(&self) -> usize {
51 *self.pos.borrow()
52 }
53
54 fn set_pos(&self, pos: usize) -> usize {
55 *self.pos.borrow_mut() = pos;
56 self.get_pos()
57 }
58
59 fn skip_whitespace(&self, text: &str) {
60 while self.parse(text, &[" "]).is_some() {}
61 }
62
63 fn parse_number<'a>(&self, text: &'a str) -> Option<Span<'a>> {
68 if let Some(_number) = self.peek(text, &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
69 {
70 let start = self.get_pos();
71
72 while let Some(_number) =
73 self.parse(text, &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
74 {}
75
76 if let Some(_decimal_point) = self.parse(text, &["."]) {
77 while let Some(_number) =
78 self.parse(text, &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
79 {
80 }
81
82 Some(Span {
83 start,
84 end: self.get_pos(),
85 text: &text[start..self.get_pos()],
86 })
87 } else {
88 Some(Span {
89 start,
90 end: self.get_pos(),
91 text: &text[start..self.get_pos()],
92 })
93 }
94 } else if let (Some(_decimal_point), Some(_number)) = (
95 self.peek(text, &["."]),
96 self.peek(text, &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]),
97 ) {
98 let _decimal_point = self.parse(text, &["."]).unwrap();
99 let start = self.get_pos();
100
101 while let Some(_number) =
102 self.parse(text, &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
103 {}
104
105 let end = self.get_pos();
106
107 Some(Span {
108 start,
109 end,
110 text: &text[start..end],
111 })
112 } else {
113 None
114 }
115 }
116
117 fn parse_text<'a>(&self, text: &'a str) -> Option<Span<'a>> {
118 let start = self.get_pos();
119 if let Some(_double_quotes) = self.peek(text, &["\""]) {
120 if let Some(next_double_quotes) = self.find_next(text, &["\""], 1) {
121 self.set_pos(next_double_quotes + 1);
122 Some(Span {
123 start: start + 1,
124 end: next_double_quotes,
125 text: &text[start + 1..next_double_quotes],
126 })
127 } else {
128 None
129 }
130 } else {
131 None
132 }
133 }
134
135 fn parse_symbol<'a>(&self, text: &'a str) -> Option<Span<'a>> {
136 let start = self.get_pos();
137 if let Some(c) = text[self.get_pos()..].chars().next() {
138 self.set_pos(self.get_pos() + c.len_utf8());
139
140 Some(Span {
141 start,
142 end: self.get_pos(),
143 text: &text[start..self.get_pos()],
144 })
145 } else {
146 None
147 }
148 }
149
150 fn find_next(&self, text: &str, patterns: &[&str], offset: usize) -> Option<usize> {
151 text.char_indices()
152 .skip(self.get_pos() + offset)
153 .map(|(offset, _)| (offset, &text[offset..]))
154 .find(|(_offset, substr)| patterns.iter().any(|pattern| substr.starts_with(*pattern)))
155 .map(|(offset, _)| offset)
156 }
157
158 fn peek<'a>(&self, text: &'a str, patterns: &[&str]) -> Option<Span<'a>> {
159 self.peek_n(text, patterns, 0)
160 }
161
162 fn peek_n<'a>(&self, text: &'a str, patterns: &[&str], offset: usize) -> Option<Span<'a>> {
163 patterns
164 .iter()
165 .find(|pattern| text[self.get_pos() + offset..].starts_with(*pattern))
166 .map(|pattern| Span {
167 start: self.get_pos() + offset,
168 end: self.get_pos() + offset + pattern.len(),
169 text: &text[self.get_pos() + offset..self.get_pos() + offset + pattern.len()],
170 })
171 }
172
173 fn parse<'a>(&self, text: &'a str, patterns: &[&str]) -> Option<Span<'a>> {
174 let pattern = patterns
175 .iter()
176 .find(|pattern| text[self.get_pos()..].starts_with(*pattern))
177 .map(|pattern| {
178 (
179 self.get_pos() + pattern.len(),
180 Span {
181 start: self.get_pos(),
182 end: self.get_pos() + pattern.len(),
183 text: &text[self.get_pos()..self.get_pos() + pattern.len()],
184 },
185 )
186 });
187
188 if pattern.is_some() {
189 self.set_pos(pattern.as_ref().unwrap().0);
190 }
191
192 pattern.map(|tuple| tuple.1)
193 }
194
195 fn parse_pattern<'a, T: Clone>(
196 &self,
197 text: &'a str,
198 patterns: &[(&[&str], T)],
199 ) -> Option<(Span<'a>, T)> {
200 let token = patterns.iter().find_map(|patterns| {
201 patterns
202 .0
203 .iter()
204 .find(|pattern| text[self.get_pos()..].starts_with(*pattern))
205 .map(|pattern| {
206 (
207 Span {
208 start: self.get_pos(),
209 end: self.get_pos() + pattern.len(),
210 text: &text[self.get_pos()..self.get_pos() + pattern.len()],
211 },
212 patterns.1.clone(),
213 )
214 })
215 });
216
217 if token.is_some() {
218 self.set_pos(token.as_ref().unwrap().0.end);
219 }
220
221 token
222 }
223
224 fn eos(&self, text: &str) -> bool {
225 self.get_pos() >= text.len()
226 }
227}
228
229fn parse_token<'a>(cursor: &Cursor, text: &'a str) -> Token<'a> {
230 if let Some(span) = cursor.parse(text, &["/"]) {
231 Token {
232 span,
233 token_type: TokenType::Division,
234 }
235 } else if let Some(span) = cursor.parse(text, &["_"]) {
236 Token {
237 span,
238 token_type: TokenType::Underscorce,
239 }
240 } else if let Some(span) = cursor.parse(text, &["^"]) {
241 Token {
242 span,
243 token_type: TokenType::Hat,
244 }
245 } else if let Some(span) = cursor.parse_number(text) {
246 Token {
247 span,
248 token_type: TokenType::Number,
249 }
250 } else if let Some(span) = cursor.parse_text(text) {
251 Token {
252 span,
253 token_type: TokenType::Text,
254 }
255 } else if let Some((span, token_type)) = cursor.parse_pattern(text, UNARY_OPERATORS) {
256 Token { span, token_type }
257 } else if let Some((span, token_type)) = cursor.parse_pattern(text, BINARY_OPERATORS) {
258 Token { span, token_type }
259 } else if let Some(arrow) = cursor.parse_pattern(text, ARROWS) {
260 Token {
261 span: arrow.0,
262 token_type: arrow.1,
263 }
264 } else if let Some(operation) = cursor.parse_pattern(text, OPERATION) {
265 Token {
266 span: operation.0,
267 token_type: operation.1,
268 }
269 } else if let Some(greek) = cursor.parse_pattern(text, GREEK) {
270 Token {
271 span: greek.0,
272 token_type: greek.1,
273 }
274 } else if let Some(misc) = cursor.parse_pattern(text, MISC) {
275 Token {
276 span: misc.0,
277 token_type: misc.1,
278 }
279 } else if let Some(relational) = cursor.parse_pattern(text, RELATIONAL) {
280 Token {
281 span: relational.0,
282 token_type: relational.1,
283 }
284 } else if let Some(logical) = cursor.parse_pattern(text, LOGICAL) {
285 Token {
286 span: logical.0,
287 token_type: logical.1,
288 }
289 } else if let Some(function) = cursor.parse_pattern(text, FUNCTION) {
290 Token {
291 span: function.0,
292 token_type: function.1,
293 }
294 } else if let Some(l_brace) = cursor.parse_pattern(text, LBRACES) {
295 Token {
296 span: l_brace.0,
297 token_type: l_brace.1,
298 }
299 } else if let Some(l_brace) = cursor.parse_pattern(text, RBRACES) {
300 Token {
301 span: l_brace.0,
302 token_type: l_brace.1,
303 }
304 } else if let Some(span) = cursor.parse_symbol(text) {
305 Token {
306 span,
307 token_type: TokenType::Symbol,
308 }
309 } else {
310 Token {
311 span: Span {
312 text: "",
313 start: 0,
314 end: 0,
315 },
316 token_type: TokenType::None,
317 }
318 }
319}