polymath_rs/tokens/
mod.rs

1use std::cell::RefCell;
2
3use self::types::*;
4
5pub mod types;
6
7#[derive(Debug, Clone, PartialOrd, PartialEq)]
8pub struct Token<'a> {
9    pub span: Span<'a>,
10    pub token_type: TokenType,
11}
12
13#[derive(Debug, Clone, PartialOrd, PartialEq)]
14pub struct Span<'a> {
15    pub text: &'a str,
16    pub start: usize,
17    pub end: usize,
18}
19
20impl<'a> Span<'a> {
21    pub fn len(&self) -> usize {
22        self.end - self.start
23    }
24
25    pub fn is_empty(&self) -> bool {
26        self.len() == 0
27    }
28}
29
30#[derive(Debug)]
31pub struct Cursor {
32    pos: RefCell<usize>,
33}
34
35pub fn tokenize(text: &str) -> Vec<Token> {
36    let cursor = Cursor {
37        pos: RefCell::new(0),
38    };
39    let mut token_vec = Vec::new();
40
41    while !cursor.eos(text) {
42        cursor.skip_whitespace(text);
43        token_vec.push(parse_token(&cursor, text))
44    }
45
46    token_vec
47}
48
49impl Cursor {
50    fn get_pos(&self) -> usize {
51        *self.pos.borrow()
52    }
53
54    fn set_pos(&self, pos: usize) -> usize {
55        *self.pos.borrow_mut() = pos;
56        self.get_pos()
57    }
58
59    fn skip_whitespace(&self, text: &str) {
60        while self.parse(text, &[" "]).is_some() {}
61    }
62
63    // fn parse<P: TokenParser>(&self, token_parser: P) {
64    //     todo!();
65    // }
66
67    fn parse_number<'a>(&self, text: &'a str) -> Option<Span<'a>> {
68        if let Some(_number) = self.peek(text, &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
69        {
70            let start = self.get_pos();
71
72            while let Some(_number) =
73                self.parse(text, &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
74            {}
75
76            if let Some(_decimal_point) = self.parse(text, &["."]) {
77                while let Some(_number) =
78                    self.parse(text, &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
79                {
80                }
81
82                Some(Span {
83                    start,
84                    end: self.get_pos(),
85                    text: &text[start..self.get_pos()],
86                })
87            } else {
88                Some(Span {
89                    start,
90                    end: self.get_pos(),
91                    text: &text[start..self.get_pos()],
92                })
93            }
94        } else if let (Some(_decimal_point), Some(_number)) = (
95            self.peek(text, &["."]),
96            self.peek(text, &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]),
97        ) {
98            let _decimal_point = self.parse(text, &["."]).unwrap();
99            let start = self.get_pos();
100
101            while let Some(_number) =
102                self.parse(text, &["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
103            {}
104
105            let end = self.get_pos();
106
107            Some(Span {
108                start,
109                end,
110                text: &text[start..end],
111            })
112        } else {
113            None
114        }
115    }
116
117    fn parse_text<'a>(&self, text: &'a str) -> Option<Span<'a>> {
118        let start = self.get_pos();
119        if let Some(_double_quotes) = self.peek(text, &["\""]) {
120            if let Some(next_double_quotes) = self.find_next(text, &["\""], 1) {
121                self.set_pos(next_double_quotes + 1);
122                Some(Span {
123                    start: start + 1,
124                    end: next_double_quotes,
125                    text: &text[start + 1..next_double_quotes],
126                })
127            } else {
128                None
129            }
130        } else {
131            None
132        }
133    }
134
135    fn parse_symbol<'a>(&self, text: &'a str) -> Option<Span<'a>> {
136        let start = self.get_pos();
137        if let Some(c) = text[self.get_pos()..].chars().next() {
138            self.set_pos(self.get_pos() + c.len_utf8());
139
140            Some(Span {
141                start,
142                end: self.get_pos(),
143                text: &text[start..self.get_pos()],
144            })
145        } else {
146            None
147        }
148    }
149
150    fn find_next(&self, text: &str, patterns: &[&str], offset: usize) -> Option<usize> {
151        text.char_indices()
152            .skip(self.get_pos() + offset)
153            .map(|(offset, _)| (offset, &text[offset..]))
154            .find(|(_offset, substr)| patterns.iter().any(|pattern| substr.starts_with(*pattern)))
155            .map(|(offset, _)| offset)
156    }
157
158    fn peek<'a>(&self, text: &'a str, patterns: &[&str]) -> Option<Span<'a>> {
159        self.peek_n(text, patterns, 0)
160    }
161
162    fn peek_n<'a>(&self, text: &'a str, patterns: &[&str], offset: usize) -> Option<Span<'a>> {
163        patterns
164            .iter()
165            .find(|pattern| text[self.get_pos() + offset..].starts_with(*pattern))
166            .map(|pattern| Span {
167                start: self.get_pos() + offset,
168                end: self.get_pos() + offset + pattern.len(),
169                text: &text[self.get_pos() + offset..self.get_pos() + offset + pattern.len()],
170            })
171    }
172
173    fn parse<'a>(&self, text: &'a str, patterns: &[&str]) -> Option<Span<'a>> {
174        let pattern = patterns
175            .iter()
176            .find(|pattern| text[self.get_pos()..].starts_with(*pattern))
177            .map(|pattern| {
178                (
179                    self.get_pos() + pattern.len(),
180                    Span {
181                        start: self.get_pos(),
182                        end: self.get_pos() + pattern.len(),
183                        text: &text[self.get_pos()..self.get_pos() + pattern.len()],
184                    },
185                )
186            });
187
188        if pattern.is_some() {
189            self.set_pos(pattern.as_ref().unwrap().0);
190        }
191
192        pattern.map(|tuple| tuple.1)
193    }
194
195    fn parse_pattern<'a, T: Clone>(
196        &self,
197        text: &'a str,
198        patterns: &[(&[&str], T)],
199    ) -> Option<(Span<'a>, T)> {
200        let token = patterns.iter().find_map(|patterns| {
201            patterns
202                .0
203                .iter()
204                .find(|pattern| text[self.get_pos()..].starts_with(*pattern))
205                .map(|pattern| {
206                    (
207                        Span {
208                            start: self.get_pos(),
209                            end: self.get_pos() + pattern.len(),
210                            text: &text[self.get_pos()..self.get_pos() + pattern.len()],
211                        },
212                        patterns.1.clone(),
213                    )
214                })
215        });
216
217        if token.is_some() {
218            self.set_pos(token.as_ref().unwrap().0.end);
219        }
220
221        token
222    }
223
224    fn eos(&self, text: &str) -> bool {
225        self.get_pos() >= text.len()
226    }
227}
228
229fn parse_token<'a>(cursor: &Cursor, text: &'a str) -> Token<'a> {
230    if let Some(span) = cursor.parse(text, &["/"]) {
231        Token {
232            span,
233            token_type: TokenType::Division,
234        }
235    } else if let Some(span) = cursor.parse(text, &["_"]) {
236        Token {
237            span,
238            token_type: TokenType::Underscorce,
239        }
240    } else if let Some(span) = cursor.parse(text, &["^"]) {
241        Token {
242            span,
243            token_type: TokenType::Hat,
244        }
245    } else if let Some(span) = cursor.parse_number(text) {
246        Token {
247            span,
248            token_type: TokenType::Number,
249        }
250    } else if let Some(span) = cursor.parse_text(text) {
251        Token {
252            span,
253            token_type: TokenType::Text,
254        }
255    } else if let Some((span, token_type)) = cursor.parse_pattern(text, UNARY_OPERATORS) {
256        Token { span, token_type }
257    } else if let Some((span, token_type)) = cursor.parse_pattern(text, BINARY_OPERATORS) {
258        Token { span, token_type }
259    } else if let Some(arrow) = cursor.parse_pattern(text, ARROWS) {
260        Token {
261            span: arrow.0,
262            token_type: arrow.1,
263        }
264    } else if let Some(operation) = cursor.parse_pattern(text, OPERATION) {
265        Token {
266            span: operation.0,
267            token_type: operation.1,
268        }
269    } else if let Some(greek) = cursor.parse_pattern(text, GREEK) {
270        Token {
271            span: greek.0,
272            token_type: greek.1,
273        }
274    } else if let Some(misc) = cursor.parse_pattern(text, MISC) {
275        Token {
276            span: misc.0,
277            token_type: misc.1,
278        }
279    } else if let Some(relational) = cursor.parse_pattern(text, RELATIONAL) {
280        Token {
281            span: relational.0,
282            token_type: relational.1,
283        }
284    } else if let Some(logical) = cursor.parse_pattern(text, LOGICAL) {
285        Token {
286            span: logical.0,
287            token_type: logical.1,
288        }
289    } else if let Some(function) = cursor.parse_pattern(text, FUNCTION) {
290        Token {
291            span: function.0,
292            token_type: function.1,
293        }
294    } else if let Some(l_brace) = cursor.parse_pattern(text, LBRACES) {
295        Token {
296            span: l_brace.0,
297            token_type: l_brace.1,
298        }
299    } else if let Some(l_brace) = cursor.parse_pattern(text, RBRACES) {
300        Token {
301            span: l_brace.0,
302            token_type: l_brace.1,
303        }
304    } else if let Some(span) = cursor.parse_symbol(text) {
305        Token {
306            span,
307            token_type: TokenType::Symbol,
308        }
309    } else {
310        Token {
311            span: Span {
312                text: "",
313                start: 0,
314                end: 0,
315            },
316            token_type: TokenType::None,
317        }
318    }
319}