zen_parser/lexer/
lexer.rs

1use std::cell::RefCell;
2use std::rc::Rc;
3
4use crate::is_token_type;
5use crate::lexer::cursor::{Cursor, CursorItem};
6use crate::lexer::error::LexerError;
7use crate::lexer::error::LexerError::{UnexpectedEof, UnmatchedSymbol};
8use crate::lexer::token::{Token, TokenKind};
9
10type TokenSlice<'a> = Rc<RefCell<Vec<Token<'a>>>>;
11
12type VoidResult = Result<(), LexerError>;
13
14#[derive(Debug)]
15pub struct Lexer<'a> {
16    tokens: TokenSlice<'a>,
17}
18
19impl<'a> Default for Lexer<'a> {
20    fn default() -> Self {
21        Lexer::new()
22    }
23}
24
25impl<'a> Lexer<'a> {
26    pub fn new() -> Self {
27        Self {
28            tokens: Rc::new(RefCell::new(Vec::new())),
29        }
30    }
31
32    pub fn tokenize(&self, source: &'a str) -> Result<TokenSlice<'a>, LexerError> {
33        self.tokens.borrow_mut().clear();
34        Scanner::new(source, self.tokens.clone()).scan()?;
35        Ok(self.tokens.clone())
36    }
37}
38
39struct Scanner<'a> {
40    cursor: Cursor<'a>,
41    tokens: TokenSlice<'a>,
42    source: &'a str,
43}
44
45impl<'a> Scanner<'a> {
46    pub fn new(source: &'a str, tokens: TokenSlice<'a>) -> Self {
47        Self {
48            cursor: Cursor::from(source),
49            source,
50            tokens,
51        }
52    }
53
54    pub fn scan(&self) -> VoidResult {
55        while let Some((i, s)) = self.cursor.peek() {
56            match s {
57                ' ' => {
58                    self.cursor.next();
59                    Ok(())
60                }
61                _ if is_token_type!(s, "quote") => self.string(),
62                _ if is_token_type!(s, "digit") => self.number(),
63                _ if is_token_type!(s, "bracket") => self.bracket(),
64                _ if is_token_type!(s, "cmp_operator") => self.operator(),
65                _ if is_token_type!(s, "operator") => self.simple_operator(),
66                '.' => self.dot(),
67                _ if is_token_type!(s, "alpha") => self.identifier(),
68
69                _ => Err(UnmatchedSymbol {
70                    symbol: s,
71                    position: i,
72                }),
73            }?;
74        }
75
76        Ok(())
77    }
78
79    fn next(&self) -> Result<CursorItem, LexerError> {
80        self.cursor.next().ok_or_else(|| {
81            let (a, b) = self.cursor.peek_back().unwrap_or((0, ' '));
82
83            UnexpectedEof {
84                symbol: b,
85                position: a,
86            }
87        })
88    }
89
90    fn push(&self, token: Token<'a>) {
91        self.tokens.borrow_mut().push(token);
92    }
93
94    fn string(&self) -> VoidResult {
95        let (start, opener) = self.next()?;
96        let end: usize;
97
98        loop {
99            let (e, c) = self.next()?;
100            if c == opener {
101                end = e;
102                break;
103            }
104        }
105
106        self.push(Token {
107            kind: TokenKind::String,
108            span: (start, end),
109            value: &self.source[start + 1..end],
110        });
111
112        Ok(())
113    }
114
115    fn number(&self) -> VoidResult {
116        let (start, _) = self.next()?;
117        let mut end = start;
118        let mut fractal = false;
119
120        while let Some((e, c)) = self
121            .cursor
122            .next_if(|c| is_token_type!(c, "digit") || c == '_' || c == '.')
123        {
124            if fractal && c == '.' {
125                self.cursor.back();
126                break;
127            }
128
129            if c == '.' {
130                if let Some((_, p)) = self.cursor.peek() {
131                    if p == '.' {
132                        self.cursor.back();
133                        break;
134                    }
135
136                    fractal = true
137                }
138            }
139
140            end = e;
141        }
142
143        self.push(Token {
144            kind: TokenKind::Number,
145            span: (start, end + 1),
146            value: &self.source[start..=end],
147        });
148
149        Ok(())
150    }
151
152    fn bracket(&self) -> VoidResult {
153        let (start, _) = self.next()?;
154
155        self.push(Token {
156            kind: TokenKind::Bracket,
157            span: (start, start + 1),
158            value: &self.source[start..=start],
159        });
160
161        Ok(())
162    }
163
164    fn dot(&self) -> VoidResult {
165        let (start, _) = self.next()?;
166        let mut end = start;
167
168        if self.cursor.next_if(|c| c == '.').is_some() {
169            end += 1;
170        }
171
172        self.push(Token {
173            kind: TokenKind::Operator,
174            span: (start, end + 1),
175            value: &self.source[start..=end],
176        });
177
178        Ok(())
179    }
180
181    fn operator(&self) -> VoidResult {
182        let (start, _) = self.next()?;
183        let mut end = start;
184
185        if self.cursor.next_if(|c| c == '=').is_some() {
186            end += 1;
187        }
188
189        self.push(Token {
190            kind: TokenKind::Operator,
191            span: (start, end + 1),
192            value: &self.source[start..=end],
193        });
194
195        Ok(())
196    }
197
198    fn simple_operator(&self) -> VoidResult {
199        let (start, _) = self.next()?;
200
201        self.push(Token {
202            kind: TokenKind::Operator,
203            span: (start, start + 1),
204            value: &self.source[start..=start],
205        });
206
207        Ok(())
208    }
209
210    fn not(&self, start: usize) -> VoidResult {
211        if self.cursor.next_if_is(" in ") {
212            let end = self.cursor.position();
213
214            self.push(Token {
215                kind: TokenKind::Operator,
216                span: (start, end - 1),
217                value: "not in",
218            })
219        } else {
220            let end = self.cursor.position();
221
222            self.push(Token {
223                kind: TokenKind::Operator,
224                span: (start, end),
225                value: "not",
226            })
227        }
228
229        Ok(())
230    }
231
232    fn identifier(&self) -> VoidResult {
233        let (start, _) = self.next()?;
234        let mut end = start;
235
236        while let Some((e, _)) = self.cursor.next_if(|c| is_token_type!(c, "alphanumeric")) {
237            end = e;
238        }
239
240        let value = &self.source[start..=end];
241        match value {
242            "and" | "or" | "in" => self.push(Token {
243                kind: TokenKind::Operator,
244                span: (start, end + 1),
245                value,
246            }),
247            "not" => self.not(start)?,
248            _ => self.push(Token {
249                kind: TokenKind::Identifier,
250                span: (start, end + 1),
251                value,
252            }),
253        }
254
255        Ok(())
256    }
257}