1use std::cell::RefCell;
2use std::rc::Rc;
3
4use crate::is_token_type;
5use crate::lexer::cursor::{Cursor, CursorItem};
6use crate::lexer::error::LexerError;
7use crate::lexer::error::LexerError::{UnexpectedEof, UnmatchedSymbol};
8use crate::lexer::token::{Token, TokenKind};
9
10type TokenSlice<'a> = Rc<RefCell<Vec<Token<'a>>>>;
11
12type VoidResult = Result<(), LexerError>;
13
14#[derive(Debug)]
15pub struct Lexer<'a> {
16 tokens: TokenSlice<'a>,
17}
18
19impl<'a> Default for Lexer<'a> {
20 fn default() -> Self {
21 Lexer::new()
22 }
23}
24
25impl<'a> Lexer<'a> {
26 pub fn new() -> Self {
27 Self {
28 tokens: Rc::new(RefCell::new(Vec::new())),
29 }
30 }
31
32 pub fn tokenize(&self, source: &'a str) -> Result<TokenSlice<'a>, LexerError> {
33 self.tokens.borrow_mut().clear();
34 Scanner::new(source, self.tokens.clone()).scan()?;
35 Ok(self.tokens.clone())
36 }
37}
38
39struct Scanner<'a> {
40 cursor: Cursor<'a>,
41 tokens: TokenSlice<'a>,
42 source: &'a str,
43}
44
45impl<'a> Scanner<'a> {
46 pub fn new(source: &'a str, tokens: TokenSlice<'a>) -> Self {
47 Self {
48 cursor: Cursor::from(source),
49 source,
50 tokens,
51 }
52 }
53
54 pub fn scan(&self) -> VoidResult {
55 while let Some((i, s)) = self.cursor.peek() {
56 match s {
57 ' ' => {
58 self.cursor.next();
59 Ok(())
60 }
61 _ if is_token_type!(s, "quote") => self.string(),
62 _ if is_token_type!(s, "digit") => self.number(),
63 _ if is_token_type!(s, "bracket") => self.bracket(),
64 _ if is_token_type!(s, "cmp_operator") => self.operator(),
65 _ if is_token_type!(s, "operator") => self.simple_operator(),
66 '.' => self.dot(),
67 _ if is_token_type!(s, "alpha") => self.identifier(),
68
69 _ => Err(UnmatchedSymbol {
70 symbol: s,
71 position: i,
72 }),
73 }?;
74 }
75
76 Ok(())
77 }
78
79 fn next(&self) -> Result<CursorItem, LexerError> {
80 self.cursor.next().ok_or_else(|| {
81 let (a, b) = self.cursor.peek_back().unwrap_or((0, ' '));
82
83 UnexpectedEof {
84 symbol: b,
85 position: a,
86 }
87 })
88 }
89
90 fn push(&self, token: Token<'a>) {
91 self.tokens.borrow_mut().push(token);
92 }
93
94 fn string(&self) -> VoidResult {
95 let (start, opener) = self.next()?;
96 let end: usize;
97
98 loop {
99 let (e, c) = self.next()?;
100 if c == opener {
101 end = e;
102 break;
103 }
104 }
105
106 self.push(Token {
107 kind: TokenKind::String,
108 span: (start, end),
109 value: &self.source[start + 1..end],
110 });
111
112 Ok(())
113 }
114
115 fn number(&self) -> VoidResult {
116 let (start, _) = self.next()?;
117 let mut end = start;
118 let mut fractal = false;
119
120 while let Some((e, c)) = self
121 .cursor
122 .next_if(|c| is_token_type!(c, "digit") || c == '_' || c == '.')
123 {
124 if fractal && c == '.' {
125 self.cursor.back();
126 break;
127 }
128
129 if c == '.' {
130 if let Some((_, p)) = self.cursor.peek() {
131 if p == '.' {
132 self.cursor.back();
133 break;
134 }
135
136 fractal = true
137 }
138 }
139
140 end = e;
141 }
142
143 self.push(Token {
144 kind: TokenKind::Number,
145 span: (start, end + 1),
146 value: &self.source[start..=end],
147 });
148
149 Ok(())
150 }
151
152 fn bracket(&self) -> VoidResult {
153 let (start, _) = self.next()?;
154
155 self.push(Token {
156 kind: TokenKind::Bracket,
157 span: (start, start + 1),
158 value: &self.source[start..=start],
159 });
160
161 Ok(())
162 }
163
164 fn dot(&self) -> VoidResult {
165 let (start, _) = self.next()?;
166 let mut end = start;
167
168 if self.cursor.next_if(|c| c == '.').is_some() {
169 end += 1;
170 }
171
172 self.push(Token {
173 kind: TokenKind::Operator,
174 span: (start, end + 1),
175 value: &self.source[start..=end],
176 });
177
178 Ok(())
179 }
180
181 fn operator(&self) -> VoidResult {
182 let (start, _) = self.next()?;
183 let mut end = start;
184
185 if self.cursor.next_if(|c| c == '=').is_some() {
186 end += 1;
187 }
188
189 self.push(Token {
190 kind: TokenKind::Operator,
191 span: (start, end + 1),
192 value: &self.source[start..=end],
193 });
194
195 Ok(())
196 }
197
198 fn simple_operator(&self) -> VoidResult {
199 let (start, _) = self.next()?;
200
201 self.push(Token {
202 kind: TokenKind::Operator,
203 span: (start, start + 1),
204 value: &self.source[start..=start],
205 });
206
207 Ok(())
208 }
209
210 fn not(&self, start: usize) -> VoidResult {
211 if self.cursor.next_if_is(" in ") {
212 let end = self.cursor.position();
213
214 self.push(Token {
215 kind: TokenKind::Operator,
216 span: (start, end - 1),
217 value: "not in",
218 })
219 } else {
220 let end = self.cursor.position();
221
222 self.push(Token {
223 kind: TokenKind::Operator,
224 span: (start, end),
225 value: "not",
226 })
227 }
228
229 Ok(())
230 }
231
232 fn identifier(&self) -> VoidResult {
233 let (start, _) = self.next()?;
234 let mut end = start;
235
236 while let Some((e, _)) = self.cursor.next_if(|c| is_token_type!(c, "alphanumeric")) {
237 end = e;
238 }
239
240 let value = &self.source[start..=end];
241 match value {
242 "and" | "or" | "in" => self.push(Token {
243 kind: TokenKind::Operator,
244 span: (start, end + 1),
245 value,
246 }),
247 "not" => self.not(start)?,
248 _ => self.push(Token {
249 kind: TokenKind::Identifier,
250 span: (start, end + 1),
251 value,
252 }),
253 }
254
255 Ok(())
256 }
257}