nodedb_sql/parser/array_stmt/
lexer.rs1use crate::error::SqlError;
11
12#[derive(Debug, Clone, PartialEq)]
13pub enum Tok {
14 Ident(String),
15 Int(i64),
16 Float(f64),
17 Str(String),
18 LParen,
19 RParen,
20 LBracket,
21 RBracket,
22 Comma,
23 DotDot,
24 Eq,
26 Null,
29}
30
31#[derive(Debug, Clone, PartialEq)]
32pub struct Token {
33 pub tok: Tok,
34 pub pos: usize,
36}
37
38pub fn tokenize(src: &str) -> Result<Vec<Token>, SqlError> {
41 let bytes = src.as_bytes();
42 let mut out = Vec::with_capacity(src.len() / 4);
43 let mut i = 0;
44 while i < bytes.len() {
45 let b = bytes[i];
46 if b.is_ascii_whitespace() {
48 i += 1;
49 continue;
50 }
51 if b == b'-' && i + 1 < bytes.len() && bytes[i + 1] == b'-' {
53 while i < bytes.len() && bytes[i] != b'\n' {
54 i += 1;
55 }
56 continue;
57 }
58 match b {
60 b'(' => {
61 out.push(Token {
62 tok: Tok::LParen,
63 pos: i,
64 });
65 i += 1;
66 continue;
67 }
68 b')' => {
69 out.push(Token {
70 tok: Tok::RParen,
71 pos: i,
72 });
73 i += 1;
74 continue;
75 }
76 b'[' => {
77 out.push(Token {
78 tok: Tok::LBracket,
79 pos: i,
80 });
81 i += 1;
82 continue;
83 }
84 b']' => {
85 out.push(Token {
86 tok: Tok::RBracket,
87 pos: i,
88 });
89 i += 1;
90 continue;
91 }
92 b',' => {
93 out.push(Token {
94 tok: Tok::Comma,
95 pos: i,
96 });
97 i += 1;
98 continue;
99 }
100 b'.' if i + 1 < bytes.len() && bytes[i + 1] == b'.' => {
101 out.push(Token {
102 tok: Tok::DotDot,
103 pos: i,
104 });
105 i += 2;
106 continue;
107 }
108 b'=' => {
109 out.push(Token {
110 tok: Tok::Eq,
111 pos: i,
112 });
113 i += 1;
114 continue;
115 }
116 _ => {}
117 }
118 if b == b'\'' {
120 let start = i + 1;
121 let mut j = start;
122 let mut s = String::new();
123 while j < bytes.len() {
124 if bytes[j] == b'\'' {
125 if j + 1 < bytes.len() && bytes[j + 1] == b'\'' {
127 s.push('\'');
128 j += 2;
129 continue;
130 }
131 break;
132 }
133 s.push(bytes[j] as char);
134 j += 1;
135 }
136 if j >= bytes.len() {
137 return Err(SqlError::Parse {
138 detail: format!("unterminated string literal at offset {i}"),
139 });
140 }
141 out.push(Token {
142 tok: Tok::Str(s),
143 pos: i,
144 });
145 i = j + 1;
146 continue;
147 }
148 if b.is_ascii_digit() || (b == b'-' && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit())
150 {
151 let start = i;
152 let mut j = i;
153 if bytes[j] == b'-' {
154 j += 1;
155 }
156 while j < bytes.len() && bytes[j].is_ascii_digit() {
157 j += 1;
158 }
159 let is_float = j + 1 < bytes.len()
161 && bytes[j] == b'.'
162 && bytes[j + 1] != b'.'
163 && bytes[j + 1].is_ascii_digit();
164 if is_float {
165 j += 1;
166 while j < bytes.len() && bytes[j].is_ascii_digit() {
167 j += 1;
168 }
169 let txt = &src[start..j];
170 let f: f64 = txt.parse().map_err(|_| SqlError::Parse {
171 detail: format!("invalid float literal '{txt}'"),
172 })?;
173 out.push(Token {
174 tok: Tok::Float(f),
175 pos: start,
176 });
177 } else {
178 let txt = &src[start..j];
179 let n: i64 = txt.parse().map_err(|_| SqlError::Parse {
180 detail: format!("invalid integer literal '{txt}'"),
181 })?;
182 out.push(Token {
183 tok: Tok::Int(n),
184 pos: start,
185 });
186 }
187 i = j;
188 continue;
189 }
190 if b == b'_' || b.is_ascii_alphabetic() {
193 let start = i;
194 let mut j = i;
195 while j < bytes.len() && (bytes[j] == b'_' || bytes[j].is_ascii_alphanumeric()) {
196 j += 1;
197 }
198 let txt = &src[start..j];
199 if txt.eq_ignore_ascii_case("NULL") {
200 out.push(Token {
201 tok: Tok::Null,
202 pos: start,
203 });
204 } else {
205 out.push(Token {
206 tok: Tok::Ident(txt.to_string()),
207 pos: start,
208 });
209 }
210 i = j;
211 continue;
212 }
213 return Err(SqlError::Parse {
214 detail: format!("unexpected character '{}' at offset {i}", b as char),
215 });
216 }
217 Ok(out)
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223
224 #[test]
225 fn tokenize_simple() {
226 let toks = tokenize("CREATE ARRAY a (1, 2.5, 'x')").unwrap();
227 assert!(matches!(toks[0].tok, Tok::Ident(ref s) if s == "CREATE"));
228 assert!(matches!(toks[1].tok, Tok::Ident(ref s) if s == "ARRAY"));
229 assert!(matches!(toks[3].tok, Tok::LParen));
230 assert!(matches!(toks[4].tok, Tok::Int(1)));
231 assert!(matches!(toks[6].tok, Tok::Float(f) if (f - 2.5).abs() < 1e-9));
232 assert!(matches!(toks[8].tok, Tok::Str(ref s) if s == "x"));
233 }
234
235 #[test]
236 fn tokenize_dotdot_range() {
237 let toks = tokenize("[0..23]").unwrap();
238 assert!(matches!(toks[1].tok, Tok::Int(0)));
239 assert!(matches!(toks[2].tok, Tok::DotDot));
240 assert!(matches!(toks[3].tok, Tok::Int(23)));
241 }
242
243 #[test]
244 fn tokenize_negative() {
245 let toks = tokenize("-7").unwrap();
246 assert!(matches!(toks[0].tok, Tok::Int(-7)));
247 }
248}