1use crate::error::{ODataError, Result};
2
3#[derive(Debug, Clone, PartialEq)]
5pub enum Token {
6 QueryOption(String),
8 Identifier(String),
10 Number(String),
12 StringLiteral(String),
14 Comma,
16 Equals,
18 Ampersand,
20 Semicolon,
22 LParen,
25 RParen,
27 Mul,
29 Colon,
31 Slash,
33 Minus,
35 Eof,
37}
38
39pub struct Lexer {
41 input: Vec<char>,
42 position: usize,
43}
44
45impl Lexer {
46 pub fn new(input: &str) -> Self {
47 Self {
48 input: input.chars().collect(),
49 position: 0,
50 }
51 }
52
53 pub fn position(&self) -> usize {
54 self.position
55 }
56
57 fn current_char(&self) -> Option<char> {
58 if self.position < self.input.len() {
59 Some(self.input[self.position])
60 } else {
61 None
62 }
63 }
64
65 fn advance(&mut self) {
66 self.position += 1;
67 }
68
69 fn skip_whitespace(&mut self) {
70 while let Some(ch) = self.current_char() {
71 if ch.is_whitespace() {
72 self.advance();
73 } else {
74 break;
75 }
76 }
77 }
78
79 fn read_identifier(&mut self) -> String {
80 let mut result = String::new();
81 while let Some(ch) = self.current_char() {
82 if ch.is_alphanumeric() || ch == '_' || ch == '.' || ch == '-' {
83 result.push(ch);
84 self.advance();
85 } else {
86 break;
87 }
88 }
89 result
90 }
91
92 fn read_number(&mut self) -> String {
93 let mut result = String::new();
94 while let Some(ch) = self.current_char() {
95 if ch.is_numeric() || ch == '.' {
96 result.push(ch);
97 self.advance();
98 } else {
99 break;
100 }
101 }
102 result
103 }
104
105 fn read_string_literal(&mut self) -> Result<String> {
106 self.advance();
108 let mut result = String::new();
109
110 while let Some(ch) = self.current_char() {
111 if ch == '\'' {
112 self.advance();
114 if self.current_char() == Some('\'') {
115 result.push('\'');
116 self.advance();
117 } else {
118 return Ok(result);
120 }
121 } else {
122 result.push(ch);
123 self.advance();
124 }
125 }
126
127 Err(ODataError::ParseError {
128 position: self.position,
129 message: "Unterminated string literal".to_string(),
130 })
131 }
132
133 fn read_backticked_identifier(&mut self) -> Result<String> {
134 self.advance();
136 let mut result = String::new();
137
138 while let Some(ch) = self.current_char() {
139 if ch == '`' {
140 self.advance();
141 return Ok(result);
142 } else if ch == '\\' {
143 self.advance();
145 if let Some(next_ch) = self.current_char() {
146 result.push(next_ch);
147 self.advance();
148 } else {
149 break;
150 }
151 } else {
152 result.push(ch);
153 self.advance();
154 }
155 }
156
157 Err(ODataError::ParseError {
158 position: self.position,
159 message: "Unterminated backticked identifier".to_string(),
160 })
161 }
162
163 pub fn next_token(&mut self) -> Result<Token> {
164 self.skip_whitespace();
165
166 match self.current_char() {
167 None => Ok(Token::Eof),
168 Some('=') => {
169 self.advance();
170 Ok(Token::Equals)
171 }
172 Some(',') => {
173 self.advance();
174 Ok(Token::Comma)
175 }
176 Some('&') => {
177 self.advance();
178 Ok(Token::Ampersand)
179 }
180 Some(';') => {
181 self.advance();
182 Ok(Token::Semicolon)
183 }
184 Some('(') => {
185 self.advance();
186 Ok(Token::LParen)
187 }
188 Some(')') => {
189 self.advance();
190 Ok(Token::RParen)
191 }
192 Some('*') => {
193 self.advance();
194 Ok(Token::Mul)
195 }
196 Some(':') => {
197 self.advance();
198 Ok(Token::Colon)
199 }
200 Some('/') => {
201 self.advance();
202 Ok(Token::Slash)
203 }
204 Some('-') => {
205 self.advance();
206 Ok(Token::Minus)
207 }
208 Some('\'') => {
209 let string = self.read_string_literal()?;
210 Ok(Token::StringLiteral(string))
211 }
212 Some('`') => {
213 let ident = self.read_backticked_identifier()?;
214 Ok(Token::Identifier(ident))
215 }
216 Some('$') => {
217 self.advance();
218 let name = self.read_identifier();
219 Ok(Token::QueryOption(format!("${}", name)))
220 }
221 Some(ch) if ch.is_numeric() => {
222 let number = self.read_number();
223 Ok(Token::Number(number))
224 }
225 Some(ch) if ch.is_alphabetic() || ch == '_' => {
226 let ident = self.read_identifier();
227 Ok(Token::Identifier(ident))
228 }
229 Some(ch) => Err(ODataError::ParseError {
230 position: self.position,
231 message: format!("Unexpected character: '{}'", ch),
232 }),
233 }
234 }
235}
236
237#[cfg(test)]
238mod tests {
239 use super::*;
240
241 #[test]
242 fn test_tokenize_simple_query() {
243 let mut lexer = Lexer::new("$select=id,name");
244 assert_eq!(lexer.next_token().unwrap(), Token::QueryOption("$select".to_string()));
245 assert_eq!(lexer.next_token().unwrap(), Token::Equals);
246 assert_eq!(lexer.next_token().unwrap(), Token::Identifier("id".to_string()));
247 assert_eq!(lexer.next_token().unwrap(), Token::Comma);
248 assert_eq!(lexer.next_token().unwrap(), Token::Identifier("name".to_string()));
249 assert_eq!(lexer.next_token().unwrap(), Token::Eof);
250 }
251
252 #[test]
253 fn test_tokenize_with_numbers() {
254 let mut lexer = Lexer::new("$top=10&$skip=20");
255 assert_eq!(lexer.next_token().unwrap(), Token::QueryOption("$top".to_string()));
256 assert_eq!(lexer.next_token().unwrap(), Token::Equals);
257 assert_eq!(lexer.next_token().unwrap(), Token::Number("10".to_string()));
258 assert_eq!(lexer.next_token().unwrap(), Token::Ampersand);
259 assert_eq!(lexer.next_token().unwrap(), Token::QueryOption("$skip".to_string()));
260 assert_eq!(lexer.next_token().unwrap(), Token::Equals);
261 assert_eq!(lexer.next_token().unwrap(), Token::Number("20".to_string()));
262 }
263}