odatav4_parser/
lexer.rs

1use crate::error::{ODataError, Result};
2
3/// Token types produced by the lexer
4#[derive(Debug, Clone, PartialEq)]
5pub enum Token {
6    /// Query option name (e.g., "$select", "$top")
7    QueryOption(String),
8    /// Identifier (field name or keyword)
9    Identifier(String),
10    /// Number literal
11    Number(String),
12    /// String literal
13    StringLiteral(String),
14    /// Comma separator
15    Comma,
16    /// Equals sign
17    Equals,
18    /// Ampersand (query separator)
19    Ampersand,
20    /// Semicolon (separator for nested options)
21    Semicolon,
22    /// Left parenthesis
23    /// Left parenthesis
24    LParen,
25    /// Right parenthesis
26    RParen,
27    /// Asterisk/multiplication
28    Mul,
29    /// Colon (for lambda operators)
30    Colon,
31    /// Slash (for path navigation)
32    Slash,
33    /// Minus/hyphen
34    Minus,
35    /// End of input
36    Eof,
37}
38
39/// Lexer for tokenizing OData V4 query strings
40pub struct Lexer {
41    input: Vec<char>,
42    position: usize,
43}
44
45impl Lexer {
46    pub fn new(input: &str) -> Self {
47        Self {
48            input: input.chars().collect(),
49            position: 0,
50        }
51    }
52
53    pub fn position(&self) -> usize {
54        self.position
55    }
56
57    fn current_char(&self) -> Option<char> {
58        if self.position < self.input.len() {
59            Some(self.input[self.position])
60        } else {
61            None
62        }
63    }
64
65    fn advance(&mut self) {
66        self.position += 1;
67    }
68
69    fn skip_whitespace(&mut self) {
70        while let Some(ch) = self.current_char() {
71            if ch.is_whitespace() {
72                self.advance();
73            } else {
74                break;
75            }
76        }
77    }
78
79    fn read_identifier(&mut self) -> String {
80        let mut result = String::new();
81        while let Some(ch) = self.current_char() {
82            if ch.is_alphanumeric() || ch == '_' || ch == '.' || ch == '-' {
83                result.push(ch);
84                self.advance();
85            } else {
86                break;
87            }
88        }
89        result
90    }
91
92    fn read_number(&mut self) -> String {
93        let mut result = String::new();
94        while let Some(ch) = self.current_char() {
95            if ch.is_numeric() || ch == '.' {
96                result.push(ch);
97                self.advance();
98            } else {
99                break;
100            }
101        }
102        result
103    }
104
105    fn read_string_literal(&mut self) -> Result<String> {
106        // Skip opening quote
107        self.advance();
108        let mut result = String::new();
109        
110        while let Some(ch) = self.current_char() {
111            if ch == '\'' {
112                // Check for escaped quote ''
113                self.advance();
114                if self.current_char() == Some('\'') {
115                    result.push('\'');
116                    self.advance();
117                } else {
118                    // End of string
119                    return Ok(result);
120                }
121            } else {
122                result.push(ch);
123                self.advance();
124            }
125        }
126        
127        Err(ODataError::ParseError {
128            position: self.position,
129            message: "Unterminated string literal".to_string(),
130        })
131    }
132
133    fn read_backticked_identifier(&mut self) -> Result<String> {
134        // Skip opening backtick
135        self.advance();
136        let mut result = String::new();
137        
138        while let Some(ch) = self.current_char() {
139            if ch == '`' {
140                self.advance();
141                return Ok(result);
142            } else if ch == '\\' {
143                // Handle escaping
144                self.advance();
145                if let Some(next_ch) = self.current_char() {
146                    result.push(next_ch);
147                    self.advance();
148                } else {
149                    break;
150                }
151            } else {
152                result.push(ch);
153                self.advance();
154            }
155        }
156        
157        Err(ODataError::ParseError {
158            position: self.position,
159            message: "Unterminated backticked identifier".to_string(),
160        })
161    }
162
163    pub fn next_token(&mut self) -> Result<Token> {
164        self.skip_whitespace();
165
166        match self.current_char() {
167            None => Ok(Token::Eof),
168            Some('=') => {
169                self.advance();
170                Ok(Token::Equals)
171            }
172            Some(',') => {
173                self.advance();
174                Ok(Token::Comma)
175            }
176            Some('&') => {
177                self.advance();
178                Ok(Token::Ampersand)
179            }
180            Some(';') => {
181                self.advance();
182                Ok(Token::Semicolon)
183            }
184            Some('(') => {
185                self.advance();
186                Ok(Token::LParen)
187            }
188            Some(')') => {
189                self.advance();
190                Ok(Token::RParen)
191            }
192            Some('*') => {
193                self.advance();
194                Ok(Token::Mul)
195            }
196            Some(':') => {
197                self.advance();
198                Ok(Token::Colon)
199            }
200            Some('/') => {
201                self.advance();
202                Ok(Token::Slash)
203            }
204            Some('-') => {
205                self.advance();
206                Ok(Token::Minus)
207            }
208            Some('\'') => {
209                let string = self.read_string_literal()?;
210                Ok(Token::StringLiteral(string))
211            }
212            Some('`') => {
213                let ident = self.read_backticked_identifier()?;
214                Ok(Token::Identifier(ident))
215            }
216            Some('$') => {
217                self.advance();
218                let name = self.read_identifier();
219                Ok(Token::QueryOption(format!("${}", name)))
220            }
221            Some(ch) if ch.is_numeric() => {
222                let number = self.read_number();
223                Ok(Token::Number(number))
224            }
225            Some(ch) if ch.is_alphabetic() || ch == '_' => {
226                let ident = self.read_identifier();
227                Ok(Token::Identifier(ident))
228            }
229            Some(ch) => Err(ODataError::ParseError {
230                position: self.position,
231                message: format!("Unexpected character: '{}'", ch),
232            }),
233        }
234    }
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    #[test]
242    fn test_tokenize_simple_query() {
243        let mut lexer = Lexer::new("$select=id,name");
244        assert_eq!(lexer.next_token().unwrap(), Token::QueryOption("$select".to_string()));
245        assert_eq!(lexer.next_token().unwrap(), Token::Equals);
246        assert_eq!(lexer.next_token().unwrap(), Token::Identifier("id".to_string()));
247        assert_eq!(lexer.next_token().unwrap(), Token::Comma);
248        assert_eq!(lexer.next_token().unwrap(), Token::Identifier("name".to_string()));
249        assert_eq!(lexer.next_token().unwrap(), Token::Eof);
250    }
251
252    #[test]
253    fn test_tokenize_with_numbers() {
254        let mut lexer = Lexer::new("$top=10&$skip=20");
255        assert_eq!(lexer.next_token().unwrap(), Token::QueryOption("$top".to_string()));
256        assert_eq!(lexer.next_token().unwrap(), Token::Equals);
257        assert_eq!(lexer.next_token().unwrap(), Token::Number("10".to_string()));
258        assert_eq!(lexer.next_token().unwrap(), Token::Ampersand);
259        assert_eq!(lexer.next_token().unwrap(), Token::QueryOption("$skip".to_string()));
260        assert_eq!(lexer.next_token().unwrap(), Token::Equals);
261        assert_eq!(lexer.next_token().unwrap(), Token::Number("20".to_string()));
262    }
263}