1use std::fmt::Display;
5use std::iter::Peekable;
6use std::str::Chars;
7use thiserror::Error;
8
9#[derive(Debug, PartialEq, Clone)]
10pub enum Token {
11 #[allow(clippy::enum_variant_names)] AccessToken(String),
12 OpenParen,
13 CloseParen,
14 And,
15 Or,
16}
17
18#[derive(Debug, PartialEq, Clone)]
19pub enum Operator {
20 Conjunction,
21 Disjunction,
22}
23
24impl Display for Token {
25 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
26 match self {
27 Token::AccessToken(token) => write!(f, "{:?}", token),
28 Token::OpenParen => write!(f, "("),
29 Token::CloseParen => write!(f, ")"),
30 Token::And => write!(f, "&"),
31 Token::Or => write!(f, "|"),
32 }
33 }
34}
35
36#[derive(Debug, Clone)]
38pub struct Lexer<'a> {
39 inner_peekable_iterator: Peekable<Chars<'a>>,
40 position: usize,
41}
42
43#[derive(Error, Debug, PartialEq, Clone)]
44pub enum LexerError {
45 UnexpectedCharacter(char, usize),
46}
47
48impl Display for LexerError {
49 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
50 match self {
51 LexerError::UnexpectedCharacter(c, position) => {
52 write!(f, "Unexpected character '{}' at position {}", c, position)
53 }
54 }
55 }
56}
57
58impl<'a> Lexer<'a> {
59 pub fn new(input: &'a str) -> Self {
65 let inner_peekable_iterator = input.chars().peekable();
66 Lexer {
67 inner_peekable_iterator,
68 position: 0,
69 }
70 }
71
72 fn read_char(&mut self) -> Option<char> {
73 let c = self.inner_peekable_iterator.next();
74 if c.is_some() {
75 self.position += 1;
76 }
77 c
78 }
79
80 fn peek_char(&mut self) -> Option<&char> {
81 self.inner_peekable_iterator.peek()
82 }
83}
84
85fn is_allowed_char_for_unquoted_access_token(c: char) -> bool {
86 c.is_ascii_alphanumeric()
87 || c == '_'
88 || c == '-'
89 || c == '.'
90 || c == ':'
91 || c == '/'
92}
93
94fn is_allowed_char_for_quoted_access_token(c: char) -> bool {
95 c.is_ascii_graphic()
101 || c == ' '
102 || (c as u32) >= 0x0080 && (c as u32) <= 0xD7FF
103 || (c as u32) >= 0xE000 && (c as u32) <= 0x10FFFF
104}
105
106impl<'a> Iterator for Lexer<'a> {
107 type Item = Result<Token, LexerError>;
108
109 fn next(&mut self) -> Option<Self::Item> {
110 let c = self.read_char()?;
111 let r = match c {
112 '(' => {
113 Ok(Token::OpenParen)
115 }
116
117 ')' => {
118 Ok(Token::CloseParen)
120 }
121 '&' => {
122 Ok(Token::And)
124 }
125 '|' => {
126 Ok(Token::Or)
128 }
129 '"' => {
130 self.handle_quoted_access_token()
131 }
132 _ if is_allowed_char_for_unquoted_access_token(c) => {
133 self.handle_unquoted_access_token(c)
134 }
135 _ => {
136 Err(LexerError::UnexpectedCharacter(c, self.position))
138 }
139 };
140 Some(r)
141 }
142}
143
144impl<'a> Lexer<'a> {
145 fn handle_quoted_access_token(&mut self) -> Result<Token, LexerError> {
146 let mut value = String::new();
147 while let Some(c) = self.read_char() {
149 if !is_allowed_char_for_quoted_access_token(c)
150 {
151 return Err(LexerError::UnexpectedCharacter(c, self.position));
152 }
153 match c {
154 '\\' => {
155 if let Some(next_char) = self.read_char() {
156 if next_char == '"' || next_char == '\\' {
157 value.push(next_char);
158 } else {
159 return Err(LexerError::UnexpectedCharacter(next_char, self.position));
160 }
161 }
162 }
163 '"' => {
164 break;
165 }
166 _ => {
167 value.push(c);
168 }
169 }
170 }
171 Ok(Token::AccessToken(value))
172 }
173
174 fn handle_unquoted_access_token(&mut self, first_char: char) -> Result<Token, LexerError> {
175 let mut value = String::new();
176 value.push(first_char);
177 while let Some(c) = self.peek_char() {
178 if is_allowed_char_for_unquoted_access_token(*c) {
179 let c = self.read_char().unwrap();
180 value.push(c);
181 } else {
182 break;
183 }
184 }
185 Ok(Token::AccessToken(value))
186 }
187}
188
189#[cfg(test)]
190mod tests {
191 use super::*;
192
193 #[test]
194 fn test_lexer_valid() {
195 let input =
196 "label1&\"label 🕺\"|(\"hello \\\\ \\\"world\"|label4|(label5&label6)))";
197 let lexer = Lexer::new(input);
198 let tokens: Vec<Result<Token, LexerError>> = lexer.collect();
199 assert_eq!(
200 tokens,
201 vec![
202 Ok(Token::AccessToken("label1".to_string())),
203 Ok(Token::And),
204 Ok(Token::AccessToken("label 🕺".to_string())),
205 Ok(Token::Or),
206 Ok(Token::OpenParen),
207 Ok(Token::AccessToken("hello \\ \"world".to_string())),
208 Ok(Token::Or),
209 Ok(Token::AccessToken("label4".to_string())),
210 Ok(Token::Or),
211 Ok(Token::OpenParen),
212 Ok(Token::AccessToken("label5".to_string())),
213 Ok(Token::And),
214 Ok(Token::AccessToken("label6".to_string())),
215 Ok(Token::CloseParen),
216 Ok(Token::CloseParen),
217 Ok(Token::CloseParen),
218 ]
219 );
220 }
221
222 #[test]
223 fn test_lexer_valid2() {
224 let input = "\"abc!12\"&\"abc\\\\xyz\"&GHI";
225
226 let lexer = Lexer::new(input);
227 let tokens: Vec<Result<Token, LexerError>> = lexer.collect();
228
229 assert_eq!(
230 tokens,
231 vec![
232 Ok(Token::AccessToken("abc!12".to_string())),
233 Ok(Token::And),
234 Ok(Token::AccessToken("abc\\xyz".to_string())),
235 Ok(Token::And),
236 Ok(Token::AccessToken("GHI".to_string())),
237 ]);
238 }
239
240 #[test]
241 fn test_lexer_invalid() {
242 let input = "label1 & [";
243 let lexer = Lexer::new(input);
244 let tokens: Vec<Result<Token, LexerError>> = lexer.collect();
245 assert_eq!(
246 tokens,
247 vec![
248 Ok(Token::AccessToken("label1".to_string())),
249 Err(LexerError::UnexpectedCharacter(' ', 7)),
250 Ok(Token::And),
251 Err(LexerError::UnexpectedCharacter(' ', 9)),
252 Err(LexerError::UnexpectedCharacter('[', 10)),
253 ]
254 );
255 }
256}