usiem/components/query/
mod.rs1pub struct QueryLexer {
2 input: Vec<char>,
3 pub position: usize,
4 pub read_position: usize,
5 pub ch: char,
6}
7
8fn is_function(name: &str) -> bool {
9 matches!(
10 name,
11 "to_number"
12 | "to_string"
13 | "lowercase"
14 | "uppercase"
15 | "replace"
16 | "len"
17 | "floor"
18 | "trim"
19 | "to_integer"
20 | "to_float"
21 )
22}
23
24fn is_letter(ch: char) -> bool {
25 ch.is_ascii_alphabetic() || ch == '_' || ch == '.'
26}
27
28fn count_asterix(input: &Vec<char>) -> usize {
29 let mut counter = 0;
30 let mut last_char = '\0';
31 for char in input {
32 if *char == '*' && last_char != '\\' {
33 counter += 1;
34 }
35 last_char = *char;
36 }
37 counter
38}
39
40fn transform_escape_char(ch: char) -> Result<char, ()> {
41 match ch {
42 'n' => Ok('\n'),
43 't' => Ok('\t'),
44 'r' => Ok('\r'),
45 '0' => Ok('\0'),
46 '*' => Err(()),
47 _ => Ok(ch),
48 }
49}
50
51impl QueryLexer {
52 pub fn new(input: Vec<char>) -> Self {
53 Self {
54 input,
55 position: 0,
56 read_position: 0,
57 ch: '0',
58 }
59 }
60
61 pub fn read_char(&mut self) {
62 if self.read_position >= self.input.len() {
63 self.ch = '0';
64 } else {
65 self.ch = self.input[self.read_position];
66 }
67 self.position = self.read_position;
68 self.read_position += 1;
69 }
70
71 pub fn skip_whitespace(&mut self) {
72 loop {
73 let ch = self.ch;
74 if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
75 self.read_char();
76 } else {
77 return;
78 }
79 }
80 }
81
82 pub fn next_token(&mut self) -> Token {
83 let read_identifier = |l: &mut QueryLexer| -> Vec<char> {
84 let position = l.position;
85 while l.position < l.input.len() && (is_letter(l.ch) || l.ch.is_ascii_digit()) {
86 l.read_char();
87 }
88 l.input[position..l.position].to_vec()
89 };
90 let read_literal_string = |l: &mut QueryLexer| -> Vec<char> {
91 let mut is_escape = false;
92 let mut to_ret = Vec::with_capacity(32);
93 while (is_escape || l.ch != '\'') && l.position < l.input.len() {
94 if l.ch == '\\' {
95 if is_escape {
96 to_ret.push('\\');
97 }
98 is_escape = !is_escape;
99 } else {
100 if is_escape {
101 match transform_escape_char(l.ch) {
102 Ok(ch) => to_ret.push(ch),
103 Err(_) => {
104 to_ret.push(l.ch);
105 }
106 };
107 } else {
108 to_ret.push(l.ch);
109 }
110 is_escape = false;
111 }
112 l.read_char();
113 }
114 to_ret
115 };
116
117 let read_string = |l: &mut QueryLexer| -> Vec<char> {
118 let mut is_escape = false;
119 let mut to_ret = Vec::with_capacity(32);
120 while (is_escape || l.ch != '"') && l.position < l.input.len() {
121 if l.ch == '\\' {
122 if is_escape {
123 to_ret.push('\\');
124 }
125 is_escape = !is_escape;
126 } else {
127 if is_escape {
128 match transform_escape_char(l.ch) {
129 Ok(ch) => to_ret.push(ch),
130 Err(_) => {
131 to_ret.push('\\');
133 to_ret.push(l.ch);
134 }
135 };
136 } else {
137 to_ret.push(l.ch);
138 }
139 is_escape = false;
140 }
141 l.read_char();
142 }
143 to_ret
144 };
145
146 let read_number = |l: &mut QueryLexer| -> Vec<char> {
147 let position = l.position;
148 while l.position < l.input.len() && l.ch.is_ascii_digit() {
149 l.read_char();
150 }
151 l.input[position..l.position].to_vec()
152 };
153
154 let tok: Token;
155 self.skip_whitespace();
156 match self.ch {
157 '=' => {
158 tok = Token::ASSIGN;
159 }
160 '|' => {
161 tok = Token::PIPE;
162 }
163 '+' => {
164 tok = Token::PLUS(self.ch);
165 }
166 '-' => {
167 tok = Token::MINUS(self.ch);
168 }
169 '!' => {
170 tok = Token::BANG(self.ch);
171 }
172 '/' => {
173 tok = Token::SLASH(self.ch);
174 }
175 '*' => {
176 tok = Token::ASTERISK(self.ch);
177 }
178 '<' => {
179 tok = Token::LT(self.ch);
180 }
181 '>' => {
182 tok = Token::GT(self.ch);
183 }
184 ';' => {
185 tok = Token::SEMICOLON(self.ch);
186 }
187 '(' => {
188 tok = Token::LPAREN(self.ch);
189 }
190 ')' => {
191 tok = Token::RPAREN(self.ch);
192 }
193 ',' => {
194 tok = Token::COMMA(self.ch);
195 }
196 '{' => {
197 tok = Token::LBRACE(self.ch);
198 }
199 '}' => {
200 tok = Token::RBRACE(self.ch);
201 }
202 '0' => {
203 tok = Token::EOF;
204 }
205 '\'' => {
206 self.read_char();
207 let data = read_literal_string(self);
208 tok = Token::String(data.iter().collect())
209 }
210 '"' => {
211 self.read_char();
212 let data = read_string(self);
213 if data.len() > 1 {
214 let n_asterix = count_asterix(&data);
215 if n_asterix > 2 {
217 tok = Token::Like(data.iter().collect())
218 } else {
219 let starts_astx = data[0] == '*';
220 let ends_astx = data[data.len() - 1] == '*';
221 if starts_astx && ends_astx {
222 tok = Token::Contains(data.iter().filter(|c| *c != &'*').collect())
223 } else if starts_astx {
224 tok = Token::StartsWith(data.iter().filter(|c| *c != &'*').collect())
225 } else if ends_astx {
226 tok = Token::EndsWith(data.iter().filter(|c| *c != &'*').collect())
227 } else if n_asterix == 0 {
228 tok = Token::String(data.iter().collect())
229 } else {
230 tok = Token::Like(data.iter().collect())
231 }
232 }
233 } else {
234 tok = Token::String(data.iter().collect())
235 }
236 }
237 _ => {
238 if is_letter(self.ch) {
239 let ident: Vec<char> = read_identifier(self);
240 match get_keyword_token(&ident) {
241 Ok(keywork_token) => {
242 return keywork_token;
243 }
244 Err(_err) => {
245 return Token::FIELD(ident.into_iter().collect());
246 }
247 }
248 } else if self.ch.is_ascii_digit() {
249 let ident: Vec<char> = read_number(self);
250 return Token::INT(ident.into_iter().collect());
251 } else {
252 return Token::ILLEGAL;
253 }
254 }
255 }
256 self.read_char();
257 tok
258 }
259}
260
261#[derive(Debug, PartialEq)]
262pub enum Token {
263 ILLEGAL,
264 EOF,
265 FIELD(String),
266 INT(String),
267 ASSIGN,
268 PIPE,
269 PLUS(char),
270 COMMA(char),
271 SEMICOLON(char),
272 LPAREN(char),
273 RPAREN(char),
274 LBRACE(char),
275 RBRACE(char),
276 FUNCTION(String),
277 TRUE,
278 FALSE,
279 AND,
280 OR,
281 NOT,
282 RETURN,
283 MINUS(char),
284 BANG(char),
285 ASTERISK(char),
286 SLASH(char),
287 LT(char),
288 GT(char),
289 FILTER,
290 FIELDS,
291 AS,
292 String(String),
293 RegexField(String),
294 StartsWith(String),
295 EndsWith(String),
296 Like(String),
297 Contains(String),
298}
299
300pub fn get_keyword_token(ident: &[char]) -> Result<Token, String> {
301 let identifier: String = ident.iter().collect();
302 match &identifier[..] {
303 "true" => Ok(Token::TRUE),
304 "false" => Ok(Token::FALSE),
305 "AND" => Ok(Token::AND),
306 "OR" => Ok(Token::OR),
307 "NOT" => Ok(Token::NOT),
308 "filter" => Ok(Token::FILTER),
309 "fields" => Ok(Token::FIELDS),
310 "as" => Ok(Token::AS),
311 _ => {
312 if is_function(&identifier) {
313 return Ok(Token::FUNCTION(identifier));
314 }
315 Err(String::from("Not a keyword"))
316 }
317 }
318}
319
320#[cfg(test)]
321mod tests {
322 use super::*;
323 #[test]
324 fn should_parse_the_query() {
325 let input = String::from("filter field_name2=\"*something\" | fields os.actor_process as osap | filter to_string(osap,'something') = \"12345\"");
326 let mut l = QueryLexer::new(input.chars().collect());
327 l.read_char();
328 loop {
329 let token = l.next_token();
330 if token == Token::ILLEGAL {
331 break;
332 }
333 if token == Token::EOF {
334 break;
335 } else {
336 println!("{:?}", token);
337 }
338 }
339 println!("{} {} {}", char::from(l.ch), l.position, l.read_position);
340 }
341}