libdd_trace_obfuscation/
redis_tokenizer.rs

1// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4#[derive(Debug, Clone, Copy)]
5pub enum RedisTokenType {
6    RedisTokenCommand,
7    RedisTokenArgument,
8}
9
10pub struct RedisTokenizer<'a> {
11    data: &'a str,
12    offset: usize,
13    state: RedisTokenType, // specifies the token we are about to parse
14}
15
16#[derive(Debug)]
17pub struct RedisTokenizerScanResult<'a> {
18    pub token: &'a str,
19    pub token_type: RedisTokenType,
20    pub done: bool,
21}
22
23impl<'a> RedisTokenizer<'a> {
24    pub fn new(query: &str) -> RedisTokenizer<'_> {
25        let mut s = RedisTokenizer {
26            data: query,
27            offset: 0,
28            state: RedisTokenType::RedisTokenCommand,
29        };
30        s.skip_empty_lines();
31        s
32    }
33
34    pub fn scan(&mut self) -> RedisTokenizerScanResult<'a> {
35        let token_type = self.state;
36        let current = self.next_token();
37        RedisTokenizerScanResult {
38            token: &self.data[current.0..current.1],
39            token_type,
40            done: self.curr_char() == 0,
41        }
42    }
43
44    pub fn next_token(&mut self) -> (usize, usize) {
45        let s = match self.state {
46            RedisTokenType::RedisTokenCommand => self.next_cmd(),
47            RedisTokenType::RedisTokenArgument => self.next_arg(),
48        };
49        loop {
50            self.skip_whitespace();
51            if self.curr_char() != b'\n' {
52                break;
53            }
54            self.state = RedisTokenType::RedisTokenCommand;
55            self.offset += 1;
56        }
57        s
58    }
59
60    fn next_cmd(&mut self) -> (usize, usize) {
61        self.skip_whitespace();
62        let start = self.offset;
63        loop {
64            match self.curr_char() {
65                0 => break,
66                b'\n' => {
67                    let span = (start, self.offset);
68                    self.offset += 1;
69                    return span;
70                }
71                b' ' => {
72                    self.state = RedisTokenType::RedisTokenArgument;
73                    break;
74                }
75                _ => self.offset += 1,
76            }
77        }
78        (start, self.offset)
79    }
80
81    fn next_arg(&mut self) -> (usize, usize) {
82        self.skip_whitespace();
83        let start = self.offset;
84        let mut quote = false;
85        let mut escape = false;
86        loop {
87            match self.curr_char() {
88                0 => break,
89                b'\\' => {
90                    if !escape {
91                        escape = true;
92                        self.offset += 1;
93                        continue;
94                    }
95                }
96                b'"' => {
97                    if !escape {
98                        quote = !quote
99                    }
100                }
101                b'\n' => {
102                    if !quote {
103                        let span = (start, self.offset);
104                        self.offset += 1;
105                        self.state = RedisTokenType::RedisTokenCommand;
106                        return span;
107                    }
108                }
109                b' ' => {
110                    if !quote {
111                        return (start, self.offset);
112                    }
113                }
114                _ => {}
115            }
116            escape = false;
117            self.offset += 1;
118        }
119        (start, self.offset)
120    }
121
122    fn skip_whitespace(&mut self) {
123        while matches!(self.curr_char(), b' ' | b'\t' | b'\r') {
124            self.offset += 1;
125        }
126    }
127
128    fn skip_empty_lines(&mut self) {
129        while matches!(self.curr_char(), b' ' | b'\t' | b'\r' | b'\n') {
130            self.offset += 1;
131        }
132    }
133
134    fn curr_char(&self) -> u8 {
135        match self.data.as_bytes().get(self.offset) {
136            Some(&c) => c,
137            None => 0,
138        }
139    }
140}
141
142#[cfg(test)]
143mod tests {
144    use duplicate::duplicate_item;
145
146    use super::RedisTokenizer;
147
148    #[duplicate_item(
149        [
150            test_name   [test_redis_tokenizer_1]
151            input       [""]
152            expected    [[r#"{ token: "", token_type: RedisTokenCommand, done: true }"#]];
153        ]
154        [
155            test_name   [test_redis_tokenizer_2]
156            input       ["BAD\"\"INPUT\" \"boo\n  Weird13\\Stuff"]
157            expected    [
158                [
159                    r#"{ token: "BAD\"\"INPUT\"", token_type: RedisTokenCommand, done: false }"#,
160                    r#"{ token: "\"boo\n  Weird13\\Stuff", token_type: RedisTokenArgument, done: true }"#
161                ]
162            ];
163        ]
164        [
165            test_name   [test_redis_tokenizer_3]
166            input       ["CMD"]
167            expected    [[r#"{ token: "CMD", token_type: RedisTokenCommand, done: true }"#]];
168        ]
169        [
170            test_name   [test_redis_tokenizer_4]
171            input       ["\n  \nCMD\n  \n"]
172            expected    [[r#"{ token: "CMD", token_type: RedisTokenCommand, done: true }"#]];
173        ]
174        [
175            test_name   [test_redis_tokenizer_5]
176            input       ["  CMD  "]
177            expected    [[r#"{ token: "CMD", token_type: RedisTokenCommand, done: true }"#]];
178        ]
179        [
180            test_name   [test_redis_tokenizer_6]
181            input       ["CMD1\nCMD2"]
182            expected    [
183                [
184                    r#"{ token: "CMD1", token_type: RedisTokenCommand, done: false }"#,
185                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: true }"#
186                ]
187            ];
188        ]
189        [
190            test_name   [test_redis_tokenizer_7]
191            input       ["  CMD1  \n  CMD2  "]
192            expected    [
193                [
194                    r#"{ token: "CMD1", token_type: RedisTokenCommand, done: false }"#,
195                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: true }"#
196                ]
197            ];
198        ]
199        [
200            test_name   [test_redis_tokenizer_8]
201            input       ["CMD1\nCMD2\nCMD3"]
202            expected    [
203                [
204                    r#"{ token: "CMD1", token_type: RedisTokenCommand, done: false }"#,
205                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
206                    r#"{ token: "CMD3", token_type: RedisTokenCommand, done: true }"#
207                ]
208            ];
209        ]
210        [
211            test_name   [test_redis_tokenizer_9]
212            input       ["CMD arg"]
213            expected    [
214                [
215                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
216                    r#"{ token: "arg", token_type: RedisTokenArgument, done: true }"#
217                ]
218            ];
219        ]
220        [
221            test_name   [test_redis_tokenizer_10]
222            input       ["  CMD  arg  "]
223            expected    [
224                [
225                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
226                    r#"{ token: "arg", token_type: RedisTokenArgument, done: true }"#
227                ]
228            ];
229        ]
230        [
231            test_name   [test_redis_tokenizer_11]
232            input       ["CMD arg1 arg2"]
233            expected    [
234                [
235                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
236                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
237                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: true }"#
238                ]
239            ];
240        ]
241        [
242            test_name   [test_redis_tokenizer_12]
243            input       [" 	 CMD   arg1 	  arg2 "]
244            expected    [
245                [
246                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
247                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
248                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: true }"#
249                ]
250            ];
251        ]
252        [
253            test_name   [test_redis_tokenizer_13]
254            input       ["CMD arg1\nCMD2 arg2"]
255            expected    [
256                [
257                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
258                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
259                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
260                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: true }"#
261                ]
262            ];
263        ]
264        [
265            test_name   [test_redis_tokenizer_14]
266            input       ["CMD arg1 arg2\nCMD2 arg3\nCMD3\nCMD4 arg4 arg5 arg6"]
267            expected    [
268                [
269                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
270                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
271                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: false }"#,
272                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
273                    r#"{ token: "arg3", token_type: RedisTokenArgument, done: false }"#,
274                    r#"{ token: "CMD3", token_type: RedisTokenCommand, done: false }"#,
275                    r#"{ token: "CMD4", token_type: RedisTokenCommand, done: false }"#,
276                    r#"{ token: "arg4", token_type: RedisTokenArgument, done: false }"#,
277                    r#"{ token: "arg5", token_type: RedisTokenArgument, done: false }"#,
278                    r#"{ token: "arg6", token_type: RedisTokenArgument, done: true }"#
279                ]
280            ];
281        ]
282        [
283            test_name   [test_redis_tokenizer_15]
284            input       ["CMD arg1   arg2  \n CMD2  arg3 \n CMD3 \n  CMD4 arg4 arg5 arg6\nCMD5 "]
285            expected    [
286                [
287                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
288                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
289                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: false }"#,
290                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
291                    r#"{ token: "arg3", token_type: RedisTokenArgument, done: false }"#,
292                    r#"{ token: "CMD3", token_type: RedisTokenCommand, done: false }"#,
293                    r#"{ token: "CMD4", token_type: RedisTokenCommand, done: false }"#,
294                    r#"{ token: "arg4", token_type: RedisTokenArgument, done: false }"#,
295                    r#"{ token: "arg5", token_type: RedisTokenArgument, done: false }"#,
296                    r#"{ token: "arg6", token_type: RedisTokenArgument, done: false }"#,
297                    r#"{ token: "CMD5", token_type: RedisTokenCommand, done: true }"#,
298                ]
299            ];
300        ]
301        [
302            test_name   [test_redis_tokenizer_16]
303            input       [r#"CMD """#]
304            expected    [
305                [
306                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
307                    r#"{ token: "\"\"", token_type: RedisTokenArgument, done: true }"#
308                ]
309            ];
310        ]
311        [
312            test_name   [test_redis_tokenizer_17]
313            input       [r#"CMD "foo bar""#]
314            expected    [
315                [
316                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
317                    r#"{ token: "\"foo bar\"", token_type: RedisTokenArgument, done: true }"#
318                ]
319            ];
320        ]
321        [
322            test_name   [test_redis_tokenizer_18]
323            input       [r#"CMD "foo bar\ " baz"#]
324            expected    [
325                [
326                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
327                    r#"{ token: "\"foo bar\\ \"", token_type: RedisTokenArgument, done: false }"#,
328                    r#"{ token: "baz", token_type: RedisTokenArgument, done: true }"#
329                ]
330            ];
331        ]
332        [
333            test_name   [test_redis_tokenizer_19]
334            input       ["CMD \"foo \n bar\" \"\"  baz "]
335            expected    [
336                [
337                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
338                    r#"{ token: "\"foo \n bar\"", token_type: RedisTokenArgument, done: false }"#,
339                    r#"{ token: "\"\"", token_type: RedisTokenArgument, done: false }"#,
340                    r#"{ token: "baz", token_type: RedisTokenArgument, done: true }"#
341                ]
342            ];
343        ]
344        [
345            test_name   [test_redis_tokenizer_20]
346            input       ["CMD \"foo \\\" bar\" baz"]
347            expected    [
348                [
349                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
350                    r#"{ token: "\"foo \\\" bar\"", token_type: RedisTokenArgument, done: false }"#,
351                    r#"{ token: "baz", token_type: RedisTokenArgument, done: true }"#
352                ]
353            ];
354        ]
355        [
356            test_name   [test_redis_tokenizer_21]
357            input       [r#"CMD "foo bar" baz"#]
358            expected    [
359                [
360                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
361                    r#"{ token: "\"foo bar\"", token_type: RedisTokenArgument, done: false }"#,
362                    r#"{ token: "baz", token_type: RedisTokenArgument, done: true }"#
363                ]
364            ];
365        ]
366        [
367            test_name   [test_redis_tokenizer_22]
368            input       ["CMD \"foo bar\" baz\nCMD2 \"baz\\\\bar\""]
369            expected    [
370                [
371                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
372                    r#"{ token: "\"foo bar\"", token_type: RedisTokenArgument, done: false }"#,
373                    r#"{ token: "baz", token_type: RedisTokenArgument, done: false }"#,
374                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
375                    r#"{ token: "\"baz\\\\bar\"", token_type: RedisTokenArgument, done: true }"#
376                ]
377            ];
378        ]
379        [
380            test_name   [test_redis_tokenizer_23]
381            input       [" CMD  \"foo bar\"  baz \n CMD2  \"baz\\\\bar\"  "]
382            expected    [
383                [
384                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
385                    r#"{ token: "\"foo bar\"", token_type: RedisTokenArgument, done: false }"#,
386                    r#"{ token: "baz", token_type: RedisTokenArgument, done: false }"#,
387                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
388                    r#"{ token: "\"baz\\\\bar\"", token_type: RedisTokenArgument, done: true }"#
389                ]
390            ];
391        ]
392    )]
393    #[test]
394    fn test_name() {
395        let mut tokenizer = RedisTokenizer::new(input);
396        for i in 0..expected.len() {
397            let res = tokenizer.scan();
398            assert_eq!(
399                format!("{res:?}"),
400                format!("RedisTokenizerScanResult {}", expected[i])
401            );
402        }
403    }
404}