Skip to main content

libdd_trace_obfuscation/
redis_tokenizer.rs

1// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4#[derive(Debug, Clone, Copy)]
5pub enum RedisTokenType {
6    RedisTokenCommand,
7    RedisTokenArgument,
8}
9
10pub struct RedisTokenizer<'a> {
11    data: &'a str,
12    offset: usize,
13    state: RedisTokenType, // specifies the token we are about to parse
14}
15
16#[derive(Debug)]
17pub struct RedisTokenizerScanResult<'a> {
18    pub token: &'a str,
19    pub token_type: RedisTokenType,
20    pub done: bool,
21}
22
23impl<'a> RedisTokenizer<'a> {
24    pub fn new(query: &str) -> RedisTokenizer<'_> {
25        let mut s = RedisTokenizer {
26            data: query,
27            offset: 0,
28            state: RedisTokenType::RedisTokenCommand,
29        };
30        s.skip_empty_lines();
31        s
32    }
33
34    pub fn scan(&mut self) -> RedisTokenizerScanResult<'a> {
35        let token_type = self.state;
36        let current = self.next_token();
37        RedisTokenizerScanResult {
38            token: &self.data[current.0..current.1],
39            token_type,
40            done: self.curr_char() == 0,
41        }
42    }
43
44    pub fn next_token(&mut self) -> (usize, usize) {
45        let s = match self.state {
46            RedisTokenType::RedisTokenCommand => self.next_cmd(),
47            RedisTokenType::RedisTokenArgument => self.next_arg(),
48        };
49        loop {
50            // Only skip spaces between commands (not tabs - Go only skips spaces)
51            while self.curr_char() == b' ' {
52                self.offset += 1;
53            }
54            if self.curr_char() != b'\n' {
55                break;
56            }
57            self.state = RedisTokenType::RedisTokenCommand;
58            self.offset += 1;
59        }
60        s
61    }
62
63    fn next_cmd(&mut self) -> (usize, usize) {
64        // Go's scanCommand only skips ASCII spaces before the command (not tabs).
65        // Tabs are included in the command token (default case in Go's switch).
66        while self.curr_char() == b' ' {
67            self.offset += 1;
68        }
69        let start = self.offset;
70        loop {
71            match self.curr_char() {
72                0 => break,
73                b'\n' => {
74                    let span = (start, self.offset);
75                    self.offset += 1;
76                    return span;
77                }
78                b' ' => {
79                    self.state = RedisTokenType::RedisTokenArgument;
80                    break;
81                }
82                _ => self.offset += 1,
83            }
84        }
85        (start, self.offset)
86    }
87
88    fn next_arg(&mut self) -> (usize, usize) {
89        self.skip_whitespace();
90        let start = self.offset;
91        let mut quote = false;
92        let mut escape = false;
93        loop {
94            match self.curr_char() {
95                0 => break,
96                b'\\' if !escape => {
97                    escape = true;
98                    self.offset += 1;
99                    continue;
100                }
101                b'"' if !escape => quote = !quote,
102                b'\n' if !quote => {
103                    let span = (start, self.offset);
104                    self.offset += 1;
105                    self.state = RedisTokenType::RedisTokenCommand;
106                    return span;
107                }
108                b' ' if !quote => {
109                    return (start, self.offset);
110                }
111                _ => {}
112            }
113            escape = false;
114            self.offset += 1;
115        }
116        (start, self.offset)
117    }
118
119    fn skip_whitespace(&mut self) {
120        while matches!(self.curr_char(), b' ' | b'\t' | b'\r') {
121            self.offset += 1;
122        }
123    }
124
125    fn skip_empty_lines(&mut self) {
126        while matches!(self.curr_char(), b' ' | b'\t' | b'\r' | b'\n') {
127            self.offset += 1;
128        }
129    }
130
131    fn curr_char(&self) -> u8 {
132        match self.data.as_bytes().get(self.offset) {
133            Some(&c) => c,
134            None => 0,
135        }
136    }
137}
138
139#[cfg(test)]
140mod tests {
141    use duplicate::duplicate_item;
142
143    use super::RedisTokenizer;
144
145    #[duplicate_item(
146        [
147            test_name   [test_redis_tokenizer_1]
148            input       [""]
149            expected    [[r#"{ token: "", token_type: RedisTokenCommand, done: true }"#]];
150        ]
151        [
152            test_name   [test_redis_tokenizer_2]
153            input       ["BAD\"\"INPUT\" \"boo\n  Weird13\\Stuff"]
154            expected    [
155                [
156                    r#"{ token: "BAD\"\"INPUT\"", token_type: RedisTokenCommand, done: false }"#,
157                    r#"{ token: "\"boo\n  Weird13\\Stuff", token_type: RedisTokenArgument, done: true }"#
158                ]
159            ];
160        ]
161        [
162            test_name   [test_redis_tokenizer_3]
163            input       ["CMD"]
164            expected    [[r#"{ token: "CMD", token_type: RedisTokenCommand, done: true }"#]];
165        ]
166        [
167            test_name   [test_redis_tokenizer_4]
168            input       ["\n  \nCMD\n  \n"]
169            expected    [[r#"{ token: "CMD", token_type: RedisTokenCommand, done: true }"#]];
170        ]
171        [
172            test_name   [test_redis_tokenizer_5]
173            input       ["  CMD  "]
174            expected    [[r#"{ token: "CMD", token_type: RedisTokenCommand, done: true }"#]];
175        ]
176        [
177            test_name   [test_redis_tokenizer_6]
178            input       ["CMD1\nCMD2"]
179            expected    [
180                [
181                    r#"{ token: "CMD1", token_type: RedisTokenCommand, done: false }"#,
182                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: true }"#
183                ]
184            ];
185        ]
186        [
187            test_name   [test_redis_tokenizer_7]
188            input       ["  CMD1  \n  CMD2  "]
189            expected    [
190                [
191                    r#"{ token: "CMD1", token_type: RedisTokenCommand, done: false }"#,
192                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: true }"#
193                ]
194            ];
195        ]
196        [
197            test_name   [test_redis_tokenizer_8]
198            input       ["CMD1\nCMD2\nCMD3"]
199            expected    [
200                [
201                    r#"{ token: "CMD1", token_type: RedisTokenCommand, done: false }"#,
202                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
203                    r#"{ token: "CMD3", token_type: RedisTokenCommand, done: true }"#
204                ]
205            ];
206        ]
207        [
208            test_name   [test_redis_tokenizer_9]
209            input       ["CMD arg"]
210            expected    [
211                [
212                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
213                    r#"{ token: "arg", token_type: RedisTokenArgument, done: true }"#
214                ]
215            ];
216        ]
217        [
218            test_name   [test_redis_tokenizer_10]
219            input       ["  CMD  arg  "]
220            expected    [
221                [
222                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
223                    r#"{ token: "arg", token_type: RedisTokenArgument, done: true }"#
224                ]
225            ];
226        ]
227        [
228            test_name   [test_redis_tokenizer_11]
229            input       ["CMD arg1 arg2"]
230            expected    [
231                [
232                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
233                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
234                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: true }"#
235                ]
236            ];
237        ]
238        [
239            test_name   [test_redis_tokenizer_12]
240            input       [" 	 CMD   arg1 	  arg2 "]
241            expected    [
242                [
243                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
244                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
245                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: true }"#
246                ]
247            ];
248        ]
249        [
250            test_name   [test_redis_tokenizer_13]
251            input       ["CMD arg1\nCMD2 arg2"]
252            expected    [
253                [
254                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
255                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
256                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
257                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: true }"#
258                ]
259            ];
260        ]
261        [
262            test_name   [test_redis_tokenizer_14]
263            input       ["CMD arg1 arg2\nCMD2 arg3\nCMD3\nCMD4 arg4 arg5 arg6"]
264            expected    [
265                [
266                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
267                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
268                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: false }"#,
269                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
270                    r#"{ token: "arg3", token_type: RedisTokenArgument, done: false }"#,
271                    r#"{ token: "CMD3", token_type: RedisTokenCommand, done: false }"#,
272                    r#"{ token: "CMD4", token_type: RedisTokenCommand, done: false }"#,
273                    r#"{ token: "arg4", token_type: RedisTokenArgument, done: false }"#,
274                    r#"{ token: "arg5", token_type: RedisTokenArgument, done: false }"#,
275                    r#"{ token: "arg6", token_type: RedisTokenArgument, done: true }"#
276                ]
277            ];
278        ]
279        [
280            test_name   [test_redis_tokenizer_15]
281            input       ["CMD arg1   arg2  \n CMD2  arg3 \n CMD3 \n  CMD4 arg4 arg5 arg6\nCMD5 "]
282            expected    [
283                [
284                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
285                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
286                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: false }"#,
287                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
288                    r#"{ token: "arg3", token_type: RedisTokenArgument, done: false }"#,
289                    r#"{ token: "CMD3", token_type: RedisTokenCommand, done: false }"#,
290                    r#"{ token: "CMD4", token_type: RedisTokenCommand, done: false }"#,
291                    r#"{ token: "arg4", token_type: RedisTokenArgument, done: false }"#,
292                    r#"{ token: "arg5", token_type: RedisTokenArgument, done: false }"#,
293                    r#"{ token: "arg6", token_type: RedisTokenArgument, done: false }"#,
294                    r#"{ token: "CMD5", token_type: RedisTokenCommand, done: true }"#,
295                ]
296            ];
297        ]
298        [
299            test_name   [test_redis_tokenizer_16]
300            input       [r#"CMD """#]
301            expected    [
302                [
303                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
304                    r#"{ token: "\"\"", token_type: RedisTokenArgument, done: true }"#
305                ]
306            ];
307        ]
308        [
309            test_name   [test_redis_tokenizer_17]
310            input       [r#"CMD "foo bar""#]
311            expected    [
312                [
313                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
314                    r#"{ token: "\"foo bar\"", token_type: RedisTokenArgument, done: true }"#
315                ]
316            ];
317        ]
318        [
319            test_name   [test_redis_tokenizer_18]
320            input       [r#"CMD "foo bar\ " baz"#]
321            expected    [
322                [
323                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
324                    r#"{ token: "\"foo bar\\ \"", token_type: RedisTokenArgument, done: false }"#,
325                    r#"{ token: "baz", token_type: RedisTokenArgument, done: true }"#
326                ]
327            ];
328        ]
329        [
330            test_name   [test_redis_tokenizer_19]
331            input       ["CMD \"foo \n bar\" \"\"  baz "]
332            expected    [
333                [
334                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
335                    r#"{ token: "\"foo \n bar\"", token_type: RedisTokenArgument, done: false }"#,
336                    r#"{ token: "\"\"", token_type: RedisTokenArgument, done: false }"#,
337                    r#"{ token: "baz", token_type: RedisTokenArgument, done: true }"#
338                ]
339            ];
340        ]
341        [
342            test_name   [test_redis_tokenizer_20]
343            input       ["CMD \"foo \\\" bar\" baz"]
344            expected    [
345                [
346                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
347                    r#"{ token: "\"foo \\\" bar\"", token_type: RedisTokenArgument, done: false }"#,
348                    r#"{ token: "baz", token_type: RedisTokenArgument, done: true }"#
349                ]
350            ];
351        ]
352        [
353            test_name   [test_redis_tokenizer_21]
354            input       [r#"CMD "foo bar" baz"#]
355            expected    [
356                [
357                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
358                    r#"{ token: "\"foo bar\"", token_type: RedisTokenArgument, done: false }"#,
359                    r#"{ token: "baz", token_type: RedisTokenArgument, done: true }"#
360                ]
361            ];
362        ]
363        [
364            test_name   [test_redis_tokenizer_22]
365            input       ["CMD \"foo bar\" baz\nCMD2 \"baz\\\\bar\""]
366            expected    [
367                [
368                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
369                    r#"{ token: "\"foo bar\"", token_type: RedisTokenArgument, done: false }"#,
370                    r#"{ token: "baz", token_type: RedisTokenArgument, done: false }"#,
371                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
372                    r#"{ token: "\"baz\\\\bar\"", token_type: RedisTokenArgument, done: true }"#
373                ]
374            ];
375        ]
376        [
377            test_name   [test_redis_tokenizer_23]
378            input       [" CMD  \"foo bar\"  baz \n CMD2  \"baz\\\\bar\"  "]
379            expected    [
380                [
381                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
382                    r#"{ token: "\"foo bar\"", token_type: RedisTokenArgument, done: false }"#,
383                    r#"{ token: "baz", token_type: RedisTokenArgument, done: false }"#,
384                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
385                    r#"{ token: "\"baz\\\\bar\"", token_type: RedisTokenArgument, done: true }"#
386                ]
387            ];
388        ]
389    )]
390    #[test]
391    fn test_name() {
392        let mut tokenizer = RedisTokenizer::new(input);
393        for i in 0..expected.len() {
394            let res = tokenizer.scan();
395            assert_eq!(
396                format!("{res:?}"),
397                format!("RedisTokenizerScanResult {}", expected[i])
398            );
399        }
400    }
401}