Skip to main content

libdd_trace_obfuscation/
redis_tokenizer.rs

1// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4#[derive(Debug, Clone, Copy)]
5pub enum RedisTokenType {
6    RedisTokenCommand,
7    RedisTokenArgument,
8}
9
10pub struct RedisTokenizer<'a> {
11    data: &'a str,
12    offset: usize,
13    state: RedisTokenType, // specifies the token we are about to parse
14}
15
16#[derive(Debug)]
17pub struct RedisTokenizerScanResult<'a> {
18    pub token: &'a str,
19    pub token_type: RedisTokenType,
20    pub done: bool,
21}
22
23impl<'a> RedisTokenizer<'a> {
24    #[must_use]
25    pub fn new(query: &str) -> RedisTokenizer<'_> {
26        let mut s = RedisTokenizer {
27            data: query,
28            offset: 0,
29            state: RedisTokenType::RedisTokenCommand,
30        };
31        s.skip_empty_lines();
32        s
33    }
34
35    pub fn scan(&mut self) -> RedisTokenizerScanResult<'a> {
36        let token_type = self.state;
37        let current = self.next_token();
38        RedisTokenizerScanResult {
39            token: &self.data[current.0..current.1],
40            token_type,
41            done: self.curr_char() == 0,
42        }
43    }
44
45    pub fn next_token(&mut self) -> (usize, usize) {
46        let s = match self.state {
47            RedisTokenType::RedisTokenCommand => self.next_cmd(),
48            RedisTokenType::RedisTokenArgument => self.next_arg(),
49        };
50        loop {
51            // Only skip spaces between commands (not tabs - Go only skips spaces)
52            while self.curr_char() == b' ' {
53                self.offset += 1;
54            }
55            if self.curr_char() != b'\n' {
56                break;
57            }
58            self.state = RedisTokenType::RedisTokenCommand;
59            self.offset += 1;
60        }
61        s
62    }
63
64    fn next_cmd(&mut self) -> (usize, usize) {
65        // Go's scanCommand only skips ASCII spaces before the command (not tabs).
66        // Tabs are included in the command token (default case in Go's switch).
67        while self.curr_char() == b' ' {
68            self.offset += 1;
69        }
70        let start = self.offset;
71        loop {
72            match self.curr_char() {
73                0 => break,
74                b'\n' => {
75                    let span = (start, self.offset);
76                    self.offset += 1;
77                    return span;
78                }
79                b' ' => {
80                    self.state = RedisTokenType::RedisTokenArgument;
81                    break;
82                }
83                _ => self.offset += 1,
84            }
85        }
86        (start, self.offset)
87    }
88
89    fn next_arg(&mut self) -> (usize, usize) {
90        self.skip_whitespace();
91        let start = self.offset;
92        let mut quote = false;
93        let mut escape = false;
94        loop {
95            match self.curr_char() {
96                0 => break,
97                b'\\' if !escape => {
98                    escape = true;
99                    self.offset += 1;
100                    continue;
101                }
102                b'"' if !escape => quote = !quote,
103                b'\n' if !quote => {
104                    let span = (start, self.offset);
105                    self.offset += 1;
106                    self.state = RedisTokenType::RedisTokenCommand;
107                    return span;
108                }
109                b' ' if !quote => {
110                    return (start, self.offset);
111                }
112                _ => {}
113            }
114            escape = false;
115            self.offset += 1;
116        }
117        (start, self.offset)
118    }
119
120    fn skip_whitespace(&mut self) {
121        while matches!(self.curr_char(), b' ' | b'\t' | b'\r') {
122            self.offset += 1;
123        }
124    }
125
126    fn skip_empty_lines(&mut self) {
127        while matches!(self.curr_char(), b' ' | b'\t' | b'\r' | b'\n') {
128            self.offset += 1;
129        }
130    }
131
132    fn curr_char(&self) -> u8 {
133        match self.data.as_bytes().get(self.offset) {
134            Some(&c) => c,
135            None => 0,
136        }
137    }
138}
139
140#[cfg(test)]
141mod tests {
142    use duplicate::duplicate_item;
143
144    use super::RedisTokenizer;
145
146    #[duplicate_item(
147        [
148            test_name   [test_redis_tokenizer_1]
149            input       [""]
150            expected    [[r#"{ token: "", token_type: RedisTokenCommand, done: true }"#]];
151        ]
152        [
153            test_name   [test_redis_tokenizer_2]
154            input       ["BAD\"\"INPUT\" \"boo\n  Weird13\\Stuff"]
155            expected    [
156                [
157                    r#"{ token: "BAD\"\"INPUT\"", token_type: RedisTokenCommand, done: false }"#,
158                    r#"{ token: "\"boo\n  Weird13\\Stuff", token_type: RedisTokenArgument, done: true }"#
159                ]
160            ];
161        ]
162        [
163            test_name   [test_redis_tokenizer_3]
164            input       ["CMD"]
165            expected    [[r#"{ token: "CMD", token_type: RedisTokenCommand, done: true }"#]];
166        ]
167        [
168            test_name   [test_redis_tokenizer_4]
169            input       ["\n  \nCMD\n  \n"]
170            expected    [[r#"{ token: "CMD", token_type: RedisTokenCommand, done: true }"#]];
171        ]
172        [
173            test_name   [test_redis_tokenizer_5]
174            input       ["  CMD  "]
175            expected    [[r#"{ token: "CMD", token_type: RedisTokenCommand, done: true }"#]];
176        ]
177        [
178            test_name   [test_redis_tokenizer_6]
179            input       ["CMD1\nCMD2"]
180            expected    [
181                [
182                    r#"{ token: "CMD1", token_type: RedisTokenCommand, done: false }"#,
183                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: true }"#
184                ]
185            ];
186        ]
187        [
188            test_name   [test_redis_tokenizer_7]
189            input       ["  CMD1  \n  CMD2  "]
190            expected    [
191                [
192                    r#"{ token: "CMD1", token_type: RedisTokenCommand, done: false }"#,
193                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: true }"#
194                ]
195            ];
196        ]
197        [
198            test_name   [test_redis_tokenizer_8]
199            input       ["CMD1\nCMD2\nCMD3"]
200            expected    [
201                [
202                    r#"{ token: "CMD1", token_type: RedisTokenCommand, done: false }"#,
203                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
204                    r#"{ token: "CMD3", token_type: RedisTokenCommand, done: true }"#
205                ]
206            ];
207        ]
208        [
209            test_name   [test_redis_tokenizer_9]
210            input       ["CMD arg"]
211            expected    [
212                [
213                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
214                    r#"{ token: "arg", token_type: RedisTokenArgument, done: true }"#
215                ]
216            ];
217        ]
218        [
219            test_name   [test_redis_tokenizer_10]
220            input       ["  CMD  arg  "]
221            expected    [
222                [
223                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
224                    r#"{ token: "arg", token_type: RedisTokenArgument, done: true }"#
225                ]
226            ];
227        ]
228        [
229            test_name   [test_redis_tokenizer_11]
230            input       ["CMD arg1 arg2"]
231            expected    [
232                [
233                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
234                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
235                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: true }"#
236                ]
237            ];
238        ]
239        [
240            test_name   [test_redis_tokenizer_12]
241            input       [" 	 CMD   arg1 	  arg2 "]
242            expected    [
243                [
244                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
245                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
246                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: true }"#
247                ]
248            ];
249        ]
250        [
251            test_name   [test_redis_tokenizer_13]
252            input       ["CMD arg1\nCMD2 arg2"]
253            expected    [
254                [
255                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
256                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
257                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
258                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: true }"#
259                ]
260            ];
261        ]
262        [
263            test_name   [test_redis_tokenizer_14]
264            input       ["CMD arg1 arg2\nCMD2 arg3\nCMD3\nCMD4 arg4 arg5 arg6"]
265            expected    [
266                [
267                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
268                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
269                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: false }"#,
270                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
271                    r#"{ token: "arg3", token_type: RedisTokenArgument, done: false }"#,
272                    r#"{ token: "CMD3", token_type: RedisTokenCommand, done: false }"#,
273                    r#"{ token: "CMD4", token_type: RedisTokenCommand, done: false }"#,
274                    r#"{ token: "arg4", token_type: RedisTokenArgument, done: false }"#,
275                    r#"{ token: "arg5", token_type: RedisTokenArgument, done: false }"#,
276                    r#"{ token: "arg6", token_type: RedisTokenArgument, done: true }"#
277                ]
278            ];
279        ]
280        [
281            test_name   [test_redis_tokenizer_15]
282            input       ["CMD arg1   arg2  \n CMD2  arg3 \n CMD3 \n  CMD4 arg4 arg5 arg6\nCMD5 "]
283            expected    [
284                [
285                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
286                    r#"{ token: "arg1", token_type: RedisTokenArgument, done: false }"#,
287                    r#"{ token: "arg2", token_type: RedisTokenArgument, done: false }"#,
288                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
289                    r#"{ token: "arg3", token_type: RedisTokenArgument, done: false }"#,
290                    r#"{ token: "CMD3", token_type: RedisTokenCommand, done: false }"#,
291                    r#"{ token: "CMD4", token_type: RedisTokenCommand, done: false }"#,
292                    r#"{ token: "arg4", token_type: RedisTokenArgument, done: false }"#,
293                    r#"{ token: "arg5", token_type: RedisTokenArgument, done: false }"#,
294                    r#"{ token: "arg6", token_type: RedisTokenArgument, done: false }"#,
295                    r#"{ token: "CMD5", token_type: RedisTokenCommand, done: true }"#,
296                ]
297            ];
298        ]
299        [
300            test_name   [test_redis_tokenizer_16]
301            input       [r#"CMD """#]
302            expected    [
303                [
304                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
305                    r#"{ token: "\"\"", token_type: RedisTokenArgument, done: true }"#
306                ]
307            ];
308        ]
309        [
310            test_name   [test_redis_tokenizer_17]
311            input       [r#"CMD "foo bar""#]
312            expected    [
313                [
314                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
315                    r#"{ token: "\"foo bar\"", token_type: RedisTokenArgument, done: true }"#
316                ]
317            ];
318        ]
319        [
320            test_name   [test_redis_tokenizer_18]
321            input       [r#"CMD "foo bar\ " baz"#]
322            expected    [
323                [
324                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
325                    r#"{ token: "\"foo bar\\ \"", token_type: RedisTokenArgument, done: false }"#,
326                    r#"{ token: "baz", token_type: RedisTokenArgument, done: true }"#
327                ]
328            ];
329        ]
330        [
331            test_name   [test_redis_tokenizer_19]
332            input       ["CMD \"foo \n bar\" \"\"  baz "]
333            expected    [
334                [
335                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
336                    r#"{ token: "\"foo \n bar\"", token_type: RedisTokenArgument, done: false }"#,
337                    r#"{ token: "\"\"", token_type: RedisTokenArgument, done: false }"#,
338                    r#"{ token: "baz", token_type: RedisTokenArgument, done: true }"#
339                ]
340            ];
341        ]
342        [
343            test_name   [test_redis_tokenizer_20]
344            input       ["CMD \"foo \\\" bar\" baz"]
345            expected    [
346                [
347                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
348                    r#"{ token: "\"foo \\\" bar\"", token_type: RedisTokenArgument, done: false }"#,
349                    r#"{ token: "baz", token_type: RedisTokenArgument, done: true }"#
350                ]
351            ];
352        ]
353        [
354            test_name   [test_redis_tokenizer_21]
355            input       [r#"CMD "foo bar" baz"#]
356            expected    [
357                [
358                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
359                    r#"{ token: "\"foo bar\"", token_type: RedisTokenArgument, done: false }"#,
360                    r#"{ token: "baz", token_type: RedisTokenArgument, done: true }"#
361                ]
362            ];
363        ]
364        [
365            test_name   [test_redis_tokenizer_22]
366            input       ["CMD \"foo bar\" baz\nCMD2 \"baz\\\\bar\""]
367            expected    [
368                [
369                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
370                    r#"{ token: "\"foo bar\"", token_type: RedisTokenArgument, done: false }"#,
371                    r#"{ token: "baz", token_type: RedisTokenArgument, done: false }"#,
372                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
373                    r#"{ token: "\"baz\\\\bar\"", token_type: RedisTokenArgument, done: true }"#
374                ]
375            ];
376        ]
377        [
378            test_name   [test_redis_tokenizer_23]
379            input       [" CMD  \"foo bar\"  baz \n CMD2  \"baz\\\\bar\"  "]
380            expected    [
381                [
382                    r#"{ token: "CMD", token_type: RedisTokenCommand, done: false }"#,
383                    r#"{ token: "\"foo bar\"", token_type: RedisTokenArgument, done: false }"#,
384                    r#"{ token: "baz", token_type: RedisTokenArgument, done: false }"#,
385                    r#"{ token: "CMD2", token_type: RedisTokenCommand, done: false }"#,
386                    r#"{ token: "\"baz\\\\bar\"", token_type: RedisTokenArgument, done: true }"#
387                ]
388            ];
389        ]
390    )]
391    #[test]
392    fn test_name() {
393        let mut tokenizer = RedisTokenizer::new(input);
394        for i in 0..expected.len() {
395            let res = tokenizer.scan();
396            assert_eq!(
397                format!("{res:?}"),
398                format!("RedisTokenizerScanResult {}", expected[i])
399            );
400        }
401    }
402}