Skip to main content

reddb_server/cli/
token.rs

1/// Pure lexer that tokenizes raw CLI arguments into typed tokens.
2///
3/// No schema knowledge -- just lexical classification. The tokenizer
4/// distinguishes positionals, long flags, short flags, short clusters,
5/// and the end-of-options separator (`--`).
6
7#[derive(Debug, Clone, PartialEq)]
8pub enum Token {
9    /// Bare positional value (no leading dash)
10    Positional(String),
11    /// Long flag: --flag (no value) or --flag=value
12    LongFlag { name: String, value: Option<String> },
13    /// Short flag: -f (no value) or -f=value
14    ShortFlag { name: char, value: Option<String> },
15    /// Short cluster: -abc (multiple short flags combined)
16    ShortCluster(Vec<char>),
17    /// End-of-options separator: --
18    EndOfOptions,
19}
20
21/// Tokenize a slice of CLI arguments into a vector of typed tokens.
22///
23/// Rules:
24/// 1. `--` alone emits `EndOfOptions`; everything after becomes `Positional`.
25/// 2. `--name` or `--name=val` emits `LongFlag`.
26/// 3. `-x` emits `ShortFlag`; `-x=val` emits `ShortFlag` with value;
27///    `-abc` (len > 2, no `=`) emits `ShortCluster`.
28/// 4. `-` alone or `-42` (dash + digit) emits `Positional`.
29/// 5. Everything else emits `Positional`.
30pub fn tokenize(args: &[String]) -> Vec<Token> {
31    let mut tokens = Vec::with_capacity(args.len());
32    let mut past_eoo = false;
33
34    for arg in args {
35        // After `--`, everything is positional regardless of prefix.
36        if past_eoo {
37            tokens.push(Token::Positional(arg.clone()));
38            continue;
39        }
40
41        // Exact `--` is the end-of-options sentinel.
42        if arg == "--" {
43            tokens.push(Token::EndOfOptions);
44            past_eoo = true;
45            continue;
46        }
47
48        // Long flags: starts with `--` (already ruled out bare `--` above).
49        if let Some(rest) = arg.strip_prefix("--") {
50            if let Some(eq_pos) = rest.find('=') {
51                tokens.push(Token::LongFlag {
52                    name: rest[..eq_pos].to_string(),
53                    value: Some(rest[eq_pos + 1..].to_string()),
54                });
55            } else {
56                tokens.push(Token::LongFlag {
57                    name: rest.to_string(),
58                    value: None,
59                });
60            }
61            continue;
62        }
63
64        // Short flags / clusters: starts with `-`, length > 1.
65        if arg.starts_with('-') && arg.len() > 1 {
66            let chars: Vec<char> = arg.chars().collect();
67
68            // `-42` -- dash followed by a digit is a negative number, treat as positional.
69            if chars[1].is_ascii_digit() {
70                tokens.push(Token::Positional(arg.clone()));
71                continue;
72            }
73
74            let flag_char = chars[1];
75
76            // Exactly `-x` (length 2): single short flag, no value.
77            if arg.len() == 2 {
78                tokens.push(Token::ShortFlag {
79                    name: flag_char,
80                    value: None,
81                });
82                continue;
83            }
84
85            // `-x=` (length 3, third char is `=`): short flag with empty value.
86            if arg.len() == 3 && chars[2] == '=' {
87                tokens.push(Token::ShortFlag {
88                    name: flag_char,
89                    value: Some(String::new()),
90                });
91                continue;
92            }
93
94            // `-x=val`: short flag with value (split at first `=`).
95            if let Some(eq_pos) = arg.find('=') {
96                tokens.push(Token::ShortFlag {
97                    name: flag_char,
98                    value: Some(arg[eq_pos + 1..].to_string()),
99                });
100                continue;
101            }
102
103            // `-abc` (length > 2, no `=`): short cluster.
104            tokens.push(Token::ShortCluster(chars[1..].to_vec()));
105            continue;
106        }
107
108        // Everything else is positional (bare words, `-` alone, empty strings).
109        tokens.push(Token::Positional(arg.clone()));
110    }
111
112    tokens
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118
119    fn s(v: &str) -> String {
120        v.to_string()
121    }
122
123    fn args(v: &[&str]) -> Vec<String> {
124        v.iter().map(|a| s(a)).collect()
125    }
126
127    #[test]
128    fn test_long_flag() {
129        let tokens = tokenize(&args(&["--verbose"]));
130        assert_eq!(
131            tokens,
132            vec![Token::LongFlag {
133                name: s("verbose"),
134                value: None,
135            }]
136        );
137    }
138
139    #[test]
140    fn test_long_flag_with_value() {
141        let tokens = tokenize(&args(&["--output=json"]));
142        assert_eq!(
143            tokens,
144            vec![Token::LongFlag {
145                name: s("output"),
146                value: Some(s("json")),
147            }]
148        );
149    }
150
151    #[test]
152    fn test_long_flag_empty_value() {
153        let tokens = tokenize(&args(&["--output="]));
154        assert_eq!(
155            tokens,
156            vec![Token::LongFlag {
157                name: s("output"),
158                value: Some(s("")),
159            }]
160        );
161    }
162
163    #[test]
164    fn test_long_flag_value_with_equals() {
165        // --config=key=value should split only at the first `=`
166        let tokens = tokenize(&args(&["--config=key=value"]));
167        assert_eq!(
168            tokens,
169            vec![Token::LongFlag {
170                name: s("config"),
171                value: Some(s("key=value")),
172            }]
173        );
174    }
175
176    #[test]
177    fn test_long_flag_empty_name_with_value() {
178        // --=value is weird but handled gracefully
179        let tokens = tokenize(&args(&["--=value"]));
180        assert_eq!(
181            tokens,
182            vec![Token::LongFlag {
183                name: s(""),
184                value: Some(s("value")),
185            }]
186        );
187    }
188
189    #[test]
190    fn test_short_flag() {
191        let tokens = tokenize(&args(&["-v"]));
192        assert_eq!(
193            tokens,
194            vec![Token::ShortFlag {
195                name: 'v',
196                value: None,
197            }]
198        );
199    }
200
201    #[test]
202    fn test_short_flag_with_value() {
203        let tokens = tokenize(&args(&["-o=json"]));
204        assert_eq!(
205            tokens,
206            vec![Token::ShortFlag {
207                name: 'o',
208                value: Some(s("json")),
209            }]
210        );
211    }
212
213    #[test]
214    fn test_short_flag_empty_value() {
215        // `-o=` (length 3, third char is `=`) -> empty value
216        let tokens = tokenize(&args(&["-o="]));
217        assert_eq!(
218            tokens,
219            vec![Token::ShortFlag {
220                name: 'o',
221                value: Some(s("")),
222            }]
223        );
224    }
225
226    #[test]
227    fn test_short_cluster() {
228        let tokens = tokenize(&args(&["-vvv"]));
229        assert_eq!(tokens, vec![Token::ShortCluster(vec!['v', 'v', 'v'])]);
230    }
231
232    #[test]
233    fn test_short_cluster_mixed() {
234        let tokens = tokenize(&args(&["-abc"]));
235        assert_eq!(tokens, vec![Token::ShortCluster(vec!['a', 'b', 'c'])]);
236    }
237
238    #[test]
239    fn test_end_of_options() {
240        let tokens = tokenize(&args(&["--"]));
241        assert_eq!(tokens, vec![Token::EndOfOptions]);
242    }
243
244    #[test]
245    fn test_after_end_of_options() {
246        let tokens = tokenize(&args(&["--", "--verbose"]));
247        assert_eq!(
248            tokens,
249            vec![Token::EndOfOptions, Token::Positional(s("--verbose")),]
250        );
251    }
252
253    #[test]
254    fn test_after_end_of_options_multiple() {
255        let tokens = tokenize(&args(&["--", "-v", "--flag=val", "pos"]));
256        assert_eq!(
257            tokens,
258            vec![
259                Token::EndOfOptions,
260                Token::Positional(s("-v")),
261                Token::Positional(s("--flag=val")),
262                Token::Positional(s("pos")),
263            ]
264        );
265    }
266
267    #[test]
268    fn test_positional() {
269        let tokens = tokenize(&args(&["example.com"]));
270        assert_eq!(tokens, vec![Token::Positional(s("example.com"))]);
271    }
272
273    #[test]
274    fn test_negative_number() {
275        let tokens = tokenize(&args(&["-42"]));
276        assert_eq!(tokens, vec![Token::Positional(s("-42"))]);
277    }
278
279    #[test]
280    fn test_negative_number_float() {
281        let tokens = tokenize(&args(&["-3.14"]));
282        assert_eq!(tokens, vec![Token::Positional(s("-3.14"))]);
283    }
284
285    #[test]
286    fn test_single_dash() {
287        let tokens = tokenize(&args(&["-"]));
288        assert_eq!(tokens, vec![Token::Positional(s("-"))]);
289    }
290
291    #[test]
292    fn test_empty_string() {
293        let tokens = tokenize(&args(&[""]));
294        assert_eq!(tokens, vec![Token::Positional(s(""))]);
295    }
296
297    #[test]
298    fn test_empty_args() {
299        let tokens = tokenize(&args(&[]));
300        assert!(tokens.is_empty());
301    }
302
303    #[test]
304    fn test_mixed() {
305        let tokens = tokenize(&args(&[
306            "server",
307            "--path",
308            "/data",
309            "--bind",
310            "0.0.0.0:6380",
311            "-v",
312        ]));
313        assert_eq!(
314            tokens,
315            vec![
316                Token::Positional(s("server")),
317                Token::LongFlag {
318                    name: s("path"),
319                    value: None,
320                },
321                Token::Positional(s("/data")),
322                Token::LongFlag {
323                    name: s("bind"),
324                    value: None,
325                },
326                Token::Positional(s("0.0.0.0:6380")),
327                Token::ShortFlag {
328                    name: 'v',
329                    value: None,
330                },
331            ]
332        );
333    }
334
335    #[test]
336    fn test_mixed_with_eoo() {
337        let tokens = tokenize(&args(&[
338            "--output=json",
339            "-v",
340            "--",
341            "--not-a-flag",
342            "target",
343        ]));
344        assert_eq!(
345            tokens,
346            vec![
347                Token::LongFlag {
348                    name: s("output"),
349                    value: Some(s("json")),
350                },
351                Token::ShortFlag {
352                    name: 'v',
353                    value: None,
354                },
355                Token::EndOfOptions,
356                Token::Positional(s("--not-a-flag")),
357                Token::Positional(s("target")),
358            ]
359        );
360    }
361
362    #[test]
363    fn test_realistic_command() {
364        // red serve --path /data --bind 0.0.0.0:6380 --role primary -v
365        let tokens = tokenize(&args(&[
366            "server",
367            "--path",
368            "/data",
369            "--bind=0.0.0.0:6380",
370            "--role",
371            "primary",
372            "-v",
373        ]));
374        assert_eq!(
375            tokens,
376            vec![
377                Token::Positional(s("server")),
378                Token::LongFlag {
379                    name: s("path"),
380                    value: None,
381                },
382                Token::Positional(s("/data")),
383                Token::LongFlag {
384                    name: s("bind"),
385                    value: Some(s("0.0.0.0:6380")),
386                },
387                Token::LongFlag {
388                    name: s("role"),
389                    value: None,
390                },
391                Token::Positional(s("primary")),
392                Token::ShortFlag {
393                    name: 'v',
394                    value: None,
395                },
396            ]
397        );
398    }
399}