Skip to main content

flodl_cli/args/
parser.rs

1//! Argv tokenizer + resolver.
2//!
3//! This is the runtime side of `#[derive(FdlArgs)]`. The derive macro
4//! builds an [`ArgsSpec`] from the struct's fields + attributes, calls
5//! [`parse`] on `std::env::args()`, then destructures the resulting
6//! [`ParsedArgs`] into concrete field values.
7//!
8//! The parser is opinionated: it does NOT implement every historical
9//! convention. What it supports is documented in the test block below,
10//! and that set is the contract.
11
12use std::collections::BTreeMap;
13
14// ── Spec (what the CLI declares) ────────────────────────────────────────
15
16/// Declarative spec of what flags and positionals a CLI accepts. Built by
17/// the `#[derive(FdlArgs)]` output at runtime, consumed by [`parse`].
18#[derive(Debug, Clone, Default)]
19pub struct ArgsSpec {
20    pub options: Vec<OptionDecl>,
21    pub positionals: Vec<PositionalDecl>,
22    /// When true, unknown long/short flags are silently skipped (the
23    /// token is consumed, no error is raised), and the required-
24    /// positional check is disabled. Used by fdl's non-strict tail
25    /// validation: the binary re-parses the argv itself, so fdl's job
26    /// is to enforce declared contracts (choices on known flags,
27    /// positional choices when unambiguous) without blocking
28    /// pass-through flags the author chose to allow.
29    ///
30    /// Defaults to false so derive binaries stay strict by default.
31    pub lenient_unknowns: bool,
32}
33
34/// Declaration of a single option (long flag, optionally with short alias).
35#[derive(Debug, Clone)]
36pub struct OptionDecl {
37    /// Long name (without `--` prefix).
38    pub long: String,
39    /// Single-character short alias (without `-` prefix).
40    pub short: Option<char>,
41    /// True for value-carrying options; false for presence-only flags (bool).
42    pub takes_value: bool,
43    /// True if bare `--foo` is legal (field type is bool, or a default is
44    /// declared for an `Option<T>`). Ignored when `takes_value = false`.
45    pub allows_bare: bool,
46    /// True for list-typed options (`Vec<T>`): multiple occurrences and
47    /// comma-separated values accumulate.
48    pub repeatable: bool,
49    /// Restrict values to this set (validated at parse time).
50    pub choices: Option<Vec<String>>,
51}
52
53/// Declaration of a single positional argument.
54#[derive(Debug, Clone)]
55pub struct PositionalDecl {
56    /// Field name (used in error messages).
57    pub name: String,
58    /// When true, absence is a parse error.
59    pub required: bool,
60    /// When true, consumes all remaining positionals; must be the last decl.
61    pub variadic: bool,
62    /// Restrict values to this set.
63    pub choices: Option<Vec<String>>,
64}
65
66// ── Output (what was passed) ────────────────────────────────────────────
67
68/// Intermediate parsed result, shaped for the derive macro's field
69/// extraction. Absence is encoded by a missing map entry.
70#[derive(Debug, Default)]
71pub struct ParsedArgs {
72    /// Keyed by option long name.
73    pub options: BTreeMap<String, OptionState>,
74    /// Positionals in declaration order; variadic drains the tail.
75    pub positionals: Vec<String>,
76}
77
78/// What happened to a single option on the command line.
79#[derive(Debug, Clone)]
80pub enum OptionState {
81    /// Flag was passed with no value (bare `--foo` or `-f`).
82    BarePresent,
83    /// Flag was passed with value(s). Length 1 for scalar, >=1 for list.
84    WithValues(Vec<String>),
85}
86
87// ── Parse ───────────────────────────────────────────────────────────────
88
89/// Parse argv against a spec. `args[0]` is the program name and is ignored.
90///
91/// Returns a human-readable error string on failure. The caller prints it
92/// to stderr and exits with a non-zero code (see [`super::parse_or_schema`]).
93pub fn parse(spec: &ArgsSpec, args: &[String]) -> Result<ParsedArgs, String> {
94    let mut out = ParsedArgs::default();
95    let mut i = 1usize;
96    let mut stop_flags = false;
97
98    while i < args.len() {
99        let tok = &args[i];
100
101        if !stop_flags && tok == "--" {
102            stop_flags = true;
103            i += 1;
104            continue;
105        }
106
107        if !stop_flags && tok.starts_with("--") {
108            // Long flag: `--name` or `--name=value`.
109            let rest = &tok[2..];
110            let (name, inline_value) = match rest.split_once('=') {
111                Some((n, v)) => (n, Some(v.to_string())),
112                None => (rest, None),
113            };
114            match find_long(spec, name) {
115                Some(decl) => {
116                    i = consume_flag(decl, inline_value, args, i, &mut out)?;
117                }
118                None if spec.lenient_unknowns => {
119                    // Unknown flag tolerated: consume just this token.
120                    // We deliberately don't look ahead to consume a
121                    // value — fdl has no way to know whether the unknown
122                    // flag takes one, and the binary will re-parse the
123                    // forwarded tail authoritatively anyway.
124                    i += 1;
125                }
126                None => return Err(unknown_long_error(spec, name)),
127            }
128            continue;
129        }
130
131        if !stop_flags && tok.starts_with('-') && tok.len() >= 2 {
132            // Short flag: `-x`, `-xyz` (cluster), `-x=val`, `-xval` rejected.
133            let rest = &tok[1..];
134            if let Some((head, inline_value)) = rest.split_once('=') {
135                // `-x=value` — only valid if head is a single char.
136                if head.chars().count() != 1 {
137                    return Err(format!(
138                        "invalid short-flag syntax `{tok}`: `-x=value` requires a single-letter short"
139                    ));
140                }
141                let c = head.chars().next().unwrap();
142                match find_short(spec, c) {
143                    Some(decl) => {
144                        i = consume_flag(decl, Some(inline_value.to_string()), args, i, &mut out)?;
145                    }
146                    None if spec.lenient_unknowns => {
147                        i += 1;
148                    }
149                    None => return Err(format!("unknown short flag `-{c}`")),
150                }
151                continue;
152            }
153            // Cluster: each char is an independent flag. Only the last
154            // may take a value (consumes next arg); all before must be
155            // presence-only (takes_value = false).
156            let chars: Vec<char> = rest.chars().collect();
157            if spec.lenient_unknowns && chars.iter().any(|c| find_short(spec, *c).is_none()) {
158                // If any char in the cluster is unknown, we can't
159                // safely partition the cluster (unknown `takes_value`
160                // makes cluster interpretation ambiguous). Skip the
161                // whole token and let the binary handle it.
162                i += 1;
163                continue;
164            }
165            for (pos, c) in chars.iter().enumerate() {
166                let decl = find_short(spec, *c)
167                    .ok_or_else(|| format!("unknown short flag `-{c}`"))?;
168                let is_last = pos == chars.len() - 1;
169                if !is_last && decl.takes_value {
170                    return Err(format!(
171                        "short `-{c}` takes a value and cannot be clustered mid-token `{tok}`"
172                    ));
173                }
174                if is_last {
175                    i = consume_flag(decl, None, args, i, &mut out)?;
176                } else {
177                    record_option(&mut out, decl, None, spec)?;
178                }
179            }
180            if chars.is_empty() {
181                // bare `-` (no flag letter): treat as positional.
182                out.positionals.push(tok.clone());
183                i += 1;
184            }
185            continue;
186        }
187
188        // Positional.
189        out.positionals.push(tok.clone());
190        i += 1;
191    }
192
193    // Required positional check. Skipped in lenient mode: orphan unknown
194    // flags may have been silently dropped, so the collected positionals
195    // are an unreliable count of what the user actually wrote. The binary
196    // will re-check arity authoritatively.
197    if !spec.lenient_unknowns {
198        let required_count = spec.positionals.iter().filter(|p| p.required).count();
199        if out.positionals.len() < required_count {
200            let missing = &spec.positionals[out.positionals.len()].name;
201            return Err(format!("missing required argument <{missing}>"));
202        }
203    }
204
205    // Positional choice validation.
206    for (idx, value) in out.positionals.iter().enumerate() {
207        let decl = positional_decl_for(spec, idx);
208        if let Some(d) = decl {
209            if let Some(choices) = &d.choices {
210                if !choices.iter().any(|c| c == value) {
211                    return Err(format!(
212                        "invalid value `{value}` for <{}> -- allowed: {}",
213                        d.name,
214                        choices.join(", ")
215                    ));
216                }
217            }
218        }
219    }
220
221    Ok(out)
222}
223
224/// Consume one flag — given the decl and optional inline value — and
225/// advance the argv cursor accordingly. Returns the new index.
226fn consume_flag(
227    decl: &OptionDecl,
228    inline_value: Option<String>,
229    args: &[String],
230    i: usize,
231    out: &mut ParsedArgs,
232) -> Result<usize, String> {
233    if !decl.takes_value {
234        // Presence-only flag: must NOT have an inline value.
235        if inline_value.is_some() {
236            return Err(format!("flag `--{}` takes no value", decl.long));
237        }
238        record_option(out, decl, None, &ArgsSpec::default())?;
239        return Ok(i + 1);
240    }
241
242    // Value-taking option.
243    if let Some(v) = inline_value {
244        record_option(out, decl, Some(v), &ArgsSpec::default())?;
245        return Ok(i + 1);
246    }
247
248    // Look at next token: if it exists and is not itself a flag, consume.
249    let next_idx = i + 1;
250    let next_is_flag = args
251        .get(next_idx)
252        .map(|s| s.starts_with('-') && s != "-" && s != "--")
253        .unwrap_or(true); // absent counts as "no value available"
254
255    if !next_is_flag {
256        let v = args[next_idx].clone();
257        record_option(out, decl, Some(v), &ArgsSpec::default())?;
258        return Ok(i + 2);
259    }
260
261    // No value available — bare flag. Only valid if the spec allows it.
262    if !decl.allows_bare {
263        return Err(format!("`--{}` requires a value", decl.long));
264    }
265    record_option(out, decl, None, &ArgsSpec::default())?;
266    Ok(i + 1)
267}
268
269/// Record one occurrence of an option. Handles choice validation and
270/// repeatable accumulation.
271fn record_option(
272    out: &mut ParsedArgs,
273    decl: &OptionDecl,
274    value: Option<String>,
275    _spec: &ArgsSpec,
276) -> Result<(), String> {
277    // Choice validation (only applies when a value is present).
278    if let (Some(v), Some(choices)) = (&value, &decl.choices) {
279        for part in split_list_value(v) {
280            if !choices.iter().any(|c| c == part) {
281                return Err(format!(
282                    "invalid value `{part}` for `--{}` -- allowed: {}",
283                    decl.long,
284                    choices.join(", ")
285                ));
286            }
287        }
288    }
289
290    let key = decl.long.clone();
291    match (value, decl.repeatable) {
292        (None, _) => {
293            // Bare flag: first occurrence wins (BarePresent).
294            out.options.entry(key).or_insert(OptionState::BarePresent);
295        }
296        (Some(v), false) => {
297            // Scalar option: last occurrence wins.
298            out.options.insert(key, OptionState::WithValues(vec![v]));
299        }
300        (Some(v), true) => {
301            // List option: accumulate, with comma-split inside each value.
302            let parts: Vec<String> = split_list_value(&v).into_iter().map(String::from).collect();
303            let entry = out
304                .options
305                .entry(key)
306                .or_insert(OptionState::WithValues(Vec::new()));
307            if let OptionState::WithValues(list) = entry {
308                list.extend(parts);
309            }
310        }
311    }
312    Ok(())
313}
314
315/// Split a list value on commas, trimming whitespace around each piece.
316/// Empty pieces are dropped (so `--tags a,,b` = `["a", "b"]`).
317fn split_list_value(v: &str) -> Vec<&str> {
318    v.split(',').map(str::trim).filter(|s| !s.is_empty()).collect()
319}
320
321fn find_long<'a>(spec: &'a ArgsSpec, name: &str) -> Option<&'a OptionDecl> {
322    spec.options.iter().find(|o| o.long == name)
323}
324
325fn find_short(spec: &ArgsSpec, c: char) -> Option<&OptionDecl> {
326    spec.options.iter().find(|o| o.short == Some(c))
327}
328
329fn positional_decl_for(spec: &ArgsSpec, idx: usize) -> Option<&PositionalDecl> {
330    // Direct index up to the variadic; beyond that, re-use the variadic decl.
331    if let Some(decl) = spec.positionals.get(idx) {
332        return Some(decl);
333    }
334    spec.positionals.iter().rev().find(|d| d.variadic)
335}
336
337/// "did you mean" error for unknown long flags.
338fn unknown_long_error(spec: &ArgsSpec, name: &str) -> String {
339    let suggestion = spec
340        .options
341        .iter()
342        .filter(|o| similar(&o.long, name))
343        .map(|o| format!("--{}", o.long))
344        .next();
345    match suggestion {
346        Some(s) => format!("unknown flag `--{name}`, did you mean `{s}`?"),
347        None => format!("unknown flag `--{name}`"),
348    }
349}
350
351/// "did you mean" similarity: edit distance ≤ 2 qualifies.
352///
353/// Simple Levenshtein on char vectors. The input sizes here are tiny
354/// (flag names), so an O(n*m) implementation is fine.
355fn similar(candidate: &str, target: &str) -> bool {
356    if candidate == target {
357        return false;
358    }
359    levenshtein(candidate, target) <= 2
360}
361
362fn levenshtein(a: &str, b: &str) -> usize {
363    let a: Vec<char> = a.chars().collect();
364    let b: Vec<char> = b.chars().collect();
365    let (m, n) = (a.len(), b.len());
366    if m == 0 {
367        return n;
368    }
369    if n == 0 {
370        return m;
371    }
372    let mut prev: Vec<usize> = (0..=n).collect();
373    let mut curr = vec![0usize; n + 1];
374    for (i, ca) in a.iter().enumerate() {
375        curr[0] = i + 1;
376        for (j, cb) in b.iter().enumerate() {
377            let cost = if ca == cb { 0 } else { 1 };
378            curr[j + 1] = (prev[j + 1] + 1)
379                .min(curr[j] + 1)
380                .min(prev[j] + cost);
381        }
382        std::mem::swap(&mut prev, &mut curr);
383    }
384    prev[n]
385}
386
387// ── Tests ───────────────────────────────────────────────────────────────
388
389#[cfg(test)]
390mod tests {
391    use super::*;
392
393    fn flag(long: &str, short: Option<char>) -> OptionDecl {
394        OptionDecl {
395            long: long.into(),
396            short,
397            takes_value: false,
398            allows_bare: true,
399            repeatable: false,
400            choices: None,
401        }
402    }
403
404    fn value(long: &str, short: Option<char>, bare_ok: bool) -> OptionDecl {
405        OptionDecl {
406            long: long.into(),
407            short,
408            takes_value: true,
409            allows_bare: bare_ok,
410            repeatable: false,
411            choices: None,
412        }
413    }
414
415    fn list(long: &str, short: Option<char>) -> OptionDecl {
416        OptionDecl {
417            long: long.into(),
418            short,
419            takes_value: true,
420            allows_bare: false,
421            repeatable: true,
422            choices: None,
423        }
424    }
425
426    fn pos(name: &str, required: bool, variadic: bool) -> PositionalDecl {
427        PositionalDecl {
428            name: name.into(),
429            required,
430            variadic,
431            choices: None,
432        }
433    }
434
435    fn argv(parts: &[&str]) -> Vec<String> {
436        std::iter::once("prog")
437            .chain(parts.iter().copied())
438            .map(String::from)
439            .collect()
440    }
441
442    #[test]
443    fn parses_long_flag_with_value() {
444        let spec = ArgsSpec {
445            options: vec![value("model", None, false)],
446            positionals: vec![],
447            ..ArgsSpec::default()
448        };
449        let out = parse(&spec, &argv(&["--model", "mlp"])).unwrap();
450        match out.options.get("model") {
451            Some(OptionState::WithValues(v)) => assert_eq!(v, &vec!["mlp".to_string()]),
452            other => panic!("expected WithValues, got {:?}", other),
453        }
454    }
455
456    #[test]
457    fn parses_long_flag_with_equals() {
458        let spec = ArgsSpec {
459            options: vec![value("model", None, false)],
460            positionals: vec![],
461            ..ArgsSpec::default()
462        };
463        let out = parse(&spec, &argv(&["--model=mlp"])).unwrap();
464        match out.options.get("model") {
465            Some(OptionState::WithValues(v)) => assert_eq!(v, &vec!["mlp".to_string()]),
466            _ => panic!("expected WithValues"),
467        }
468    }
469
470    #[test]
471    fn bare_flag_without_default_errors() {
472        let spec = ArgsSpec {
473            options: vec![value("report", None, false)],
474            positionals: vec![],
475            ..ArgsSpec::default()
476        };
477        let err = parse(&spec, &argv(&["--report"])).unwrap_err();
478        assert!(err.contains("requires a value"), "got: {err}");
479    }
480
481    #[test]
482    fn bare_flag_with_default_is_present() {
483        let spec = ArgsSpec {
484            options: vec![value("report", None, true)],
485            positionals: vec![],
486            ..ArgsSpec::default()
487        };
488        let out = parse(&spec, &argv(&["--report"])).unwrap();
489        assert!(matches!(out.options.get("report"), Some(OptionState::BarePresent)));
490    }
491
492    #[test]
493    fn bool_flag_presence() {
494        let spec = ArgsSpec {
495            options: vec![flag("validate", None)],
496            positionals: vec![],
497            ..ArgsSpec::default()
498        };
499        let out = parse(&spec, &argv(&["--validate"])).unwrap();
500        assert!(matches!(out.options.get("validate"), Some(OptionState::BarePresent)));
501    }
502
503    #[test]
504    fn bool_flag_rejects_value() {
505        let spec = ArgsSpec {
506            options: vec![flag("validate", None)],
507            positionals: vec![],
508            ..ArgsSpec::default()
509        };
510        let err = parse(&spec, &argv(&["--validate=yes"])).unwrap_err();
511        assert!(err.contains("takes no value"), "got: {err}");
512    }
513
514    #[test]
515    fn short_flag() {
516        let spec = ArgsSpec {
517            options: vec![flag("verbose", Some('v'))],
518            positionals: vec![],
519            ..ArgsSpec::default()
520        };
521        let out = parse(&spec, &argv(&["-v"])).unwrap();
522        assert!(matches!(out.options.get("verbose"), Some(OptionState::BarePresent)));
523    }
524
525    #[test]
526    fn short_clustering_for_bool_flags() {
527        let spec = ArgsSpec {
528            options: vec![flag("a", Some('a')), flag("b", Some('b'))],
529            positionals: vec![],
530            ..ArgsSpec::default()
531        };
532        let out = parse(&spec, &argv(&["-ab"])).unwrap();
533        assert!(out.options.contains_key("a"));
534        assert!(out.options.contains_key("b"));
535    }
536
537    #[test]
538    fn short_cluster_last_may_take_value() {
539        let spec = ArgsSpec {
540            options: vec![flag("a", Some('a')), value("model", Some('m'), false)],
541            positionals: vec![],
542            ..ArgsSpec::default()
543        };
544        let out = parse(&spec, &argv(&["-am", "mlp"])).unwrap();
545        assert!(out.options.contains_key("a"));
546        match out.options.get("model") {
547            Some(OptionState::WithValues(v)) => assert_eq!(v, &vec!["mlp".to_string()]),
548            _ => panic!("expected model value"),
549        }
550    }
551
552    #[test]
553    fn list_option_accumulates_across_repeats_and_commas() {
554        let spec = ArgsSpec {
555            options: vec![list("tags", Some('t'))],
556            positionals: vec![],
557            ..ArgsSpec::default()
558        };
559        let out = parse(&spec, &argv(&["--tags", "a,b", "-t", "c"])).unwrap();
560        match out.options.get("tags") {
561            Some(OptionState::WithValues(v)) => {
562                assert_eq!(v, &vec!["a".to_string(), "b".into(), "c".into()]);
563            }
564            _ => panic!("expected list values"),
565        }
566    }
567
568    #[test]
569    fn positionals_in_order() {
570        let spec = ArgsSpec {
571            options: vec![],
572            positionals: vec![pos("first", true, false), pos("second", false, false)],
573            ..ArgsSpec::default()
574        };
575        let out = parse(&spec, &argv(&["a", "b"])).unwrap();
576        assert_eq!(out.positionals, vec!["a".to_string(), "b".into()]);
577    }
578
579    #[test]
580    fn missing_required_positional_errors() {
581        let spec = ArgsSpec {
582            options: vec![],
583            positionals: vec![pos("first", true, false)],
584            ..ArgsSpec::default()
585        };
586        let err = parse(&spec, &argv(&[])).unwrap_err();
587        assert!(err.contains("missing required argument"), "got: {err}");
588    }
589
590    #[test]
591    fn variadic_positional_absorbs_tail() {
592        let spec = ArgsSpec {
593            options: vec![],
594            positionals: vec![pos("files", false, true)],
595            ..ArgsSpec::default()
596        };
597        let out = parse(&spec, &argv(&["a", "b", "c"])).unwrap();
598        assert_eq!(out.positionals, vec!["a".to_string(), "b".into(), "c".into()]);
599    }
600
601    #[test]
602    fn double_dash_stops_flag_parsing() {
603        let spec = ArgsSpec {
604            options: vec![flag("verbose", None)],
605            positionals: vec![pos("rest", false, true)],
606            ..ArgsSpec::default()
607        };
608        let out = parse(&spec, &argv(&["--", "--verbose", "-x"])).unwrap();
609        assert!(!out.options.contains_key("verbose"));
610        assert_eq!(out.positionals, vec!["--verbose".to_string(), "-x".into()]);
611    }
612
613    #[test]
614    fn unknown_flag_suggests_similar() {
615        let spec = ArgsSpec {
616            options: vec![value("model", None, false)],
617            positionals: vec![],
618            ..ArgsSpec::default()
619        };
620        let err = parse(&spec, &argv(&["--modl", "mlp"])).unwrap_err();
621        assert!(err.contains("did you mean"), "got: {err}");
622    }
623
624    #[test]
625    fn choices_validated_at_parse_time() {
626        let mut model = value("model", None, false);
627        model.choices = Some(vec!["mlp".into(), "lenet".into()]);
628        let spec = ArgsSpec {
629            options: vec![model],
630            positionals: vec![],
631            ..ArgsSpec::default()
632        };
633        let err = parse(&spec, &argv(&["--model", "foobar"])).unwrap_err();
634        assert!(err.contains("allowed"), "got: {err}");
635    }
636
637    #[test]
638    fn bare_dash_is_positional() {
639        let spec = ArgsSpec {
640            options: vec![],
641            positionals: vec![pos("target", true, false)],
642            ..ArgsSpec::default()
643        };
644        let out = parse(&spec, &argv(&["-"])).unwrap();
645        assert_eq!(out.positionals, vec!["-".to_string()]);
646    }
647
648    #[test]
649    fn scalar_last_write_wins() {
650        let spec = ArgsSpec {
651            options: vec![value("model", None, false)],
652            positionals: vec![],
653            ..ArgsSpec::default()
654        };
655        let out = parse(&spec, &argv(&["--model", "a", "--model", "b"])).unwrap();
656        match out.options.get("model") {
657            Some(OptionState::WithValues(v)) => assert_eq!(v, &vec!["b".to_string()]),
658            _ => panic!("expected last-write-wins"),
659        }
660    }
661}