agentnative 0.1.2

The agent-native CLI linter — check whether your CLI follows agent-readiness principles
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
//! Shared `--help` probe + lazy parsers.
//!
//! The runner spawns `<binary> --help` exactly once per target. The captured
//! text is parsed on demand into three views — flags, env hints, subcommands.
//! Behavioral checks that need to inspect the help surface share the same
//! `HelpOutput` for a given target so none of them re-spawn the binary.
//!
//! Parsers are English-only by convention: we match on clap's output shape
//! (`Commands:`, `[env: FOO]`, leading-whitespace flag lines). Localized help
//! is a named exception in `docs/coverage-matrix.md` — checks that consume
//! these parsers should Skip, not Warn, when the raw text lacks an English
//! help surface.

use std::sync::OnceLock;

use anyhow::Result;

use super::{BinaryRunner, RunStatus};

/// A flag discovered in `--help` output. `short` is the single-character
/// variant (e.g., `-q`); `long` is the GNU-style variant (e.g., `--quiet`).
/// At least one of the two is always set.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Flag {
    pub short: Option<String>,
    pub long: Option<String>,
}

impl Flag {
    /// Whether this flag exposes `name` under either its short or long form.
    /// Accepts `-s`, `--long`, or even `long` / `s` (without dashes).
    pub fn matches(&self, name: &str) -> bool {
        let with_dash_long = if name.starts_with('-') {
            name.to_string()
        } else if name.len() == 1 {
            format!("-{name}")
        } else {
            format!("--{name}")
        };
        self.short.as_deref() == Some(with_dash_long.as_str())
            || self.long.as_deref() == Some(with_dash_long.as_str())
    }
}

/// A bound between a flag surface and an environment variable — surfaces
/// clap's `[env: FOO]` hints as first-class data so checks don't re-scan.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct EnvHint {
    /// Environment variable name, e.g., `RIPGREP_CONFIG_PATH`.
    pub var: String,
}

/// Shared, lazily-parsed view over `<binary> --help`. Construct via
/// [`HelpOutput::probe`] in runner code, or [`HelpOutput::from_raw`] in tests.
pub struct HelpOutput {
    raw: String,
    flags: OnceLock<Vec<Flag>>,
    env_hints: OnceLock<Vec<EnvHint>>,
    /// Reserved for P3/P6 subcommand-structure checks. Parsed lazily like
    /// the other views; no current behavioral check consumes it, so the
    /// compiler would flag it as dead code without this allow.
    #[allow(dead_code)]
    subcommands: OnceLock<Vec<String>>,
}

impl HelpOutput {
    /// Build a `HelpOutput` from captured help text. The primary seam for
    /// unit tests — pass a fixture string and exercise the parsers without
    /// spawning a binary.
    pub fn from_raw(raw: impl Into<String>) -> Self {
        Self {
            raw: raw.into(),
            flags: OnceLock::new(),
            env_hints: OnceLock::new(),
            subcommands: OnceLock::new(),
        }
    }

    /// Spawn `<binary> --help` via the shared `BinaryRunner` and capture its
    /// combined stdout+stderr. Returns an empty `HelpOutput` rather than an
    /// error on timeout/crash — a misbehaving `--help` is a signal the check
    /// consumers can use, not a hard runner failure.
    pub fn probe(runner: &BinaryRunner) -> Result<Self> {
        let help = runner.run(&["--help"], &[]);
        match help.status {
            RunStatus::NotFound => {
                anyhow::bail!("binary not found when probing --help")
            }
            RunStatus::PermissionDenied => {
                anyhow::bail!("permission denied when probing --help")
            }
            RunStatus::Error(ref msg) => anyhow::bail!("--help probe failed: {msg}"),
            // Ok / Timeout / Crash — capture whatever output is available.
            // Some tools print help to stderr, or crash after writing usage.
            RunStatus::Ok | RunStatus::Timeout | RunStatus::Crash { .. } => {
                let mut raw = String::with_capacity(help.stdout.len() + help.stderr.len());
                raw.push_str(&help.stdout);
                raw.push_str(&help.stderr);
                Ok(Self::from_raw(raw))
            }
        }
    }

    /// Raw help text, exactly as captured.
    pub fn raw(&self) -> &str {
        &self.raw
    }

    /// Flags parsed out of the help surface. Lazy + cached on first call.
    pub fn flags(&self) -> &[Flag] {
        self.flags.get_or_init(|| parse_flags(&self.raw))
    }

    /// `[env: FOO]` hints parsed out of the help surface. Lazy + cached.
    pub fn env_hints(&self) -> &[EnvHint] {
        self.env_hints.get_or_init(|| parse_env_hints(&self.raw))
    }

    /// Subcommand names parsed out of the help surface. Lazy + cached.
    /// Reserved for P3/P6 checks; no behavioral check consumes this yet.
    #[allow(dead_code)]
    pub fn subcommands(&self) -> &[String] {
        self.subcommands
            .get_or_init(|| parse_subcommands(&self.raw))
    }
}

/// Parse flag declarations from clap-style help text.
///
/// A "flag line" is a line that starts with whitespace and then a dash. The
/// header portion (before the description) is split from the description by
/// two or more spaces — clap's canonical shape. We tokenize the header on
/// commas and whitespace, then classify each token as short (`-s`) or long
/// (`--long`).
fn parse_flags(raw: &str) -> Vec<Flag> {
    let mut flags = Vec::new();
    for line in raw.lines() {
        if !line.starts_with(' ') {
            continue;
        }
        let trimmed = line.trim_start();
        if !trimmed.starts_with('-') {
            continue;
        }
        // Separator / heading lines like `---` are not flags.
        if trimmed.starts_with("---") {
            continue;
        }
        // Header = everything before clap's two-space description gap. When
        // there's no description on the same line the whole remainder is the
        // header.
        let header = trimmed.split("  ").next().unwrap_or(trimmed);

        let mut short: Option<String> = None;
        let mut long: Option<String> = None;
        for piece in header.split(',') {
            let candidate = piece.split_whitespace().next().unwrap_or(piece.trim());
            if candidate.is_empty() {
                continue;
            }
            if let Some(long_name) = parse_long_flag(candidate) {
                long = Some(long_name);
            } else if let Some(short_name) = parse_short_flag(candidate) {
                short = Some(short_name);
            }
        }
        if short.is_some() || long.is_some() {
            flags.push(Flag { short, long });
        }
    }
    flags
}

/// Extract a `--long` flag name from a token like `--long`, `--long=<VAL>`,
/// `--long[=<VAL>]`, or `--long <VAL>`. Returns `None` when `candidate` is
/// not a long flag.
fn parse_long_flag(candidate: &str) -> Option<String> {
    if !candidate.starts_with("--") || candidate.len() <= 2 {
        return None;
    }
    // Walk the name chars: letters, digits, dashes, underscores.
    let end = candidate[2..]
        .find(|c: char| !(c.is_ascii_alphanumeric() || c == '-' || c == '_'))
        .map(|i| i + 2)
        .unwrap_or(candidate.len());
    if end <= 2 {
        return None;
    }
    Some(candidate[..end].to_string())
}

/// Extract a `-s` short flag from a token like `-s`, `-s<VAL>`, or `-s,`.
fn parse_short_flag(candidate: &str) -> Option<String> {
    let bytes = candidate.as_bytes();
    if bytes.len() < 2 || bytes[0] != b'-' {
        return None;
    }
    // Second char must be a flag character (letter, digit, or `?`).
    let c = bytes[1] as char;
    if c.is_ascii_alphanumeric() || c == '?' {
        Some(format!("-{c}"))
    } else {
        None
    }
}

/// Parse clap's `[env: FOO_BAR]` or `[env: FOO_BAR=<default>]` annotations.
/// Each occurrence becomes one `EnvHint` — duplicates are preserved so
/// callers can count occurrences if useful.
fn parse_env_hints(raw: &str) -> Vec<EnvHint> {
    const TAG: &str = "[env:";
    let mut hints = Vec::new();
    let mut rest = raw;
    while let Some(pos) = rest.find(TAG) {
        let after = &rest[pos + TAG.len()..];
        let end = after.find(']').unwrap_or(after.len());
        let inner = after[..end].trim();
        let name = inner.split('=').next().unwrap_or("").trim();
        if is_env_var_name(name) {
            hints.push(EnvHint {
                var: name.to_string(),
            });
        }
        rest = &after[end..];
    }
    hints
}

/// Env var names are ASCII uppercase, digits, underscores; must start with
/// a letter or underscore.
fn is_env_var_name(s: &str) -> bool {
    if s.is_empty() {
        return false;
    }
    let first = s.as_bytes()[0] as char;
    if !(first.is_ascii_uppercase() || first == '_') {
        return false;
    }
    s.chars()
        .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
}

/// Parse the `Commands:` / `Subcommands:` block. We collect the first
/// whitespace-separated token on each line until the block terminates
/// (empty line, or a new non-indented section header).
#[allow(dead_code)]
fn parse_subcommands(raw: &str) -> Vec<String> {
    let mut out = Vec::new();
    let mut in_section = false;
    for line in raw.lines() {
        let trimmed = line.trim();
        let is_header = matches!(trimmed, "Commands:" | "Subcommands:" | "SUBCOMMANDS:");
        if is_header {
            in_section = true;
            continue;
        }
        if !in_section {
            continue;
        }
        if trimmed.is_empty() {
            // Blank line ends the block.
            in_section = false;
            continue;
        }
        if !line.starts_with(' ') {
            // A new top-level section header ended the commands block.
            break;
        }
        if let Some(name) = trimmed.split_whitespace().next() {
            if is_subcommand_name(name) {
                out.push(name.to_string());
            }
        }
    }
    out
}

/// Subcommand names are kebab-case/snake_case identifiers. Anything else —
/// `[options]`, `<ARG>`, punctuation — is not a subcommand.
#[allow(dead_code)]
fn is_subcommand_name(s: &str) -> bool {
    !s.is_empty()
        && s.chars()
            .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
        && s.chars().next().is_some_and(|c| c.is_ascii_alphanumeric())
}

#[cfg(test)]
mod tests {
    use super::*;

    // A fixture modeled on ripgrep's `--help` — short+long flags, env hint.
    const RIPGREP_HELP: &str = r#"ripgrep 14.1

Usage: rg [OPTIONS] PATTERN [PATH ...]

Options:
  -e, --regexp=PATTERN          A pattern to search for.
      --no-messages             Suppress some error messages.
  -q, --quiet                   Do not print anything to stdout.
  -v, --invert-match            Invert matching.
      --null                    Print a NUL byte after file paths.
      --color=<WHEN>            When to use color. [env: RIPGREP_COLOR=]
      --help                    Show this help message.
  -V, --version                 Show version.
"#;

    // Modeled on clap's generated help, with a subcommand block and [env: ...].
    const CLAP_HELP: &str = r#"anc — the agent-native CLI linter

Usage: anc <COMMAND>

Commands:
  check        Run checks against a CLI project or binary
  completions  Generate shell completions
  generate     Regenerate build artifacts
  help         Print this message or the help of the given subcommand

Options:
  -q, --quiet      Suppress non-essential output [env: AGENTNATIVE_QUIET=]
  -h, --help       Print help
  -V, --version    Print version
"#;

    // Tool with no flags and no subcommands — env_hints parser must return empty.
    const BARE_HELP: &str = r#"xurl-rs 0.1
A tiny HTTP client.

Usage: xurl-rs URL
"#;

    // Localized help — ensures parsers degrade to empty without panicking.
    const NON_ENGLISH_HELP: &str = r#"用法: outil [选项]

参数:
  URL                       目标网址

选项:
  -H, --header <HEADER>     自定义请求头
  -X, --request <METHOD>    HTTP 方法
"#;

    #[test]
    fn parse_flags_extracts_short_and_long() {
        let flags = parse_flags(RIPGREP_HELP);
        assert!(flags.iter().any(|f| f.short.as_deref() == Some("-q")));
        assert!(flags.iter().any(|f| f.long.as_deref() == Some("--quiet")));
        assert!(
            flags
                .iter()
                .any(|f| f.long.as_deref() == Some("--no-messages"))
        );
        assert!(flags.iter().any(|f| f.long.as_deref() == Some("--null")));
    }

    #[test]
    fn parse_flags_handles_equals_and_values() {
        let flags = parse_flags(RIPGREP_HELP);
        // --regexp=PATTERN — the value shape must not leak into the long name.
        let regexp = flags
            .iter()
            .find(|f| f.long.as_deref() == Some("--regexp"))
            .expect("regexp flag parsed");
        assert_eq!(regexp.short.as_deref(), Some("-e"));
    }

    #[test]
    fn parse_flags_ignores_prose_dashes() {
        // A line starting with '---' (separator) must not become a flag.
        let src = "Usage: foo [OPTIONS]\n\n-------\n\nOptions:\n  -q, --quiet    Quiet mode.\n";
        let flags = parse_flags(src);
        assert_eq!(flags.len(), 1);
        assert_eq!(flags[0].short.as_deref(), Some("-q"));
    }

    #[test]
    fn parse_env_hints_captures_clap_style() {
        let hints = parse_env_hints(RIPGREP_HELP);
        assert!(hints.iter().any(|h| h.var == "RIPGREP_COLOR"));
    }

    #[test]
    fn parse_env_hints_multiple_occurrences() {
        let hints = parse_env_hints(CLAP_HELP);
        assert!(hints.iter().any(|h| h.var == "AGENTNATIVE_QUIET"));
    }

    #[test]
    fn parse_env_hints_rejects_invalid_names() {
        // `[env: lowercase]` or `[env: 1ABC]` must not parse as env hints.
        let src = "  --flag   [env: lowercase] [env: 1ABC] [env: VALID_1]";
        let hints = parse_env_hints(src);
        assert_eq!(hints.len(), 1);
        assert_eq!(hints[0].var, "VALID_1");
    }

    #[test]
    fn parse_subcommands_reads_commands_block() {
        let subs = parse_subcommands(CLAP_HELP);
        assert!(subs.iter().any(|s| s == "check"));
        assert!(subs.iter().any(|s| s == "generate"));
        assert!(subs.iter().any(|s| s == "completions"));
    }

    #[test]
    fn parse_subcommands_empty_without_block() {
        let subs = parse_subcommands(BARE_HELP);
        assert!(subs.is_empty());
    }

    #[test]
    fn parse_non_english_help_degrades_cleanly() {
        // English-only parsers: no flags advertised via English conventions,
        // no `Commands:` header, no `[env: ...]` hint — all parsers return empty.
        let flags = parse_flags(NON_ENGLISH_HELP);
        // The Chinese options block still uses `-H, --header` syntax so we may
        // detect the flags themselves — the non-English text is in the
        // descriptions, not the flag names. The check is that parsing doesn't
        // panic and returns sane structured data.
        for f in &flags {
            assert!(f.short.is_some() || f.long.is_some());
        }
        assert!(parse_env_hints(NON_ENGLISH_HELP).is_empty());
        assert!(parse_subcommands(NON_ENGLISH_HELP).is_empty());
    }

    #[test]
    fn help_output_lazy_parse_is_idempotent() {
        let help = HelpOutput::from_raw(RIPGREP_HELP);
        // Pointer identity through two calls proves OnceLock caching.
        let first = help.flags().as_ptr();
        let second = help.flags().as_ptr();
        assert_eq!(first, second);
        // And the data is stable across calls.
        assert_eq!(help.flags().len(), help.flags().len());
    }

    #[test]
    fn flag_matches_accepts_various_spellings() {
        let f = Flag {
            short: Some("-q".into()),
            long: Some("--quiet".into()),
        };
        assert!(f.matches("-q"));
        assert!(f.matches("--quiet"));
        assert!(f.matches("quiet"));
        assert!(f.matches("q"));
        assert!(!f.matches("--verbose"));
    }

    #[test]
    fn is_env_var_name_edges() {
        assert!(is_env_var_name("FOO"));
        assert!(is_env_var_name("FOO_BAR"));
        assert!(is_env_var_name("_UNDERSCORE"));
        assert!(!is_env_var_name(""));
        assert!(!is_env_var_name("lower"));
        assert!(!is_env_var_name("1LEADING"));
        assert!(!is_env_var_name("foo-bar"));
    }

    #[test]
    fn parse_short_flag_accepts_digits_and_question() {
        assert_eq!(parse_short_flag("-q"), Some("-q".into()));
        assert_eq!(parse_short_flag("-1"), Some("-1".into()));
        assert_eq!(parse_short_flag("-?"), Some("-?".into()));
        assert_eq!(parse_short_flag("--long"), None);
        assert_eq!(parse_short_flag("-"), None);
        assert_eq!(parse_short_flag("-,"), None);
    }
}