Skip to main content

mati_core/hooks/
decide.rs

1//! Shared enforcement core for `mati hook-decide`.
2//!
3//! Pure functions — no I/O, no daemon calls. Testable without a running daemon.
4//! Platform adapters in `cli::hook_decide` map these semantic outcomes to
5//! protocol-specific output (Claude JSON, Codex exit codes).
6
7use std::collections::HashMap;
8
9// ── Types ───────────────────────────────────────────────────────────────────
10
11/// Which class of file-reading command was detected.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum CommandClass {
14    /// cat, less, head, tail, bat — file path is first non-flag arg.
15    CatLike,
16    /// grep, rg, sed, awk — file path is last non-flag arg.
17    GrepLike,
18}
19
20/// Semantic enforcement decision. Adapters map these to platform output.
21///
22/// `FailOpen` is intentionally absent — it's a daemon-readiness outcome
23/// handled by the adapter before calling `evaluate()`.
24#[derive(Debug, Clone, PartialEq)]
25pub enum Decision {
26    /// No enforcement needed — allow unconditionally.
27    Allow,
28    /// Confirmed gotcha, agent has NOT consulted — block the read.
29    Deny { file_key: String, reason: String },
30    /// Confirmed gotcha, agent already consulted — allow with awareness.
31    AlreadyConsulted { context: String },
32    /// Medium confidence (0.3–0.6), quality >= 0.4 — advisory context.
33    Advisory { context: String },
34    /// Record too stale to trust — adapter decides whether to inject warning.
35    Liability { staleness: f32, context: String },
36    /// Record fully excluded from enforcement.
37    Tombstone,
38    /// No file record exists in the store.
39    NoRecord,
40    /// Command is not a file-reading operation.
41    NotFileRead,
42}
43
44/// Side-effect events the adapter should fire after the decision.
45/// Each variant maps 1:1 to an existing daemon socket command.
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub enum HookEvent {
48    /// Record accessed — daemon `log_hit`.
49    Hit { key: String },
50    /// No record found — daemon `log_miss`.
51    Miss { key: String },
52    /// Pre-read/pre-bash denied an unconsulted read — daemon `log_compliance_miss`.
53    BlockedUnconsultedRead { key: String },
54    /// Codex shell command blocked — daemon `log_codex_shell_miss`.
55    CodexShellBlocked { key: String },
56    /// Post-bash confirmed a consulted read — daemon `log_compliance_hit`.
57    ComplianceHit { key: String },
58    /// Claude edit gate: edit DEFERRED because a recent consultation exists —
59    /// records `AllowAfterReceipt` with reason `edit_after_receipt` (Plane 2).
60    EditConsulted { key: String },
61    /// Claude edit gate: edit DENIED (no recent consult) — records `Deny` with
62    /// reason `edit_blocked_unconsulted` (Plane 2).
63    EditBlocked { key: String },
64    /// Enterprise floor mandate DENIED an unconsulted access to a consult-required path —
65    /// records `Deny` with reason `floor_consult_required`, distinct from a local-gotcha deny
66    /// so the audit/report can tell an org mandate from a repo rule.
67    FloorConsultBlocked { key: String },
68}
69
70/// Input to the enforcement decision engine.
71pub struct EnforcementInput {
72    /// Repo-relative file path (e.g. `"src/main.rs"`).
73    pub rel_path: String,
74    /// File record JSON from `hook_evaluate`, or `None` if no record.
75    pub file_record: Option<serde_json::Value>,
76    /// Gotcha records keyed by gotcha key, from `hook_evaluate`.
77    pub gotcha_records: HashMap<String, serde_json::Value>,
78    /// Whether this file was already consulted via `mem_get` this session.
79    pub already_consulted: bool,
80}
81
82/// Result of `evaluate()`.
83pub struct EnforcementResult {
84    pub decision: Decision,
85    pub events: Vec<HookEvent>,
86}
87
88// ── Command Classification ──────────────────────────────────────────────────
89
90const CAT_LIKE: &[&str] = &["cat", "less", "head", "tail", "bat"];
91const GREP_LIKE: &[&str] = &["grep", "egrep", "fgrep", "rg", "sed", "awk"];
92
93/// Returns true if `trimmed` starts with `word` followed by whitespace
94/// (or is exactly `word`). Prevents `"catch"` matching `"cat"`.
95fn matches_command_word(trimmed: &str, word: &str) -> bool {
96    if trimmed.len() < word.len() {
97        return false;
98    }
99    if !trimmed.starts_with(word) {
100        return false;
101    }
102    if trimmed.len() == word.len() {
103        return true;
104    }
105    trimmed.as_bytes()[word.len()].is_ascii_whitespace()
106}
107
108/// Command prefixes that wrap the real command without changing what it reads:
109/// `sudo cat …`, `env LOG=1 cat …`, `nice cat …`. Stripped before classifying
110/// so the read gate sees `cat`, not the wrapper. Wrapper *flags* (e.g.
111/// `sudo -u root`) are intentionally NOT parsed here — guessing which take a
112/// value risks mis-stripping a real argument, so that narrow case is left as a
113/// tracked gap rather than handled unsafely.
114const PREFIX_WORDS: &[&str] = &[
115    "sudo", "doas", "env", "nice", "ionice", "nohup", "setsid", "stdbuf", "command", "time",
116];
117
118/// Is `tok` a `NAME=VALUE` shell environment assignment?
119fn is_env_assignment(tok: &str) -> bool {
120    match tok.find('=') {
121        Some(eq) if eq > 0 => {
122            let name = &tok[..eq];
123            name.chars()
124                .next()
125                .is_some_and(|c| c.is_ascii_alphabetic() || c == '_')
126                && name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
127        }
128        _ => false,
129    }
130}
131
132/// Normalize a command for detection: strip leading env assignments and wrapper
133/// prefixes (`sudo`/`env`/`nice`/…), then reduce the command word to its
134/// basename (`/bin/cat` → `cat`). Returns the effective command, left-trimmed.
135/// Pure; closes the prefix and absolute-path bypass classes for the read gate.
136fn effective_command(cmd: &str) -> String {
137    let mut rest = cmd.trim_start();
138    loop {
139        let end = rest.find(char::is_whitespace).unwrap_or(rest.len());
140        let tok = &rest[..end];
141        if tok.is_empty() {
142            break;
143        }
144        if is_env_assignment(tok) || PREFIX_WORDS.contains(&tok) {
145            rest = rest[end..].trim_start();
146            continue;
147        }
148        break;
149    }
150    let end = rest.find(char::is_whitespace).unwrap_or(rest.len());
151    let (word, args) = rest.split_at(end);
152    let base = word.rsplit('/').next().unwrap_or(word);
153    let mut out = String::with_capacity(base.len() + args.len());
154    out.push_str(base);
155    out.push_str(args);
156    out
157}
158
159/// Classify a bash command string. Returns `None` for non-file-read commands.
160pub fn classify_command(cmd: &str) -> Option<CommandClass> {
161    let eff = effective_command(cmd);
162    let trimmed = eff.as_str();
163    for &word in CAT_LIKE {
164        if matches_command_word(trimmed, word) {
165            return Some(CommandClass::CatLike);
166        }
167    }
168    for &word in GREP_LIKE {
169        if matches_command_word(trimmed, word) {
170            return Some(CommandClass::GrepLike);
171        }
172    }
173    None
174}
175
176// ── File Path Extraction ────────────────────────────────────────────────────
177
178/// Extract the PRIMARY target file path from a classified command — the first
179/// file for cat-like, the last positional for grep-like. The single-path
180/// companion to [`extract_file_paths`]; see it for the tokenizer + grammar.
181pub fn extract_file_path(cmd: &str, class: CommandClass) -> Option<String> {
182    let paths = extract_file_paths(cmd, class);
183    match class {
184        CommandClass::CatLike => paths.into_iter().next(),
185        CommandClass::GrepLike => paths.into_iter().next_back(),
186    }
187}
188
189/// Extract ALL target file paths from a classified command, in order. The
190/// multi-file companion to [`extract_file_path`] — lets the read gate catch a
191/// gotcha on a non-first file (`cat a.rs b.rs`, `grep pat f1 f2`).
192///
193/// Tokenizes the command shell-style (honoring quotes) and reads files by each
194/// command's real grammar:
195/// - CatLike (`cat/less/head/tail/bat FILE...`): every positional is a file.
196/// - GrepLike (`grep/rg/sed/awk [flags] PATTERN [FILE...]`): the first
197///   positional is the search PATTERN; the rest are files (none after the
198///   pattern ⇒ stdin, so no files).
199///
200/// Using real tokens + position — instead of a "the file is whatever's quoted"
201/// heuristic — is what lets `grep -r "secret" src/db.rs` resolve to `src/db.rs`
202/// (the path) not `secret` (the quoted pattern), while still handling quoted
203/// paths that contain spaces. Stops at pipe (`|`), semicolon (`;`), `&&`, `||`.
204pub fn extract_file_paths(cmd: &str, class: CommandClass) -> Vec<String> {
205    // Same normalization as `classify_command` so prefixes/abs-paths don't
206    // throw off extraction (`sudo cat foo` must extract `foo`, not `cat`).
207    let eff = effective_command(cmd);
208    let cmd_part = split_at_shell_operator(&eff);
209    let positionals = positional_args(&shell_tokens(cmd_part));
210
211    match class {
212        CommandClass::CatLike => positionals,
213        CommandClass::GrepLike => {
214            if positionals.len() >= 2 {
215                positionals[1..].to_vec()
216            } else {
217                Vec::new()
218            }
219        }
220    }
221}
222
223/// Split at the first shell operator (`|`, `;`, `&&`, `||`), returning the
224/// portion before the operator.
225fn split_at_shell_operator(s: &str) -> &str {
226    let bytes = s.as_bytes();
227    let mut i = 0;
228    while i < bytes.len() {
229        match bytes[i] {
230            b'|' => {
231                // Could be `|` (pipe) or `||` — both mean stop.
232                return &s[..i];
233            }
234            b';' => return &s[..i],
235            b'&' if i + 1 < bytes.len() && bytes[i + 1] == b'&' => {
236                return &s[..i];
237            }
238            b'"' => {
239                // Skip quoted strings so we don't split on operators inside quotes.
240                i += 1;
241                while i < bytes.len() && bytes[i] != b'"' {
242                    i += 1;
243                }
244            }
245            b'\'' => {
246                i += 1;
247                while i < bytes.len() && bytes[i] != b'\'' {
248                    i += 1;
249                }
250            }
251            _ => {}
252        }
253        i += 1;
254    }
255    s
256}
257
258/// Split a command into shell-style tokens, honoring single and double quotes
259/// (the quotes are stripped from the returned tokens). Not a full shell parser
260/// — enough for path extraction: a quoted path keeps its spaces as one token,
261/// and a quoted pattern becomes just its inner text. Runs of whitespace are
262/// collapsed; an unterminated quote consumes to end of input (best effort).
263fn shell_tokens(s: &str) -> Vec<String> {
264    let mut tokens = Vec::new();
265    let mut cur = String::new();
266    let mut in_token = false;
267    let mut chars = s.chars();
268    while let Some(c) = chars.next() {
269        match c {
270            '\'' | '"' => {
271                in_token = true;
272                let quote = c;
273                for q in chars.by_ref() {
274                    if q == quote {
275                        break;
276                    }
277                    cur.push(q);
278                }
279            }
280            c if c.is_whitespace() => {
281                if in_token {
282                    tokens.push(std::mem::take(&mut cur));
283                    in_token = false;
284                }
285            }
286            c => {
287                in_token = true;
288                cur.push(c);
289            }
290        }
291    }
292    if in_token {
293        tokens.push(cur);
294    }
295    tokens
296}
297
298/// Positional (non-flag) arguments after the command word, in order. Skips a
299/// purely-numeric token that follows a flag — it is the flag's value, not a
300/// file (`tail -n 100 file`, `head -c 5 file`).
301fn positional_args(tokens: &[String]) -> Vec<String> {
302    let mut args = Vec::new();
303    let mut prev_was_flag = false;
304    for t in tokens.iter().skip(1) {
305        if t.starts_with('-') {
306            prev_was_flag = true;
307            continue;
308        }
309        if prev_was_flag && !t.is_empty() && t.bytes().all(|b| b.is_ascii_digit()) {
310            prev_was_flag = false;
311            continue;
312        }
313        prev_was_flag = false;
314        if !t.is_empty() {
315            args.push(t.clone());
316        }
317    }
318    args
319}
320
321// ── apply_patch envelope parsing ────────────────────────────────────────────
322
323/// Maximum number of files a single `apply_patch` is gated against. A patch
324/// touching more than this is rare; the cap bounds per-file daemon round-trips
325/// so the hook stays well inside its deadline. Files beyond the cap are NOT
326/// gated (fail-open bias for the edit path) and the caller logs the truncation.
327pub const MAX_APPLY_PATCH_FILES: usize = 50;
328
329/// Extract the target file paths from a Codex `apply_patch` envelope.
330///
331/// Codex delivers the patch as a single string in `tool_input.command`:
332///
333/// ```text
334/// *** Begin Patch
335/// *** Update File: src/a.rs
336/// @@ ...
337///  context
338/// -old
339/// +new
340/// *** Add File: src/b.rs
341/// +contents
342/// *** Delete File: src/c.rs
343/// *** Move to: src/a_renamed.rs
344/// *** End Patch
345/// ```
346///
347/// Markers are matched only at column 0. Diff body lines are prefixed with a
348/// space/`+`/`-`/`@@`, so a content line that happens to contain
349/// `*** Update File:` (e.g. `+*** Update File: x`) does NOT collide with a real
350/// envelope marker. Returns paths in first-seen order with duplicates removed;
351/// the caller normalizes each. Add/Update/Delete and the rename source +
352/// destination are all included — `evaluate()` allows any path with no
353/// confirmed gotcha, so over-collecting is harmless.
354pub fn extract_apply_patch_files(patch: &str) -> Vec<String> {
355    const MARKERS: &[&str] = &[
356        "*** Update File: ",
357        "*** Add File: ",
358        "*** Delete File: ",
359        "*** Move to: ",
360    ];
361    let mut files: Vec<String> = Vec::new();
362    for line in patch.lines() {
363        for marker in MARKERS {
364            if let Some(rest) = line.strip_prefix(marker) {
365                let path = rest.trim();
366                if !path.is_empty() && !files.iter().any(|f| f == path) {
367                    files.push(path.to_string());
368                }
369                break;
370            }
371        }
372    }
373    files
374}
375
376// ── Path Normalization ──────────────────────────────────────────────────────
377
378/// Normalize `file_path` to a lexical repo-relative path.
379///
380/// - Strips `repo_root` prefix (with trailing `/`).
381/// - Collapses `.` and `..` components lexically (no filesystem access).
382/// - Does NOT resolve symlinks — memory keys are lexical paths.
383pub fn normalize_path(file_path: &str, repo_root: Option<&str>) -> String {
384    let stripped = match repo_root {
385        Some(root) => file_path
386            .strip_prefix(root)
387            .and_then(|s| s.strip_prefix('/'))
388            .unwrap_or(file_path),
389        None => file_path,
390    };
391
392    let mut components: Vec<&str> = Vec::new();
393    for part in stripped.split('/') {
394        match part {
395            "" | "." => continue,
396            ".." => {
397                if components.pop().is_none() {
398                    // Path escapes above root — out of scope.
399                    // Return as-is; it won't match any store key.
400                    return stripped.to_string();
401                }
402            }
403            c => components.push(c),
404        }
405    }
406
407    if components.is_empty() {
408        ".".to_string()
409    } else {
410        components.join("/")
411    }
412}
413
414// ── Core Decision Engine ────────────────────────────────────────────────────
415
416/// Evaluate the enforcement decision for a file access.
417///
418/// Pure function — all data comes from `input`, no I/O. The decision matrix
419/// matches ARCHITECTURE.md §10.1.
420pub fn evaluate(input: &EnforcementInput) -> EnforcementResult {
421    let file_key = format!("file:{}", input.rel_path);
422
423    // ── No record ───────────────────────────────────────────────────────
424    let file_record = match &input.file_record {
425        Some(r) if r.is_object() => r,
426        _ => {
427            return EnforcementResult {
428                decision: Decision::NoRecord,
429                events: vec![HookEvent::Miss { key: file_key }],
430            };
431        }
432    };
433
434    // ── Extract scores ──────────────────────────────────────────────────
435    let confidence = json_f32(file_record, "/confidence/value");
436    let quality = json_f32(file_record, "/quality/value");
437    let staleness = json_f32(file_record, "/staleness/value");
438    let staleness_tier = json_str(file_record, "/staleness/tier");
439
440    // ── Tombstone — fully excluded ──────────────────────────────────────
441    if staleness_tier == "tombstone" {
442        return EnforcementResult {
443            decision: Decision::Tombstone,
444            events: vec![],
445        };
446    }
447
448    // ── Liability — too stale to trust ──────────────────────────────────
449    if staleness_tier == "liability" {
450        return EnforcementResult {
451            decision: Decision::Liability {
452                staleness,
453                context: format!(
454                    "WARNING: STALE record for {} is a liability (staleness {:.2}). \
455                     Read the file directly — the cached record is too stale to trust.",
456                    input.rel_path, staleness
457                ),
458            },
459            events: vec![HookEvent::Hit { key: file_key }],
460        };
461    }
462
463    // ── Build context + check gotchas ───────────────────────────────────
464    let purpose = json_str(file_record, "/value");
465    let mut context_lines: Vec<String> = Vec::new();
466    if !purpose.is_empty() {
467        context_lines.push(format!("Purpose: {purpose}"));
468    }
469
470    let mut deny_signal = false;
471    let gotcha_keys = json_string_array(file_record, "/payload/gotcha_keys");
472
473    for gkey in &gotcha_keys {
474        let grec = match input.gotcha_records.get(gkey.as_str()) {
475            Some(r) if r.is_object() => r,
476            _ => continue,
477        };
478
479        let confirmed = json_bool(grec, "/payload/confirmed");
480        let gconfidence = json_f32(grec, "/confidence/value");
481        let gquality = json_f32(grec, "/quality/value");
482        let rule = json_str(grec, "/value");
483
484        // Only confirmed, injectable gotchas contribute to the injected
485        // context (P4: unconfirmed gotchas never influence injection). Gating
486        // the rule push here also bounds the payload — without it, every
487        // attached gotcha, including unconfirmed Layer-0 stubs, was dumped into
488        // the context (a single hotspot file with 1k+ stubs produced ~47 KB).
489        if confirmed && gconfidence >= 0.6 && gquality >= 0.4 {
490            deny_signal = true;
491            if !rule.is_empty() {
492                context_lines.push(format!("\u{26a0} {rule}"));
493            }
494        }
495    }
496
497    // Staleness warning for moderately stale records.
498    if staleness >= 0.4 {
499        context_lines.push(format!(
500            "Warning: record staleness {staleness:.2} — verify critical details."
501        ));
502    }
503
504    // Blast radius warning for high-impact files.
505    {
506        let blast_tier = json_str(file_record, "/payload/blast_radius/tier");
507        if blast_tier == "high" || blast_tier == "critical" {
508            let blast_direct = file_record
509                .pointer("/payload/blast_radius/direct")
510                .and_then(|v| v.as_u64())
511                .unwrap_or(0);
512            context_lines.push(format!(
513                "\u{26a0} Blast radius: {blast_direct} direct importers ({blast_tier}) — modify carefully"
514            ));
515        }
516    }
517
518    // ── Deny path ───────────────────────────────────────────────────────
519    if deny_signal {
520        if input.already_consulted {
521            let context = if context_lines.is_empty() {
522                format!(
523                    "Gotcha exists for {} — proceed with awareness",
524                    input.rel_path
525                )
526            } else {
527                context_lines.join("\n")
528            };
529            // AllowAfterReceipt enforcement event: the read is being allowed
530            // because a valid consultation receipt exists. ComplianceHit
531            // (SessionLog v2) triggers the AllowAfterReceipt record.
532            return EnforcementResult {
533                decision: Decision::AlreadyConsulted { context },
534                events: vec![HookEvent::ComplianceHit { key: file_key }],
535            };
536        }
537
538        // The reason is a SEMANTIC string — JSON escaping is the output
539        // layer's job (`escape_json_string` in the adapter). Pre-escaping
540        // here double-escapes: a path with a quote would render as `\"` in
541        // the message the agent sees.
542        let safe_path = &input.rel_path;
543        let staleness_note = if staleness >= 0.4 {
544            format!(" (staleness {staleness:.2} — verify critical details)")
545        } else {
546            String::new()
547        };
548
549        return EnforcementResult {
550            decision: Decision::Deny {
551                file_key: file_key.clone(),
552                reason: format!(
553                    "[mati] Confirmed gotcha on {safe_path} — \
554                     call mem_get(\"file:{safe_path}\") and read the record \
555                     before accessing this file.{staleness_note}"
556                ),
557            },
558            events: vec![HookEvent::BlockedUnconsultedRead { key: file_key }],
559        };
560    }
561
562    // ── Advisory path (medium confidence) ───────────────────────────────
563    if confidence >= 0.3 && quality >= 0.4 {
564        let context = if context_lines.is_empty() {
565            format!(
566                "Record exists for {} — confidence {confidence:.2}",
567                input.rel_path
568            )
569        } else {
570            context_lines.join("\n")
571        };
572        return EnforcementResult {
573            decision: Decision::Advisory { context },
574            events: vec![HookEvent::Hit { key: file_key }],
575        };
576    }
577
578    // ── Default: allow, no injection ────────────────────────────────────
579    EnforcementResult {
580        decision: Decision::Allow,
581        events: vec![],
582    }
583}
584
585// ── JSON helpers ────────────────────────────────────────────────────────────
586
587fn json_f32(val: &serde_json::Value, pointer: &str) -> f32 {
588    val.pointer(pointer)
589        .and_then(|v| v.as_f64())
590        .map(|f| f as f32)
591        .unwrap_or(0.0)
592}
593
594fn json_str(val: &serde_json::Value, pointer: &str) -> String {
595    val.pointer(pointer)
596        .and_then(|v| v.as_str())
597        .unwrap_or("")
598        .to_string()
599}
600
601fn json_bool(val: &serde_json::Value, pointer: &str) -> bool {
602    val.pointer(pointer)
603        .and_then(|v| v.as_bool())
604        .unwrap_or(false)
605}
606
607fn json_string_array(val: &serde_json::Value, pointer: &str) -> Vec<String> {
608    val.pointer(pointer)
609        .and_then(|v| v.as_array())
610        .map(|arr| {
611            arr.iter()
612                .filter_map(|v| v.as_str().map(|s| s.to_string()))
613                .collect()
614        })
615        .unwrap_or_default()
616}
617
618// ── Tests ───────────────────────────────────────────────────────────────────
619
620#[cfg(test)]
621mod tests {
622    use super::*;
623    use serde_json::json;
624
625    // ── extract_apply_patch_files ────────────────────────────────────────
626
627    #[test]
628    fn apply_patch_single_update() {
629        let patch =
630            "*** Begin Patch\n*** Update File: src/main.rs\n@@\n-old\n+new\n*** End Patch\n";
631        assert_eq!(extract_apply_patch_files(patch), vec!["src/main.rs"]);
632    }
633
634    #[test]
635    fn apply_patch_multi_file_add_update_delete() {
636        let patch = "*** Begin Patch\n\
637            *** Update File: src/a.rs\n@@\n+x\n\
638            *** Add File: src/b.rs\n+y\n\
639            *** Delete File: src/c.rs\n\
640            *** End Patch\n";
641        assert_eq!(
642            extract_apply_patch_files(patch),
643            vec!["src/a.rs", "src/b.rs", "src/c.rs"]
644        );
645    }
646
647    #[test]
648    fn apply_patch_rename_includes_source_and_destination() {
649        let patch =
650            "*** Begin Patch\n*** Update File: src/old.rs\n*** Move to: src/new.rs\n@@\n+x\n*** End Patch\n";
651        assert_eq!(
652            extract_apply_patch_files(patch),
653            vec!["src/old.rs", "src/new.rs"]
654        );
655    }
656
657    #[test]
658    fn apply_patch_ignores_marker_inside_diff_body() {
659        // A diff line that ADDS text resembling a marker must NOT be parsed as
660        // an envelope marker: diff body lines are prefixed (+/-/space), so they
661        // never begin at column 0 with "*** ".
662        let patch = "*** Begin Patch\n\
663            *** Update File: src/real.rs\n@@\n\
664            +*** Update File: src/fake.rs\n\
665            + *** Add File: src/also_fake.rs\n\
666            *** End Patch\n";
667        assert_eq!(extract_apply_patch_files(patch), vec!["src/real.rs"]);
668    }
669
670    #[test]
671    fn apply_patch_dedups_repeated_path() {
672        let patch =
673            "*** Begin Patch\n*** Update File: src/a.rs\n*** Update File: src/a.rs\n*** End Patch\n";
674        assert_eq!(extract_apply_patch_files(patch), vec!["src/a.rs"]);
675    }
676
677    #[test]
678    fn apply_patch_empty_or_no_markers() {
679        assert!(extract_apply_patch_files("").is_empty());
680        assert!(extract_apply_patch_files("just some text\nno markers here").is_empty());
681        assert!(extract_apply_patch_files("*** Begin Patch\n*** End Patch\n").is_empty());
682    }
683
684    #[test]
685    fn apply_patch_trims_trailing_whitespace() {
686        let patch = "*** Update File: src/spaced.rs   \n";
687        assert_eq!(extract_apply_patch_files(patch), vec!["src/spaced.rs"]);
688    }
689
690    // ── classify_command ─────────────────────────────────────────────────
691
692    #[test]
693    fn classify_cat() {
694        assert_eq!(
695            classify_command("cat src/main.rs"),
696            Some(CommandClass::CatLike)
697        );
698    }
699
700    #[test]
701    fn classify_head_with_flag() {
702        assert_eq!(
703            classify_command("head -n 10 file.rs"),
704            Some(CommandClass::CatLike)
705        );
706    }
707
708    #[test]
709    fn classify_leading_whitespace() {
710        assert_eq!(classify_command("  cat file"), Some(CommandClass::CatLike));
711    }
712
713    #[test]
714    fn classify_less() {
715        assert_eq!(
716            classify_command("less README.md"),
717            Some(CommandClass::CatLike)
718        );
719    }
720
721    #[test]
722    fn classify_tail() {
723        assert_eq!(
724            classify_command("tail -f log.txt"),
725            Some(CommandClass::CatLike)
726        );
727    }
728
729    #[test]
730    fn classify_bat() {
731        assert_eq!(
732            classify_command("bat src/lib.rs"),
733            Some(CommandClass::CatLike)
734        );
735    }
736
737    #[test]
738    fn classify_grep() {
739        assert_eq!(
740            classify_command("grep -rn pattern src/"),
741            Some(CommandClass::GrepLike)
742        );
743    }
744
745    #[test]
746    fn classify_rg() {
747        assert_eq!(
748            classify_command("rg TODO src/"),
749            Some(CommandClass::GrepLike)
750        );
751    }
752
753    #[test]
754    fn classify_sed() {
755        assert_eq!(
756            classify_command("sed -i 's/a/b/' file.rs"),
757            Some(CommandClass::GrepLike)
758        );
759    }
760
761    #[test]
762    fn classify_awk() {
763        assert_eq!(
764            classify_command("awk '{print $1}' file.rs"),
765            Some(CommandClass::GrepLike)
766        );
767    }
768
769    #[test]
770    fn classify_ls_is_none() {
771        assert_eq!(classify_command("ls -la"), None);
772    }
773
774    #[test]
775    fn classify_cd_is_none() {
776        assert_eq!(classify_command("cd /tmp"), None);
777    }
778
779    #[test]
780    fn classify_catch_is_none() {
781        assert_eq!(classify_command("catch errors"), None);
782    }
783
784    #[test]
785    fn classify_catalog_is_none() {
786        assert_eq!(classify_command("catalog"), None);
787    }
788
789    #[test]
790    fn classify_grep_bare_is_none() {
791        // "grep" with no args — still classifies (extraction returns None later)
792        assert_eq!(classify_command("grep"), Some(CommandClass::GrepLike));
793    }
794
795    // ── extract_file_path ───────────────────────────────────────────────
796
797    #[test]
798    fn extract_cat_simple() {
799        assert_eq!(
800            extract_file_path("cat src/main.rs", CommandClass::CatLike),
801            Some("src/main.rs".into())
802        );
803    }
804
805    #[test]
806    fn extract_cat_with_flag() {
807        assert_eq!(
808            extract_file_path("cat -n src/main.rs", CommandClass::CatLike),
809            Some("src/main.rs".into())
810        );
811    }
812
813    #[test]
814    fn extract_cat_quoted_path() {
815        assert_eq!(
816            extract_file_path(r#"cat "path with spaces/file.rs""#, CommandClass::CatLike),
817            Some("path with spaces/file.rs".into())
818        );
819    }
820
821    #[test]
822    fn extract_cat_with_pipe() {
823        assert_eq!(
824            extract_file_path("cat file.rs | grep foo", CommandClass::CatLike),
825            Some("file.rs".into())
826        );
827    }
828
829    #[test]
830    fn extract_cat_with_semicolon() {
831        assert_eq!(
832            extract_file_path("cat file.rs; echo done", CommandClass::CatLike),
833            Some("file.rs".into())
834        );
835    }
836
837    #[test]
838    fn extract_cat_with_and() {
839        assert_eq!(
840            extract_file_path("cat file.rs && echo ok", CommandClass::CatLike),
841            Some("file.rs".into())
842        );
843    }
844
845    #[test]
846    fn extract_grep_last_arg() {
847        assert_eq!(
848            extract_file_path("grep -rn pattern src/main.rs", CommandClass::GrepLike),
849            Some("src/main.rs".into())
850        );
851    }
852
853    #[test]
854    fn extract_grep_quoted_file() {
855        assert_eq!(
856            extract_file_path(r#"grep pattern "src/main.rs""#, CommandClass::GrepLike),
857            Some("src/main.rs".into())
858        );
859    }
860
861    #[test]
862    fn extract_grep_strips_single_quotes() {
863        assert_eq!(
864            extract_file_path("grep 'pattern' file.rs", CommandClass::GrepLike),
865            Some("file.rs".into())
866        );
867    }
868
869    #[test]
870    fn extract_no_args() {
871        assert_eq!(extract_file_path("cat", CommandClass::CatLike), None);
872    }
873
874    #[test]
875    fn extract_only_flags() {
876        assert_eq!(extract_file_path("cat -n -v", CommandClass::CatLike), None);
877    }
878
879    // ── normalize_path ──────────────────────────────────────────────────
880
881    #[test]
882    fn normalize_strips_prefix() {
883        assert_eq!(
884            normalize_path("/home/user/project/src/main.rs", Some("/home/user/project")),
885            "src/main.rs"
886        );
887    }
888
889    #[test]
890    fn normalize_dot_slash() {
891        assert_eq!(normalize_path("./src/main.rs", None), "src/main.rs");
892    }
893
894    #[test]
895    fn normalize_dotdot() {
896        assert_eq!(normalize_path("src/../src/main.rs", None), "src/main.rs");
897    }
898
899    #[test]
900    fn normalize_already_relative() {
901        assert_eq!(normalize_path("src/main.rs", None), "src/main.rs");
902    }
903
904    #[test]
905    fn normalize_no_repo_root() {
906        assert_eq!(
907            normalize_path("/abs/path/file.rs", None),
908            "abs/path/file.rs"
909        );
910    }
911
912    #[test]
913    fn normalize_trailing_slash_root() {
914        // repo_root should not have trailing slash, but handle it gracefully.
915        assert_eq!(
916            normalize_path("/project/src/file.rs", Some("/project")),
917            "src/file.rs"
918        );
919    }
920
921    #[test]
922    fn normalize_leading_dotdot_returns_unchanged() {
923        // Path escaping above root is out-of-scope — return as-is.
924        assert_eq!(normalize_path("../other/file.rs", None), "../other/file.rs");
925    }
926
927    #[test]
928    fn normalize_deep_dotdot_escape_returns_unchanged() {
929        assert_eq!(normalize_path("foo/../../bar.rs", None), "foo/../../bar.rs");
930    }
931
932    #[test]
933    fn normalize_dotdot_within_scope_ok() {
934        // src/../lib/file.rs stays within the repo — collapses fine.
935        assert_eq!(normalize_path("src/../lib/file.rs", None), "lib/file.rs");
936    }
937
938    // ── evaluate ────────────────────────────────────────────────────────
939
940    fn make_file_record(
941        confidence: f32,
942        quality: f32,
943        staleness: f32,
944        staleness_tier: &str,
945        gotcha_keys: &[&str],
946    ) -> serde_json::Value {
947        json!({
948            "value": "Test file purpose",
949            "confidence": { "value": confidence },
950            "quality": { "value": quality },
951            "staleness": { "value": staleness, "tier": staleness_tier },
952            "payload": {
953                "gotcha_keys": gotcha_keys,
954            }
955        })
956    }
957
958    fn make_gotcha(confirmed: bool, confidence: f32, quality: f32) -> serde_json::Value {
959        json!({
960            "value": "Do not use unwrap here",
961            "confidence": { "value": confidence },
962            "quality": { "value": quality },
963            "payload": { "confirmed": confirmed }
964        })
965    }
966
967    #[test]
968    fn eval_no_record() {
969        let input = EnforcementInput {
970            rel_path: "src/main.rs".into(),
971            file_record: None,
972            gotcha_records: HashMap::new(),
973            already_consulted: false,
974        };
975        let result = evaluate(&input);
976        assert_eq!(result.decision, Decision::NoRecord);
977        assert_eq!(result.events.len(), 1);
978        assert!(matches!(&result.events[0], HookEvent::Miss { key } if key == "file:src/main.rs"));
979    }
980
981    #[test]
982    fn eval_tombstone() {
983        let input = EnforcementInput {
984            rel_path: "src/old.rs".into(),
985            file_record: Some(make_file_record(0.8, 0.5, 0.95, "tombstone", &[])),
986            gotcha_records: HashMap::new(),
987            already_consulted: false,
988        };
989        let result = evaluate(&input);
990        assert_eq!(result.decision, Decision::Tombstone);
991        assert!(result.events.is_empty());
992    }
993
994    #[test]
995    fn eval_liability() {
996        let input = EnforcementInput {
997            rel_path: "src/stale.rs".into(),
998            file_record: Some(make_file_record(0.8, 0.5, 0.85, "liability", &[])),
999            gotcha_records: HashMap::new(),
1000            already_consulted: false,
1001        };
1002        let result = evaluate(&input);
1003        assert!(
1004            matches!(&result.decision, Decision::Liability { staleness, .. } if *staleness > 0.8)
1005        );
1006        assert_eq!(result.events.len(), 1);
1007        assert!(matches!(&result.events[0], HookEvent::Hit { .. }));
1008    }
1009
1010    #[test]
1011    fn eval_confirmed_gotcha_denies() {
1012        let mut gotchas = HashMap::new();
1013        gotchas.insert("gotcha:test".to_string(), make_gotcha(true, 0.7, 0.5));
1014
1015        let input = EnforcementInput {
1016            rel_path: "src/main.rs".into(),
1017            file_record: Some(make_file_record(0.7, 0.5, 0.1, "fresh", &["gotcha:test"])),
1018            gotcha_records: gotchas,
1019            already_consulted: false,
1020        };
1021        let result = evaluate(&input);
1022        assert!(matches!(&result.decision, Decision::Deny { .. }));
1023        assert!(matches!(
1024            &result.events[0],
1025            HookEvent::BlockedUnconsultedRead { key } if key == "file:src/main.rs"
1026        ));
1027    }
1028
1029    #[test]
1030    fn eval_unconfirmed_gotcha_allows() {
1031        let mut gotchas = HashMap::new();
1032        gotchas.insert("gotcha:test".to_string(), make_gotcha(false, 0.7, 0.5));
1033
1034        let input = EnforcementInput {
1035            rel_path: "src/main.rs".into(),
1036            file_record: Some(make_file_record(0.7, 0.5, 0.1, "fresh", &["gotcha:test"])),
1037            gotcha_records: gotchas,
1038            already_consulted: false,
1039        };
1040        let result = evaluate(&input);
1041        // No deny signal — falls through to advisory (confidence 0.7 >= 0.3, quality 0.5 >= 0.4).
1042        // P4: the unconfirmed gotcha's rule must NOT leak into the injected
1043        // context — only confirmed gotchas contribute to injection.
1044        match &result.decision {
1045            Decision::Advisory { context } => assert!(
1046                !context.contains("Do not use unwrap here"),
1047                "unconfirmed gotcha rule leaked into injected context: {context:?}"
1048            ),
1049            other => panic!("expected Advisory, got {other:?}"),
1050        }
1051    }
1052
1053    #[test]
1054    fn eval_low_confidence_gotcha_allows() {
1055        let mut gotchas = HashMap::new();
1056        gotchas.insert("gotcha:test".to_string(), make_gotcha(true, 0.4, 0.5));
1057
1058        let input = EnforcementInput {
1059            rel_path: "src/main.rs".into(),
1060            file_record: Some(make_file_record(0.7, 0.5, 0.1, "fresh", &["gotcha:test"])),
1061            gotcha_records: gotchas,
1062            already_consulted: false,
1063        };
1064        let result = evaluate(&input);
1065        assert!(matches!(&result.decision, Decision::Advisory { .. }));
1066    }
1067
1068    #[test]
1069    fn eval_low_quality_gotcha_allows() {
1070        let mut gotchas = HashMap::new();
1071        gotchas.insert("gotcha:test".to_string(), make_gotcha(true, 0.7, 0.2));
1072
1073        let input = EnforcementInput {
1074            rel_path: "src/main.rs".into(),
1075            file_record: Some(make_file_record(0.7, 0.5, 0.1, "fresh", &["gotcha:test"])),
1076            gotcha_records: gotchas,
1077            already_consulted: false,
1078        };
1079        let result = evaluate(&input);
1080        assert!(matches!(&result.decision, Decision::Advisory { .. }));
1081    }
1082
1083    #[test]
1084    fn eval_consulted_downgrades_deny() {
1085        let mut gotchas = HashMap::new();
1086        gotchas.insert("gotcha:test".to_string(), make_gotcha(true, 0.7, 0.5));
1087
1088        let input = EnforcementInput {
1089            rel_path: "src/main.rs".into(),
1090            file_record: Some(make_file_record(0.7, 0.5, 0.1, "fresh", &["gotcha:test"])),
1091            gotcha_records: gotchas,
1092            already_consulted: true,
1093        };
1094        let result = evaluate(&input);
1095        assert!(matches!(
1096            &result.decision,
1097            Decision::AlreadyConsulted { .. }
1098        ));
1099        // AlreadyConsulted emits ComplianceHit so the v2 SessionLog dispatch
1100        // records an AllowAfterReceipt enforcement event (not a fresh receipt).
1101        assert!(matches!(&result.events[0], HookEvent::ComplianceHit { .. }));
1102    }
1103
1104    #[test]
1105    fn eval_medium_confidence_advisory() {
1106        let input = EnforcementInput {
1107            rel_path: "src/main.rs".into(),
1108            file_record: Some(make_file_record(0.45, 0.5, 0.1, "fresh", &[])),
1109            gotcha_records: HashMap::new(),
1110            already_consulted: false,
1111        };
1112        let result = evaluate(&input);
1113        assert!(matches!(&result.decision, Decision::Advisory { .. }));
1114        assert!(matches!(&result.events[0], HookEvent::Hit { .. }));
1115    }
1116
1117    #[test]
1118    fn eval_low_everything_allows() {
1119        let input = EnforcementInput {
1120            rel_path: "src/main.rs".into(),
1121            file_record: Some(make_file_record(0.1, 0.1, 0.1, "fresh", &[])),
1122            gotcha_records: HashMap::new(),
1123            already_consulted: false,
1124        };
1125        let result = evaluate(&input);
1126        assert_eq!(result.decision, Decision::Allow);
1127        assert!(result.events.is_empty());
1128    }
1129
1130    #[test]
1131    fn eval_staleness_warning_appended() {
1132        let input = EnforcementInput {
1133            rel_path: "src/main.rs".into(),
1134            file_record: Some(make_file_record(0.5, 0.5, 0.5, "stale", &[])),
1135            gotcha_records: HashMap::new(),
1136            already_consulted: false,
1137        };
1138        let result = evaluate(&input);
1139        if let Decision::Advisory { context } = &result.decision {
1140            assert!(context.contains("staleness 0.50"));
1141        } else {
1142            panic!("expected Advisory, got {:?}", result.decision);
1143        }
1144    }
1145
1146    #[test]
1147    fn eval_multiple_gotchas_one_deny() {
1148        let mut gotchas = HashMap::new();
1149        gotchas.insert("gotcha:safe".to_string(), make_gotcha(false, 0.7, 0.5));
1150        gotchas.insert("gotcha:danger".to_string(), make_gotcha(true, 0.8, 0.6));
1151
1152        let input = EnforcementInput {
1153            rel_path: "src/main.rs".into(),
1154            file_record: Some(make_file_record(
1155                0.7,
1156                0.5,
1157                0.1,
1158                "fresh",
1159                &["gotcha:safe", "gotcha:danger"],
1160            )),
1161            gotcha_records: gotchas,
1162            already_consulted: false,
1163        };
1164        let result = evaluate(&input);
1165        assert!(matches!(&result.decision, Decision::Deny { .. }));
1166    }
1167
1168    #[test]
1169    fn eval_deny_includes_staleness_note() {
1170        let mut gotchas = HashMap::new();
1171        gotchas.insert("gotcha:test".to_string(), make_gotcha(true, 0.7, 0.5));
1172
1173        let input = EnforcementInput {
1174            rel_path: "src/main.rs".into(),
1175            file_record: Some(make_file_record(0.7, 0.5, 0.5, "stale", &["gotcha:test"])),
1176            gotcha_records: gotchas,
1177            already_consulted: false,
1178        };
1179        let result = evaluate(&input);
1180        if let Decision::Deny { reason, .. } = &result.decision {
1181            assert!(reason.contains("staleness"));
1182        } else {
1183            panic!("expected Deny");
1184        }
1185    }
1186
1187    #[test]
1188    fn eval_invalid_json_allows() {
1189        let input = EnforcementInput {
1190            rel_path: "src/main.rs".into(),
1191            file_record: Some(json!("not an object")),
1192            gotcha_records: HashMap::new(),
1193            already_consulted: false,
1194        };
1195        let result = evaluate(&input);
1196        // Invalid record treated as no-record.
1197        assert_eq!(result.decision, Decision::NoRecord);
1198    }
1199
1200    #[test]
1201    fn eval_never_produces_fail_open() {
1202        // FailOpen is NOT in the Decision enum at all — this test documents the contract.
1203        // The enum has no FailOpen variant, so this is a compile-time guarantee.
1204        // This test verifies the doc comment claim by testing boundary cases.
1205        let cases: Vec<EnforcementInput> = vec![
1206            EnforcementInput {
1207                rel_path: "x".into(),
1208                file_record: None,
1209                gotcha_records: HashMap::new(),
1210                already_consulted: false,
1211            },
1212            EnforcementInput {
1213                rel_path: "x".into(),
1214                file_record: Some(json!(null)),
1215                gotcha_records: HashMap::new(),
1216                already_consulted: false,
1217            },
1218            EnforcementInput {
1219                rel_path: "x".into(),
1220                file_record: Some(json!({})),
1221                gotcha_records: HashMap::new(),
1222                already_consulted: false,
1223            },
1224        ];
1225        for input in cases {
1226            let result = evaluate(&input);
1227            // If Decision had a FailOpen variant, we'd match against it here.
1228            // Since it doesn't, this documents that the pure core never fails open.
1229            assert!(matches!(
1230                result.decision,
1231                Decision::Allow
1232                    | Decision::Deny { .. }
1233                    | Decision::AlreadyConsulted { .. }
1234                    | Decision::Advisory { .. }
1235                    | Decision::Liability { .. }
1236                    | Decision::Tombstone
1237                    | Decision::NoRecord
1238                    | Decision::NotFileRead
1239            ));
1240        }
1241    }
1242
1243    #[test]
1244    fn eval_context_includes_purpose_and_rules() {
1245        let mut gotchas = HashMap::new();
1246        gotchas.insert("gotcha:test".to_string(), make_gotcha(true, 0.7, 0.5));
1247
1248        let input = EnforcementInput {
1249            rel_path: "src/main.rs".into(),
1250            file_record: Some(make_file_record(0.7, 0.5, 0.1, "fresh", &["gotcha:test"])),
1251            gotcha_records: gotchas,
1252            already_consulted: true,
1253        };
1254        let result = evaluate(&input);
1255        if let Decision::AlreadyConsulted { context } = &result.decision {
1256            assert!(context.contains("Purpose: Test file purpose"));
1257            assert!(context.contains("Do not use unwrap here"));
1258        } else {
1259            panic!("expected AlreadyConsulted, got {:?}", result.decision);
1260        }
1261    }
1262
1263    #[test]
1264    fn eval_blast_radius_warning_for_critical_file() {
1265        let mut file_record = make_file_record(0.5, 0.5, 0.1, "fresh", &[]);
1266        // Inject blast_radius into payload
1267        file_record
1268            .as_object_mut()
1269            .unwrap()
1270            .get_mut("payload")
1271            .unwrap()
1272            .as_object_mut()
1273            .unwrap()
1274            .insert(
1275                "blast_radius".into(),
1276                json!({ "direct": 45, "transitive": 10, "score": 48.0, "tier": "critical" }),
1277            );
1278
1279        let input = EnforcementInput {
1280            rel_path: "src/core.rs".into(),
1281            file_record: Some(file_record),
1282            gotcha_records: HashMap::new(),
1283            already_consulted: false,
1284        };
1285        let result = evaluate(&input);
1286        if let Decision::Advisory { context } = &result.decision {
1287            assert!(
1288                context.contains("Blast radius"),
1289                "advisory context must include blast radius warning, got: {context}"
1290            );
1291            assert!(context.contains("45"), "warning must include direct count");
1292            assert!(context.contains("critical"), "warning must include tier");
1293        } else {
1294            panic!("expected Advisory, got {:?}", result.decision);
1295        }
1296    }
1297
1298    #[test]
1299    fn eval_no_blast_warning_for_low_file() {
1300        let mut file_record = make_file_record(0.5, 0.5, 0.1, "fresh", &[]);
1301        file_record
1302            .as_object_mut()
1303            .unwrap()
1304            .get_mut("payload")
1305            .unwrap()
1306            .as_object_mut()
1307            .unwrap()
1308            .insert(
1309                "blast_radius".into(),
1310                json!({ "direct": 2, "transitive": 0, "score": 2.0, "tier": "low" }),
1311            );
1312
1313        let input = EnforcementInput {
1314            rel_path: "src/leaf.rs".into(),
1315            file_record: Some(file_record),
1316            gotcha_records: HashMap::new(),
1317            already_consulted: false,
1318        };
1319        let result = evaluate(&input);
1320        if let Decision::Advisory { context } = &result.decision {
1321            assert!(
1322                !context.contains("Blast radius"),
1323                "low blast radius file should NOT have warning, got: {context}"
1324            );
1325        } else {
1326            panic!("expected Advisory, got {:?}", result.decision);
1327        }
1328    }
1329
1330    // ── detection hardening: prefixes, abs-path, numeric flag values ──────
1331
1332    #[test]
1333    fn classify_strips_sudo_prefix() {
1334        assert_eq!(
1335            classify_command("sudo cat src/secret.rs"),
1336            Some(CommandClass::CatLike)
1337        );
1338    }
1339
1340    #[test]
1341    fn classify_strips_env_assignment_prefix() {
1342        assert_eq!(
1343            classify_command("env LOG=1 cat src/secret.rs"),
1344            Some(CommandClass::CatLike)
1345        );
1346        assert_eq!(
1347            classify_command("LOG=1 DEBUG=2 cat src/secret.rs"),
1348            Some(CommandClass::CatLike)
1349        );
1350    }
1351
1352    #[test]
1353    fn classify_reduces_absolute_path_to_basename() {
1354        assert_eq!(
1355            classify_command("/bin/cat src/secret.rs"),
1356            Some(CommandClass::CatLike)
1357        );
1358    }
1359
1360    #[test]
1361    fn classify_prefix_on_non_read_stays_none() {
1362        // Stripping a wrapper must not invent a read: `sudo rm` is still not one.
1363        assert_eq!(classify_command("sudo rm -rf build"), None);
1364        assert_eq!(classify_command("env X=1 ls"), None);
1365    }
1366
1367    #[test]
1368    fn extract_through_sudo_prefix() {
1369        assert_eq!(
1370            extract_file_path("sudo cat src/secret.rs", CommandClass::CatLike),
1371            Some("src/secret.rs".to_string())
1372        );
1373    }
1374
1375    #[test]
1376    fn extract_through_abs_path() {
1377        assert_eq!(
1378            extract_file_path("/bin/cat src/secret.rs", CommandClass::CatLike),
1379            Some("src/secret.rs".to_string())
1380        );
1381    }
1382
1383    #[test]
1384    fn extract_skips_numeric_flag_value() {
1385        // The `100`/`5` are arguments to `-n`/`-c`, not the file.
1386        assert_eq!(
1387            extract_file_path("tail -n 100 src/secret.rs", CommandClass::CatLike),
1388            Some("src/secret.rs".to_string())
1389        );
1390        assert_eq!(
1391            extract_file_path("head -c 5 src/secret.rs", CommandClass::CatLike),
1392            Some("src/secret.rs".to_string())
1393        );
1394    }
1395
1396    #[test]
1397    fn extract_keeps_attached_numeric_flag() {
1398        // `-5` is itself a flag (starts with '-'), already filtered; path follows.
1399        assert_eq!(
1400            extract_file_path("head -5 src/db.rs", CommandClass::CatLike),
1401            Some("src/db.rs".to_string())
1402        );
1403    }
1404
1405    #[test]
1406    fn sudo_with_flags_is_a_known_gap() {
1407        // Documents the boundary of the prefix fix: wrapper-flag arity is
1408        // ambiguous (`-u` takes a value), so this is intentionally NOT handled.
1409        // Pinned so a future change that closes it also updates the eval
1410        // baseline (tests/fixtures/eval/baseline.json :: adv-sudo-uroot-cat).
1411        assert_eq!(classify_command("sudo -u root cat src/secret.rs"), None);
1412    }
1413
1414    // ── quote-aware tokenizer: grep grammar (pattern vs path) ─────────────
1415
1416    #[test]
1417    fn extract_grep_quoted_pattern_picks_the_path() {
1418        // The quoted token is the PATTERN; the file is the last positional.
1419        assert_eq!(
1420            extract_file_path("grep -r \"secret\" src/db.rs", CommandClass::GrepLike),
1421            Some("src/db.rs".to_string())
1422        );
1423        assert_eq!(
1424            extract_file_path("grep \"pat\" \"src/db.rs\"", CommandClass::GrepLike),
1425            Some("src/db.rs".to_string())
1426        );
1427    }
1428
1429    #[test]
1430    fn extract_grep_without_file_reads_stdin() {
1431        // Only a pattern, no file -> no path to gate.
1432        assert_eq!(
1433            extract_file_path("grep \"secret\"", CommandClass::GrepLike),
1434            None
1435        );
1436    }
1437
1438    #[test]
1439    fn extract_cat_quoted_path_with_spaces() {
1440        // The tokenizer keeps a quoted path's spaces as a single token.
1441        assert_eq!(
1442            extract_file_path("cat \"src/with space.rs\"", CommandClass::CatLike),
1443            Some("src/with space.rs".to_string())
1444        );
1445    }
1446
1447    #[test]
1448    fn shell_tokens_honor_quotes() {
1449        assert_eq!(
1450            shell_tokens("grep -r \"a b\" file.rs"),
1451            vec!["grep", "-r", "a b", "file.rs"]
1452        );
1453        assert_eq!(
1454            shell_tokens("awk '{print $1}' src/db.rs"),
1455            vec!["awk", "{print $1}", "src/db.rs"]
1456        );
1457    }
1458
1459    // ── multi-file extraction (`cat a.rs b.rs`, `grep pat f1 f2`) ─────────
1460
1461    #[test]
1462    fn extract_file_paths_cat_returns_all_files() {
1463        assert_eq!(
1464            extract_file_paths("cat src/a.rs src/b.rs", CommandClass::CatLike),
1465            vec!["src/a.rs", "src/b.rs"]
1466        );
1467        assert_eq!(
1468            extract_file_paths("cat -n src/only.rs", CommandClass::CatLike),
1469            vec!["src/only.rs"]
1470        );
1471    }
1472
1473    #[test]
1474    fn extract_file_paths_grep_drops_the_pattern() {
1475        // grep PATTERN FILE... — the pattern is not a file.
1476        assert_eq!(
1477            extract_file_paths("grep -i secret src/a.rs src/b.rs", CommandClass::GrepLike),
1478            vec!["src/a.rs", "src/b.rs"]
1479        );
1480        // Only a pattern, no file -> no paths.
1481        assert!(extract_file_paths("grep secret", CommandClass::GrepLike).is_empty());
1482    }
1483
1484    #[test]
1485    fn extract_file_path_is_the_primary_of_paths() {
1486        // singular = first cat file / last grep file.
1487        assert_eq!(
1488            extract_file_path("cat a.rs b.rs", CommandClass::CatLike).as_deref(),
1489            Some("a.rs")
1490        );
1491        assert_eq!(
1492            extract_file_path("grep pat f1 f2", CommandClass::GrepLike).as_deref(),
1493            Some("f2")
1494        );
1495    }
1496}