Skip to main content

team_core/
attachments.rs

1//! T-32 attachment policy layer.
2//!
3//! The agent receives a message body containing `📎 attachment: <path>`
4//! and asks the broker to read the file. This module owns the
5//! decision: accept (return bytes) or reject (return reason). It is
6//! transport-agnostic — the MCP `read_attachment` tool is one caller;
7//! a future REST surface or CLI debug helper would call the same
8//! `check_and_read` entry point.
9//!
10//! Three independent guards layered before any read:
11//! 1. **Path-traversal**: canonicalize the operator-supplied path and
12//!    confirm it is a descendant of one of `allowed_roots`.
13//! 2. **Size**: stat the file and reject if it exceeds `max_size_bytes`
14//!    before any bytes are read.
15//! 3. **Scanner** (optional): hand the canonical path to an external
16//!    command with a timeout; non-zero exit or timeout → reject.
17//!
18//! Bytes are returned as-is on accept. No envelope wrapping, no
19//! "treat as data" framing — those are prompt-injection mitigations
20//! and live in the hook layer per owner ratify.
21//!
22//! `enabled = false` short-circuits with `RejectReason::Disabled` —
23//! no filesystem cost when the operator has flipped the flag.
24
25use std::fs;
26use std::io::Write;
27use std::path::{Path, PathBuf};
28use std::process::{Command, Stdio};
29use std::sync::mpsc;
30use std::thread;
31use std::time::{Duration, SystemTime};
32
33use crate::compose::{AttachmentScanner, Attachments};
34
35/// Reasons the broker can refuse to read an attachment. The agent
36/// surfaces the variant + a short string back to the operator via
37/// the originating-channel notification path; the operator never
38/// sees raw filesystem errors verbatim.
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub enum RejectReason {
41    /// Operator set `attachments.enabled: false`. Agent should treat
42    /// the request as if attachments were unsupported.
43    Disabled,
44    /// `allowed_roots` was empty after `$HOME` expansion. The
45    /// operator misconfigured — no path can ever resolve.
46    NoAllowedRoots,
47    /// Path failed to canonicalize (does not exist, broken symlink,
48    /// permission denied during traversal). Carries the OS-level
49    /// reason for operator-side debugging.
50    PathUnresolvable(String),
51    /// Resolved path is not a descendant of any `allowed_root`.
52    OutsideAllowedRoots { resolved: PathBuf },
53    /// Resolved path stat'd > `max_size_bytes`.
54    TooLarge { size: u64, cap: u64 },
55    /// Scanner subprocess returned non-zero, timed out, or could not
56    /// be spawned. `detail` carries the scanner's stderr (truncated)
57    /// or a wrapper-level error.
58    ScannerRejected { detail: String },
59    /// Compose configured a scanner but the caller passed
60    /// `scanner: None` to `check_and_read`. Tighter than silently
61    /// skipping the scan: a refactor that drops the scanner arg
62    /// anywhere upstream surfaces here instead of disabling
63    /// malware checking with no test failure.
64    ScannerNotProvided,
65    /// Read raced with deletion or another `fs::read` failure surfaced
66    /// after the size check passed.
67    ReadFailed(String),
68}
69
70impl RejectReason {
71    /// One-liner suitable for inclusion in an operator-facing
72    /// notification. Avoids markdown chars so HTML / plain renderers
73    /// both reproduce it byte-for-byte (T-134 coordination with
74    /// wren — no `_*<>&` that would need escaping in either path).
75    pub fn human(&self) -> String {
76        match self {
77            Self::Disabled => "attachments are disabled in this team's compose".into(),
78            Self::NoAllowedRoots => {
79                "no allowed_roots resolved — check attachments.allowed_roots config".into()
80            }
81            Self::PathUnresolvable(e) => format!("could not resolve path: {e}"),
82            Self::OutsideAllowedRoots { resolved } => format!(
83                "path resolves outside allowed_roots: {}",
84                resolved.display()
85            ),
86            Self::TooLarge { size, cap } => {
87                format!("file size {size} bytes exceeds the {cap}-byte cap")
88            }
89            Self::ScannerRejected { detail } => format!("scanner rejected: {detail}"),
90            Self::ScannerNotProvided => {
91                "scanner is configured but the broker did not run it (internal misconfiguration)"
92                    .into()
93            }
94            Self::ReadFailed(e) => format!("read failed: {e}"),
95        }
96    }
97}
98
99/// External-scanner abstraction. Implementations spawn the operator's
100/// configured command, wait up to `timeout`, and return the outcome.
101/// Trait-object shape keeps the read path testable without spawning
102/// real processes — the test seam is the Mock impl in `#[cfg(test)]`.
103pub trait Scanner: Send + Sync {
104    fn scan(&self, path: &Path, timeout: Duration) -> ScanOutcome;
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub enum ScanOutcome {
109    Clean,
110    Rejected { detail: String },
111}
112
113/// Resolve `$HOME` and other allow-list roots to canonical paths.
114/// Performed at check-time so a snapshot taken on machine A still
115/// resolves correctly when restored on machine B (different `$HOME`).
116/// Roots that fail to canonicalize are dropped — an operator with a
117/// stale path entry doesn't break the whole policy.
118pub fn resolve_allowed_roots(cfg: &Attachments) -> Vec<PathBuf> {
119    let home = std::env::var_os("HOME").map(PathBuf::from);
120    cfg.allowed_roots
121        .iter()
122        .map(|s| s.as_str())
123        .filter_map(|spec| {
124            let raw = if spec == "$HOME" {
125                home.clone()?
126            } else if let Some(rest) = spec.strip_prefix("$HOME/") {
127                home.clone().map(|h| h.join(rest))?
128            } else {
129                PathBuf::from(spec)
130            };
131            raw.canonicalize().ok()
132        })
133        .collect()
134}
135
136/// Pure check: is `resolved` a descendant of (or equal to) any of
137/// `roots`? Both sides are expected canonical, so byte-equality
138/// `starts_with` is enough — no `..` slipping through.
139pub fn is_within_any_root(resolved: &Path, roots: &[PathBuf]) -> bool {
140    roots.iter().any(|r| resolved.starts_with(r))
141}
142
143/// Attempt to read the file the operator pointed at, applying every
144/// configured guard. The scanner is plumbed through as a trait object
145/// so callers (production: real `Command`; tests: mock) share the
146/// same control flow.
147pub fn check_and_read(
148    cfg: &Attachments,
149    raw_path: &Path,
150    scanner: Option<&dyn Scanner>,
151) -> Result<Vec<u8>, RejectReason> {
152    if !cfg.enabled {
153        return Err(RejectReason::Disabled);
154    }
155    let roots = resolve_allowed_roots(cfg);
156    if roots.is_empty() {
157        return Err(RejectReason::NoAllowedRoots);
158    }
159    let resolved = raw_path
160        .canonicalize()
161        .map_err(|e| RejectReason::PathUnresolvable(e.to_string()))?;
162    if !is_within_any_root(&resolved, &roots) {
163        return Err(RejectReason::OutsideAllowedRoots { resolved });
164    }
165    let metadata = fs::metadata(&resolved).map_err(|e| RejectReason::ReadFailed(e.to_string()))?;
166    if metadata.len() > cfg.max_size_bytes {
167        return Err(RejectReason::TooLarge {
168            size: metadata.len(),
169            cap: cfg.max_size_bytes,
170        });
171    }
172    // Tight scanner contract: if the operator configured a scanner,
173    // the caller MUST hand one to `check_and_read`. A `None` here
174    // surfaces as `ScannerNotProvided` rather than silently skipping
175    // the scan — caught by the unit test below, so a refactor that
176    // drops the arg upstream fails loudly instead of disabling
177    // malware checking.
178    if let Some(spec) = cfg.scanner.as_ref() {
179        let Some(s) = scanner else {
180            return Err(RejectReason::ScannerNotProvided);
181        };
182        let outcome = s.scan(&resolved, Duration::from_secs(spec.timeout_seconds));
183        if let ScanOutcome::Rejected { detail } = outcome {
184            return Err(RejectReason::ScannerRejected { detail });
185        }
186    }
187    fs::read(&resolved).map_err(|e| RejectReason::ReadFailed(e.to_string()))
188}
189
190/// T-32b: outcome of a successful read — bytes plus metadata the
191/// caller surfaces to the agent (and writes to the audit log).
192#[derive(Debug, Clone)]
193pub struct AcceptedAttachment {
194    pub bytes: Vec<u8>,
195    pub blake3_hex: String,
196    pub size: u64,
197    /// Canonicalized path that passed the policy check. Returned so
198    /// the audit log captures the resolved-not-the-typed path.
199    pub resolved: PathBuf,
200}
201
202/// Wrapper that runs `check_and_read` and packages the result with
203/// the metadata team-mcp's `read_attachment` tool returns to the
204/// agent. Centralises hashing so the staging-tempfile name (content-
205/// addressed) and the audit log entry stay in sync.
206pub fn check_and_read_with_metadata(
207    cfg: &Attachments,
208    raw_path: &Path,
209    scanner: Option<&dyn Scanner>,
210) -> Result<AcceptedAttachment, RejectReason> {
211    let bytes = check_and_read(cfg, raw_path, scanner)?;
212    let blake3_hex = blake3::hash(&bytes).to_hex().to_string();
213    let resolved = raw_path
214        .canonicalize()
215        .map_err(|e| RejectReason::PathUnresolvable(e.to_string()))?;
216    let size = bytes.len() as u64;
217    Ok(AcceptedAttachment {
218        bytes,
219        blake3_hex,
220        size,
221        resolved,
222    })
223}
224
225/// T-32b staging directory under the compose root. Tempfiles live
226/// here, named by the content blake3 hash so identical content
227/// dedups to a single file across sessions and across agents.
228pub fn staging_dir(compose_root: &Path) -> PathBuf {
229    compose_root.join("state/attachments-staging")
230}
231
232/// Write `accepted.bytes` to the staging dir under a content-
233/// addressed name. Idempotent: if the file already exists with the
234/// expected size, we skip the write (operator may have read the
235/// same attachment recently). Returns the staged path so the agent
236/// can `read_file()` it directly.
237pub fn stage_to_tempfile(
238    staging_dir: &Path,
239    accepted: &AcceptedAttachment,
240) -> Result<PathBuf, std::io::Error> {
241    fs::create_dir_all(staging_dir)?;
242    let path = staging_dir.join(&accepted.blake3_hex);
243    let needs_write = !matches!(fs::metadata(&path), Ok(m) if m.len() == accepted.size);
244    if needs_write {
245        // Atomic write via tempfile + rename so a crash mid-write
246        // can't leave a half-baked stage file with the canonical name.
247        let tmp = staging_dir.join(format!("{}.tmp", &accepted.blake3_hex));
248        fs::write(&tmp, &accepted.bytes)?;
249        fs::rename(&tmp, &path)?;
250    } else {
251        // Bump mtime so the sweep doesn't reap a freshly-touched file.
252        let _ = touch(&path);
253    }
254    Ok(path)
255}
256
257fn touch(path: &Path) -> std::io::Result<()> {
258    let now = SystemTime::now();
259    let f = fs::OpenOptions::new().write(true).open(path)?;
260    f.set_modified(now)?;
261    Ok(())
262}
263
264/// T-32b: drop tempfiles whose mtime is older than `now - ttl`.
265/// Called on team-mcp startup as a best-effort cleanup. Returns the
266/// number of files reaped so callers can log a single summary line.
267/// Errors traversing individual entries are logged-and-skipped at
268/// the call site (kept out of this function so unit tests stay
269/// trace-free).
270pub fn sweep_expired(staging_dir: &Path, ttl: Duration) -> std::io::Result<usize> {
271    if !staging_dir.exists() {
272        return Ok(0);
273    }
274    let cutoff = SystemTime::now()
275        .checked_sub(ttl)
276        .unwrap_or(SystemTime::UNIX_EPOCH);
277    let mut reaped = 0usize;
278    for entry in fs::read_dir(staging_dir)? {
279        let entry = entry?;
280        let meta = match entry.metadata() {
281            Ok(m) => m,
282            Err(_) => continue,
283        };
284        let mtime = meta.modified().unwrap_or(SystemTime::UNIX_EPOCH);
285        if mtime < cutoff && fs::remove_file(entry.path()).is_ok() {
286            reaped += 1;
287        }
288    }
289    Ok(reaped)
290}
291
292/// Production scanner: spawns the operator-configured command with
293/// the resolved path as a single argument, waits up to `timeout`,
294/// captures stderr for the reject detail. The wait uses
295/// `std::thread::spawn` + `mpsc::recv_timeout` so team-core stays
296/// sync (no tokio dep — owner-ratify variant 4).
297pub struct RealScanner;
298
299impl Scanner for RealScanner {
300    fn scan(&self, path: &Path, timeout: Duration) -> ScanOutcome {
301        // Look up the configured command from the closure-captured
302        // spec via the wrapper below — the trait API doesn't carry
303        // the command, callers construct via `RealScanner::for_spec`.
304        // Default impl: panic-free fallback for the trait when used
305        // without the wrapper. In practice the wrapper is the only
306        // caller (see `for_spec`).
307        let _ = (path, timeout);
308        ScanOutcome::Rejected {
309            detail: "RealScanner used without a configured command".into(),
310        }
311    }
312}
313
314impl RealScanner {
315    /// Bind a `RealScanner` to a specific scanner spec. Returns a
316    /// boxed trait object so the call site stays uniform with the
317    /// `MockScanner` shape used in tests.
318    pub fn for_spec(spec: &AttachmentScanner) -> Box<dyn Scanner> {
319        Box::new(RealScannerForSpec {
320            command: spec.command.clone(),
321        })
322    }
323}
324
325struct RealScannerForSpec {
326    command: String,
327}
328
329impl Scanner for RealScannerForSpec {
330    fn scan(&self, path: &Path, timeout: Duration) -> ScanOutcome {
331        let cmd = self.command.clone();
332        let path_owned = path.to_path_buf();
333        let (tx, rx) = mpsc::channel();
334        thread::spawn(move || {
335            let result = Command::new(&cmd)
336                .arg(&path_owned)
337                .stdin(Stdio::null())
338                .stdout(Stdio::piped())
339                .stderr(Stdio::piped())
340                .output();
341            let _ = tx.send(result);
342        });
343        match rx.recv_timeout(timeout) {
344            Ok(Ok(output)) => {
345                if output.status.success() {
346                    ScanOutcome::Clean
347                } else {
348                    let detail = String::from_utf8_lossy(&output.stderr).trim().to_string();
349                    let detail = if detail.is_empty() {
350                        format!("exited with status {}", output.status)
351                    } else {
352                        truncate_for_reject(&detail)
353                    };
354                    ScanOutcome::Rejected { detail }
355                }
356            }
357            Ok(Err(e)) => ScanOutcome::Rejected {
358                detail: format!("scanner spawn failed: {e}"),
359            },
360            Err(_) => ScanOutcome::Rejected {
361                detail: format!("scanner timed out after {}s", timeout.as_secs()),
362            },
363        }
364    }
365}
366
367/// Cap scanner stderr so a chatty scanner can't blow up the reject
368/// notification's wire size or the audit log line. 512 chars is
369/// plenty for diagnostic context (the operator's full scanner log
370/// is on disk anyway).
371fn truncate_for_reject(s: &str) -> String {
372    const CAP: usize = 512;
373    if s.chars().count() <= CAP {
374        s.to_string()
375    } else {
376        let mut out: String = s.chars().take(CAP).collect();
377        out.push('…');
378        out
379    }
380}
381
382/// T-32b audit log entry. Written as a single JSON line per attempt
383/// so the file is `tail -f`-friendly + parseable with `jq`.
384#[derive(Debug, Clone, serde::Serialize)]
385pub struct AuditEntry<'a> {
386    /// RFC3339 UTC timestamp.
387    pub ts: String,
388    /// Operator-supplied path verbatim — preserved so a typo is
389    /// debuggable directly from the log.
390    pub path: &'a str,
391    /// Canonicalized path (`None` when the path failed to resolve).
392    pub resolved: Option<&'a str>,
393    pub outcome: &'static str,
394    pub size: Option<u64>,
395    pub blake3: Option<&'a str>,
396    pub reason: Option<String>,
397}
398
399/// Append a single JSON-line audit entry. No-op when
400/// `audit_log_path` is `None`. Errors creating the parent dir or
401/// opening the file are surfaced to the caller — production paths
402/// log-and-continue so a misconfigured audit dir doesn't block real
403/// reads.
404pub fn append_audit(audit_log_path: Option<&Path>, entry: &AuditEntry<'_>) -> std::io::Result<()> {
405    let Some(p) = audit_log_path else {
406        return Ok(());
407    };
408    if let Some(parent) = p.parent() {
409        fs::create_dir_all(parent)?;
410    }
411    let line = serde_json::to_string(entry).map_err(std::io::Error::other)?;
412    let mut f = fs::OpenOptions::new().append(true).create(true).open(p)?;
413    f.write_all(line.as_bytes())?;
414    f.write_all(b"\n")?;
415    Ok(())
416}
417
418/// Helper: format an RFC3339 UTC timestamp suitable for audit
419/// entries. Pulled out so tests can pin the format independently of
420/// the call site.
421pub fn now_rfc3339() -> String {
422    chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true)
423}
424
425#[cfg(test)]
426mod tests {
427    use super::*;
428    use std::sync::Mutex;
429    use tempfile::TempDir;
430
431    /// Test scanner — accepts when configured to, rejects with a
432    /// canned detail otherwise. The Mutex tracks which paths it was
433    /// called on so tests can assert the scanner ran (or didn't).
434    struct MockScanner {
435        outcome: ScanOutcome,
436        calls: Mutex<Vec<PathBuf>>,
437    }
438
439    impl MockScanner {
440        fn clean() -> Self {
441            Self {
442                outcome: ScanOutcome::Clean,
443                calls: Mutex::new(Vec::new()),
444            }
445        }
446        fn rejecting(detail: &str) -> Self {
447            Self {
448                outcome: ScanOutcome::Rejected {
449                    detail: detail.into(),
450                },
451                calls: Mutex::new(Vec::new()),
452            }
453        }
454        fn call_count(&self) -> usize {
455            self.calls.lock().unwrap().len()
456        }
457    }
458
459    impl Scanner for MockScanner {
460        fn scan(&self, path: &Path, _timeout: Duration) -> ScanOutcome {
461            self.calls.lock().unwrap().push(path.to_path_buf());
462            self.outcome.clone()
463        }
464    }
465
466    fn cfg_with_root(root: &Path, max: u64) -> Attachments {
467        Attachments {
468            enabled: true,
469            max_size_bytes: max,
470            allowed_roots: vec![root.to_string_lossy().into_owned()],
471            scanner: None,
472            audit_log_path: None,
473            tempfile_ttl_seconds: 6 * 60 * 60,
474        }
475    }
476
477    #[test]
478    fn disabled_short_circuits_before_any_filesystem_work() {
479        // `enabled: false` returns Disabled even when the path would
480        // pass every other check — the operator's flip is honoured
481        // synchronously, no read attempted.
482        let dir = TempDir::new().unwrap();
483        let p = dir.path().join("ok.txt");
484        fs::write(&p, b"hi").unwrap();
485        let mut cfg = cfg_with_root(dir.path(), 1024);
486        cfg.enabled = false;
487        assert_eq!(
488            check_and_read(&cfg, &p, None).unwrap_err(),
489            RejectReason::Disabled
490        );
491    }
492
493    #[test]
494    fn unresolvable_path_returns_path_unresolvable() {
495        let dir = TempDir::new().unwrap();
496        let cfg = cfg_with_root(dir.path(), 1024);
497        let missing = dir.path().join("nope.txt");
498        let err = check_and_read(&cfg, &missing, None).unwrap_err();
499        assert!(
500            matches!(err, RejectReason::PathUnresolvable(_)),
501            "got {err:?}"
502        );
503    }
504
505    #[test]
506    fn path_outside_allowed_roots_is_rejected() {
507        // Two tempdirs: file lives in `outside`, allow-list points at
508        // `inside`. Canonicalize on both sides so symlinks (macOS
509        // /var → /private/var) don't false-positive.
510        let inside = TempDir::new().unwrap();
511        let outside = TempDir::new().unwrap();
512        let p = outside.path().join("leak.txt");
513        fs::write(&p, b"x").unwrap();
514        let cfg = cfg_with_root(inside.path(), 1024);
515        let err = check_and_read(&cfg, &p, None).unwrap_err();
516        assert!(
517            matches!(err, RejectReason::OutsideAllowedRoots { .. }),
518            "got {err:?}"
519        );
520    }
521
522    #[test]
523    fn file_above_size_cap_is_rejected_before_read() {
524        let dir = TempDir::new().unwrap();
525        let p = dir.path().join("big.bin");
526        fs::write(&p, vec![0u8; 16]).unwrap();
527        let cfg = cfg_with_root(dir.path(), 8);
528        let err = check_and_read(&cfg, &p, None).unwrap_err();
529        match err {
530            RejectReason::TooLarge { size, cap } => {
531                assert_eq!(size, 16);
532                assert_eq!(cap, 8);
533            }
534            other => panic!("expected TooLarge, got {other:?}"),
535        }
536    }
537
538    #[test]
539    fn happy_path_returns_bytes_unmodified() {
540        let dir = TempDir::new().unwrap();
541        let p = dir.path().join("ok.md");
542        fs::write(&p, b"hello attachments").unwrap();
543        let cfg = cfg_with_root(dir.path(), 1024);
544        let bytes = check_and_read(&cfg, &p, None).unwrap();
545        assert_eq!(bytes, b"hello attachments");
546    }
547
548    #[test]
549    fn scanner_clean_passes_through() {
550        let dir = TempDir::new().unwrap();
551        let p = dir.path().join("ok.md");
552        fs::write(&p, b"hi").unwrap();
553        let mut cfg = cfg_with_root(dir.path(), 1024);
554        cfg.scanner = Some(crate::compose::AttachmentScanner {
555            command: "true".into(),
556            timeout_seconds: 30,
557        });
558        let scanner = MockScanner::clean();
559        let bytes = check_and_read(&cfg, &p, Some(&scanner)).unwrap();
560        assert_eq!(bytes, b"hi");
561        assert_eq!(scanner.call_count(), 1, "scanner ran exactly once");
562    }
563
564    #[test]
565    fn scanner_reject_blocks_read() {
566        // Bytes must NOT come back — the agent never sees the file
567        // content even though the size + path checks both passed.
568        let dir = TempDir::new().unwrap();
569        let p = dir.path().join("malware.exe");
570        fs::write(&p, b"would-be-bad").unwrap();
571        let mut cfg = cfg_with_root(dir.path(), 1024);
572        cfg.scanner = Some(crate::compose::AttachmentScanner {
573            command: "false".into(),
574            timeout_seconds: 30,
575        });
576        let scanner = MockScanner::rejecting("EICAR test signature");
577        let err = check_and_read(&cfg, &p, Some(&scanner)).unwrap_err();
578        match err {
579            RejectReason::ScannerRejected { detail } => {
580                assert!(
581                    detail.contains("EICAR"),
582                    "scanner detail must surface to the reason: {detail}"
583                );
584            }
585            other => panic!("expected ScannerRejected, got {other:?}"),
586        }
587    }
588
589    #[test]
590    fn scanner_only_runs_after_path_and_size_pass() {
591        // Order matters for cost: an oversize file shouldn't pay the
592        // scanner cost. Pin the order with a scanner that would
593        // reject — if it ran, we'd see ScannerRejected; we expect
594        // TooLarge instead.
595        let dir = TempDir::new().unwrap();
596        let p = dir.path().join("big.bin");
597        fs::write(&p, vec![0u8; 100]).unwrap();
598        let mut cfg = cfg_with_root(dir.path(), 8);
599        cfg.scanner = Some(crate::compose::AttachmentScanner {
600            command: "false".into(),
601            timeout_seconds: 30,
602        });
603        let scanner = MockScanner::rejecting("would-reject");
604        let err = check_and_read(&cfg, &p, Some(&scanner)).unwrap_err();
605        assert!(matches!(err, RejectReason::TooLarge { .. }), "got {err:?}");
606        assert_eq!(scanner.call_count(), 0, "scanner short-circuited");
607    }
608
609    #[test]
610    fn human_message_avoids_markdown_chars() {
611        // T-134 coordination: messages flow through team-bot's
612        // render path. Both render_plain and the upcoming HTML
613        // renderer must reproduce these byte-for-byte. Pinning a
614        // representative sample.
615        let r = RejectReason::TooLarge { size: 100, cap: 50 };
616        let s = r.human();
617        for c in ['<', '>', '&', '*', '_'] {
618            assert!(!s.contains(c), "human() message contains `{c}`: {s}");
619        }
620    }
621
622    #[test]
623    fn empty_allowed_roots_returns_no_allowed_roots() {
624        let dir = TempDir::new().unwrap();
625        let p = dir.path().join("ok.txt");
626        fs::write(&p, b"hi").unwrap();
627        let mut cfg = cfg_with_root(dir.path(), 1024);
628        cfg.allowed_roots = vec![];
629        let err = check_and_read(&cfg, &p, None).unwrap_err();
630        assert_eq!(err, RejectReason::NoAllowedRoots);
631    }
632
633    #[test]
634    fn scanner_configured_but_caller_passes_none_returns_scanner_not_provided() {
635        // Tight contract per peer review: a caller path that drops
636        // the scanner argument while the compose still configures
637        // one must surface as ScannerNotProvided. The previous shape
638        // silently skipped the scan, which would let a refactor
639        // disable malware checking with zero test failure.
640        let dir = TempDir::new().unwrap();
641        let p = dir.path().join("ok.md");
642        fs::write(&p, b"hi").unwrap();
643        let mut cfg = cfg_with_root(dir.path(), 1024);
644        cfg.scanner = Some(crate::compose::AttachmentScanner {
645            command: "true".into(),
646            timeout_seconds: 30,
647        });
648        let err = check_and_read(&cfg, &p, None).unwrap_err();
649        assert_eq!(err, RejectReason::ScannerNotProvided);
650    }
651
652    #[test]
653    fn is_within_any_root_handles_descendant_and_equal_paths() {
654        let root = PathBuf::from("/tmp/team");
655        let descendant = PathBuf::from("/tmp/team/sub/file.md");
656        let elsewhere = PathBuf::from("/tmp/other/file.md");
657        assert!(is_within_any_root(&descendant, std::slice::from_ref(&root)));
658        assert!(is_within_any_root(&root, std::slice::from_ref(&root)));
659        assert!(!is_within_any_root(&elsewhere, &[root]));
660    }
661
662    // ── T-32b: staging + sweep + audit + scanner ───────────────────
663
664    fn accepted_for(bytes: &[u8]) -> AcceptedAttachment {
665        AcceptedAttachment {
666            bytes: bytes.to_vec(),
667            blake3_hex: blake3::hash(bytes).to_hex().to_string(),
668            size: bytes.len() as u64,
669            resolved: PathBuf::from("/dev/null"),
670        }
671    }
672
673    #[test]
674    fn stage_to_tempfile_writes_and_returns_content_addressed_path() {
675        let dir = TempDir::new().unwrap();
676        let staging = dir.path().join("attachments-staging");
677        let accepted = accepted_for(b"hello attachments");
678        let staged = stage_to_tempfile(&staging, &accepted).unwrap();
679        assert!(staged.exists(), "staged file present: {}", staged.display());
680        assert_eq!(fs::read(&staged).unwrap(), b"hello attachments");
681        // Filename is the blake3 hex — content-addressed.
682        assert_eq!(staged.file_name().unwrap(), accepted.blake3_hex.as_str());
683    }
684
685    #[test]
686    fn stage_to_tempfile_is_idempotent_for_identical_content() {
687        // Same content → same path → no overwrite. Operator reading
688        // the same attachment twice in a session should not multiply
689        // staging-dir size.
690        let dir = TempDir::new().unwrap();
691        let staging = dir.path().join("attachments-staging");
692        let accepted = accepted_for(b"same bytes");
693        let p1 = stage_to_tempfile(&staging, &accepted).unwrap();
694        let p2 = stage_to_tempfile(&staging, &accepted).unwrap();
695        assert_eq!(p1, p2);
696        // Only one file in the staging dir.
697        let entries: Vec<_> = fs::read_dir(&staging).unwrap().flatten().collect();
698        assert_eq!(entries.len(), 1, "no duplicates: {entries:?}");
699    }
700
701    #[test]
702    fn sweep_expired_drops_stale_files_and_returns_count() {
703        let dir = TempDir::new().unwrap();
704        let staging = dir.path().join("staging");
705        fs::create_dir_all(&staging).unwrap();
706        // Two stage files: one freshly written, one with mtime
707        // pushed back beyond the TTL.
708        let fresh = staging.join("fresh");
709        fs::write(&fresh, b"fresh").unwrap();
710        let stale = staging.join("stale");
711        fs::write(&stale, b"stale").unwrap();
712        let old = SystemTime::now() - Duration::from_secs(7200);
713        fs::OpenOptions::new()
714            .write(true)
715            .open(&stale)
716            .unwrap()
717            .set_modified(old)
718            .unwrap();
719
720        let reaped = sweep_expired(&staging, Duration::from_secs(3600)).unwrap();
721        assert_eq!(reaped, 1);
722        assert!(fresh.exists(), "fresh file kept");
723        assert!(!stale.exists(), "stale file reaped");
724    }
725
726    #[test]
727    fn sweep_expired_returns_zero_when_dir_missing() {
728        // Best-effort cleanup — a missing staging dir on first boot
729        // is normal, not an error.
730        let dir = TempDir::new().unwrap();
731        let nonexistent = dir.path().join("never-created");
732        let reaped = sweep_expired(&nonexistent, Duration::from_secs(60)).unwrap();
733        assert_eq!(reaped, 0);
734    }
735
736    #[test]
737    fn audit_no_op_when_path_unset() {
738        // The configured-but-unset code path is the most common one
739        // (default-no-audit); the helper has to be silent and
740        // success-returning so callers don't need a separate guard.
741        let entry = AuditEntry {
742            ts: now_rfc3339(),
743            path: "/whatever",
744            resolved: None,
745            outcome: "accept",
746            size: Some(0),
747            blake3: None,
748            reason: None,
749        };
750        append_audit(None, &entry).unwrap();
751    }
752
753    #[test]
754    fn audit_appends_jsonl_lines() {
755        let dir = TempDir::new().unwrap();
756        let log = dir.path().join("audit.log");
757        for i in 0..3 {
758            let entry = AuditEntry {
759                ts: format!("2026-05-10T15:00:0{i}Z"),
760                path: "/some/path",
761                resolved: Some("/canonical/path"),
762                outcome: if i == 2 { "reject" } else { "accept" },
763                size: Some(i),
764                blake3: Some("abcdef"),
765                reason: if i == 2 {
766                    Some("too large".into())
767                } else {
768                    None
769                },
770            };
771            append_audit(Some(&log), &entry).unwrap();
772        }
773        let body = fs::read_to_string(&log).unwrap();
774        let lines: Vec<&str> = body.lines().collect();
775        assert_eq!(lines.len(), 3);
776        // Each line is parseable JSON with the expected shape.
777        for (i, line) in lines.iter().enumerate() {
778            let v: serde_json::Value = serde_json::from_str(line).unwrap();
779            assert!(v["ts"].is_string());
780            assert_eq!(v["size"].as_i64().unwrap(), i as i64);
781        }
782    }
783
784    #[test]
785    fn audit_creates_parent_dir_on_first_write() {
786        // Operator points `audit_log_path` at a fresh subdir under
787        // state/. The helper has to create the dir before opening
788        // the file — no manual `mkdir -p` step.
789        let dir = TempDir::new().unwrap();
790        let log = dir.path().join("audit/attempts.log");
791        let entry = AuditEntry {
792            ts: now_rfc3339(),
793            path: "/x",
794            resolved: None,
795            outcome: "accept",
796            size: None,
797            blake3: None,
798            reason: None,
799        };
800        append_audit(Some(&log), &entry).unwrap();
801        assert!(log.exists());
802    }
803
804    #[test]
805    fn check_and_read_with_metadata_returns_blake3_and_size() {
806        let dir = TempDir::new().unwrap();
807        let p = dir.path().join("ok.md");
808        fs::write(&p, b"twelve chars").unwrap();
809        let cfg = cfg_with_root(dir.path(), 1024);
810        let acc = check_and_read_with_metadata(&cfg, &p, None).unwrap();
811        assert_eq!(acc.size, 12);
812        assert_eq!(acc.bytes, b"twelve chars");
813        assert_eq!(
814            acc.blake3_hex,
815            blake3::hash(b"twelve chars").to_hex().to_string()
816        );
817    }
818
819    #[test]
820    fn real_scanner_for_spec_clean_path_returns_clean() {
821        // `/usr/bin/true` — universally available on linux runners.
822        if !Path::new("/usr/bin/true").exists() {
823            return;
824        }
825        let dir = TempDir::new().unwrap();
826        let dummy = dir.path().join("any");
827        fs::write(&dummy, b"x").unwrap();
828        let scanner = RealScanner::for_spec(&AttachmentScanner {
829            command: "/usr/bin/true".into(),
830            timeout_seconds: 5,
831        });
832        assert_eq!(
833            scanner.scan(&dummy, Duration::from_secs(5)),
834            ScanOutcome::Clean
835        );
836    }
837
838    #[test]
839    fn real_scanner_for_spec_nonzero_exit_returns_rejected() {
840        if !Path::new("/usr/bin/false").exists() {
841            return;
842        }
843        let dir = TempDir::new().unwrap();
844        let dummy = dir.path().join("any");
845        fs::write(&dummy, b"x").unwrap();
846        let scanner = RealScanner::for_spec(&AttachmentScanner {
847            command: "/usr/bin/false".into(),
848            timeout_seconds: 5,
849        });
850        match scanner.scan(&dummy, Duration::from_secs(5)) {
851            ScanOutcome::Rejected { detail } => {
852                // `false` writes nothing to stderr; we surface
853                // the exit-status fallback message.
854                assert!(
855                    detail.contains("status") || !detail.is_empty(),
856                    "non-empty detail: {detail}"
857                );
858            }
859            other => panic!("expected Rejected, got {other:?}"),
860        }
861    }
862
863    #[test]
864    fn real_scanner_for_spec_timeout_returns_rejected() {
865        // `/bin/sleep 5` against a sub-second timeout — the
866        // `recv_timeout` path. Detail mentions the timeout so
867        // operators can tune `scanner.timeout_seconds` from log
868        // output alone.
869        //
870        // The previous shape wrote a `#!/bin/sh\nsleep 5` script
871        // into a TempDir, chmod'd it +x, and pointed the scanner
872        // at it. That hit a flake on otis's #151 CI run: the
873        // chmod/exec-bit visibility race on a forked subprocess
874        // surfaced as a "scanner spawn failed: permission denied"
875        // result instead of a timeout. Using `/bin/sleep` directly
876        // — present on every linux runner — eliminates the FS
877        // race entirely. The `path` arg passed to scan() is
878        // ceremonial here (the trait forwards it as a process
879        // arg; sleep treats it as a duration, which is exactly
880        // what we want).
881        if !Path::new("/bin/sleep").exists() {
882            return;
883        }
884        let scanner: Box<dyn Scanner> = Box::new(RealScannerForSpec {
885            command: "/bin/sleep".into(),
886        });
887        let outcome = scanner.scan(Path::new("5"), Duration::from_millis(500));
888        match outcome {
889            ScanOutcome::Rejected { detail } => {
890                assert!(
891                    detail.contains("timed out"),
892                    "timeout reason in detail: {detail}"
893                );
894            }
895            other => panic!("expected Rejected on timeout, got {other:?}"),
896        }
897    }
898
899    #[test]
900    fn truncate_for_reject_caps_long_strings() {
901        let long: String = "x".repeat(1000);
902        let out = truncate_for_reject(&long);
903        assert!(out.chars().count() <= 513, "<= cap + ellipsis");
904        assert!(out.ends_with('…'));
905    }
906
907    #[test]
908    fn truncate_for_reject_passes_short_strings_through() {
909        assert_eq!(truncate_for_reject("clean"), "clean");
910    }
911}