travelagent-core 1.11.1

//! Phase I5: spec↔test linkage primitives for Sparring Review mode.
//!
//! Two ways a generated test file links back to a captured spec
//! comment:
//!
//! 1. **Inline marker.** The test body contains a line matching
//!    `<line-comment-prefix> trv-spec: <spec_id>` — the authoritative
//!    link. Agents produce this by convention when they generate a
//!    test via `trv_write_test_from_spec`; `trv_propose_accept_test`
//!    enforces it at propose time. Supported comment prefixes cover
//!    `//` (Rust/C/Go/JS/Kotlin/Swift/...), `#` (Python/Ruby/shell),
//!    `--` (SQL/Haskell/Lua), and `;` (Lisps/asm).
//! 2. **Filename fallback.** The test file's basename contains
//!    `spec_<first8>` (first eight characters of the spec id). Used
//!    for languages whose line-comment syntax the marker parser
//!    doesn't recognise (rare in practice) and as a defence in depth
//!    for tests whose body the agent forgot to mark.
//!
//! Accepting either proof keeps the contract flexible without
//! dropping the "every generated test points at a spec" invariant.
//! The reconciliation panel (I5-2) scans the repo with
//! [`scan_spec_links`] to build a `spec_id → test_path` map.

use std::collections::HashMap;
use std::path::{Path, PathBuf};

/// Short-id prefix length used by the filename fallback. Matches the
/// first segment of the UUID that `uuid::Uuid::new_v4().to_string()`
/// emits (`xxxxxxxx-...`), which is what trv uses for comment ids.
pub const SPEC_SHORT_ID_LEN: usize = 8;

/// Line-comment prefixes trv recognises when looking for the
/// `trv-spec: <id>` marker. Ordered so longer prefixes come first —
/// `--` before `-` would matter if we ever added a single-dash style.
const COMMENT_PREFIXES: &[&str] = &["//", "#", "--", ";"];

/// Status of a spec comment with respect to a generated test.
///
/// Phase v1.4.0 reaches `Unlinked` and `Linked`. `Reconciling` is
/// plumbed through the type so v1.4.1's test-runner integration
/// (I4c-3) can fill it in without breaking callers or persisted
/// state.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SparringStatus {
    /// No generated test points at this spec yet.
    Unlinked,
    /// A generated test claims this spec via marker or filename.
    Linked,
    /// A linked test has been run and is currently failing — author
    /// intent and reviewer spec disagree. Reserved for the I4c-3
    /// test-runner integration; unreachable in v1.4.0.
    Reconciling,
}

impl SparringStatus {
    /// Stable wire token used by the status bar, JSON responses, and
    /// (future) MCP notifications. Kept here so callers never hand-roll
    /// strings that could drift.
    #[must_use]
    pub fn as_str(self) -> &'static str {
        match self {
            SparringStatus::Unlinked => "unlinked",
            SparringStatus::Linked => "linked",
            SparringStatus::Reconciling => "reconciling",
        }
    }
}

/// Validate that `test_body` or `test_path` carries a spec-link proof
/// for `spec_id` — either the inline `trv-spec: <id>` marker or the
/// basename fallback `…spec_<first8>…`. Returns a human-readable error
/// describing *both* options when neither is present, so the agent
/// knows it can add the marker *or* rename the file.
///
/// `spec_id` is the full id (e.g. `"e2a1c7f0-..."`); callers typically
/// pass the id they already hold from `trv_list_spec_comments`.
///
/// # Errors
/// Returns `Err(message)` when the body has no recognised marker for
/// this spec *and* the basename lacks the short-id fragment.
pub fn validate_spec_link(test_path: &str, test_body: &str, spec_id: &str) -> Result<(), String> {
    if find_marker_spec_id(test_body).as_deref() == Some(spec_id) {
        return Ok(());
    }
    if filename_claims_spec(test_path, spec_id) {
        return Ok(());
    }
    Err(format!(
        "test_body has no `trv-spec: {spec_id}` marker and basename of \
         `{test_path}` lacks `spec_{short}` — add a line-comment marker \
         (`// trv-spec: {spec_id}`, `# trv-spec: ...`, `-- trv-spec: ...`, \
         or `; trv-spec: ...`) to the body or name the file to include \
         `spec_{short}`",
        short = short_id(spec_id),
    ))
}

/// Extract the first spec id pointed at by a `trv-spec:` marker in
/// `body`. Scans every line; tolerates leading whitespace and any
/// recognised comment prefix. Returns `None` when no marker is found.
///
/// Exposed so the scanner and the propose-time validator share one
/// parser — divergence would let a marker that satisfies propose-time
/// go unfound during reconciliation.
#[must_use]
pub fn find_marker_spec_id(body: &str) -> Option<String> {
    for raw in body.lines() {
        let line = raw.trim_start();
        let Some(rest) = strip_comment_prefix(line) else {
            continue;
        };
        let rest = rest.trim_start();
        let Some(after) = rest.strip_prefix("trv-spec:") else {
            continue;
        };
        // The id we emit is a UUID; stop at whitespace or end-of-line.
        let id = after
            .trim_start()
            .split(|c: char| c.is_whitespace() || c == '*')
            .next()
            .unwrap_or("")
            .trim_end_matches(|c: char| !c.is_ascii_hexdigit() && c != '-');
        if id.is_empty() {
            continue;
        }
        return Some(id.to_string());
    }
    None
}

/// Check whether the basename of `test_path` contains `spec_<first8>`
/// — the filename fallback marker. Case-sensitive on the prefix
/// (`spec_`) so `.specs/` directories don't false-match.
#[must_use]
pub fn filename_claims_spec(test_path: &str, spec_id: &str) -> bool {
    let Some(base) = Path::new(test_path).file_name().and_then(|s| s.to_str()) else {
        return false;
    };
    let needle = format!("spec_{}", short_id(spec_id));
    base.contains(&needle)
}

fn short_id(spec_id: &str) -> &str {
    let cut = spec_id
        .char_indices()
        .nth(SPEC_SHORT_ID_LEN)
        .map(|(i, _)| i)
        .unwrap_or(spec_id.len());
    &spec_id[..cut]
}

fn strip_comment_prefix(line: &str) -> Option<&str> {
    for prefix in COMMENT_PREFIXES {
        if let Some(rest) = line.strip_prefix(prefix) {
            return Some(rest);
        }
    }
    None
}

/// Walk `repo_root` looking for test files that claim any of the
/// supplied `spec_ids`. Returns a map from spec id to the first
/// (repo-relative, forward-slash) path found claiming it.
///
/// Skips common large/generated directories (`.git`, `target`,
/// `node_modules`, `dist`, `build`) to keep the scan cheap on real
/// repos. Files over `MAX_SCAN_BYTES` are skipped entirely — the
/// marker sits within the first line of a test file by convention,
/// so huge files are almost never the right answer anyway.
///
/// Binary files are detected by reading at most `SNIFF_BYTES` and
/// checking for a NUL byte; the read is then reused as the marker
/// scan when the file is small enough to fit in it, so small tests
/// cost exactly one read.
#[must_use]
pub fn scan_spec_links(repo_root: &Path, spec_ids: &[String]) -> HashMap<String, PathBuf> {
    use std::collections::HashSet;

    if spec_ids.is_empty() {
        return HashMap::new();
    }
    let wanted: HashSet<&str> = spec_ids.iter().map(|s| s.as_str()).collect();
    // Precompute the short-id → full-id reverse map for the filename
    // fallback so a single pass over an entry can resolve either
    // claim without re-hashing.
    let short_to_full: HashMap<String, &String> = spec_ids
        .iter()
        .map(|s| (short_id(s).to_string(), s))
        .collect();

    let mut out: HashMap<String, PathBuf> = HashMap::new();
    let mut stack: Vec<PathBuf> = vec![repo_root.to_path_buf()];

    while let Some(dir) = stack.pop() {
        let Ok(entries) = std::fs::read_dir(&dir) else {
            continue;
        };
        for entry in entries.flatten() {
            let Ok(file_type) = entry.file_type() else {
                continue;
            };
            let path = entry.path();
            let name = entry.file_name();
            let name_str = name.to_string_lossy();
            if file_type.is_dir() {
                if SCAN_SKIP_DIRS.contains(&name_str.as_ref()) {
                    continue;
                }
                if name_str.starts_with('.') && name_str.as_ref() != ".travelagent" {
                    // Skip other dotdirs to keep scans cheap.
                    continue;
                }
                stack.push(path);
                continue;
            }
            if !file_type.is_file() {
                continue;
            }
            let rel = path.strip_prefix(repo_root).unwrap_or(&path);
            let rel_str = rel.to_string_lossy().replace('\\', "/");
            claim_via_filename(&rel_str, &short_to_full, &mut out);
            // Don't bother re-reading a file we already linked with
            // every spec that's possibly in it — one file can in
            // principle host more than one spec via multiple markers,
            // so keep scanning.
            claim_via_marker(&path, &rel_str, &wanted, &mut out);
        }
    }

    out
}

/// Resolve a filename-fallback claim. If the basename contains
/// `spec_<short>` for any known spec id, record it (first wins —
/// walk order is deterministic across a single `scan_spec_links`
/// invocation because we process entries in read-dir order and
/// `HashMap::entry(…).or_insert(..)` preserves the earliest writer).
fn claim_via_filename(
    rel_str: &str,
    short_to_full: &HashMap<String, &String>,
    out: &mut HashMap<String, PathBuf>,
) {
    let Some(base) = Path::new(rel_str).file_name().and_then(|s| s.to_str()) else {
        return;
    };
    for (short, full) in short_to_full {
        let needle = format!("spec_{short}");
        if base.contains(&needle) {
            out.entry((*full).clone())
                .or_insert_with(|| PathBuf::from(rel_str));
        }
    }
}

/// Resolve an inline-marker claim by reading the file (with the
/// binary / size guards described on `scan_spec_links`). Records
/// every `trv-spec:` marker the file contains that matches a wanted
/// spec id; first write wins so already-linked specs aren't
/// overwritten.
fn claim_via_marker(
    abs: &Path,
    rel_str: &str,
    wanted: &std::collections::HashSet<&str>,
    out: &mut HashMap<String, PathBuf>,
) {
    let Ok(meta) = std::fs::metadata(abs) else {
        return;
    };
    if meta.len() > MAX_SCAN_BYTES as u64 {
        return;
    }
    let Ok(bytes) = std::fs::read(abs) else {
        return;
    };
    // Binary sniff: if any of the first SNIFF_BYTES contain NUL, treat
    // as binary and skip.
    let sniff_len = bytes.len().min(SNIFF_BYTES);
    if bytes[..sniff_len].contains(&0u8) {
        return;
    }
    let Ok(text) = std::str::from_utf8(&bytes) else {
        return;
    };
    // Scan every line for markers — a file may host multiple specs.
    for raw in text.lines() {
        let line = raw.trim_start();
        let Some(rest) = strip_comment_prefix(line) else {
            continue;
        };
        let Some(after) = rest.trim_start().strip_prefix("trv-spec:") else {
            continue;
        };
        let id = after
            .trim_start()
            .split(|c: char| c.is_whitespace() || c == '*')
            .next()
            .unwrap_or("")
            .trim_end_matches(|c: char| !c.is_ascii_hexdigit() && c != '-');
        if id.is_empty() {
            continue;
        }
        if wanted.contains(id) {
            out.entry(id.to_string())
                .or_insert_with(|| PathBuf::from(rel_str));
        }
    }
}

/// Hard cap on files scanned by the marker pass. Above this size the
/// file is very unlikely to be a generated test file and reading it
/// on every reconciliation refresh would make large repos feel slow.
const MAX_SCAN_BYTES: usize = 512 * 1024;

/// Bytes sampled for the binary detector. Mirrors `git`'s default —
/// enough to catch real binaries without reading half a file that's
/// just long.
const SNIFF_BYTES: usize = 8 * 1024;

/// Directory names the scanner skips by name, matching the build /
/// dependency conventions every language the tool cares about shares.
const SCAN_SKIP_DIRS: &[&str] = &[
    ".git",
    "target",
    "node_modules",
    "dist",
    "build",
    "__pycache__",
    ".venv",
    "venv",
    ".tox",
    ".mypy_cache",
    ".pytest_cache",
];

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;

    const FULL_ID: &str = "deadbeef-cafe-babe-feed-0123456789ab";
    const SHORT: &str = "deadbeef";

    #[test]
    fn sparring_status_wire_names_are_stable() {
        assert_eq!(SparringStatus::Unlinked.as_str(), "unlinked");
        assert_eq!(SparringStatus::Linked.as_str(), "linked");
        assert_eq!(SparringStatus::Reconciling.as_str(), "reconciling");
    }

    #[test]
    fn find_marker_accepts_double_slash() {
        let body = "// trv-spec: deadbeef-cafe-babe-feed-0123456789ab\nfn test_x() {}\n";
        assert_eq!(find_marker_spec_id(body).as_deref(), Some(FULL_ID));
    }

    #[test]
    fn find_marker_accepts_hash_prefix() {
        let body = "# trv-spec: deadbeef-cafe-babe-feed-0123456789ab\n\ndef test_x(): pass\n";
        assert_eq!(find_marker_spec_id(body).as_deref(), Some(FULL_ID));
    }

    #[test]
    fn find_marker_accepts_double_dash_prefix() {
        let body = "-- trv-spec: deadbeef-cafe-babe-feed-0123456789ab\nSELECT 1;\n";
        assert_eq!(find_marker_spec_id(body).as_deref(), Some(FULL_ID));
    }

    #[test]
    fn find_marker_accepts_semicolon_prefix() {
        let body = "; trv-spec: deadbeef-cafe-babe-feed-0123456789ab\n";
        assert_eq!(find_marker_spec_id(body).as_deref(), Some(FULL_ID));
    }

    #[test]
    fn find_marker_tolerates_leading_whitespace() {
        let body = "    //   trv-spec:    deadbeef-cafe-babe-feed-0123456789ab   \n";
        assert_eq!(find_marker_spec_id(body).as_deref(), Some(FULL_ID));
    }

    #[test]
    fn find_marker_returns_none_without_marker() {
        let body = "fn test_x() {}\n";
        assert_eq!(find_marker_spec_id(body), None);
    }

    #[test]
    fn filename_claims_spec_matches_short_prefix() {
        assert!(filename_claims_spec(
            "tests/spec_deadbeef_addition.rs",
            FULL_ID
        ));
        assert!(filename_claims_spec("a/b/spec_deadbeef.rs", FULL_ID));
    }

    #[test]
    fn filename_claims_spec_rejects_mismatched_short_id() {
        assert!(!filename_claims_spec(
            "tests/spec_feedface_addition.rs",
            FULL_ID
        ));
    }

    #[test]
    fn filename_claims_spec_rejects_non_spec_folder() {
        assert!(!filename_claims_spec(".specs/deadbeef.rs", FULL_ID));
    }

    #[test]
    fn validate_spec_link_accepts_marker() {
        let body = "// trv-spec: deadbeef-cafe-babe-feed-0123456789ab\n";
        assert!(validate_spec_link("tests/whatever.rs", body, FULL_ID).is_ok());
    }

    #[test]
    fn validate_spec_link_accepts_filename_fallback() {
        let body = "fn test_x() {}\n";
        assert!(validate_spec_link("tests/spec_deadbeef_adds.rs", body, FULL_ID).is_ok());
    }

    #[test]
    fn validate_spec_link_rejects_both_missing() {
        let body = "fn test_x() {}\n";
        let err = validate_spec_link("tests/whatever.rs", body, FULL_ID).unwrap_err();
        assert!(err.contains("trv-spec"));
        assert!(err.contains(SHORT));
    }

    #[test]
    fn validate_spec_link_rejects_marker_for_different_spec() {
        let body = "// trv-spec: feedface-cafe-babe-feed-0123456789ab\n";
        assert!(validate_spec_link("tests/whatever.rs", body, FULL_ID).is_err());
    }

    #[test]
    fn scan_finds_markered_file() {
        let tmp = tempfile::tempdir().expect("tmpdir");
        fs::create_dir_all(tmp.path().join("tests")).unwrap();
        fs::write(
            tmp.path().join("tests/bar.rs"),
            "// trv-spec: deadbeef-cafe-babe-feed-0123456789ab\nfn test_x() {}\n",
        )
        .unwrap();

        let specs = vec![FULL_ID.to_string()];
        let links = scan_spec_links(tmp.path(), &specs);
        assert_eq!(links.len(), 1);
        assert_eq!(
            links.get(FULL_ID).unwrap().to_string_lossy(),
            "tests/bar.rs"
        );
    }

    #[test]
    fn scan_finds_filename_fallback() {
        let tmp = tempfile::tempdir().expect("tmpdir");
        fs::create_dir_all(tmp.path().join("tests")).unwrap();
        // No marker in the body; only the filename claims it.
        fs::write(
            tmp.path().join("tests/spec_deadbeef_adds.py"),
            "def test_x(): pass\n",
        )
        .unwrap();

        let specs = vec![FULL_ID.to_string()];
        let links = scan_spec_links(tmp.path(), &specs);
        assert_eq!(
            links.get(FULL_ID).unwrap().to_string_lossy(),
            "tests/spec_deadbeef_adds.py"
        );
    }

    #[test]
    fn scan_skips_git_and_target_dirs() {
        let tmp = tempfile::tempdir().expect("tmpdir");
        fs::create_dir_all(tmp.path().join(".git")).unwrap();
        fs::create_dir_all(tmp.path().join("target/debug")).unwrap();
        fs::write(
            tmp.path().join(".git/trap.rs"),
            "// trv-spec: deadbeef-cafe-babe-feed-0123456789ab\n",
        )
        .unwrap();
        fs::write(
            tmp.path().join("target/debug/trap.rs"),
            "// trv-spec: deadbeef-cafe-babe-feed-0123456789ab\n",
        )
        .unwrap();

        let specs = vec![FULL_ID.to_string()];
        let links = scan_spec_links(tmp.path(), &specs);
        assert!(links.is_empty(), "must not pick up files under .git/target");
    }

    #[test]
    fn scan_skips_binary_files() {
        let tmp = tempfile::tempdir().expect("tmpdir");
        let mut bytes = b"// trv-spec: deadbeef-cafe-babe-feed-0123456789ab\n".to_vec();
        bytes.push(0); // NUL byte within sniff window → binary
        bytes.extend_from_slice(b"more\n");
        fs::write(tmp.path().join("embedded.bin"), bytes).unwrap();

        let specs = vec![FULL_ID.to_string()];
        let links = scan_spec_links(tmp.path(), &specs);
        assert!(links.is_empty());
    }

    #[test]
    fn scan_is_noop_when_no_specs() {
        let tmp = tempfile::tempdir().expect("tmpdir");
        fs::write(
            tmp.path().join("stray.rs"),
            "// trv-spec: deadbeef-cafe-babe-feed-0123456789ab\n",
        )
        .unwrap();
        let links = scan_spec_links(tmp.path(), &[]);
        assert!(links.is_empty());
    }

    #[test]
    fn scan_first_match_wins() {
        // Two tests both claim the same spec via marker. `scan_spec_links`
        // records the first one the walk finds and doesn't overwrite it.
        let tmp = tempfile::tempdir().expect("tmpdir");
        fs::create_dir_all(tmp.path().join("a")).unwrap();
        fs::create_dir_all(tmp.path().join("b")).unwrap();
        fs::write(
            tmp.path().join("a/one.rs"),
            "// trv-spec: deadbeef-cafe-babe-feed-0123456789ab\n",
        )
        .unwrap();
        fs::write(
            tmp.path().join("b/two.rs"),
            "// trv-spec: deadbeef-cafe-babe-feed-0123456789ab\n",
        )
        .unwrap();

        let specs = vec![FULL_ID.to_string()];
        let links = scan_spec_links(tmp.path(), &specs);
        assert_eq!(links.len(), 1);
        let path = links.get(FULL_ID).unwrap();
        // Either a/one or b/two — both are valid; the test just pins
        // that we don't double-count or drop the link.
        let s = path.to_string_lossy();
        assert!(s == "a/one.rs" || s == "b/two.rs", "got: {s}");
    }
}