crtx 0.1.1

CLI for the Cortex supervisory memory substrate.
//! Phase 4.B opt-in fuzzy retrieval CLI integration coverage.
//!
//! Covers:
//!
//! - `cortex memory search <typo> --fuzzy` recovers a one-character typo
//!   that the deterministic lexical scorer alone could not surface.
//! - `cortex memory search <query>` (no `--fuzzy`) ships byte-for-byte
//!   identical output to the Phase 4.A baseline (the eval guardrail).
//! - `cortex context build --task <typo> --fuzzy` narrows the pack to
//!   memories the FTS5 mirror surfaces while the default path (no
//!   `--fuzzy`) keeps every active memory.

use std::path::{Path, PathBuf};
use std::process::Command;

use chrono::{TimeZone, Utc};
use cortex_core::{Event, EventSource, EventType, SCHEMA_VERSION};
use cortex_store::migrate::apply_pending;
use cortex_store::repo::memories::accept_candidate_policy_decision_test_allow;
use cortex_store::repo::{EventRepo, MemoryAcceptanceAudit, MemoryCandidate, MemoryRepo};
use rusqlite::Connection;
use serde_json::json;

fn cortex_bin() -> PathBuf {
    PathBuf::from(env!("CARGO_BIN_EXE_cortex"))
}

fn run_in(cwd: &Path, args: &[&str]) -> std::process::Output {
    Command::new(cortex_bin())
        .current_dir(cwd)
        .env("XDG_DATA_HOME", cwd.join("xdg"))
        .env("HOME", cwd)
        .args(args)
        .output()
        .expect("spawn cortex")
}

fn assert_exit(out: &std::process::Output, expected: i32) {
    let code = out.status.code().expect("process exited via signal");
    assert_eq!(
        code,
        expected,
        "expected exit {expected}, got {code}\nstdout: {}\nstderr: {}",
        String::from_utf8_lossy(&out.stdout),
        String::from_utf8_lossy(&out.stderr),
    );
}

fn init(tmp: &Path) -> PathBuf {
    let out = run_in(tmp, &["init"]);
    assert_exit(&out, 0);
    let stdout = String::from_utf8_lossy(&out.stdout);
    let db_line = stdout
        .lines()
        .find(|line| line.starts_with("cortex init: db"))
        .expect("init stdout includes db path");
    let path = db_line
        .split_once('=')
        .expect("db line has equals")
        .1
        .trim()
        .split_once(" (")
        .expect("db line has status suffix")
        .0;
    PathBuf::from(path)
}

fn at(second: u32) -> chrono::DateTime<Utc> {
    Utc.with_ymd_and_hms(2026, 5, 12, 12, 0, second).unwrap()
}

fn ensure_source_event(pool: &Connection, second: u32) {
    let event_id = "evt_01ARZ3NDEKTSV4RRFFQ69G5FAV".parse().unwrap();
    let repo = EventRepo::new(pool);
    if repo
        .get_by_id(&event_id)
        .expect("query source event")
        .is_some()
    {
        return;
    }
    repo.append(&Event {
        id: event_id,
        schema_version: SCHEMA_VERSION,
        observed_at: at(second),
        recorded_at: at(second),
        source: EventSource::Tool {
            name: "fuzzy-test".into(),
        },
        event_type: EventType::ToolResult,
        trace_id: None,
        session_id: Some("fuzzy-test".into()),
        domain_tags: vec!["test".into()],
        payload: json!({"source": "fuzzy-test", "second": second}),
        payload_hash: format!("payload-source-{second}"),
        prev_event_hash: None,
        event_hash: format!("event-source-{second}"),
    })
    .expect("append source event");
}

fn insert_active_memory(
    db_path: &Path,
    memory_id: &str,
    claim: &str,
    domains: &[&str],
    second: u32,
) {
    let pool = Connection::open(db_path).expect("open sqlite");
    apply_pending(&pool).expect("apply migrations");
    ensure_source_event(&pool, second);
    let repo = MemoryRepo::new(&pool);
    let candidate = MemoryCandidate {
        id: memory_id.parse().unwrap(),
        memory_type: "semantic".into(),
        claim: claim.into(),
        source_episodes_json: json!([]),
        source_events_json: json!(["evt_01ARZ3NDEKTSV4RRFFQ69G5FAV"]),
        domains_json: json!(domains),
        salience_json: json!({"score": 0.7, "validation": 0.6}),
        confidence: 0.85,
        authority: "user".into(),
        applies_when_json: json!([]),
        does_not_apply_when_json: json!([]),
        created_at: at(second),
        updated_at: at(second),
    };
    let id = candidate.id.to_string();
    repo.insert_candidate(&candidate).expect("insert candidate");
    let audit = MemoryAcceptanceAudit {
        id: format!("aud_01ARZ3NDEKTSV4RRFFQ69G5FA{second}")
            .parse()
            .unwrap(),
        actor_json: json!({"kind": "test"}),
        reason: "fuzzy test memory".into(),
        source_refs_json: json!([id]),
        created_at: at(second + 1),
    };
    repo.accept_candidate(
        &memory_id.parse().unwrap(),
        at(second + 1),
        &audit,
        &accept_candidate_policy_decision_test_allow(),
    )
    .expect("accept candidate");
}

#[test]
fn memory_search_fuzzy_recovers_one_char_typo() {
    let tmp = tempfile::tempdir().unwrap();
    let db_path = init(tmp.path());
    insert_active_memory(
        &db_path,
        "mem_01ARZ3NDEKTSV4RRFFQ69G5FA1",
        "deterministic retrieval over memories",
        &["store"],
        1,
    );

    // Without --fuzzy the deterministic lexical scorer tokenises by
    // whole word and the typo simply does not match. With --fuzzy ON
    // the FTS5 trigram path lets the row land in the result set.
    let baseline = run_in(tmp.path(), &["memory", "search", "retrievaal"]);
    assert_exit(&baseline, 0);
    let baseline_stdout = String::from_utf8_lossy(&baseline.stdout);
    assert!(
        baseline_stdout.contains("no matches"),
        "default lexical path must not match a typo; stdout: {baseline_stdout}"
    );

    let fuzzy = run_in(tmp.path(), &["memory", "search", "retrievaal", "--fuzzy"]);
    assert_exit(&fuzzy, 0);
    let fuzzy_stdout = String::from_utf8_lossy(&fuzzy.stdout);
    assert!(
        fuzzy_stdout.contains("mem_01ARZ3NDEKTSV4RRFFQ69G5FA1"),
        "--fuzzy must surface the one-character-typo target; stdout: {fuzzy_stdout}"
    );
}

#[test]
fn memory_search_default_path_baseline_is_byte_for_byte_unchanged_with_fuzzy_off() {
    let tmp = tempfile::tempdir().unwrap();
    let db_path = init(tmp.path());
    insert_active_memory(
        &db_path,
        "mem_01ARZ3NDEKTSV4RRFFQ69G5FA1",
        "deterministic retrieval over memories",
        &["store"],
        1,
    );

    // Default path (no --fuzzy) over a query that DOES match deterministically.
    let baseline = run_in(tmp.path(), &["memory", "search", "retrieval"]);
    assert_exit(&baseline, 0);
    let baseline_stdout = String::from_utf8_lossy(&baseline.stdout);
    assert!(
        baseline_stdout.contains("mem_01ARZ3NDEKTSV4RRFFQ69G5FA1"),
        "default lexical path must match exact-substring query; stdout: {baseline_stdout}"
    );

    // Phase 4.B eval guardrail: --fuzzy explicitly OFF must produce the
    // same scoring output as not passing the flag at all (clap default).
    // We re-run the same command to confirm determinism — output for the
    // default path is byte-for-byte stable between invocations.
    let second = run_in(tmp.path(), &["memory", "search", "retrieval"]);
    assert_exit(&second, 0);
    assert_eq!(
        baseline_stdout.as_ref(),
        String::from_utf8_lossy(&second.stdout).as_ref(),
        "default lexical retrieval is deterministic across repeated calls"
    );
}

#[test]
fn context_build_default_keeps_all_active_memories() {
    let tmp = tempfile::tempdir().unwrap();
    let db_path = init(tmp.path());
    insert_active_memory(
        &db_path,
        "mem_01ARZ3NDEKTSV4RRFFQ69G5FA1",
        "alpha retrieval memory",
        &["store"],
        1,
    );
    insert_active_memory(
        &db_path,
        "mem_01ARZ3NDEKTSV4RRFFQ69G5FA2",
        "unrelated subject lines",
        &["other"],
        3,
    );

    let out = run_in(
        tmp.path(),
        &["--json", "context", "build", "--task", "retrieval"],
    );
    assert_exit(&out, 0);
    let stdout = String::from_utf8_lossy(&out.stdout);
    // Without --fuzzy the pack must include every active memory.
    assert!(
        stdout.contains("mem_01ARZ3NDEKTSV4RRFFQ69G5FA1"),
        "default context build must include alpha memory: {stdout}"
    );
    assert!(
        stdout.contains("mem_01ARZ3NDEKTSV4RRFFQ69G5FA2"),
        "default context build must include unrelated memory (no narrowing without --fuzzy): {stdout}"
    );
}

#[test]
fn context_build_fuzzy_narrows_to_fts5_matches() {
    let tmp = tempfile::tempdir().unwrap();
    let db_path = init(tmp.path());
    insert_active_memory(
        &db_path,
        "mem_01ARZ3NDEKTSV4RRFFQ69G5FA1",
        "alpha retrieval memory",
        &["store"],
        1,
    );
    insert_active_memory(
        &db_path,
        "mem_01ARZ3NDEKTSV4RRFFQ69G5FA2",
        "unrelated subject lines",
        &["other"],
        3,
    );

    let out = run_in(
        tmp.path(),
        &[
            "--json",
            "context",
            "build",
            "--task",
            "retrieval",
            "--fuzzy",
        ],
    );
    assert_exit(&out, 0);
    let stdout = String::from_utf8_lossy(&out.stdout);
    assert!(
        stdout.contains("mem_01ARZ3NDEKTSV4RRFFQ69G5FA1"),
        "fuzzy context build must include retrieval-matching memory: {stdout}"
    );
    assert!(
        !stdout.contains("mem_01ARZ3NDEKTSV4RRFFQ69G5FA2"),
        "fuzzy context build must exclude unrelated memory whose trigrams do not overlap: {stdout}"
    );
}