crtx 0.1.1

CLI for the Cortex supervisory memory substrate.
//! Phase 4.C opt-in semantic reranking CLI integration coverage.
//!
//! Covers:
//!
//! - `cortex memory search <query>` without `--semantic` returns results
//!   unchanged from the Phase 4.B baseline.
//! - `cortex memory search <query> --semantic` returns results with
//!   `sem_score` fields in the JSON envelope.
//! - `cortex memory search <query> --semantic --fuzzy` smoke-tests that
//!   both flags co-operate.
//! - Memories with no stored embedding still appear when `--semantic` is
//!   set, with `sem_score=0.0` replaced by the on-demand-computed value
//!   (the store is warmed on-demand by `compute_or_warm_embedding`).
//! - JSON envelope shape includes `sem_score` when `--semantic` is set.

use std::path::{Path, PathBuf};
use std::process::Command;

use chrono::{TimeZone, Utc};
use cortex_core::{Event, EventSource, EventType, MemoryId, SCHEMA_VERSION};
use cortex_store::migrate::apply_pending;
use cortex_store::repo::memories::accept_candidate_policy_decision_test_allow;
use cortex_store::repo::{EventRepo, MemoryAcceptanceAudit, MemoryCandidate, MemoryRepo};
use rusqlite::Connection;
use serde_json::json;

fn cortex_bin() -> PathBuf {
    PathBuf::from(env!("CARGO_BIN_EXE_cortex"))
}

fn run_in(cwd: &Path, args: &[&str]) -> std::process::Output {
    Command::new(cortex_bin())
        .current_dir(cwd)
        .env("XDG_DATA_HOME", cwd.join("xdg"))
        .env("HOME", cwd)
        .args(args)
        .output()
        .expect("spawn cortex")
}

fn assert_exit(out: &std::process::Output, expected: i32) {
    let code = out.status.code().expect("process exited via signal");
    assert_eq!(
        code,
        expected,
        "expected exit {expected}, got {code}\nstdout: {}\nstderr: {}",
        String::from_utf8_lossy(&out.stdout),
        String::from_utf8_lossy(&out.stderr),
    );
}

fn init(tmp: &Path) -> PathBuf {
    let out = run_in(tmp, &["init"]);
    assert_exit(&out, 0);
    let stdout = String::from_utf8_lossy(&out.stdout);
    let db_line = stdout
        .lines()
        .find(|line| line.starts_with("cortex init: db"))
        .expect("init stdout includes db path");
    let path = db_line
        .split_once('=')
        .expect("db line has equals")
        .1
        .trim()
        .split_once(" (")
        .expect("db line has status suffix")
        .0;
    PathBuf::from(path)
}

fn at(second: u32) -> chrono::DateTime<Utc> {
    Utc.with_ymd_and_hms(2026, 5, 13, 12, 0, second).unwrap()
}

fn ensure_source_event(pool: &Connection, second: u32) {
    let event_id = "evt_01ARZ3NDEKTSV4RRFFQ69G5FAV".parse().unwrap();
    let repo = EventRepo::new(pool);
    if repo
        .get_by_id(&event_id)
        .expect("query source event")
        .is_some()
    {
        return;
    }
    repo.append(&Event {
        id: event_id,
        schema_version: SCHEMA_VERSION,
        observed_at: at(second),
        recorded_at: at(second),
        source: EventSource::Tool {
            name: "semantic-test".into(),
        },
        event_type: EventType::ToolResult,
        trace_id: None,
        session_id: Some("semantic-test".into()),
        domain_tags: vec!["test".into()],
        payload: json!({"source": "semantic-test", "second": second}),
        payload_hash: format!("payload-source-{second}"),
        prev_event_hash: None,
        event_hash: format!("event-source-{second}"),
    })
    .expect("append source event");
}

fn insert_active_memory(
    db_path: &Path,
    memory_id: &str,
    claim: &str,
    domains: &[&str],
    second: u32,
) {
    let pool = Connection::open(db_path).expect("open sqlite");
    apply_pending(&pool).expect("apply migrations");
    ensure_source_event(&pool, second);
    let repo = MemoryRepo::new(&pool);
    let candidate = MemoryCandidate {
        id: memory_id.parse().unwrap(),
        memory_type: "semantic".into(),
        claim: claim.into(),
        source_episodes_json: json!([]),
        source_events_json: json!(["evt_01ARZ3NDEKTSV4RRFFQ69G5FAV"]),
        domains_json: json!(domains),
        salience_json: json!({"score": 0.7, "validation": 0.6}),
        confidence: 0.85,
        authority: "user".into(),
        applies_when_json: json!([]),
        does_not_apply_when_json: json!([]),
        created_at: at(second),
        updated_at: at(second),
    };
    let id = candidate.id.to_string();
    repo.insert_candidate(&candidate).expect("insert candidate");
    let audit = MemoryAcceptanceAudit {
        id: format!("aud_01ARZ3NDEKTSV4RRFFQ69G5FA{second}")
            .parse()
            .unwrap(),
        actor_json: json!({"kind": "test"}),
        reason: "semantic test memory".into(),
        source_refs_json: json!([id]),
        created_at: at(second + 1),
    };
    repo.accept_candidate(
        &memory_id.parse::<MemoryId>().unwrap(),
        at(second + 1),
        &audit,
        &accept_candidate_policy_decision_test_allow(),
    )
    .expect("accept candidate");
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[test]
fn search_without_semantic_flag_returns_results_unchanged() {
    let tmp = tempfile::tempdir().unwrap();
    let db_path = init(tmp.path());
    insert_active_memory(
        &db_path,
        "mem_01ARZ3NDEKTSV4RRFFQ69G5FA1",
        "deterministic retrieval over memories",
        &["store"],
        1,
    );

    // Without --semantic, a basic search must work exactly as before.
    let out = run_in(tmp.path(), &["memory", "search", "retrieval"]);
    assert_exit(&out, 0);
    let stdout = String::from_utf8_lossy(&out.stdout);
    assert!(
        stdout.contains("mem_01ARZ3NDEKTSV4RRFFQ69G5FA1"),
        "default search must return the matching memory; stdout: {stdout}"
    );
    // No sem_score in default (non-JSON) output.
    assert!(
        !stdout.contains("sem="),
        "default output must not include sem= field; stdout: {stdout}"
    );
}

#[test]
fn search_with_semantic_flag_returns_results_with_sem_score_field() {
    let tmp = tempfile::tempdir().unwrap();
    let db_path = init(tmp.path());
    insert_active_memory(
        &db_path,
        "mem_01ARZ3NDEKTSV4RRFFQ69G5FA1",
        "deterministic retrieval over memories",
        &["store"],
        1,
    );

    // Human-readable output with --semantic should include sem= field.
    let out = run_in(tmp.path(), &["memory", "search", "retrieval", "--semantic"]);
    assert_exit(&out, 0);
    let stdout = String::from_utf8_lossy(&out.stdout);
    assert!(
        stdout.contains("mem_01ARZ3NDEKTSV4RRFFQ69G5FA1"),
        "--semantic search must return the matching memory; stdout: {stdout}"
    );
    assert!(
        stdout.contains("sem="),
        "--semantic output must include sem= field; stdout: {stdout}"
    );
}

#[test]
fn search_with_semantic_and_fuzzy_flags_both_work() {
    let tmp = tempfile::tempdir().unwrap();
    let db_path = init(tmp.path());
    insert_active_memory(
        &db_path,
        "mem_01ARZ3NDEKTSV4RRFFQ69G5FA1",
        "deterministic retrieval over memories",
        &["store"],
        1,
    );

    // --semantic combined with --fuzzy must not panic and must return results.
    let out = run_in(
        tmp.path(),
        &["memory", "search", "retrieval", "--semantic", "--fuzzy"],
    );
    assert_exit(&out, 0);
    let stdout = String::from_utf8_lossy(&out.stdout);
    assert!(
        stdout.contains("mem_01ARZ3NDEKTSV4RRFFQ69G5FA1"),
        "--semantic --fuzzy must return the matching memory; stdout: {stdout}"
    );
}

#[test]
fn search_with_semantic_flag_gracefully_handles_missing_embeddings() {
    // When no embedding row exists in the store, the on-demand embedding
    // path computes and warms the store. The memory must still appear in
    // results — absence of a pre-computed embedding is NOT an error.
    let tmp = tempfile::tempdir().unwrap();
    let db_path = init(tmp.path());
    insert_active_memory(
        &db_path,
        "mem_01ARZ3NDEKTSV4RRFFQ69G5FA2",
        "semantic search without pre-computed embedding",
        &["retrieval"],
        2,
    );

    let out = run_in(tmp.path(), &["memory", "search", "semantic", "--semantic"]);
    assert_exit(&out, 0);
    let stdout = String::from_utf8_lossy(&out.stdout);
    assert!(
        stdout.contains("mem_01ARZ3NDEKTSV4RRFFQ69G5FA2"),
        "memory with no pre-computed embedding must still appear; stdout: {stdout}"
    );
    // sem= field must be present (on-demand embedding succeeded).
    assert!(
        stdout.contains("sem="),
        "on-demand embedding must produce a sem= field; stdout: {stdout}"
    );
}

#[test]
fn search_semantic_json_envelope_includes_sem_score() {
    let tmp = tempfile::tempdir().unwrap();
    let db_path = init(tmp.path());
    insert_active_memory(
        &db_path,
        "mem_01ARZ3NDEKTSV4RRFFQ69G5FA1",
        "deterministic retrieval over memories",
        &["store"],
        1,
    );

    let out = run_in(
        tmp.path(),
        &["--json", "memory", "search", "retrieval", "--semantic"],
    );
    assert_exit(&out, 0);
    let stdout = String::from_utf8_lossy(&out.stdout);
    let envelope: serde_json::Value =
        serde_json::from_str(&stdout).expect("stdout must be valid JSON");

    let matches = envelope["report"]["matches"]
        .as_array()
        .expect("matches must be an array");
    assert!(
        !matches.is_empty(),
        "semantic search must return at least one match; envelope: {envelope}"
    );

    let first = &matches[0];
    assert!(
        first.get("sem_score").is_some(),
        "each match must include a sem_score field when --semantic is set; match: {first}"
    );
    // sem_score must be a finite number.
    let sem = first["sem_score"]
        .as_f64()
        .expect("sem_score must be a number");
    assert!(sem.is_finite(), "sem_score must be finite; got {sem}");
}