spool-memory 0.2.3

//! End-to-end integration tests: distill → ledger → retrieval → wakeup.
//!
//! These tests verify the complete knowledge lifecycle: from session
//! transcript extraction through structured memory creation, retrieval
//! scoring, and final injection into wakeup/get output.

use spool::distill::pipeline::{DistillRequest, run as distill_run};
use spool::domain::{
    MatchedProject, MemoryLifecycleState, MemoryRecord, MemoryScope, OutputFormat, RouteInput,
    TargetTool,
};
use spool::engine::scorer::score_lifecycle_candidate;
use spool::engine::selector::select_lifecycle_candidates;
use spool::lifecycle_service::LifecycleService;
use std::collections::HashSet;
use std::fs;
use std::path::PathBuf;
use tempfile::tempdir;

// ─────────────────────────────────────────────────────────────────────
// Test 1: distill pipeline extracts structured records from transcript
// ─────────────────────────────────────────────────────────────────────

#[test]
fn distill_creates_structured_record_from_self_tag() {
    let temp = tempdir().unwrap();
    let vault_dir = temp.path().join("vault");
    fs::create_dir_all(&vault_dir).unwrap();
    let config_path = temp.path().join("spool.toml");
    fs::write(
        &config_path,
        format!("[vault]\nroot = \"{}\"\n", vault_dir.display()),
    )
    .unwrap();

    let cwd = temp.path().join("my-project");
    fs::create_dir_all(&cwd).unwrap();

    // Create a transcript with a self-tag about SQLite
    let transcript_path = temp.path().join("session.jsonl");
    let entries = [
        serde_json::json!({
            "type": "user",
            "message": {"role": "user", "content": "记一下：以后数据库都用 SQLite，部署简单"}
        }),
        serde_json::json!({
            "type": "user",
            "message": {"role": "user", "content": "好的，继续实现 src/db.rs 的查询逻辑"}
        }),
    ];
    let mut body = String::new();
    for e in &entries {
        body.push_str(&e.to_string());
        body.push('\n');
    }
    fs::write(&transcript_path, body).unwrap();

    // Run the distill pipeline
    let request = DistillRequest::new(config_path.clone(), cwd.clone(), Some(transcript_path));
    let report = distill_run(request).unwrap();

    // Verify: at least one self-tag signal was persisted
    assert!(
        !report.signals_persisted.is_empty(),
        "distill should persist at least one self-tag signal, report: {:?}",
        report
    );

    // Read the ledger and verify the record
    let service = LifecycleService::new();
    let snapshot = service.load_workbench(&config_path).unwrap();
    assert!(
        !snapshot.wakeup_ready.is_empty(),
        "self-tag should produce an accepted (wakeup-ready) record"
    );

    let record = &snapshot.wakeup_ready[0].record;

    // Title or summary should reference SQLite
    let has_sqlite = record.title.contains("SQLite")
        || record.title.contains("sqlite")
        || record.summary.contains("SQLite")
        || record.summary.contains("sqlite");
    assert!(
        has_sqlite,
        "record should mention SQLite. title={}, summary={}",
        record.title, record.summary
    );

    // State should be Accepted (self-tags are auto-accepted)
    assert_eq!(record.state, MemoryLifecycleState::Accepted);

    // Structured fields should be populated
    assert!(
        !record.triggers.is_empty(),
        "triggers should be inferred from summary: {:?}",
        record.triggers
    );

    // applies_to should contain the project name from cwd
    assert!(
        record.applies_to.iter().any(|a| a == "my-project"),
        "applies_to should contain project name from cwd: {:?}",
        record.applies_to
    );

    // entities should be populated
    assert!(
        !record.entities.is_empty(),
        "entities should be populated: {:?}",
        record.entities
    );
}

// ─────────────────────────────────────────────────────────────────────
// Test 2: structured fields (entities, tags, triggers) improve scoring
// ─────────────────────────────────────────────────────────────────────

#[test]
fn structured_fields_improve_retrieval_ranking() {
    let input = RouteInput {
        task: "优化数据库查询性能".to_string(),
        cwd: PathBuf::from("/tmp/repo"),
        files: vec!["src/db.rs".to_string()],
        target: TargetTool::Claude,
        format: OutputFormat::Prompt,
    };
    let project = MatchedProject {
        id: "myapp".to_string(),
        name: "myapp".to_string(),
        reason: "test".to_string(),
    };

    // Record A: highly relevant to database work
    let record_a = MemoryRecord {
        title: "数据库选型决策".to_string(),
        summary: "以后数据库都用 SQLite，部署简单".to_string(),
        memory_type: "decision".to_string(),
        scope: MemoryScope::Project,
        state: MemoryLifecycleState::Accepted,
        origin: spool::domain::MemoryOrigin {
            source_kind: spool::domain::MemorySourceKind::Manual,
            source_ref: "test".to_string(),
        },
        project_id: Some("myapp".to_string()),
        user_id: None,
        sensitivity: None,
        entities: vec!["SQLite".to_string(), "rusqlite".to_string()],
        tags: vec!["database".to_string(), "storage".to_string()],
        triggers: vec!["数据库".to_string(), "sqlite".to_string()],
        related_files: vec!["src/db.rs".to_string()],
        related_records: Vec::new(),
        supersedes: None,
        applies_to: vec!["myapp".to_string()],
        valid_until: None,
    };

    // Record B: unrelated (frontend)
    let record_b = MemoryRecord {
        title: "UI 框架选型".to_string(),
        summary: "前端用 React + shadcn 组件库".to_string(),
        memory_type: "decision".to_string(),
        scope: MemoryScope::Project,
        state: MemoryLifecycleState::Accepted,
        origin: spool::domain::MemoryOrigin {
            source_kind: spool::domain::MemorySourceKind::Manual,
            source_ref: "test".to_string(),
        },
        project_id: Some("myapp".to_string()),
        user_id: None,
        sensitivity: None,
        entities: vec!["React".to_string(), "shadcn".to_string()],
        tags: vec!["frontend".to_string(), "ui".to_string()],
        triggers: vec!["UI框架".to_string(), "react".to_string()],
        related_files: vec!["src/components/App.tsx".to_string()],
        related_records: Vec::new(),
        supersedes: None,
        applies_to: vec!["myapp".to_string()],
        valid_until: None,
    };

    let candidate_a =
        score_lifecycle_candidate(Some(&project), "rec-a", &record_a, &input, None, None);
    let candidate_b =
        score_lifecycle_candidate(Some(&project), "rec-b", &record_b, &input, None, None);

    let score_a = candidate_a.as_ref().map(|c| c.score).unwrap_or(0);
    let score_b = candidate_b.as_ref().map(|c| c.score).unwrap_or(0);

    assert!(
        score_a > score_b,
        "Record A (database) should rank above Record B (frontend). A={}, B={}",
        score_a,
        score_b
    );

    // Verify that structured fields contributed to the score
    let reasons_a = &candidate_a.unwrap().reasons;
    let has_entity_match = reasons_a.iter().any(|r| r.contains("entity"));
    let has_trigger_match = reasons_a.iter().any(|r| r.contains("trigger"));
    let has_file_match = reasons_a.iter().any(|r| r.contains("related_file"));
    assert!(
        has_entity_match || has_trigger_match || has_file_match,
        "structured fields should contribute to scoring. reasons: {:?}",
        reasons_a
    );
}

// ─────────────────────────────────────────────────────────────────────
// Test 3: relation expansion finds linked records via related_records
// ─────────────────────────────────────────────────────────────────────

#[test]
fn relation_expansion_finds_linked_records() {
    let input = RouteInput {
        task: "优化数据库查询".to_string(),
        cwd: PathBuf::from("/tmp/repo"),
        files: vec!["src/db.rs".to_string()],
        target: TargetTool::Claude,
        format: OutputFormat::Prompt,
    };
    let project = MatchedProject {
        id: "myapp".to_string(),
        name: "myapp".to_string(),
        reason: "test".to_string(),
    };

    // Record A: directly matches the query, references Record B
    let record_a = MemoryRecord {
        title: "SQLite 查询优化".to_string(),
        summary: "数据库查询需要加索引".to_string(),
        memory_type: "decision".to_string(),
        scope: MemoryScope::Project,
        state: MemoryLifecycleState::Accepted,
        origin: spool::domain::MemoryOrigin {
            source_kind: spool::domain::MemorySourceKind::Manual,
            source_ref: "test".to_string(),
        },
        project_id: Some("myapp".to_string()),
        user_id: None,
        sensitivity: None,
        entities: vec!["SQLite".to_string()],
        tags: vec!["database".to_string()],
        triggers: vec!["数据库".to_string()],
        related_files: vec!["src/db.rs".to_string()],
        related_records: vec!["rec-b".to_string()],
        supersedes: None,
        applies_to: vec!["myapp".to_string()],
        valid_until: None,
    };

    // Record B: would NOT match the query on its own (different project_id
    // means project scope filter rejects it) but is linked via
    // related_records from Record A. The relation expansion path handles
    // records that scored 0 by creating a minimal candidate with a
    // relation-based score.
    let record_b = MemoryRecord {
        title: "部署流程约束".to_string(),
        summary: "单文件部署，不依赖外部服务".to_string(),
        memory_type: "constraint".to_string(),
        scope: MemoryScope::Project,
        state: MemoryLifecycleState::Accepted,
        origin: spool::domain::MemoryOrigin {
            source_kind: spool::domain::MemorySourceKind::Manual,
            source_ref: "test".to_string(),
        },
        project_id: Some("other-project".to_string()),
        user_id: None,
        sensitivity: None,
        entities: vec!["Docker".to_string()],
        tags: vec!["deployment".to_string()],
        triggers: vec!["部署".to_string()],
        related_files: Vec::new(),
        related_records: Vec::new(),
        supersedes: None,
        applies_to: Vec::new(),
        valid_until: None,
    };

    let records = vec![
        ("rec-a".to_string(), record_a),
        ("rec-b".to_string(), record_b),
    ];

    // Use limit=1 so only the highest-scoring record (A) is in the
    // initial selection. Record B should then appear via relation expansion.
    let candidates =
        select_lifecycle_candidates(Some(&project), &records, &input, 5, &HashSet::new(), None);

    // Record A should definitely be present (direct match)
    let has_a = candidates.iter().any(|c| c.record_id == "rec-a");
    assert!(has_a, "Record A should be in results (direct match)");

    // Record B should appear via relation expansion from Record A
    let has_b = candidates.iter().any(|c| c.record_id == "rec-b");
    assert!(
        has_b,
        "Record B should appear via relation expansion. candidates: {:?}",
        candidates.iter().map(|c| &c.record_id).collect::<Vec<_>>()
    );

    // Record B's reasons should mention relation expansion
    let b_candidate = candidates.iter().find(|c| c.record_id == "rec-b").unwrap();
    assert!(
        b_candidate
            .reasons
            .iter()
            .any(|r| r.contains("relation-expanded")),
        "Record B should be marked as relation-expanded. reasons: {:?}",
        b_candidate.reasons
    );
}

// ─────────────────────────────────────────────────────────────────────
// Test 4: full lifecycle — distill → ledger → retrieval → wakeup
// ─────────────────────────────────────────────────────────────────────

#[test]
fn full_lifecycle_distill_to_retrieval() {
    use assert_cmd::Command;
    use predicates::prelude::*;

    let temp = tempdir().unwrap();
    // Canonicalize to avoid macOS /var vs /private/var symlink issues
    let base = temp.path().canonicalize().unwrap();
    let vault_dir = base.join("vault");
    let repo_dir = base.join("repo");
    fs::create_dir_all(vault_dir.join("10-Projects")).unwrap();
    fs::create_dir_all(&repo_dir).unwrap();

    // Create a vault note so retrieval has something to scan
    fs::write(
        vault_dir.join("10-Projects/project.md"),
        "---\nmemory_type: project\nproject_id: spool\n---\n# Spool Project\n\nProject context.\n",
    )
    .unwrap();

    let config = format!(
        r#"[vault]
root = "{}"

[output]
default_format = "prompt"
max_chars = 12000
max_notes = 8
max_lifecycle = 5

[[projects]]
id = "spool"
name = "spool"
repo_paths = ["{}"]
note_roots = ["10-Projects"]
"#,
        vault_dir.display(),
        repo_dir.display()
    );
    let config_path = base.join("spool.toml");
    fs::write(&config_path, &config).unwrap();

    let cwd = repo_dir.clone();

    // Step 1: Create a transcript with a self-tag
    let transcript_path = base.join("session.jsonl");
    let transcript_line = serde_json::json!({
        "type": "user",
        "message": {"role": "user", "content": "记一下：routing 模块必须先匹配 project"}
    });
    fs::write(&transcript_path, format!("{}\n", transcript_line)).unwrap();

    // Step 2: Run distill pipeline
    let request = DistillRequest::new(config_path.clone(), cwd.clone(), Some(transcript_path));
    let report = distill_run(request).unwrap();
    assert!(
        !report.signals_persisted.is_empty(),
        "distill should persist the self-tag"
    );

    // Step 3: Verify ledger has the record
    let service = LifecycleService::new();
    let snapshot = service.load_workbench(&config_path).unwrap();
    assert!(
        !snapshot.wakeup_ready.is_empty(),
        "ledger should have a wakeup-ready record after distill"
    );
    let record = &snapshot.wakeup_ready[0].record;
    assert!(
        record.summary.contains("routing") || record.summary.contains("project"),
        "record summary should contain routing or project: {}",
        record.summary
    );

    // Step 4: Verify the distilled record is visible via CLI `memory list`
    Command::cargo_bin("spool")
        .unwrap()
        .args([
            "memory",
            "list",
            "--config",
            config_path.to_str().unwrap(),
            "--view",
            "wakeup-ready",
        ])
        .assert()
        .success()
        .stdout(predicate::str::contains("routing"));

    // Step 5: Create a user-scoped manual record (simulating a record
    // that would be retrievable by the `get` command's lifecycle scorer)
    // and verify it appears in the retrieval output.
    Command::cargo_bin("spool")
        .unwrap()
        .args([
            "memory",
            "record-manual",
            "--config",
            config_path.to_str().unwrap(),
            "--title",
            "routing 约束",
            "--summary",
            "routing 模块必须先匹配 project 再扫描",
            "--memory-type",
            "constraint",
            "--scope",
            "user",
            "--source-ref",
            "e2e-test",
        ])
        .assert()
        .success();

    // Step 6: Run `get` and verify the user-scoped lifecycle memory
    // appears in the output (retrieval → injection path)
    Command::cargo_bin("spool")
        .unwrap()
        .args([
            "get",
            "--config",
            config_path.to_str().unwrap(),
            "--task",
            "实现 routing 模块",
            "--cwd",
            cwd.to_str().unwrap(),
            "--format",
            "prompt",
        ])
        .assert()
        .success()
        .stdout(predicate::str::contains("记忆（accepted / canonical）"))
        .stdout(predicate::str::contains("routing"));
}