lantern 0.3.0

Local-first, provenance-aware semantic search for agent activity
Documentation
use std::fs;

use lantern::ingest::ingest_path;
use lantern::inspect::{InspectOptions, inspect};
use lantern::store::Store;
use tempfile::tempdir;

#[test]
fn empty_store_reports_zero_counts() {
    let root = tempdir().unwrap();
    let store = Store::initialize(&root.path().join("store")).unwrap();

    let report = inspect(&store, InspectOptions::default()).unwrap();
    assert_eq!(report.schema_version, 18);
    assert_eq!(report.source_count, 0);
    assert_eq!(report.chunk_count, 0);
    assert_eq!(report.indexed_bytes, 0);
    assert!(report.recent_sources.is_empty());
    assert_eq!(report.confidence_signals.chunks_with_access, 0);
    assert_eq!(report.confidence_signals.chunks_with_feedback, 0);
    assert_eq!(report.confidence_signals.chunks_with_query_success, 0);
    assert_eq!(report.confidence_signals.chunks_with_decay_checkpoint, 0);
    assert!(report.db_bytes > 0, "db file should exist after init");
    assert!(report.db_path.ends_with("lantern.db"));
    assert!(report.store_path.ends_with("store"));
}

#[test]
fn confidence_signals_count_chunks_with_each_non_zero_input() {
    // Untouched chunks contribute zero to all three signal counts; once any
    // confidence input is set on a chunk, only the matching signal advances.
    // Exercising each input independently keeps the aggregate query honest
    // even if the column layout shifts again.
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();

    let data = root.path().join("data");
    fs::create_dir_all(&data).unwrap();
    fs::write(data.join("a.md"), "alpha").unwrap();
    fs::write(data.join("b.md"), "beta").unwrap();
    fs::write(data.join("c.md"), "gamma").unwrap();
    fs::write(data.join("d.md"), "delta").unwrap();
    ingest_path(&mut store, &data).unwrap();

    let baseline = inspect(&store, InspectOptions::default()).unwrap();
    assert_eq!(baseline.confidence_signals.chunks_with_access, 0);
    assert_eq!(baseline.confidence_signals.chunks_with_feedback, 0);
    assert_eq!(baseline.confidence_signals.chunks_with_query_success, 0);

    let chunk_ids: Vec<String> = {
        let conn = store.conn();
        let mut stmt = conn
            .prepare("SELECT id FROM chunks ORDER BY source_id LIMIT 4")
            .unwrap();
        stmt.query_map([], |row| row.get::<_, String>(0))
            .unwrap()
            .collect::<Result<Vec<_>, _>>()
            .unwrap()
    };
    assert_eq!(chunk_ids.len(), 4);

    {
        let conn = store.conn();
        // Two access-touched chunks (one of which also has positive feedback),
        // one feedback-only chunk with a *negative* score (proves the signal
        // counts non-zero, not just positive), and one query-success-only chunk.
        conn.execute(
            "UPDATE chunks SET access_count = 3 WHERE id = ?1",
            rusqlite::params![chunk_ids[0]],
        )
        .unwrap();
        conn.execute(
            "UPDATE chunks SET access_count = 1, feedback_score = 2 WHERE id = ?1",
            rusqlite::params![chunk_ids[1]],
        )
        .unwrap();
        conn.execute(
            "UPDATE chunks SET feedback_score = -1 WHERE id = ?1",
            rusqlite::params![chunk_ids[2]],
        )
        .unwrap();
        conn.execute(
            "UPDATE chunks SET query_success_count = 5 WHERE id = ?1",
            rusqlite::params![chunk_ids[3]],
        )
        .unwrap();
        conn.execute(
            "UPDATE chunks SET access_decay_at = 1_700_000_000 WHERE id = ?1",
            rusqlite::params![chunk_ids[0]],
        )
        .unwrap();
    }

    let report = inspect(&store, InspectOptions::default()).unwrap();
    assert_eq!(report.confidence_signals.chunks_with_access, 2);
    assert_eq!(report.confidence_signals.chunks_with_feedback, 2);
    assert_eq!(report.confidence_signals.chunks_with_query_success, 1);
    assert_eq!(report.confidence_signals.chunks_with_decay_checkpoint, 1);
}

#[test]
fn populated_store_reports_accurate_counts() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();

    let data = root.path().join("data");
    fs::create_dir_all(&data).unwrap();
    let a_body = "one paragraph.\n".repeat(200);
    let b_body = "another file with less content\n";
    fs::write(data.join("a.md"), &a_body).unwrap();
    fs::write(data.join("b.txt"), b_body).unwrap();
    ingest_path(&mut store, &data).unwrap();

    let report = inspect(&store, InspectOptions::default()).unwrap();
    assert_eq!(report.source_count, 2);
    assert!(report.chunk_count >= 2);
    assert_eq!(report.indexed_bytes as usize, a_body.len() + b_body.len());
    assert_eq!(report.recent_sources.len(), 2);
    for s in &report.recent_sources {
        assert!(!s.source_id.is_empty());
        assert!(s.uri.starts_with("file://"));
        assert!(s.chunks > 0);
        assert!(s.bytes > 0);
        assert!(s.ingested_at > 0);
    }
    let total_chunks: i64 = report.recent_sources.iter().map(|s| s.chunks).sum();
    assert_eq!(total_chunks, report.chunk_count);
}

#[test]
fn recent_sources_respect_limit() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();

    let data = root.path().join("data");
    fs::create_dir_all(&data).unwrap();
    for i in 0..5 {
        fs::write(data.join(format!("f{i}.md")), format!("body {i}")).unwrap();
    }
    ingest_path(&mut store, &data).unwrap();

    let report = inspect(&store, InspectOptions { recent_limit: 2 }).unwrap();
    assert_eq!(report.source_count, 5);
    assert_eq!(report.recent_sources.len(), 2);
}

#[test]
fn recent_sources_sorted_newest_first() {
    let root = tempdir().unwrap();
    let store = Store::initialize(&root.path().join("store")).unwrap();

    // Manually seed two sources with distinct ingested_at timestamps so the
    // ordering assertion is independent of wall-clock granularity.
    {
        let conn = store.conn();
        conn.execute(
            "INSERT INTO sources (id, uri, path, kind, bytes, content_sha256, mtime_unix, ingested_at)
             VALUES ('older',  'file://older',  '/older',  'text/plain', 10, 'sha-o', NULL, 1000),
                    ('newer',  'file://newer',  '/newer',  'text/plain', 10, 'sha-n', NULL, 2000)",
            [],
        )
        .unwrap();
    }

    let report = inspect(&store, InspectOptions::default()).unwrap();
    assert_eq!(report.recent_sources.len(), 2);
    assert_eq!(report.recent_sources[0].source_id, "newer");
    assert_eq!(report.recent_sources[1].source_id, "older");
}