lantern 0.2.2

Local-first, provenance-aware semantic search for agent activity
Documentation
use std::fs;

use lantern::ingest::ingest_path;
use lantern::reindex::reindex;
use lantern::search::{SearchOptions, search};
use lantern::store::Store;
use tempfile::tempdir;

fn counts(store: &Store) -> (i64, i64, i64) {
    let conn = store.conn();
    let sources: i64 = conn
        .query_row("SELECT COUNT(*) FROM sources", [], |r| r.get(0))
        .unwrap();
    let chunks: i64 = conn
        .query_row("SELECT COUNT(*) FROM chunks", [], |r| r.get(0))
        .unwrap();
    let fts: i64 = conn
        .query_row("SELECT COUNT(*) FROM chunks_fts", [], |r| r.get(0))
        .unwrap();
    (sources, chunks, fts)
}

#[test]
fn reindex_on_empty_store_reports_zero() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let report = reindex(&mut store).unwrap();
    assert_eq!(report.schema_version, 7);
    assert_eq!(report.chunks_reindexed, 0);
    assert_eq!(counts(&store), (0, 0, 0));
}

#[test]
fn reindex_preserves_canonical_data_and_rebuilds_search_index() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let data = root.path().join("data");
    fs::create_dir_all(&data).unwrap();
    fs::write(data.join("a.md"), "needle in markdown").unwrap();
    fs::write(data.join("b.txt"), "another needle in plain text").unwrap();
    ingest_path(&mut store, &data).unwrap();

    let before = counts(&store);
    assert_eq!(before.0, 2);
    assert!(before.1 >= 2);
    assert_eq!(before.1, before.2);

    let report = reindex(&mut store).unwrap();
    assert_eq!(report.chunks_reindexed, before.1);

    // Canonical tables untouched; FTS count matches chunk count.
    let after = counts(&store);
    assert_eq!(after, before);

    // Search still returns the same hits after the rebuild.
    let hits = search(&store, "needle", SearchOptions::default()).unwrap();
    assert_eq!(hits.len(), 2);
}

#[test]
fn reindex_recovers_from_corrupted_fts_table() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let data = root.path().join("data");
    fs::create_dir_all(&data).unwrap();
    fs::write(data.join("a.md"), "findable sentinel phrase").unwrap();
    ingest_path(&mut store, &data).unwrap();

    // Simulate a drifted FTS index: wipe the shadow rows so search misses.
    store.conn().execute("DELETE FROM chunks_fts", []).unwrap();
    assert!(
        search(&store, "sentinel", SearchOptions::default())
            .unwrap()
            .is_empty(),
        "precondition: search should miss before reindex"
    );

    let report = reindex(&mut store).unwrap();
    assert!(report.chunks_reindexed > 0);

    let hits = search(&store, "sentinel", SearchOptions::default()).unwrap();
    assert_eq!(hits.len(), 1);
}

#[test]
fn reindex_keeps_chunk_rowid_alignment_so_triggers_still_work() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let file = root.path().join("note.md");
    fs::write(&file, "alpha beta gamma").unwrap();
    ingest_path(&mut store, &file).unwrap();

    reindex(&mut store).unwrap();

    // After reindex, the insert/delete/update triggers should still be in
    // place, so ingesting additional content keeps the FTS synced without
    // a second manual reindex.
    let other = root.path().join("other.md");
    fs::write(&other, "delta epsilon zeta").unwrap();
    ingest_path(&mut store, &other).unwrap();

    let hits = search(&store, "epsilon", SearchOptions::default()).unwrap();
    assert_eq!(hits.len(), 1);
    assert!(hits[0].uri.ends_with("/other.md"));
}