lantern 0.2.3

Local-first, provenance-aware semantic search for agent activity
Documentation
use std::fs;

use lantern::ingest::ingest_path;
use lantern::search::{SearchOptions, search};
use lantern::store::Store;
use tempfile::tempdir;

fn setup_store_with(files: &[(&str, &str)]) -> (tempfile::TempDir, Store) {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let data = root.path().join("data");
    fs::create_dir_all(&data).unwrap();
    for (name, body) in files {
        fs::write(data.join(name), body).unwrap();
    }
    ingest_path(&mut store, &data).unwrap();
    (root, store)
}

#[test]
fn finds_matching_chunk_with_full_provenance() {
    let (_root, store) = setup_store_with(&[
        ("a.md", "Lanterns glow in the dark forest."),
        ("b.md", "Rust is a systems programming language."),
    ]);

    let hits = search(
        &store,
        "lantern",
        SearchOptions {
            limit: 10,
            ..SearchOptions::default()
        },
    )
    .unwrap();
    assert_eq!(hits.len(), 1);
    let hit = &hits[0];

    assert!(hit.uri.starts_with("file://"));
    assert!(hit.uri.ends_with("/a.md"));
    assert_eq!(hit.kind, "text/markdown");
    assert_eq!(hit.ordinal, 0);
    assert_eq!(hit.byte_start, 0);
    assert!(hit.byte_end > 0);
    assert!(hit.text.to_lowercase().contains("lantern"));
    assert!(hit.snippet.contains("<<"));
    assert!(hit.snippet.contains(">>"));
    assert!(hit.score <= 0.0, "bm25 scores should be <= 0");
    assert!(!hit.chunk_id.is_empty());
    assert!(!hit.source_id.is_empty());
}

#[test]
fn multiple_tokens_are_and_joined() {
    let (_root, store) = setup_store_with(&[
        ("a.md", "alpha beta gamma"),
        ("b.md", "alpha only"),
        ("c.md", "beta only"),
    ]);

    let hits = search(
        &store,
        "alpha beta",
        SearchOptions {
            limit: 10,
            ..SearchOptions::default()
        },
    )
    .unwrap();
    assert_eq!(hits.len(), 1);
    assert!(hits[0].uri.ends_with("/a.md"));
}

#[test]
fn empty_query_returns_no_results() {
    let (_root, store) = setup_store_with(&[("a.md", "anything goes here")]);
    let hits = search(&store, "", SearchOptions::default()).unwrap();
    assert!(hits.is_empty());
    let hits = search(&store, "   ", SearchOptions::default()).unwrap();
    assert!(hits.is_empty());
}

#[test]
fn unknown_term_returns_no_results() {
    let (_root, store) = setup_store_with(&[("a.md", "hello world")]);
    let hits = search(&store, "xyzzyquux", SearchOptions::default()).unwrap();
    assert!(hits.is_empty());
}

#[test]
fn ranking_prefers_term_dense_chunks() {
    let (_root, store) = setup_store_with(&[
        ("dense.md", &"needle needle needle needle ".repeat(5)),
        ("sparse.md", "needle once among many other words here"),
    ]);

    let hits = search(
        &store,
        "needle",
        SearchOptions {
            limit: 10,
            ..SearchOptions::default()
        },
    )
    .unwrap();
    assert_eq!(hits.len(), 2);
    assert!(
        hits[0].score <= hits[1].score,
        "results should be sorted by bm25 ascending (more relevant first)"
    );
    assert!(hits[0].uri.ends_with("/dense.md"));
}

#[test]
fn limit_caps_result_count() {
    let files: Vec<(String, String)> = (0..5)
        .map(|i| (format!("f{i}.md"), "needle in here somewhere".to_string()))
        .collect();
    let refs: Vec<(&str, &str)> = files
        .iter()
        .map(|(n, b)| (n.as_str(), b.as_str()))
        .collect();
    let (_root, store) = setup_store_with(&refs);

    let hits = search(
        &store,
        "needle",
        SearchOptions {
            limit: 3,
            ..SearchOptions::default()
        },
    )
    .unwrap();
    assert_eq!(hits.len(), 3);
}

#[test]
fn kind_filter_restricts_to_matching_kind() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let data = root.path().join("data");
    fs::create_dir_all(&data).unwrap();
    fs::write(data.join("note.md"), "needle in markdown").unwrap();
    fs::write(data.join("plain.txt"), "needle in plain text").unwrap();
    fs::write(
        data.join("session.jsonl"),
        "{\"role\":\"user\",\"content\":\"needle in jsonl\"}\n",
    )
    .unwrap();
    ingest_path(&mut store, &data).unwrap();

    let markdown = search(
        &store,
        "needle",
        SearchOptions {
            kind: Some("text/markdown".into()),
            ..SearchOptions::default()
        },
    )
    .unwrap();
    assert_eq!(markdown.len(), 1);
    assert_eq!(markdown[0].kind, "text/markdown");
    assert!(markdown[0].uri.ends_with("/note.md"));

    let jsonl = search(
        &store,
        "needle",
        SearchOptions {
            kind: Some("application/jsonl".into()),
            ..SearchOptions::default()
        },
    )
    .unwrap();
    assert_eq!(jsonl.len(), 1);
    assert_eq!(jsonl[0].kind, "application/jsonl");
}

#[test]
fn path_filter_restricts_to_matching_substring() {
    let (_root, store) = setup_store_with(&[
        ("apples.md", "needle in apples"),
        ("bananas.md", "needle in bananas"),
        ("cherries.md", "needle in cherries"),
    ]);

    let hits = search(
        &store,
        "needle",
        SearchOptions {
            path_contains: Some("bananas".into()),
            ..SearchOptions::default()
        },
    )
    .unwrap();
    assert_eq!(hits.len(), 1);
    assert!(hits[0].uri.ends_with("/bananas.md"));
}

#[test]
fn combined_kind_and_path_filters_intersect() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let data = root.path().join("data");
    fs::create_dir_all(&data).unwrap();
    fs::write(data.join("notes-alpha.md"), "needle one").unwrap();
    fs::write(data.join("notes-beta.md"), "needle two").unwrap();
    fs::write(data.join("notes-alpha.txt"), "needle three").unwrap();
    ingest_path(&mut store, &data).unwrap();

    let hits = search(
        &store,
        "needle",
        SearchOptions {
            kind: Some("text/markdown".into()),
            path_contains: Some("alpha".into()),
            ..SearchOptions::default()
        },
    )
    .unwrap();
    assert_eq!(hits.len(), 1);
    assert_eq!(hits[0].kind, "text/markdown");
    assert!(hits[0].uri.ends_with("/notes-alpha.md"));
}

#[test]
fn filters_with_no_matching_source_return_empty() {
    let (_root, store) = setup_store_with(&[("a.md", "plenty of needles here")]);

    let hits = search(
        &store,
        "needle",
        SearchOptions {
            kind: Some("application/jsonl".into()),
            ..SearchOptions::default()
        },
    )
    .unwrap();
    assert!(hits.is_empty());

    let hits = search(
        &store,
        "needle",
        SearchOptions {
            path_contains: Some("does-not-exist".into()),
            ..SearchOptions::default()
        },
    )
    .unwrap();
    assert!(hits.is_empty());
}

#[test]
fn keyword_search_bumps_access_count_and_last_accessed_at() {
    let (_root, store) = setup_store_with(&[("a.md", "needle in haystack")]);

    let hits = search(&store, "needle", SearchOptions::default()).unwrap();
    assert_eq!(hits.len(), 1);
    assert_eq!(hits[0].access_count, 0);
    assert_eq!(hits[0].last_accessed_at, None);

    let (access_count, last_accessed_at, access_decay_at): (i64, Option<i64>, Option<i64>) = store
        .conn()
        .query_row(
            "SELECT access_count, last_accessed_at, access_decay_at FROM chunks WHERE id = ?1",
            rusqlite::params![hits[0].chunk_id.as_str()],
            |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
        )
        .unwrap();
    assert_eq!(access_count, 1);
    assert!(last_accessed_at.is_some());
    assert_eq!(access_decay_at, last_accessed_at);

    let hits = search(&store, "needle", SearchOptions::default()).unwrap();
    assert_eq!(hits.len(), 1);
    assert_eq!(hits[0].access_count, 1);
    assert_eq!(hits[0].last_accessed_at, last_accessed_at);

    let (access_count, second_last_accessed_at, second_decay_at): (i64, Option<i64>, Option<i64>) =
        store
            .conn()
            .query_row(
                "SELECT access_count, last_accessed_at, access_decay_at FROM chunks WHERE id = ?1",
                rusqlite::params![hits[0].chunk_id.as_str()],
                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
            )
            .unwrap();
    assert_eq!(access_count, 2);
    assert!(second_last_accessed_at.is_some());
    assert!(second_last_accessed_at >= last_accessed_at);
    assert_eq!(second_decay_at, second_last_accessed_at);
}

#[test]
fn reingest_removes_stale_fts_entries() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();

    let file = root.path().join("note.md");
    fs::write(&file, "original sentinel word").unwrap();
    ingest_path(&mut store, &file).unwrap();
    assert_eq!(
        search(&store, "sentinel", SearchOptions::default())
            .unwrap()
            .len(),
        1
    );

    fs::write(&file, "entirely different contents now").unwrap();
    ingest_path(&mut store, &file).unwrap();
    assert!(
        search(&store, "sentinel", SearchOptions::default())
            .unwrap()
            .is_empty()
    );
    assert_eq!(
        search(&store, "different", SearchOptions::default())
            .unwrap()
            .len(),
        1
    );
}