lantern 0.2.3

Local-first, provenance-aware semantic search for agent activity
Documentation
use std::fs;

use lantern::ingest::ingest_path;
use lantern::store::Store;
use rusqlite::params;
use tempfile::tempdir;

#[test]
fn ingests_markdown_file_end_to_end() {
    let root = tempdir().unwrap();
    let store_dir = root.path().join("store");
    let mut store = Store::initialize(&store_dir).unwrap();

    let data_dir = root.path().join("data");
    fs::create_dir_all(&data_dir).unwrap();
    let file = data_dir.join("hello.md");
    let content = "# hello\n\nthis is a paragraph.\n\nanother one.\n".repeat(50);
    fs::write(&file, &content).unwrap();

    let report = ingest_path(&mut store, &file).unwrap();
    assert_eq!(report.ingested.len(), 1);
    assert!(report.skipped.is_empty());
    let ingested = &report.ingested[0];
    assert!(ingested.chunks >= 1);
    assert_eq!(ingested.bytes as usize, content.len());
    assert_eq!(ingested.kind, "text/markdown");

    let (source_rows, chunk_rows): (i64, i64) = {
        let conn = store.conn();
        let s: i64 = conn
            .query_row("SELECT COUNT(*) FROM sources", [], |row| row.get(0))
            .unwrap();
        let c: i64 = conn
            .query_row("SELECT COUNT(*) FROM chunks", [], |row| row.get(0))
            .unwrap();
        (s, c)
    };
    assert_eq!(source_rows, 1);
    assert_eq!(chunk_rows as usize, ingested.chunks);

    let reassembled: String = {
        let conn = store.conn();
        let mut stmt = conn
            .prepare("SELECT text FROM chunks WHERE source_id = ?1 ORDER BY ordinal")
            .unwrap();
        let rows = stmt
            .query_map(params![ingested.source_id], |row| row.get::<_, String>(0))
            .unwrap();
        rows.collect::<Result<Vec<_>, _>>().unwrap().join("")
    };
    assert_eq!(reassembled, content);
}

#[test]
fn reingest_unchanged_file_is_noop() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();

    let file = root.path().join("note.txt");
    fs::write(&file, "hello world").unwrap();

    let first = ingest_path(&mut store, &file).unwrap();
    assert_eq!(first.ingested.len(), 1);
    let second = ingest_path(&mut store, &file).unwrap();
    assert!(second.ingested.is_empty());
    assert_eq!(second.skipped.len(), 1);
    assert!(
        second.skipped[0]
            .reason
            .contains("unchanged since last ingest")
    );
}

#[test]
fn reingest_updated_file_replaces_chunks() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();

    let file = root.path().join("note.txt");
    fs::write(&file, "old content").unwrap();
    let first = ingest_path(&mut store, &file).unwrap();
    let original_id = first.ingested[0].source_id.clone();
    let original_sha = first.ingested[0].content_sha256.clone();

    fs::write(&file, "brand new body after edit").unwrap();
    let second = ingest_path(&mut store, &file).unwrap();
    assert_eq!(second.ingested.len(), 1);
    let updated = &second.ingested[0];
    assert_eq!(updated.source_id, original_id);
    assert_ne!(updated.content_sha256, original_sha);

    let chunk_texts: Vec<String> = {
        let conn = store.conn();
        let mut stmt = conn
            .prepare("SELECT text FROM chunks WHERE source_id = ?1 ORDER BY ordinal")
            .unwrap();
        let rows = stmt
            .query_map(params![original_id], |row| row.get::<_, String>(0))
            .unwrap();
        rows.collect::<Result<Vec<_>, _>>().unwrap()
    };
    assert_eq!(chunk_texts.join(""), "brand new body after edit");
}

#[test]
fn ingests_directory_recursively() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();

    let data = root.path().join("data");
    fs::create_dir_all(data.join("nested")).unwrap();
    fs::write(data.join("a.md"), "# a").unwrap();
    fs::write(data.join("b.txt"), "plain b").unwrap();
    fs::write(data.join("nested/c.md"), "# c").unwrap();
    fs::write(data.join("ignored.log"), "should be skipped").unwrap();

    let report = ingest_path(&mut store, &data).unwrap();
    assert_eq!(report.ingested.len(), 3);
    assert!(report.skipped.is_empty());
}

#[test]
fn rejects_non_utf8_file() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();

    let file = root.path().join("bad.txt");
    fs::write(&file, [0xff, 0xfe, 0xfd]).unwrap();

    let report = ingest_path(&mut store, &file).unwrap();
    assert!(report.ingested.is_empty());
    assert_eq!(report.skipped.len(), 1);
    assert!(report.skipped[0].reason.contains("UTF-8"));
}