lantern 0.2.3

Local-first, provenance-aware semantic search for agent activity
Documentation
use std::fs;

use lantern::ingest::{ingest_path, ingest_stdin};
use lantern::show::show;
use lantern::store::Store;
use tempfile::tempdir;

fn setup_with_two_sources() -> (tempfile::TempDir, Store, String, String) {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let data = root.path().join("data");
    fs::create_dir_all(&data).unwrap();
    fs::write(
        data.join("a.md"),
        "# heading\n\nbody paragraph one.\n\nbody paragraph two.\n",
    )
    .unwrap();
    fs::write(data.join("b.txt"), "just a small plain text file").unwrap();
    let report = ingest_path(&mut store, &data).unwrap();
    assert_eq!(report.ingested.len(), 2);

    let ids: Vec<String> = report
        .ingested
        .iter()
        .map(|s| s.source_id.clone())
        .collect();
    (root, store, ids[0].clone(), ids[1].clone())
}

#[test]
fn show_by_exact_id_returns_source_and_all_chunks() {
    let (_root, store, id_a, _id_b) = setup_with_two_sources();
    let source = show(&store, &id_a).unwrap();

    assert_eq!(source.source_id, id_a);
    assert!(source.uri.starts_with("file://"));
    assert!(source.uri.ends_with("/a.md") || source.uri.ends_with("/b.txt"));
    assert!(!source.chunks.is_empty());
    for c in &source.chunks {
        assert!(!c.chunk_id.is_empty());
        assert!(c.byte_end >= c.byte_start);
        assert!(c.char_count > 0);
        assert!(!c.sha256.is_empty());
        assert!(!c.text.is_empty());
    }
}

#[test]
fn show_by_unambiguous_prefix_resolves_to_same_source() {
    let (_root, store, id_a, _id_b) = setup_with_two_sources();
    let prefix = &id_a[..6];
    let source = show(&store, prefix).unwrap();
    assert_eq!(source.source_id, id_a);
}

#[test]
fn show_errors_for_unknown_id() {
    let (_root, store, _, _) = setup_with_two_sources();
    let err = show(&store, "deadbeefdeadbeefdeadbeefdeadbeef").unwrap_err();
    assert!(err.to_string().contains("no source matches"));
}

#[test]
fn show_errors_for_empty_id() {
    let (_root, store, _, _) = setup_with_two_sources();
    let err = show(&store, "").unwrap_err();
    assert!(err.to_string().contains("must not be empty"));
    let err = show(&store, "   ").unwrap_err();
    assert!(err.to_string().contains("must not be empty"));
}

#[test]
fn show_errors_for_ambiguous_prefix() {
    let root = tempdir().unwrap();
    let store = Store::initialize(&root.path().join("store")).unwrap();
    // Seed two sources whose ids share a prefix.
    store
        .conn()
        .execute(
            "INSERT INTO sources (id, uri, path, kind, bytes, content_sha256, mtime_unix, ingested_at)
             VALUES ('abcd1234aaaa0000abcd1234aaaa0000', 'x://1', NULL, 'text/plain', 1, 's1', NULL, 1),
                    ('abcd1234bbbb0000abcd1234bbbb0000', 'x://2', NULL, 'text/plain', 1, 's2', NULL, 1)",
            [],
        )
        .unwrap();
    let err = show(&store, "abcd1234").unwrap_err();
    assert!(err.to_string().contains("ambiguous"));
}

#[test]
fn chunks_reassemble_to_original_text_for_plain_files() {
    let (_root, store, id_a, id_b) = setup_with_two_sources();
    for id in [&id_a, &id_b] {
        let source = show(&store, id).unwrap();
        let rebuilt: String = source.chunks.iter().map(|c| c.text.as_str()).collect();
        assert!(!rebuilt.is_empty());
        // Source bytes equal the sum of chunk byte-lengths for plain/markdown.
        let last = source.chunks.last().unwrap();
        assert_eq!(last.byte_end as i64, source.bytes);
    }
}

#[test]
fn show_surfaces_jsonl_chunk_metadata() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let payload = b"{\"role\":\"user\",\"session_id\":\"sess-1\",\"turn_id\":\"turn-1\",\"tool_name\":\"search\",\"timestamp\":1700000000,\"content\":\"hello\"}\n";
    let report = ingest_stdin(
        &mut store,
        "stdin://sess-1",
        Some("application/jsonl"),
        payload,
    )
    .unwrap();
    let source = show(&store, &report.ingested[0].source_id).unwrap();
    let chunk = &source.chunks[0];
    assert_eq!(chunk.role.as_deref(), Some("user"));
    assert_eq!(chunk.session_id.as_deref(), Some("sess-1"));
    assert_eq!(chunk.turn_id.as_deref(), Some("turn-1"));
    assert_eq!(chunk.tool_name.as_deref(), Some("search"));
    assert_eq!(chunk.timestamp_unix, Some(1_700_000_000));
}