lantern 0.3.0

Local-first, provenance-aware semantic search for agent activity
Documentation
use std::fs;
use std::process::Command;

use lantern::ingest::{ingest_path, ingest_stdin};
use lantern::show::{ShowOptions, show, show_with_options};
use lantern::store::Store;
use serde_json::Value;
use tempfile::tempdir;

fn lantern() -> Command {
    Command::new(env!("CARGO_BIN_EXE_lantern"))
}

fn setup_with_two_sources() -> (tempfile::TempDir, Store, String, String) {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let data = root.path().join("data");
    fs::create_dir_all(&data).unwrap();
    fs::write(
        data.join("a.md"),
        "# heading\n\nbody paragraph one.\n\nbody paragraph two.\n",
    )
    .unwrap();
    fs::write(data.join("b.txt"), "just a small plain text file").unwrap();
    let report = ingest_path(&mut store, &data).unwrap();
    assert_eq!(report.ingested.len(), 2);

    let ids: Vec<String> = report
        .ingested
        .iter()
        .map(|s| s.source_id.clone())
        .collect();
    (root, store, ids[0].clone(), ids[1].clone())
}

#[test]
fn show_by_exact_id_returns_source_and_all_chunks() {
    let (_root, store, id_a, _id_b) = setup_with_two_sources();
    let source = show(&store, &id_a).unwrap();

    assert_eq!(source.source_id, id_a);
    assert!(source.uri.starts_with("file://"));
    assert!(source.uri.ends_with("/a.md") || source.uri.ends_with("/b.txt"));
    assert!(!source.chunks.is_empty());
    for c in &source.chunks {
        assert!(!c.chunk_id.is_empty());
        assert!(c.byte_end >= c.byte_start);
        assert!(c.char_count > 0);
        assert!(!c.sha256.is_empty());
        assert!(!c.text.is_empty());
    }
}

#[test]
fn show_by_unambiguous_prefix_resolves_to_same_source() {
    let (_root, store, id_a, _id_b) = setup_with_two_sources();
    let prefix = &id_a[..6];
    let source = show(&store, prefix).unwrap();
    assert_eq!(source.source_id, id_a);
}

#[test]
fn show_errors_for_unknown_id() {
    let (_root, store, _, _) = setup_with_two_sources();
    let err = show(&store, "deadbeefdeadbeefdeadbeefdeadbeef").unwrap_err();
    assert!(err.to_string().contains("no source matches"));
}

#[test]
fn show_errors_for_empty_id() {
    let (_root, store, _, _) = setup_with_two_sources();
    let err = show(&store, "").unwrap_err();
    assert!(err.to_string().contains("must not be empty"));
    let err = show(&store, "   ").unwrap_err();
    assert!(err.to_string().contains("must not be empty"));
}

#[test]
fn show_errors_for_ambiguous_prefix() {
    let root = tempdir().unwrap();
    let store = Store::initialize(&root.path().join("store")).unwrap();
    // Seed two sources whose ids share a prefix.
    store
        .conn()
        .execute(
            "INSERT INTO sources (id, uri, path, kind, bytes, content_sha256, mtime_unix, ingested_at)
             VALUES ('abcd1234aaaa0000abcd1234aaaa0000', 'x://1', NULL, 'text/plain', 1, 's1', NULL, 1),
                    ('abcd1234bbbb0000abcd1234bbbb0000', 'x://2', NULL, 'text/plain', 1, 's2', NULL, 1)",
            [],
        )
        .unwrap();
    let err = show(&store, "abcd1234").unwrap_err();
    assert!(err.to_string().contains("ambiguous"));
}

#[test]
fn chunks_reassemble_to_original_text_for_plain_files() {
    let (_root, store, id_a, id_b) = setup_with_two_sources();
    for id in [&id_a, &id_b] {
        let source = show(&store, id).unwrap();
        let rebuilt: String = source.chunks.iter().map(|c| c.text.as_str()).collect();
        assert!(!rebuilt.is_empty());
        // Source bytes equal the sum of chunk byte-lengths for plain/markdown.
        let last = source.chunks.last().unwrap();
        assert_eq!(last.byte_end as i64, source.bytes);
    }
}

#[test]
fn show_surfaces_jsonl_chunk_metadata() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let payload = b"{\"role\":\"user\",\"session_id\":\"sess-1\",\"turn_id\":\"turn-1\",\"tool_name\":\"search\",\"timestamp\":1700000000,\"content\":\"hello\"}\n";
    let report = ingest_stdin(
        &mut store,
        "stdin://sess-1",
        Some("application/jsonl"),
        payload,
    )
    .unwrap();
    let source = show(&store, &report.ingested[0].source_id).unwrap();
    let chunk = &source.chunks[0];
    assert_eq!(chunk.role.as_deref(), Some("user"));
    assert_eq!(chunk.session_id.as_deref(), Some("sess-1"));
    assert_eq!(chunk.turn_id.as_deref(), Some("turn-1"));
    assert_eq!(chunk.tool_name.as_deref(), Some("search"));
    assert_eq!(chunk.timestamp_unix, Some(1_700_000_000));
}

#[test]
fn show_with_options_loads_chunk_entities_and_respects_limit() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let payload =
        b"ping @alice at alice@example.com about `src/main.rs` via https://example.com/docs #memory\n";
    let report = ingest_stdin(&mut store, "stdin://entities", Some("text/plain"), payload).unwrap();

    let source = show_with_options(
        &store,
        &report.ingested[0].source_id,
        ShowOptions {
            with_entities: Some(3),
        },
    )
    .unwrap();

    assert_eq!(source.chunks.len(), 1);
    let entities = source.chunks[0]
        .entities
        .as_ref()
        .expect("entities requested");
    assert_eq!(entities.len(), 3);
    assert_eq!(entities[0].kind.as_str(), "domain");
    assert_eq!(entities[0].value, "example.com");
    assert_eq!(entities[1].kind.as_str(), "email");
    assert_eq!(entities[1].value, "alice@example.com");
    assert_eq!(entities[2].kind.as_str(), "filepath");
    assert_eq!(entities[2].value, "src/main.rs");

    let plain = show(&store, &report.ingested[0].source_id).unwrap();
    assert!(plain.chunks[0].entities.is_none());
}

#[test]
fn show_json_cli_surfaces_entities_when_requested() {
    let root = tempdir().unwrap();
    let store_dir = root.path().join("store");
    let mut store = Store::initialize(&store_dir).unwrap();
    let payload =
        b"ping @alice at alice@example.com about `src/main.rs` via https://example.com/docs #memory\n";
    let report = ingest_stdin(&mut store, "stdin://entities", Some("text/plain"), payload).unwrap();

    let output = lantern()
        .arg("show")
        .arg(&report.ingested[0].source_id)
        .arg("--store")
        .arg(&store_dir)
        .arg("--show-entities")
        .arg("2")
        .arg("--format")
        .arg("json")
        .output()
        .unwrap();
    assert!(output.status.success(), "{:?}", output);

    let parsed: Value = serde_json::from_slice(&output.stdout).unwrap();
    let entities = parsed["chunks"][0]["entities"].as_array().unwrap();
    assert_eq!(entities.len(), 2);
    assert_eq!(entities[0]["kind"], "domain");
    assert_eq!(entities[0]["value"], "example.com");
    assert_eq!(entities[1]["kind"], "email");
    assert_eq!(entities[1]["value"], "alice@example.com");
}