lantern 0.2.3

Local-first, provenance-aware semantic search for agent activity
Documentation
use std::fs;

use lantern::export::{ExportFilter, export, write_json};
use lantern::ingest::{ingest_path, ingest_stdin};
use lantern::store::Store;
use serde_json::Value;
use tempfile::tempdir;

fn setup_store_with(files: &[(&str, &str)]) -> (tempfile::TempDir, Store) {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let data = root.path().join("data");
    fs::create_dir_all(&data).unwrap();
    for (name, body) in files {
        fs::write(data.join(name), body).unwrap();
    }
    ingest_path(&mut store, &data).unwrap();
    (root, store)
}

#[test]
fn empty_store_exports_empty_sources_list() {
    let root = tempdir().unwrap();
    let store = Store::initialize(&root.path().join("store")).unwrap();

    let dump = export(&store, &ExportFilter::default()).unwrap();
    assert_eq!(dump.schema_version, 9);
    assert!(dump.sources.is_empty());
    assert!(dump.filter.path.is_none());
    assert!(dump.filter.query.is_none());
    assert!(dump.exported_at > 0);
}

#[test]
fn full_export_includes_all_sources_and_ordered_chunks() {
    let content = "alpha beta gamma delta. ".repeat(200);
    let (_root, store) =
        setup_store_with(&[("a.md", content.as_str()), ("b.txt", "short content")]);

    let dump = export(&store, &ExportFilter::default()).unwrap();
    assert_eq!(dump.sources.len(), 2);

    for source in &dump.sources {
        assert!(!source.source_id.is_empty());
        assert!(source.uri.starts_with("file://"));
        assert!(!source.content_sha256.is_empty());
        assert!(!source.chunks.is_empty());
        for pair in source.chunks.windows(2) {
            assert!(pair[0].ordinal < pair[1].ordinal);
        }
        let reassembled: String = source.chunks.iter().map(|c| c.text.as_str()).collect();
        match source.uri.as_str() {
            u if u.ends_with("/a.md") => assert_eq!(reassembled, content),
            u if u.ends_with("/b.txt") => assert_eq!(reassembled, "short content"),
            other => panic!("unexpected uri {other}"),
        }
    }
}

#[test]
fn path_filter_narrows_results() {
    let (_root, store) = setup_store_with(&[
        ("apples.md", "apple content"),
        ("bananas.md", "banana content"),
        ("cherries.md", "cherry content"),
    ]);

    let dump = export(
        &store,
        &ExportFilter {
            path_contains: Some("bananas".into()),
            query: None,
        },
    )
    .unwrap();
    assert_eq!(dump.sources.len(), 1);
    assert!(dump.sources[0].uri.ends_with("/bananas.md"));
    assert_eq!(dump.filter.path.as_deref(), Some("bananas"));
}

#[test]
fn query_filter_narrows_to_matching_sources() {
    let (_root, store) = setup_store_with(&[
        ("a.md", "Lanterns glow softly."),
        ("b.md", "Rust is a systems programming language."),
        ("c.md", "Nothing related here."),
    ]);

    let dump = export(
        &store,
        &ExportFilter {
            path_contains: None,
            query: Some("lantern".into()),
        },
    )
    .unwrap();
    assert_eq!(dump.sources.len(), 1);
    assert!(dump.sources[0].uri.ends_with("/a.md"));
    assert_eq!(dump.filter.query.as_deref(), Some("lantern"));
}

#[test]
fn combined_filters_intersect() {
    let (_root, store) = setup_store_with(&[
        ("alpha.md", "needle here"),
        ("beta.md", "needle here too"),
        ("alpha-only.md", "nothing special"),
    ]);

    let dump = export(
        &store,
        &ExportFilter {
            path_contains: Some("alpha".into()),
            query: Some("needle".into()),
        },
    )
    .unwrap();
    assert_eq!(dump.sources.len(), 1);
    assert!(dump.sources[0].uri.ends_with("/alpha.md"));
}

#[test]
fn query_with_no_meaningful_tokens_yields_empty() {
    let (_root, store) = setup_store_with(&[("a.md", "some text")]);

    let dump = export(
        &store,
        &ExportFilter {
            path_contains: None,
            query: Some("!!!".into()),
        },
    )
    .unwrap();
    assert!(dump.sources.is_empty());
}

#[test]
fn write_json_produces_valid_json_file() {
    let tmp = tempdir().unwrap();
    let store = Store::initialize(&tmp.path().join("store")).unwrap();
    let dump = export(&store, &ExportFilter::default()).unwrap();

    let out = tmp.path().join("dump.json");
    write_json(&dump, Some(&out)).unwrap();

    let raw = fs::read_to_string(&out).unwrap();
    assert!(raw.ends_with('\n'));
    let parsed: Value = serde_json::from_str(&raw).unwrap();
    assert_eq!(parsed["schema_version"], 9);
    assert!(parsed["sources"].is_array());
}

#[test]
fn jsonl_export_includes_chunk_metadata() {
    let root = tempdir().unwrap();
    let mut store = Store::initialize(&root.path().join("store")).unwrap();
    let payload = b"{\"role\":\"assistant\",\"session_id\":\"sess-9\",\"turn_id\":\"turn-2\",\"tool_name\":\"search\",\"timestamp\":1700000001,\"content\":\"tool output\"}\n";
    let report = ingest_stdin(
        &mut store,
        "stdin://sess-9",
        Some("application/jsonl"),
        payload,
    )
    .unwrap();
    assert_eq!(report.ingested.len(), 1);

    let dump = export(&store, &ExportFilter::default()).unwrap();
    let chunk = &dump.sources[0].chunks[0];
    assert_eq!(chunk.role.as_deref(), Some("assistant"));
    assert_eq!(chunk.session_id.as_deref(), Some("sess-9"));
    assert_eq!(chunk.turn_id.as_deref(), Some("turn-2"));
    assert_eq!(chunk.tool_name.as_deref(), Some("search"));
    assert_eq!(chunk.timestamp_unix, Some(1_700_000_001));
}