use lantern::ingest::{StdinIngestOptions, ingest_stdin, ingest_stdin_with};
use lantern::search::{SearchOptions, search};
use lantern::store::Store;
use tempfile::tempdir;
fn new_store() -> (tempfile::TempDir, Store) {
let root = tempdir().unwrap();
let store = Store::initialize(&root.path().join("store")).unwrap();
(root, store)
}
#[test]
fn empty_uri_is_rejected() {
let (_root, mut store) = new_store();
let err = ingest_stdin(&mut store, "", None, b"hello").unwrap_err();
assert!(err.to_string().contains("non-empty --uri"));
let err = ingest_stdin(&mut store, " ", None, b"hello").unwrap_err();
assert!(err.to_string().contains("non-empty --uri"));
}
#[test]
fn basic_plain_text_ingest_is_searchable() {
let (_root, mut store) = new_store();
let report = ingest_stdin(
&mut store,
"stdin://note-1",
None,
b"lantern notes arrive via stdin",
)
.unwrap();
assert_eq!(report.ingested.len(), 1);
let src = &report.ingested[0];
assert_eq!(src.uri, "stdin://note-1");
assert_eq!(src.kind, "text/plain");
assert!(src.path.is_none(), "stdin has no filesystem path");
assert_eq!(src.chunks, 1);
assert_eq!(src.bytes, 30);
let hits = search(&store, "stdin", SearchOptions::default()).unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].uri, "stdin://note-1");
assert_eq!(hits[0].kind, "text/plain");
}
#[test]
fn jsonl_kind_routes_through_transcript_extractor() {
let (_root, mut store) = new_store();
let payload = concat!(
"{\"role\":\"user\",\"content\":\"ask about needles\"}\n",
"{\"role\":\"assistant\",\"content\":\"needles are in the haystack\"}\n",
);
let report = ingest_stdin(
&mut store,
"stdin://session-42",
Some("application/jsonl"),
payload.as_bytes(),
)
.unwrap();
assert_eq!(report.ingested.len(), 1);
assert_eq!(report.ingested[0].kind, "application/jsonl");
assert_eq!(report.ingested[0].chunks, 2);
let hits = search(
&store,
"haystack",
SearchOptions {
kind: Some("application/jsonl".into()),
..SearchOptions::default()
},
)
.unwrap();
assert_eq!(hits.len(), 1);
assert!(hits[0].text.contains("[assistant]"));
}
#[test]
fn reingesting_same_content_is_a_noop() {
let (_root, mut store) = new_store();
let first = ingest_stdin(&mut store, "stdin://foo", None, b"hello world").unwrap();
assert_eq!(first.ingested.len(), 1);
let second = ingest_stdin(&mut store, "stdin://foo", None, b"hello world").unwrap();
assert!(second.ingested.is_empty());
assert_eq!(second.skipped.len(), 1);
assert!(
second.skipped[0]
.reason
.contains("unchanged since last ingest")
);
}
#[test]
fn reingesting_changed_content_replaces_chunks() {
let (_root, mut store) = new_store();
let first = ingest_stdin(&mut store, "stdin://foo", None, b"old payload sentinel").unwrap();
let original_id = first.ingested[0].source_id.clone();
assert_eq!(
search(&store, "sentinel", SearchOptions::default())
.unwrap()
.len(),
1
);
let second = ingest_stdin(
&mut store,
"stdin://foo",
None,
b"replacement payload marker",
)
.unwrap();
assert_eq!(second.ingested.len(), 1);
assert_eq!(second.ingested[0].source_id, original_id);
assert!(
search(&store, "sentinel", SearchOptions::default())
.unwrap()
.is_empty()
);
assert_eq!(
search(&store, "marker", SearchOptions::default())
.unwrap()
.len(),
1
);
}
#[test]
fn non_utf8_stdin_is_reported_as_skipped() {
let (_root, mut store) = new_store();
let report = ingest_stdin(&mut store, "stdin://binary", None, &[0xff, 0xfe, 0xfd]).unwrap();
assert!(report.ingested.is_empty());
assert_eq!(report.skipped.len(), 1);
assert!(report.skipped[0].reason.contains("UTF-8"));
}
#[test]
fn different_uris_produce_distinct_sources_even_for_same_payload() {
let (_root, mut store) = new_store();
ingest_stdin(&mut store, "stdin://a", None, b"shared body").unwrap();
ingest_stdin(&mut store, "stdin://b", None, b"shared body").unwrap();
let count: i64 = store
.conn()
.query_row("SELECT COUNT(*) FROM sources", [], |r| r.get(0))
.unwrap();
assert_eq!(count, 2);
}
#[test]
fn append_mode_preserves_each_batch_as_its_own_source() {
let (_root, mut store) = new_store();
let opts = StdinIngestOptions { append: true };
let first = ingest_stdin_with(
&mut store,
"stdin://session-7",
None,
b"first batch body",
&opts,
)
.unwrap();
let second = ingest_stdin_with(
&mut store,
"stdin://session-7",
None,
b"second batch body",
&opts,
)
.unwrap();
assert_eq!(first.ingested.len(), 1);
assert_eq!(second.ingested.len(), 1);
assert_ne!(
first.ingested[0].source_id, second.ingested[0].source_id,
"append mode must produce distinct source ids per batch"
);
let uris: Vec<String> = store
.conn()
.prepare("SELECT uri FROM sources ORDER BY uri")
.unwrap()
.query_map([], |row| row.get::<_, String>(0))
.unwrap()
.map(Result::unwrap)
.collect();
assert_eq!(uris.len(), 2);
for uri in &uris {
assert!(
uri.starts_with("stdin://session-7#"),
"expected batch uri to derive from base, got {uri}"
);
}
assert_ne!(uris[0], uris[1], "batch uris must be unique");
assert_eq!(
search(&store, "first", SearchOptions::default())
.unwrap()
.len(),
1
);
assert_eq!(
search(&store, "second", SearchOptions::default())
.unwrap()
.len(),
1
);
}
#[test]
fn append_mode_preserves_jsonl_metadata_across_batches() {
let (_root, mut store) = new_store();
let opts = StdinIngestOptions { append: true };
let batch_one = br#"{"session_id":"sess-9","turn_id":"turn-1","role":"user","timestamp":1700000100,"content":"where are the notes"}
"#;
let batch_two = br#"{"session_id":"sess-9","turn_id":"turn-2","role":"assistant","timestamp":1700000200,"content":"in the store"}
{"session_id":"sess-9","turn_id":"turn-3","tool_name":"grep","timestamp":1700000201,"content":"grep results"}
"#;
ingest_stdin_with(
&mut store,
"stdin://sess-9",
Some("application/jsonl"),
batch_one,
&opts,
)
.unwrap();
ingest_stdin_with(
&mut store,
"stdin://sess-9",
Some("application/jsonl"),
batch_two,
&opts,
)
.unwrap();
let source_count: i64 = store
.conn()
.query_row("SELECT COUNT(*) FROM sources", [], |r| r.get(0))
.unwrap();
assert_eq!(source_count, 2, "each batch must persist as its own source");
let mut stmt = store
.conn()
.prepare(
"SELECT role, session_id, turn_id, tool_name, timestamp_unix
FROM chunks ORDER BY timestamp_unix",
)
.unwrap();
let rows: Vec<(
Option<String>,
Option<String>,
Option<String>,
Option<String>,
Option<i64>,
)> = stmt
.query_map([], |row| {
Ok((
row.get(0)?,
row.get(1)?,
row.get(2)?,
row.get(3)?,
row.get(4)?,
))
})
.unwrap()
.map(Result::unwrap)
.collect();
assert_eq!(rows.len(), 3);
assert_eq!(rows[0].0.as_deref(), Some("user"));
assert_eq!(rows[0].1.as_deref(), Some("sess-9"));
assert_eq!(rows[0].2.as_deref(), Some("turn-1"));
assert_eq!(rows[0].4, Some(1_700_000_100));
assert_eq!(rows[1].0.as_deref(), Some("assistant"));
assert_eq!(rows[1].2.as_deref(), Some("turn-2"));
assert_eq!(rows[1].4, Some(1_700_000_200));
assert_eq!(rows[2].0, None);
assert_eq!(rows[2].2.as_deref(), Some("turn-3"));
assert_eq!(rows[2].3.as_deref(), Some("grep"));
assert_eq!(rows[2].4, Some(1_700_000_201));
}
#[test]
fn append_mode_does_not_dedup_identical_payloads() {
let (_root, mut store) = new_store();
let opts = StdinIngestOptions { append: true };
let first = ingest_stdin_with(&mut store, "stdin://replay", None, b"same body", &opts).unwrap();
let second =
ingest_stdin_with(&mut store, "stdin://replay", None, b"same body", &opts).unwrap();
assert_eq!(first.ingested.len(), 1);
assert_eq!(
second.ingested.len(),
1,
"append mode must not treat identical content as unchanged"
);
let count: i64 = store
.conn()
.query_row("SELECT COUNT(*) FROM sources", [], |r| r.get(0))
.unwrap();
assert_eq!(count, 2);
}
#[test]
fn default_mode_still_overwrites_same_uri() {
let (_root, mut store) = new_store();
let opts = StdinIngestOptions::default();
ingest_stdin_with(&mut store, "stdin://once", None, b"first", &opts).unwrap();
ingest_stdin_with(&mut store, "stdin://once", None, b"second", &opts).unwrap();
let count: i64 = store
.conn()
.query_row("SELECT COUNT(*) FROM sources", [], |r| r.get(0))
.unwrap();
assert_eq!(count, 1);
}