use std::fs;
use lantern::ingest::ingest_path;
use lantern::search::{SearchOptions, search};
use lantern::store::Store;
use rusqlite::params;
use tempfile::tempdir;
const TRANSCRIPT: &str = concat!(
"{\"role\":\"user\",\"content\":\"Where should I keep the lantern?\"}\n",
"{\"role\":\"assistant\",\"content\":[",
"{\"type\":\"text\",\"text\":\"Hang it near the door.\"},",
"{\"type\":\"text\",\"text\":\"It should catch the evening breeze.\"}",
"]}\n",
"\n",
"{\"malformed line that should be skipped\n",
"{\"role\":\"tool\",\"content\":\"\"}\n",
"{\"text\":\"rust is a systems language\"}\n",
);
fn ingest_transcript() -> (tempfile::TempDir, Store, std::path::PathBuf) {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let file = root.path().join("session.jsonl");
fs::write(&file, TRANSCRIPT).unwrap();
ingest_path(&mut store, &file).unwrap();
(root, store, file)
}
#[test]
fn jsonl_ingest_produces_one_chunk_per_extractable_line() {
let (_root, store, _file) = ingest_transcript();
let (source_count, chunk_count, kind) = {
let conn = store.conn();
let s: i64 = conn
.query_row("SELECT COUNT(*) FROM sources", [], |r| r.get(0))
.unwrap();
let c: i64 = conn
.query_row("SELECT COUNT(*) FROM chunks", [], |r| r.get(0))
.unwrap();
let k: String = conn
.query_row("SELECT kind FROM sources", [], |r| r.get(0))
.unwrap();
(s, c, k)
};
assert_eq!(source_count, 1);
assert_eq!(chunk_count, 3, "3 extractable lines out of 6");
assert_eq!(kind, "application/jsonl");
}
#[test]
fn role_prefix_is_preserved_and_searchable() {
let (_root, store, _file) = ingest_transcript();
let assistant_hits = search(&store, "breeze", SearchOptions::default()).unwrap();
assert_eq!(assistant_hits.len(), 1);
assert!(assistant_hits[0].text.contains("[assistant]"));
assert!(assistant_hits[0].text.contains("Hang it near the door"));
assert!(
assistant_hits[0].text.contains("evening breeze"),
"multi-block content should be joined"
);
let user_hits = search(&store, "lantern", SearchOptions::default()).unwrap();
assert_eq!(user_hits.len(), 1);
assert!(user_hits[0].text.starts_with("[user]"));
}
#[test]
fn alternate_text_field_lines_are_indexed() {
let (_root, store, _file) = ingest_transcript();
let hits = search(&store, "systems", SearchOptions::default()).unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].text, "rust is a systems language");
}
#[test]
fn chunk_byte_ranges_match_their_source_line() {
let (_root, store, _file) = ingest_transcript();
let conn = store.conn();
let source_id: String = conn
.query_row("SELECT id FROM sources", [], |r| r.get(0))
.unwrap();
let mut stmt = conn
.prepare(
"SELECT byte_start, byte_end FROM chunks
WHERE source_id = ?1 ORDER BY ordinal",
)
.unwrap();
let ranges: Vec<(i64, i64)> = stmt
.query_map(params![source_id], |row| Ok((row.get(0)?, row.get(1)?)))
.unwrap()
.collect::<Result<Vec<_>, _>>()
.unwrap();
for (start, end) in &ranges {
assert!(end > start);
let slice = &TRANSCRIPT[*start as usize..*end as usize];
assert!(
slice.ends_with('\n'),
"each chunk maps to a full JSONL line"
);
assert!(slice.trim_start().starts_with('{'));
}
}
#[test]
fn ingests_directory_mixing_jsonl_and_markdown() {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let data = root.path().join("data");
fs::create_dir_all(&data).unwrap();
fs::write(data.join("note.md"), "# markdown\n\nSome body.\n").unwrap();
fs::write(
data.join("session.jsonl"),
"{\"role\":\"user\",\"content\":\"needle in jsonl\"}\n",
)
.unwrap();
let report = ingest_path(&mut store, &data).unwrap();
assert_eq!(report.ingested.len(), 2);
let hits = search(&store, "needle", SearchOptions::default()).unwrap();
assert_eq!(hits.len(), 1);
assert!(hits[0].uri.ends_with("/session.jsonl"));
assert_eq!(hits[0].kind, "application/jsonl");
}
#[test]
fn empty_or_non_transcript_jsonl_ingests_with_no_chunks() {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let file = root.path().join("empty.jsonl");
fs::write(&file, "{\"timestamp\":1234}\n{\"foo\":\"bar\"}\n").unwrap();
let report = ingest_path(&mut store, &file).unwrap();
assert_eq!(report.ingested.len(), 1);
assert_eq!(report.ingested[0].chunks, 0);
let chunk_count: i64 = store
.conn()
.query_row("SELECT COUNT(*) FROM chunks", [], |r| r.get(0))
.unwrap();
assert_eq!(chunk_count, 0);
}