use std::fs;
use lantern::ingest::ingest_path;
use lantern::store::Store;
use rusqlite::params;
use tempfile::tempdir;
#[test]
fn ingests_markdown_file_end_to_end() {
let root = tempdir().unwrap();
let store_dir = root.path().join("store");
let mut store = Store::initialize(&store_dir).unwrap();
let data_dir = root.path().join("data");
fs::create_dir_all(&data_dir).unwrap();
let file = data_dir.join("hello.md");
let content = "# hello\n\nthis is a paragraph.\n\nanother one.\n".repeat(50);
fs::write(&file, &content).unwrap();
let report = ingest_path(&mut store, &file).unwrap();
assert_eq!(report.ingested.len(), 1);
assert!(report.skipped.is_empty());
let ingested = &report.ingested[0];
assert!(ingested.chunks >= 1);
assert_eq!(ingested.bytes as usize, content.len());
assert_eq!(ingested.kind, "text/markdown");
let (source_rows, chunk_rows): (i64, i64) = {
let conn = store.conn();
let s: i64 = conn
.query_row("SELECT COUNT(*) FROM sources", [], |row| row.get(0))
.unwrap();
let c: i64 = conn
.query_row("SELECT COUNT(*) FROM chunks", [], |row| row.get(0))
.unwrap();
(s, c)
};
assert_eq!(source_rows, 1);
assert_eq!(chunk_rows as usize, ingested.chunks);
let reassembled: String = {
let conn = store.conn();
let mut stmt = conn
.prepare("SELECT text FROM chunks WHERE source_id = ?1 ORDER BY ordinal")
.unwrap();
let rows = stmt
.query_map(params![ingested.source_id], |row| row.get::<_, String>(0))
.unwrap();
rows.collect::<Result<Vec<_>, _>>().unwrap().join("")
};
assert_eq!(reassembled, content);
}
#[test]
fn reingest_unchanged_file_is_noop() {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let file = root.path().join("note.txt");
fs::write(&file, "hello world").unwrap();
let first = ingest_path(&mut store, &file).unwrap();
assert_eq!(first.ingested.len(), 1);
let second = ingest_path(&mut store, &file).unwrap();
assert!(second.ingested.is_empty());
assert_eq!(second.skipped.len(), 1);
assert!(
second.skipped[0]
.reason
.contains("unchanged since last ingest")
);
}
#[test]
fn reingest_updated_file_replaces_chunks() {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let file = root.path().join("note.txt");
fs::write(&file, "old content").unwrap();
let first = ingest_path(&mut store, &file).unwrap();
let original_id = first.ingested[0].source_id.clone();
let original_sha = first.ingested[0].content_sha256.clone();
fs::write(&file, "brand new body after edit").unwrap();
let second = ingest_path(&mut store, &file).unwrap();
assert_eq!(second.ingested.len(), 1);
let updated = &second.ingested[0];
assert_eq!(updated.source_id, original_id);
assert_ne!(updated.content_sha256, original_sha);
let chunk_texts: Vec<String> = {
let conn = store.conn();
let mut stmt = conn
.prepare("SELECT text FROM chunks WHERE source_id = ?1 ORDER BY ordinal")
.unwrap();
let rows = stmt
.query_map(params![original_id], |row| row.get::<_, String>(0))
.unwrap();
rows.collect::<Result<Vec<_>, _>>().unwrap()
};
assert_eq!(chunk_texts.join(""), "brand new body after edit");
}
#[test]
fn ingests_directory_recursively() {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let data = root.path().join("data");
fs::create_dir_all(data.join("nested")).unwrap();
fs::write(data.join("a.md"), "# a").unwrap();
fs::write(data.join("b.txt"), "plain b").unwrap();
fs::write(data.join("nested/c.md"), "# c").unwrap();
fs::write(data.join("ignored.log"), "should be skipped").unwrap();
let report = ingest_path(&mut store, &data).unwrap();
assert_eq!(report.ingested.len(), 3);
assert!(report.skipped.is_empty());
}
#[test]
fn rejects_non_utf8_file() {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let file = root.path().join("bad.txt");
fs::write(&file, [0xff, 0xfe, 0xfd]).unwrap();
let report = ingest_path(&mut store, &file).unwrap();
assert!(report.ingested.is_empty());
assert_eq!(report.skipped.len(), 1);
assert!(report.skipped[0].reason.contains("UTF-8"));
}