use std::fs;
use lantern::ingest::ingest_path;
use lantern::search::{SearchOptions, search};
use lantern::store::Store;
use tempfile::tempdir;
fn setup_store_with(files: &[(&str, &str)]) -> (tempfile::TempDir, Store) {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let data = root.path().join("data");
fs::create_dir_all(&data).unwrap();
for (name, body) in files {
fs::write(data.join(name), body).unwrap();
}
ingest_path(&mut store, &data).unwrap();
(root, store)
}
#[test]
fn finds_matching_chunk_with_full_provenance() {
let (_root, store) = setup_store_with(&[
("a.md", "Lanterns glow in the dark forest."),
("b.md", "Rust is a systems programming language."),
]);
let hits = search(
&store,
"lantern",
SearchOptions {
limit: 10,
..SearchOptions::default()
},
)
.unwrap();
assert_eq!(hits.len(), 1);
let hit = &hits[0];
assert!(hit.uri.starts_with("file://"));
assert!(hit.uri.ends_with("/a.md"));
assert_eq!(hit.kind, "text/markdown");
assert_eq!(hit.ordinal, 0);
assert_eq!(hit.byte_start, 0);
assert!(hit.byte_end > 0);
assert!(hit.text.to_lowercase().contains("lantern"));
assert!(hit.snippet.contains("<<"));
assert!(hit.snippet.contains(">>"));
assert!(hit.score <= 0.0, "bm25 scores should be <= 0");
assert!(!hit.chunk_id.is_empty());
assert!(!hit.source_id.is_empty());
}
#[test]
fn multiple_tokens_are_and_joined() {
let (_root, store) = setup_store_with(&[
("a.md", "alpha beta gamma"),
("b.md", "alpha only"),
("c.md", "beta only"),
]);
let hits = search(
&store,
"alpha beta",
SearchOptions {
limit: 10,
..SearchOptions::default()
},
)
.unwrap();
assert_eq!(hits.len(), 1);
assert!(hits[0].uri.ends_with("/a.md"));
}
#[test]
fn empty_query_returns_no_results() {
let (_root, store) = setup_store_with(&[("a.md", "anything goes here")]);
let hits = search(&store, "", SearchOptions::default()).unwrap();
assert!(hits.is_empty());
let hits = search(&store, " ", SearchOptions::default()).unwrap();
assert!(hits.is_empty());
}
#[test]
fn unknown_term_returns_no_results() {
let (_root, store) = setup_store_with(&[("a.md", "hello world")]);
let hits = search(&store, "xyzzyquux", SearchOptions::default()).unwrap();
assert!(hits.is_empty());
}
#[test]
fn ranking_prefers_term_dense_chunks() {
let (_root, store) = setup_store_with(&[
("dense.md", &"needle needle needle needle ".repeat(5)),
("sparse.md", "needle once among many other words here"),
]);
let hits = search(
&store,
"needle",
SearchOptions {
limit: 10,
..SearchOptions::default()
},
)
.unwrap();
assert_eq!(hits.len(), 2);
assert!(
hits[0].score <= hits[1].score,
"results should be sorted by bm25 ascending (more relevant first)"
);
assert!(hits[0].uri.ends_with("/dense.md"));
}
#[test]
fn limit_caps_result_count() {
let files: Vec<(String, String)> = (0..5)
.map(|i| (format!("f{i}.md"), "needle in here somewhere".to_string()))
.collect();
let refs: Vec<(&str, &str)> = files
.iter()
.map(|(n, b)| (n.as_str(), b.as_str()))
.collect();
let (_root, store) = setup_store_with(&refs);
let hits = search(
&store,
"needle",
SearchOptions {
limit: 3,
..SearchOptions::default()
},
)
.unwrap();
assert_eq!(hits.len(), 3);
}
#[test]
fn kind_filter_restricts_to_matching_kind() {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let data = root.path().join("data");
fs::create_dir_all(&data).unwrap();
fs::write(data.join("note.md"), "needle in markdown").unwrap();
fs::write(data.join("plain.txt"), "needle in plain text").unwrap();
fs::write(
data.join("session.jsonl"),
"{\"role\":\"user\",\"content\":\"needle in jsonl\"}\n",
)
.unwrap();
ingest_path(&mut store, &data).unwrap();
let markdown = search(
&store,
"needle",
SearchOptions {
kind: Some("text/markdown".into()),
..SearchOptions::default()
},
)
.unwrap();
assert_eq!(markdown.len(), 1);
assert_eq!(markdown[0].kind, "text/markdown");
assert!(markdown[0].uri.ends_with("/note.md"));
let jsonl = search(
&store,
"needle",
SearchOptions {
kind: Some("application/jsonl".into()),
..SearchOptions::default()
},
)
.unwrap();
assert_eq!(jsonl.len(), 1);
assert_eq!(jsonl[0].kind, "application/jsonl");
}
#[test]
fn path_filter_restricts_to_matching_substring() {
let (_root, store) = setup_store_with(&[
("apples.md", "needle in apples"),
("bananas.md", "needle in bananas"),
("cherries.md", "needle in cherries"),
]);
let hits = search(
&store,
"needle",
SearchOptions {
path_contains: Some("bananas".into()),
..SearchOptions::default()
},
)
.unwrap();
assert_eq!(hits.len(), 1);
assert!(hits[0].uri.ends_with("/bananas.md"));
}
#[test]
fn combined_kind_and_path_filters_intersect() {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let data = root.path().join("data");
fs::create_dir_all(&data).unwrap();
fs::write(data.join("notes-alpha.md"), "needle one").unwrap();
fs::write(data.join("notes-beta.md"), "needle two").unwrap();
fs::write(data.join("notes-alpha.txt"), "needle three").unwrap();
ingest_path(&mut store, &data).unwrap();
let hits = search(
&store,
"needle",
SearchOptions {
kind: Some("text/markdown".into()),
path_contains: Some("alpha".into()),
..SearchOptions::default()
},
)
.unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].kind, "text/markdown");
assert!(hits[0].uri.ends_with("/notes-alpha.md"));
}
#[test]
fn filters_with_no_matching_source_return_empty() {
let (_root, store) = setup_store_with(&[("a.md", "plenty of needles here")]);
let hits = search(
&store,
"needle",
SearchOptions {
kind: Some("application/jsonl".into()),
..SearchOptions::default()
},
)
.unwrap();
assert!(hits.is_empty());
let hits = search(
&store,
"needle",
SearchOptions {
path_contains: Some("does-not-exist".into()),
..SearchOptions::default()
},
)
.unwrap();
assert!(hits.is_empty());
}
#[test]
fn keyword_search_bumps_access_count_and_last_accessed_at() {
let (_root, store) = setup_store_with(&[("a.md", "needle in haystack")]);
let hits = search(&store, "needle", SearchOptions::default()).unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].access_count, 0);
assert_eq!(hits[0].last_accessed_at, None);
let (access_count, last_accessed_at, access_decay_at): (i64, Option<i64>, Option<i64>) = store
.conn()
.query_row(
"SELECT access_count, last_accessed_at, access_decay_at FROM chunks WHERE id = ?1",
rusqlite::params![hits[0].chunk_id.as_str()],
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
)
.unwrap();
assert_eq!(access_count, 1);
assert!(last_accessed_at.is_some());
assert_eq!(access_decay_at, last_accessed_at);
let hits = search(&store, "needle", SearchOptions::default()).unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].access_count, 1);
assert_eq!(hits[0].last_accessed_at, last_accessed_at);
let (access_count, second_last_accessed_at, second_decay_at): (i64, Option<i64>, Option<i64>) =
store
.conn()
.query_row(
"SELECT access_count, last_accessed_at, access_decay_at FROM chunks WHERE id = ?1",
rusqlite::params![hits[0].chunk_id.as_str()],
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
)
.unwrap();
assert_eq!(access_count, 2);
assert!(second_last_accessed_at.is_some());
assert!(second_last_accessed_at >= last_accessed_at);
assert_eq!(second_decay_at, second_last_accessed_at);
}
#[test]
fn reingest_removes_stale_fts_entries() {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let file = root.path().join("note.md");
fs::write(&file, "original sentinel word").unwrap();
ingest_path(&mut store, &file).unwrap();
assert_eq!(
search(&store, "sentinel", SearchOptions::default())
.unwrap()
.len(),
1
);
fs::write(&file, "entirely different contents now").unwrap();
ingest_path(&mut store, &file).unwrap();
assert!(
search(&store, "sentinel", SearchOptions::default())
.unwrap()
.is_empty()
);
assert_eq!(
search(&store, "different", SearchOptions::default())
.unwrap()
.len(),
1
);
}