use std::fs;
use lantern::embed::{EmbedOptions, MockEmbeddingBackend, embed_missing_with};
use lantern::ingest::ingest_path;
use lantern::search::{SearchOptions, SemanticOptions, hybrid_search_with, search};
use lantern::store::Store;
use rusqlite::params;
use tempfile::tempdir;
fn setup_store_with(files: &[(&str, &str)]) -> (tempfile::TempDir, Store) {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let data = root.path().join("data");
fs::create_dir_all(&data).unwrap();
for (name, body) in files {
fs::write(data.join(name), body).unwrap();
}
ingest_path(&mut store, &data).unwrap();
(root, store)
}
#[test]
fn min_confidence_drops_low_confidence_hits() {
let (_root, store) = setup_store_with(&[
("fresh.md", "Lanterns glow in the dark forest."),
("stale.md", "Lanterns are useful in old mines as well."),
]);
let baseline = search(&store, "lantern", SearchOptions::default()).unwrap();
assert_eq!(baseline.len(), 2, "both chunks should match on 'lantern'");
store
.conn()
.execute(
"UPDATE chunks
SET timestamp_unix = unixepoch('now'),
last_accessed_at = unixepoch('now'),
access_count = 0
WHERE source_id = (SELECT id FROM sources WHERE uri LIKE '%fresh.md')",
[],
)
.unwrap();
store
.conn()
.execute(
"UPDATE chunks
SET timestamp_unix = 1,
last_accessed_at = NULL,
access_count = 0
WHERE source_id = (SELECT id FROM sources WHERE uri LIKE '%stale.md')",
[],
)
.unwrap();
let filtered = search(
&store,
"lantern",
SearchOptions {
min_confidence: Some(0.5),
..SearchOptions::default()
},
)
.unwrap();
assert_eq!(
filtered.len(),
1,
"stale chunk should be dropped by a 0.5 floor; got {filtered:#?}"
);
assert!(filtered[0].uri.ends_with("/fresh.md"));
assert!(filtered[0].confidence >= 0.5);
}
#[test]
fn filtered_hits_do_not_count_as_retrievals() {
let (_root, store) = setup_store_with(&[("stale.md", "Lanterns are ancient history.")]);
store
.conn()
.execute(
"UPDATE chunks
SET timestamp_unix = 1,
last_accessed_at = NULL,
access_count = 0,
access_decay_at = NULL",
[],
)
.unwrap();
let chunk_id: String = store
.conn()
.query_row("SELECT id FROM chunks LIMIT 1", [], |row| row.get(0))
.unwrap();
let hits = search(
&store,
"lantern",
SearchOptions {
min_confidence: Some(0.99),
..SearchOptions::default()
},
)
.unwrap();
assert!(hits.is_empty(), "floor of 0.99 should drop the stale chunk");
let (access_count, last_accessed_at): (i64, Option<i64>) = store
.conn()
.query_row(
"SELECT access_count, last_accessed_at FROM chunks WHERE id = ?1",
params![chunk_id],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap();
assert_eq!(
access_count, 0,
"filtered-out chunks should not count as retrievals"
);
assert_eq!(
last_accessed_at, None,
"filtered-out chunks should not record a retrieval timestamp"
);
}
#[test]
fn hybrid_search_does_not_bump_filtered_hits() {
const MOCK_MODEL: &str = "mock-embed-test";
let (_root, mut store) = setup_store_with(&[
("fresh.md", "Lanterns glow in the dark forest."),
("stale.md", "Lanterns are useful in old mines as well."),
]);
let backend = MockEmbeddingBackend::new(64);
embed_missing_with(
&mut store,
&EmbedOptions {
model: MOCK_MODEL.to_string(),
ollama_url: "http://mock".to_string(),
limit: None,
},
&backend,
)
.unwrap();
store
.conn()
.execute(
"UPDATE chunks
SET timestamp_unix = unixepoch('now'),
last_accessed_at = unixepoch('now'),
access_count = 0,
access_decay_at = NULL
WHERE source_id = (SELECT id FROM sources WHERE uri LIKE '%fresh.md')",
[],
)
.unwrap();
store
.conn()
.execute(
"UPDATE chunks
SET timestamp_unix = 1,
last_accessed_at = NULL,
access_count = 0,
access_decay_at = NULL
WHERE source_id = (SELECT id FROM sources WHERE uri LIKE '%stale.md')",
[],
)
.unwrap();
let stale_chunk_id: String = store
.conn()
.query_row(
"SELECT id FROM chunks WHERE source_id = (SELECT id FROM sources WHERE uri LIKE '%stale.md')",
[],
|row| row.get(0),
)
.unwrap();
let opts = SemanticOptions {
limit: 10,
kind: None,
path_contains: None,
session_id: None,
model: MOCK_MODEL.to_string(),
ollama_url: "http://mock".to_string(),
instruction: None,
min_confidence: Some(0.5),
};
let hits = hybrid_search_with(&store, "lantern", &opts, &backend).unwrap();
assert!(
hits.iter().all(|hit| !hit.uri.ends_with("/stale.md")),
"stale chunk must be dropped by the 0.5 floor; got {hits:#?}"
);
assert!(
hits.iter().all(|hit| hit.confidence >= 0.5),
"every survivor must clear the floor; got {hits:#?}"
);
let (access_count, last_accessed_at): (i64, Option<i64>) = store
.conn()
.query_row(
"SELECT access_count, last_accessed_at FROM chunks WHERE id = ?1",
params![stale_chunk_id],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap();
assert_eq!(
access_count, 0,
"filtered-out chunk must not be bumped by the inner keyword/semantic passes"
);
assert_eq!(
last_accessed_at, None,
"filtered-out chunk must not record a retrieval timestamp"
);
}
#[test]
fn no_floor_preserves_existing_bump_behavior() {
let (_root, store) = setup_store_with(&[("a.md", "Lanterns glow in the dark.")]);
let hits = search(&store, "lantern", SearchOptions::default()).unwrap();
assert_eq!(hits.len(), 1);
let chunk_id = hits[0].chunk_id.clone();
let (access_count, last_accessed_at): (i64, Option<i64>) = store
.conn()
.query_row(
"SELECT access_count, last_accessed_at FROM chunks WHERE id = ?1",
params![chunk_id],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap();
assert_eq!(access_count, 1);
assert!(last_accessed_at.is_some());
}