#![cfg(test)]
use crate::extras::dirge_paths::ProjectPaths;
use crate::extras::memory_db::{MemoryKind, SqliteMemoryStore};
struct MemoryProbe {
content: &'static str,
kind: MemoryKind,
lexical_queries: &'static [&'static str],
semantic_queries: &'static [&'static str],
}
fn seed_corpus() -> Vec<MemoryProbe> {
vec![
MemoryProbe {
content: "build the project with cargo build --bin dirge",
kind: MemoryKind::Procedural,
lexical_queries: &["cargo build", "build the project"],
semantic_queries: &["compile the executable"],
},
MemoryProbe {
content: "run the test suite with cargo test --bin dirge",
kind: MemoryKind::Procedural,
lexical_queries: &["cargo test", "run the test suite"],
semantic_queries: &["execute unit checks"],
},
MemoryProbe {
content: "the project pins its MSRV in rust-toolchain.toml",
kind: MemoryKind::Semantic,
lexical_queries: &["MSRV rust toolchain"],
semantic_queries: &["minimum supported language baseline"],
},
MemoryProbe {
content: "format all code with cargo fmt before committing",
kind: MemoryKind::Procedural,
lexical_queries: &["cargo fmt", "format code before committing"],
semantic_queries: &["tidy whitespace and indentation"],
},
MemoryProbe {
content: "long-term memory persists in SQLite at .dirge/sessions/state.db",
kind: MemoryKind::Semantic,
lexical_queries: &["SQLite memory persists", "state.db"],
semantic_queries: &["where recollections are saved"],
},
MemoryProbe {
content: "the main agent loop lives in src/agent/agent_loop.rs",
kind: MemoryKind::Semantic,
lexical_queries: &["agent loop", "agent_loop.rs"],
semantic_queries: &["primary control cycle location"],
},
MemoryProbe {
content: "secrets are redacted before FTS indexing of messages",
kind: MemoryKind::Semantic,
lexical_queries: &["redacted FTS indexing", "secrets"],
semantic_queries: &["credentials scrubbed from search"],
},
MemoryProbe {
content: "use bd beads for issue tracking not markdown TODO lists",
kind: MemoryKind::Procedural,
lexical_queries: &["beads issue tracking", "markdown TODO"],
semantic_queries: &["how to file a ticket"],
},
]
}
fn pairs(corpus: &[MemoryProbe], semantic: bool) -> Vec<(&'static str, &'static str)> {
corpus
.iter()
.flat_map(|p| {
let qs = if semantic {
p.semantic_queries
} else {
p.lexical_queries
};
qs.iter().map(move |q| (*q, p.content))
})
.collect()
}
struct RecallReport {
k: usize,
total: usize,
hits: usize,
misses: Vec<(String, String)>,
}
impl RecallReport {
fn recall(&self) -> f64 {
if self.total == 0 {
return 0.0;
}
self.hits as f64 / self.total as f64
}
}
fn recall_at_k(
search: impl Fn(&str) -> Vec<String>,
pairs: &[(&'static str, &'static str)],
k: usize,
) -> RecallReport {
let mut hits = 0;
let mut misses = Vec::new();
for (query, target) in pairs {
let ranked = search(query);
if ranked.iter().take(k).any(|c| c == target) {
hits += 1;
} else {
misses.push(((*query).to_string(), (*target).to_string()));
}
}
RecallReport {
k,
total: pairs.len(),
hits,
misses,
}
}
fn temp_project() -> (ProjectPaths, std::path::PathBuf) {
let dir = std::env::temp_dir().join(format!(
"dirge-retrieval-eval-{}-{}",
std::process::id(),
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos(),
));
let _ = std::fs::remove_dir_all(&dir);
std::fs::create_dir_all(dir.join(".git")).unwrap();
(ProjectPaths::new(&dir), dir)
}
fn seeded_bm25_search(corpus: &[MemoryProbe]) -> (SqliteMemoryStore, std::path::PathBuf) {
let (paths, dir) = temp_project();
let store = SqliteMemoryStore::load(&paths).unwrap();
for p in corpus {
store.add_entry("memory", p.content, Some(p.kind)).unwrap();
}
(store, dir)
}
fn ranked_contents(store: &SqliteMemoryStore, query: &str) -> Vec<String> {
let resp = store.search_entries(query).unwrap();
resp["results"]
.as_array()
.map(|rs| {
rs.iter()
.filter_map(|r| r["content"].as_str().map(str::to_string))
.collect()
})
.unwrap_or_default()
}
#[test]
fn harness_credits_a_perfect_retriever() {
let corpus = seed_corpus();
let all = pairs(&corpus, false);
let report = recall_at_k(|q| vec![target_for(&corpus, q).to_string()], &all, 5);
assert_eq!(report.recall(), 1.0, "perfect retriever must score 1.0");
assert!(report.misses.is_empty());
}
#[test]
fn harness_flags_a_blind_retriever() {
let corpus = seed_corpus();
let all = pairs(&corpus, false);
let report = recall_at_k(|_q| Vec::new(), &all, 5);
assert_eq!(report.recall(), 0.0);
assert_eq!(report.misses.len(), all.len());
}
#[test]
fn bm25_baseline_recovers_lexical_queries() {
let corpus = seed_corpus();
let (store, dir) = seeded_bm25_search(&corpus);
let lexical = pairs(&corpus, false);
let report = recall_at_k(|q| ranked_contents(&store, q), &lexical, 5);
assert!(
report.recall() >= 0.85,
"BM25 lexical Recall@{} = {:.2} (baseline); misses: {:?}",
report.k,
report.recall(),
report.misses,
);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn bm25_has_a_paraphrase_gap() {
let corpus = seed_corpus();
let (store, dir) = seeded_bm25_search(&corpus);
let lexical = recall_at_k(|q| ranked_contents(&store, q), &pairs(&corpus, false), 5);
let semantic = recall_at_k(|q| ranked_contents(&store, q), &pairs(&corpus, true), 5);
assert!(
semantic.recall() < lexical.recall(),
"paraphrase recall ({:.2}) must trail lexical ({:.2}) — the gap hybrid closes",
semantic.recall(),
lexical.recall(),
);
assert!(
!semantic.misses.is_empty(),
"the harness must surface concrete paraphrase misses for diagnosis",
);
let _ = std::fs::remove_dir_all(&dir);
}
fn target_for<'a>(corpus: &'a [MemoryProbe], query: &str) -> &'a str {
corpus
.iter()
.find(|p| p.lexical_queries.contains(&query) || p.semantic_queries.contains(&query))
.map(|p| p.content)
.unwrap_or_else(|| panic!("query {query:?} is not registered in the corpus"))
}
struct ConceptEmbedder;
impl ConceptEmbedder {
fn concept(word: &str) -> Option<usize> {
Some(match word.to_lowercase().as_str() {
"build" | "compile" | "executable" => 0,
"test" | "tests" | "suite" | "unit" | "checks" => 1,
"msrv" | "toolchain" | "minimum" | "supported" | "language" | "baseline" => 2,
"format" | "fmt" | "tidy" | "whitespace" | "indentation" => 3,
"memory" | "persists" | "sqlite" | "recollections" | "saved" => 4,
"loop" | "control" | "cycle" | "primary" => 5,
"secrets" | "redacted" | "credentials" | "scrubbed" => 6,
"beads" | "issue" | "tracking" | "ticket" => 7,
_ => return None,
})
}
}
impl crate::extras::memory_hybrid::Embedder for ConceptEmbedder {
fn embed(&self, texts: &[String]) -> Vec<Option<Vec<f32>>> {
texts
.iter()
.map(|t| {
let mut v = vec![0.0f32; 8];
let mut any = false;
for word in t.split(|c: char| !c.is_alphanumeric()) {
if let Some(d) = Self::concept(word) {
v[d] += 1.0;
any = true;
}
}
any.then_some(v)
})
.collect()
}
}
fn provider_ranked_contents(
provider: &dyn crate::extras::memory_provider::MemoryProvider,
query: &str,
) -> Vec<String> {
let resp = provider.search(query).unwrap();
resp["results"]
.as_array()
.map(|rs| {
rs.iter()
.filter_map(|r| r["content"].as_str().map(str::to_string))
.collect()
})
.unwrap_or_default()
}
#[test]
fn hybrid_beats_bm25_on_the_paraphrase_corpus() {
use std::sync::Arc;
let corpus = seed_corpus();
let (paths, dir) = temp_project();
let store = Arc::new(SqliteMemoryStore::load(&paths).unwrap());
for p in &corpus {
store.add_entry("memory", p.content, Some(p.kind)).unwrap();
}
let semantic = pairs(&corpus, true);
let bm25 = recall_at_k(|q| ranked_contents(&store, q), &semantic, 5);
let hybrid = crate::extras::memory_hybrid::HybridMemoryProvider::new(
store.clone(),
Arc::new(ConceptEmbedder),
);
let hybrid_report = recall_at_k(|q| provider_ranked_contents(&hybrid, q), &semantic, 5);
assert!(
hybrid_report.recall() > bm25.recall(),
"hybrid paraphrase Recall@5 ({:.2}) must beat BM25 ({:.2}); hybrid misses: {:?}",
hybrid_report.recall(),
bm25.recall(),
hybrid_report.misses,
);
assert!(
hybrid_report.recall() >= 0.85,
"hybrid should recover the paraphrases given embedder signal: {:.2}",
hybrid_report.recall(),
);
let _ = std::fs::remove_dir_all(&dir);
}