use std::sync::Arc;
use serde_json::{Value, json};
use super::compression::{
PROTECT_HEAD_DEFAULT, PROTECT_TAIL_DEFAULT, SummarizeFn, build_summary_prompt,
compute_compress_window, estimate_messages_tokens, summary_budget,
};
pub(crate) struct PlantedFact {
pub kind: &'static str,
pub needle: &'static str,
}
pub(crate) fn seed_facts() -> Vec<PlantedFact> {
vec![
PlantedFact {
kind: "file path",
needle: "src/widgets/aurora_panel.rs",
},
PlantedFact {
kind: "code location",
needle: "render_frame at line 287",
},
PlantedFact {
kind: "error message",
needle: "index out of bounds: the len is 4 but the index is 9",
},
PlantedFact {
kind: "config value",
needle: "AURORA_MAX_RETRIES=7",
},
PlantedFact {
kind: "identifier",
needle: "tok_9Q2x7Lp4dF",
},
PlantedFact {
kind: "numeric value",
needle: "timeout of 4500ms",
},
]
}
pub(crate) fn session_with_facts(facts: &[PlantedFact]) -> Vec<Value> {
let mut msgs: Vec<Value> = vec![
json!({"role": "system", "content": "you are dirge, a coding agent"}),
json!({"role": "user", "content": "fix the flaky aurora panel render"}),
];
for i in 0..4 {
msgs.push(json!({"role": "assistant", "content": format!("looking into it (step {i})")}));
msgs.push(json!({"role": "user", "content": format!("ok, continue {i}")}));
}
for fact in facts {
msgs.push(json!({
"role": "assistant",
"content": format!(
"noted ({}): {} — keep this for later",
fact.kind, fact.needle
),
}));
msgs.push(json!({"role": "user", "content": "got it, keep going"}));
}
for i in 0..4 {
msgs.push(json!({"role": "assistant", "content": format!("almost there (step {i})")}));
msgs.push(json!({"role": "user", "content": format!("keep going {i}")}));
}
msgs.push(json!({"role": "user", "content": "now write the regression test"}));
msgs
}
pub(crate) struct RecallReport {
pub total: usize,
pub survived: usize,
pub dropped: Vec<(&'static str, &'static str)>,
}
impl RecallReport {
pub fn all_survived(&self) -> bool {
self.dropped.is_empty()
}
}
pub(crate) fn score_recall(text: &str, facts: &[PlantedFact]) -> RecallReport {
let dropped: Vec<(&'static str, &'static str)> = facts
.iter()
.filter(|f| !text.contains(f.needle))
.map(|f| (f.kind, f.needle))
.collect();
RecallReport {
total: facts.len(),
survived: facts.len() - dropped.len(),
dropped,
}
}
pub(crate) async fn run_recall_eval(summarize: SummarizeFn) -> RecallReport {
let facts = seed_facts();
let msgs = session_with_facts(&facts);
let (start, end) = compute_compress_window(&msgs, PROTECT_HEAD_DEFAULT, PROTECT_TAIL_DEFAULT);
let middle = &msgs[start..end];
let prompt = build_summary_prompt(
middle,
summary_budget(estimate_messages_tokens(middle)),
None,
None,
);
let summary = summarize(prompt).await.unwrap_or_default();
score_recall(&summary, &facts)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn planted_facts_reach_the_summarizer() {
let facts = seed_facts();
let msgs = session_with_facts(&facts);
let (start, end) =
compute_compress_window(&msgs, PROTECT_HEAD_DEFAULT, PROTECT_TAIL_DEFAULT);
assert!(
start < end,
"session must produce a non-empty compaction window"
);
let middle = &msgs[start..end];
let prompt = build_summary_prompt(
middle,
summary_budget(estimate_messages_tokens(middle)),
None,
None,
);
let report = score_recall(&prompt, &facts);
assert!(
report.all_survived(),
"facts dropped before reaching the summarizer: {:?}",
report.dropped
);
}
fn session_443_task_supersession() -> Vec<Value> {
let mut msgs: Vec<Value> = vec![
json!({"role": "system", "content": "you are dirge, a coding agent"}),
json!({"role": "user", "content": "let's work on the chat server"}),
];
for i in 0..4 {
msgs.push(json!({"role": "assistant", "content": format!("on it (step {i})")}));
msgs.push(json!({"role": "user", "content": format!("ok, continue {i}")}));
}
msgs.push(json!({
"role": "user",
"content": "Convert the TCP chat server from tokio to stdlib and add an integration test",
}));
msgs.push(json!({"role": "assistant", "content": "starting the conversion"}));
msgs.push(json!({"role": "user", "content": "go ahead"}));
msgs.push(json!({
"role": "assistant",
"content": "stdlib conversion complete — no tokio remains; cargo build passes",
}));
msgs.push(json!({"role": "user", "content": "great, what now"}));
msgs.push(json!({
"role": "user",
"content": "the integration test hangs — debugging the race in the accept loop",
}));
msgs.push(json!({"role": "assistant", "content": "looking at the accept loop"}));
msgs.push(json!({"role": "user", "content": "keep going"}));
for i in 0..4 {
msgs.push(json!({"role": "assistant", "content": format!("still on it (step {i})")}));
msgs.push(json!({"role": "user", "content": format!("keep going {i}")}));
}
msgs.push(json!({"role": "user", "content": "so where does the race actually come from?"}));
msgs
}
#[test]
fn task_supersession_signal_reaches_the_summarizer() {
let msgs = session_443_task_supersession();
let (start, end) =
compute_compress_window(&msgs, PROTECT_HEAD_DEFAULT, PROTECT_TAIL_DEFAULT);
assert!(
start < end,
"session must produce a non-empty compaction window"
);
let middle = &msgs[start..end];
let prompt = build_summary_prompt(
middle,
summary_budget(estimate_messages_tokens(middle)),
None,
None,
);
assert!(
prompt.contains("stdlib conversion complete") && prompt.contains("no tokio remains"),
"completion signal (original task DONE) must reach the summarizer prompt"
);
assert!(
prompt.contains("integration test hangs") && prompt.contains("debugging the race"),
"follow-up signal (live work) must reach the summarizer prompt"
);
}
#[test]
fn scorer_flags_a_lossy_summary() {
let facts = seed_facts();
let lossy = "## Active Task\nwrite a regression test\n\n\
## Critical Context\nThe agent fixed a panic in the panel \
widget and tuned a retry config and a timeout.";
let report = score_recall(lossy, &facts);
assert!(
report.survived < report.total,
"a paraphrased summary must lose facts; survived {}/{}",
report.survived,
report.total
);
assert!(
report
.dropped
.iter()
.any(|(kind, _)| *kind == "error message"),
"the verbatim error string should be among the dropped: {:?}",
report.dropped
);
}
#[tokio::test]
async fn eval_credits_a_faithful_summarizer() {
let faithful: SummarizeFn = Arc::new(|_prompt: String| {
let body = seed_facts()
.iter()
.map(|f| format!("- {}: {}", f.kind, f.needle))
.collect::<Vec<_>>()
.join("\n");
Box::pin(async move { Ok(format!("## Critical Context\n{body}")) })
});
let report = run_recall_eval(faithful).await;
assert!(
report.all_survived(),
"faithful summarizer should preserve all facts: {:?}",
report.dropped
);
}
#[tokio::test]
async fn eval_catches_a_lossy_summarizer() {
let lossy: SummarizeFn = Arc::new(|_prompt: String| {
Box::pin(async move {
Ok("## Active Task\nwrite the regression test\n\n\
## Remaining Work\nthe agent investigated a rendering bug and \
adjusted some configuration."
.to_string())
})
});
let report = run_recall_eval(lossy).await;
assert!(
!report.all_survived(),
"a paraphrasing summarizer must be flagged"
);
assert_eq!(report.survived, 0, "this summary keeps none of the needles");
}
}