use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
use super::message::{LoopMessage, UserMessage};
pub type CriticFn = Arc<
dyn Fn(String) -> Pin<Box<dyn Future<Output = anyhow::Result<String>> + Send>> + Send + Sync,
>;
pub const CRITIC_TAG: &str = "[critic]";
pub const CRITIC_PREAMBLE: &str = "\
You are a code-review critic for an autonomous coding agent. You are given the instructions and \
constraints the assistant operates under, plus a transcript of what it just did to satisfy the \
user's request. Judge ONLY whether the task is actually complete and correct within those \
constraints — not style.\n\
\n\
Hard rules:\n\
- RESPECT the assistant's instructions. NEVER flag the absence of an action the instructions \
forbid or defer (e.g. if it was told not to push/commit/deploy, do NOT ask it to). Treat anything \
the instructions place out of scope as correctly omitted.\n\
- Block only on CONCRETE, in-scope incompleteness with evidence (e.g. the user asked for X and X \
is missing; a change was made but never built/tested when verification was expected).\n\
- A block marked `[CONTEXT COMPACTION — REFERENCE ONLY]` (or a `## Active Task` lifted from one) \
describes ALREADY-COMPLETED prior work — never treat it as an outstanding requirement. Judge only \
the latest request and the transcript.\n\
- Do NOT invent new requirements, scope, or \"nice to haves\". If you are unsure, PASS — a false \
block wastes a whole turn.";
const CRITIC_FORMAT: &str = "\
Respond in EXACTLY this format and nothing else:\n\
On the first line, either `VERDICT: COMPLETE` or `VERDICT: INCOMPLETE`.\n\
If INCOMPLETE, follow with a short bullet list of the specific, concrete, in-scope issues to fix.";
const MAX_RULES_CHARS: usize = 16_000;
fn strip_compaction_summary(rules: &str) -> &str {
match rules.find(crate::agent::compression::COMPACTION_MARKER) {
Some(idx) => rules[..idx].trim_end(),
None => rules,
}
}
pub fn build_prompt(rules: &str, transcript: &str) -> String {
let rules = strip_compaction_summary(rules).trim();
let rules_block = if rules.is_empty() {
"(no special constraints provided)".to_string()
} else if rules.len() > MAX_RULES_CHARS {
let head: String = rules.chars().take(MAX_RULES_CHARS).collect();
format!("{head}\n…(instructions truncated)")
} else {
rules.to_string()
};
format!(
"{CRITIC_FORMAT}\n\n\
--- assistant instructions & constraints (judge within these; never demand a \
forbidden/out-of-scope action) ---\n{rules_block}\n--- end instructions ---\n\n\
--- transcript ---\n{transcript}\n--- end transcript ---"
)
}
pub fn parse_verdict(response: &str) -> Option<String> {
let trimmed = response.trim();
if trimmed.is_empty() {
return None;
}
let first = trimmed.lines().find(|l| !l.trim().is_empty()).unwrap_or("");
let upper = first.to_ascii_uppercase();
if upper.contains("INCOMPLETE") {
let rest = trimmed
.split_once('\n')
.map(|(_, x)| x)
.map(str::trim)
.filter(|s| !s.is_empty())
.unwrap_or(trimmed);
Some(rest.to_string())
} else {
None
}
}
pub async fn run_critic(critic: &CriticFn, rules: &str, transcript: &str) -> Vec<LoopMessage> {
let prompt = build_prompt(rules, transcript);
let response = match critic(prompt).await {
Ok(r) => r,
Err(e) => {
tracing::warn!(target: "dirge::critic", error = %e, "critic call failed; finalizing without it");
return Vec::new();
}
};
match parse_verdict(&response) {
Some(issues) => vec![LoopMessage::User(UserMessage {
content: format!(
"{CRITIC_TAG} A review of your work found it may not be done yet. Address these \
before reporting complete, or explain why they don't apply (e.g. they're out of \
scope or something you were told not to do):\n{issues}"
),
})],
None => Vec::new(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_complete_returns_none() {
assert!(parse_verdict("VERDICT: COMPLETE").is_none());
assert!(parse_verdict("verdict: complete\n(looks good)").is_none());
}
#[test]
fn parse_incomplete_returns_issues() {
let v = parse_verdict("VERDICT: INCOMPLETE\n- missing test\n- error path unhandled");
let issues = v.expect("should be incomplete");
assert!(issues.contains("missing test"));
assert!(issues.contains("error path"));
}
#[test]
fn parse_empty_or_ambiguous_fails_open() {
assert!(parse_verdict("").is_none());
assert!(parse_verdict(" \n ").is_none());
assert!(parse_verdict("I think it's probably fine?").is_none());
}
#[test]
fn prompt_embeds_transcript_format_and_rules() {
let p = build_prompt(
"RULE: never push to remote.",
"user asked X; assistant edited foo.rs",
);
assert!(p.contains("VERDICT: COMPLETE"));
assert!(p.contains("VERDICT: INCOMPLETE"));
assert!(p.contains("edited foo.rs"));
assert!(p.contains("never push to remote"), "rules must be embedded");
assert!(
p.to_lowercase().contains("forbidden") || p.to_lowercase().contains("out-of-scope"),
"prompt must instruct the critic to respect constraints",
);
}
#[test]
fn empty_rules_render_a_placeholder_not_blank() {
let p = build_prompt("", "did stuff");
assert!(p.contains("no special constraints"));
}
#[test]
fn build_prompt_drops_the_compaction_summary_from_rules() {
let rules = format!(
"RULE: never push to remote.\n\n{} \
## Active Task\nFinish Phase 3: wire the Janet loader and add tests.",
crate::agent::compression::COMPACTION_MARKER,
);
let p = build_prompt(&rules, "user asked X; assistant edited foo.rs");
assert!(
p.contains("never push to remote"),
"real rules must survive"
);
assert!(
!p.contains("Active Task") && !p.contains("Phase 3") && !p.contains("Janet"),
"the compaction summary must be stripped from the critic's rules",
);
assert!(
!p.contains(crate::agent::compression::COMPACTION_MARKER),
"the compaction marker itself must be stripped",
);
}
#[test]
fn preamble_discounts_reference_only_blocks() {
let lower = CRITIC_PREAMBLE.to_ascii_lowercase();
assert!(
lower.contains("reference") || lower.contains("compaction"),
"preamble must tell the critic to ignore reference-only/compaction blocks",
);
}
#[test]
fn build_prompt_caps_large_rules() {
let huge = "x".repeat(MAX_RULES_CHARS + 5_000);
let p = build_prompt(&huge, "t");
assert!(p.contains("instructions truncated"));
assert!(p.len() < MAX_RULES_CHARS + 4_000);
}
#[test]
fn preamble_is_calibrated_and_constraint_aware() {
let lower = CRITIC_PREAMBLE.to_ascii_lowercase();
assert!(lower.contains("critic"), "preamble must name the role");
assert!(!lower.contains("summarizer"));
assert!(!CRITIC_PREAMBLE.contains("VERDICT:"));
assert!(build_prompt("", "t").contains("VERDICT:"));
assert!(
lower.contains("respect"),
"must say to respect instructions"
);
assert!(
lower.contains("never flag the absence") || lower.contains("forbid"),
"must forbid demanding disallowed actions",
);
assert!(lower.contains("unsure"), "must keep the fail-open guidance");
}
#[tokio::test]
async fn run_critic_injects_followup_when_incomplete() {
let critic: CriticFn = Arc::new(|_prompt| {
Box::pin(async { Ok("VERDICT: INCOMPLETE\n- the test was never run".to_string()) })
});
let msgs = run_critic(&critic, "rules", "did stuff").await;
assert_eq!(msgs.len(), 1);
let content = match &msgs[0] {
LoopMessage::User(u) => &u.content,
_ => panic!("expected user message"),
};
assert!(content.starts_with(CRITIC_TAG));
assert!(content.contains("test was never run"));
}
#[tokio::test]
async fn run_critic_passes_rules_into_prompt() {
use std::sync::Mutex;
let seen: Arc<Mutex<String>> = Arc::new(Mutex::new(String::new()));
let seen2 = seen.clone();
let critic: CriticFn = Arc::new(move |prompt: String| {
*seen2.lock().unwrap() = prompt;
Box::pin(async { Ok("VERDICT: COMPLETE".to_string()) })
});
let _ = run_critic(&critic, "RULE: do not deploy", "did stuff").await;
assert!(
seen.lock().unwrap().contains("do not deploy"),
"the agent's constraints must reach the critic prompt",
);
}
#[tokio::test]
async fn run_critic_silent_when_complete() {
let critic: CriticFn =
Arc::new(|_p| Box::pin(async { Ok("VERDICT: COMPLETE".to_string()) }));
assert!(run_critic(&critic, "rules", "did stuff").await.is_empty());
}
#[tokio::test]
async fn run_critic_fails_open_on_error() {
let critic: CriticFn = Arc::new(|_p| Box::pin(async { anyhow::bail!("provider down") }));
assert!(
run_critic(&critic, "rules", "did stuff").await.is_empty(),
"a critic error must not block finalization"
);
}
}