use std::time::Duration;
use super::extract::Pair;
use crate::agent_cli::{AgentKind, GateResult, dispatch_gate};
use difflore_core::cloud::session_mined::{
SessionMinedCandidate, SessionMinedCandidateArgs,
};
const PROMPT_MAX_CHARS: usize = 30_000;
const MAX_EXISTING_RULES_IN_PROMPT: usize = 24;
const GATE_TIMEOUT: Duration = Duration::from_secs(90);
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ExistingRule {
pub rule_id: String,
pub title: String,
pub body_snippet: String,
}
#[derive(Debug, Clone)]
pub struct GateArgs<'a> {
pub session_id: &'a str,
pub source_repo: &'a str,
pub pairs: &'a [Pair],
pub existing_rules: &'a [ExistingRule],
pub gate_model: &'a str,
pub client_name: &'a str,
pub ts_ms: i64,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum GateVerdict {
Keep { candidate: SessionMinedCandidate },
Merge { rule_id: String, updated_body: String },
Skip { reason: String },
}
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum GateError {
#[error("session-mine gate received no conversation pairs")]
EmptyInput,
#[error("session-mine gate dispatch failed: {message}")]
Dispatch { message: String },
#[error("session-mine gate parse failed: {reason}")]
ParseFailure { reason: String, raw: String },
#[error("session-mine gate produced invalid candidate: {reason}")]
InvalidCandidate { reason: String },
}
pub async fn run_gate(args: GateArgs<'_>) -> Result<GateVerdict, GateError> {
if args.pairs.is_empty() {
return Err(GateError::EmptyInput);
}
let agent = AgentKind::from_client_name(args.client_name).unwrap_or(AgentKind::ClaudeCode);
let prompt = build_prompt(args.pairs, args.existing_rules);
let result: GateResult = dispatch_gate(agent, &prompt, GATE_TIMEOUT).await;
if result.errored {
let message = if result.error_message.is_empty() {
"agent CLI reported error with no message".to_owned()
} else {
result.error_message
};
return Err(GateError::Dispatch { message });
}
let parsed = parse_gate_json(&result.stdout)?;
parsed_to_verdict(parsed, &args)
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct GateJson {
verdict: String,
rule_id: Option<String>,
title: Option<String>,
body: Option<String>,
file_patterns: Vec<String>,
reason: Option<String>,
}
fn build_prompt(pairs: &[Pair], existing_rules: &[ExistingRule]) -> String {
let mut out = String::with_capacity(PROMPT_MAX_CHARS / 2);
out.push_str(
"You are a code-review-rules librarian. Decide whether the following short session \
contains a reusable, transferable rule about how to write code in this team's repo.\n\n",
);
out.push_str("EXISTING RULES (do not duplicate):\n");
if existing_rules.is_empty() {
out.push_str("- (none yet)\n");
} else {
for rule in existing_rules.iter().take(MAX_EXISTING_RULES_IN_PROMPT) {
let snippet = rule.body_snippet.trim();
let snippet_short = truncate_chars(snippet, 200);
let title = truncate_chars(rule.title.trim(), 120);
out.push_str("- ");
out.push_str(rule.rule_id.trim());
out.push_str(": ");
out.push_str(&title);
if !snippet_short.is_empty() {
out.push_str(" — ");
out.push_str(&snippet_short);
}
out.push('\n');
}
}
out.push('\n');
let mut rendered_pairs: Vec<String> = pairs
.iter()
.map(|p| {
format!(
"USER: {}\nASSISTANT: {}\n",
p.user_prompt.trim(),
p.assistant_text.trim(),
)
})
.collect();
let body_budget = PROMPT_MAX_CHARS.saturating_sub(out.chars().count() + 1_200); let mut session_block = String::new();
while !rendered_pairs.is_empty() {
let candidate_len: usize = rendered_pairs.iter().map(|s| s.chars().count()).sum();
if candidate_len <= body_budget {
break;
}
rendered_pairs.remove(0);
}
if rendered_pairs.is_empty() {
if let Some(last) = pairs.last() {
let truncated = truncate_chars(
&format!(
"USER: {}\nASSISTANT: {}\n",
last.user_prompt.trim(),
last.assistant_text.trim(),
),
body_budget,
);
session_block.push_str(&truncated);
}
} else {
for rendered in &rendered_pairs {
session_block.push_str(rendered);
}
}
out.push_str("SESSION (prompt + final assistant text only — tool calls stripped):\n");
out.push_str(&session_block);
out.push('\n');
out.push_str(
"DECISION CRITERIA:\n\
- KEEP if the activity contains a non-obvious, reusable rule (file_pattern + behavior).\n\
- MERGE <existing-id> if it strengthens or refines an existing rule.\n\
- SKIP if it's one-off / generic / obvious / already covered.\n\
\n\
RESPOND WITH STRICT JSON (no prose, no markdown fence):\n\
{ \"verdict\": \"KEEP\" | \"MERGE\" | \"SKIP\",\n\
\"rule_id\": \"<existing id if MERGE, else null>\",\n\
\"title\": \"<≤120 chars, only if KEEP/MERGE>\",\n\
\"body\": \"<≤2000 chars rule body, only if KEEP/MERGE>\",\n\
\"file_patterns\": [\"<glob1>\", \"...\"],\n\
\"reason\": \"<short justification, only if SKIP>\" }\n",
);
if out.chars().count() > PROMPT_MAX_CHARS {
out = truncate_chars(&out, PROMPT_MAX_CHARS);
}
out
}
fn parse_gate_json(raw: &str) -> Result<GateJson, GateError> {
let cleaned = strip_markdown_fence(raw.trim());
let body = locate_json_object(&cleaned).ok_or_else(|| GateError::ParseFailure {
reason: "no JSON object found in agent output".to_owned(),
raw: truncate_chars(raw, 400),
})?;
let value: serde_json::Value =
serde_json::from_str(&body).map_err(|e| GateError::ParseFailure {
reason: format!("invalid JSON: {e}"),
raw: truncate_chars(raw, 400),
})?;
let verdict = value
.get("verdict")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|s| !s.is_empty())
.ok_or_else(|| GateError::ParseFailure {
reason: "missing 'verdict' field".to_owned(),
raw: truncate_chars(raw, 400),
})?
.to_owned();
let rule_id = optional_string(&value, "rule_id");
let title = optional_string(&value, "title");
let body_field = optional_string(&value, "body");
let reason = optional_string(&value, "reason");
let file_patterns: Vec<String> = value
.get("file_patterns")
.and_then(serde_json::Value::as_array)
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(str::trim))
.filter(|s| !s.is_empty())
.map(str::to_owned)
.collect()
})
.unwrap_or_default();
Ok(GateJson {
verdict,
rule_id,
title,
body: body_field,
file_patterns,
reason,
})
}
fn optional_string(value: &serde_json::Value, key: &str) -> Option<String> {
value
.get(key)
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|s| !s.is_empty())
.map(str::to_owned)
}
fn strip_markdown_fence(s: &str) -> String {
let trimmed = s.trim();
if let Some(rest) = trimmed.strip_prefix("```") {
let after_lang = rest
.find('\n')
.map_or("", |idx| &rest[idx + 1..]);
let stripped = after_lang
.trim_end()
.strip_suffix("```")
.unwrap_or(after_lang)
.trim_end();
return stripped.to_owned();
}
trimmed.to_owned()
}
fn locate_json_object(s: &str) -> Option<String> {
let trimmed = s.trim();
let start = trimmed.find('{')?;
let bytes = trimmed.as_bytes();
let mut depth: i32 = 0;
let mut in_string = false;
let mut escape = false;
for (i, &b) in bytes.iter().enumerate().skip(start) {
if in_string {
if escape {
escape = false;
} else if b == b'\\' {
escape = true;
} else if b == b'"' {
in_string = false;
}
continue;
}
match b {
b'"' => in_string = true,
b'{' => depth += 1,
b'}' => {
depth -= 1;
if depth == 0 {
return Some(trimmed[start..=i].to_owned());
}
}
_ => {}
}
}
None
}
fn parsed_to_verdict(parsed: GateJson, args: &GateArgs<'_>) -> Result<GateVerdict, GateError> {
let verdict_uc = parsed.verdict.to_ascii_uppercase();
match verdict_uc.as_str() {
"KEEP" => {
let title = parsed
.title
.ok_or_else(|| GateError::InvalidCandidate {
reason: "KEEP verdict missing title".to_owned(),
})?;
let body = parsed
.body
.ok_or_else(|| GateError::InvalidCandidate {
reason: "KEEP verdict missing body".to_owned(),
})?;
if parsed.file_patterns.is_empty() {
return Err(GateError::InvalidCandidate {
reason: "KEEP verdict missing file_patterns".to_owned(),
});
}
let candidate = SessionMinedCandidate::try_new(SessionMinedCandidateArgs {
session_id: args.session_id.to_owned(),
ts_ms: args.ts_ms,
source_repo: args.source_repo.to_owned(),
title,
body,
file_patterns: parsed.file_patterns,
gate_model: args.gate_model.to_owned(),
gate_verdict: "KEEP".to_owned(),
})
.map_err(|e| GateError::InvalidCandidate {
reason: e.to_string(),
})?;
Ok(GateVerdict::Keep { candidate })
}
"MERGE" => {
let rule_id = parsed
.rule_id
.ok_or_else(|| GateError::InvalidCandidate {
reason: "MERGE verdict missing rule_id".to_owned(),
})?;
let updated_body =
parsed.body.ok_or_else(|| GateError::InvalidCandidate {
reason: "MERGE verdict missing body".to_owned(),
})?;
Ok(GateVerdict::Merge {
rule_id,
updated_body,
})
}
"SKIP" => Ok(GateVerdict::Skip {
reason: parsed.reason.unwrap_or_else(|| "no reason given".to_owned()),
}),
other => Err(GateError::ParseFailure {
reason: format!("unknown verdict '{other}'"),
raw: String::new(),
}),
}
}
fn truncate_chars(s: &str, max_chars: usize) -> String {
if max_chars == 0 {
return String::new();
}
if s.chars().count() <= max_chars {
return s.to_owned();
}
let mut out: String = s.chars().take(max_chars.saturating_sub(1)).collect();
out.push('…');
out
}
#[cfg(test)]
mod tests {
use super::*;
fn pair(user: &str, assistant: &str) -> Pair {
Pair {
user_prompt: user.to_owned(),
assistant_text: assistant.to_owned(),
}
}
fn args<'a>(
pairs: &'a [Pair],
existing: &'a [ExistingRule],
) -> GateArgs<'a> {
GateArgs {
session_id: "sess_test",
source_repo: "owner/repo",
pairs,
existing_rules: existing,
gate_model: "claude-code:gate",
client_name: "claude-code",
ts_ms: 1_714_000_000_000,
}
}
#[test]
fn prompt_includes_existing_rules_section_with_ids_and_titles() {
let rules = vec![
ExistingRule {
rule_id: "rule-1".to_owned(),
title: "Prefer typed deserialization".to_owned(),
body_snippet: "Use serde structs instead of Value::as_str.".to_owned(),
},
ExistingRule {
rule_id: "rule-2".to_owned(),
title: "Hard-deny dbg!".to_owned(),
body_snippet: "Workspace forbids debug macros in committed code."
.to_owned(),
},
];
let pairs = vec![pair("hi", "hello")];
let prompt = build_prompt(&pairs, &rules);
assert!(prompt.contains("EXISTING RULES"), "section header present");
assert!(prompt.contains("rule-1: Prefer typed deserialization"));
assert!(prompt.contains("rule-2: Hard-deny dbg!"));
assert!(prompt.contains("SESSION ("));
assert!(prompt.contains("DECISION CRITERIA"));
assert!(prompt.contains("STRICT JSON"));
}
#[test]
fn prompt_uses_none_yet_placeholder_when_no_existing_rules() {
let pairs = vec![pair("u", "a")];
let prompt = build_prompt(&pairs, &[]);
assert!(prompt.contains("- (none yet)"), "explicit no-rules marker");
}
#[test]
fn prompt_renders_pairs_in_order_with_user_assistant_labels() {
let pairs = vec![pair("first q", "first a"), pair("second q", "second a")];
let prompt = build_prompt(&pairs, &[]);
let first_idx = prompt.find("first q").expect("first q present");
let second_idx = prompt.find("second q").expect("second q present");
assert!(first_idx < second_idx, "pairs in chronological order");
assert!(prompt.contains("USER: first q"));
assert!(prompt.contains("ASSISTANT: first a"));
}
#[test]
fn prompt_drops_oldest_pairs_when_over_budget() {
let mut pairs: Vec<Pair> = Vec::new();
for i in 0..400 {
let body = "x".repeat(200);
pairs.push(pair(&format!("user-{i}"), &body));
}
let prompt = build_prompt(&pairs, &[]);
assert!(prompt.chars().count() <= PROMPT_MAX_CHARS);
assert!(prompt.contains("user-399"));
assert!(!prompt.contains("user-0\n"));
}
#[test]
fn prompt_caps_existing_rules_at_max_for_budget() {
let mut rules = Vec::new();
for i in 0..(MAX_EXISTING_RULES_IN_PROMPT + 10) {
rules.push(ExistingRule {
rule_id: format!("rule-{i}"),
title: format!("title-{i}"),
body_snippet: format!("snippet-{i}"),
});
}
let pairs = vec![pair("u", "a")];
let prompt = build_prompt(&pairs, &rules);
assert!(prompt.contains("rule-0:"));
assert!(prompt.contains(&format!(
"rule-{}:",
MAX_EXISTING_RULES_IN_PROMPT - 1
)));
assert!(!prompt.contains(&format!("rule-{MAX_EXISTING_RULES_IN_PROMPT}:")));
}
#[test]
fn parse_keep_minimal_shape() {
let raw = r#"{"verdict":"KEEP","title":"Always validate","body":"Validate before enqueue.","file_patterns":["src/**/*.rs"]}"#;
let parsed = parse_gate_json(raw).expect("parses");
assert_eq!(parsed.verdict, "KEEP");
assert_eq!(parsed.title.as_deref(), Some("Always validate"));
assert_eq!(parsed.body.as_deref(), Some("Validate before enqueue."));
assert_eq!(parsed.file_patterns, vec!["src/**/*.rs"]);
assert!(parsed.rule_id.is_none());
}
#[test]
fn parse_merge_shape_carries_rule_id() {
let raw = r#"{"verdict":"MERGE","rule_id":"rule-7","title":"Refine X","body":"Updated body","file_patterns":[]}"#;
let parsed = parse_gate_json(raw).expect("parses");
assert_eq!(parsed.verdict, "MERGE");
assert_eq!(parsed.rule_id.as_deref(), Some("rule-7"));
assert_eq!(parsed.body.as_deref(), Some("Updated body"));
}
#[test]
fn parse_skip_shape_carries_reason() {
let raw = r#"{"verdict":"SKIP","reason":"one-off bug fix"}"#;
let parsed = parse_gate_json(raw).expect("parses");
assert_eq!(parsed.verdict, "SKIP");
assert_eq!(parsed.reason.as_deref(), Some("one-off bug fix"));
}
#[test]
fn parse_tolerates_markdown_json_fence() {
let raw = "```json\n{\"verdict\":\"SKIP\",\"reason\":\"covered\"}\n```";
let parsed = parse_gate_json(raw).expect("parses through fence");
assert_eq!(parsed.verdict, "SKIP");
assert_eq!(parsed.reason.as_deref(), Some("covered"));
}
#[test]
fn parse_tolerates_plain_markdown_fence() {
let raw = "```\n{\"verdict\":\"SKIP\",\"reason\":\"x\"}\n```";
let parsed = parse_gate_json(raw).expect("parses");
assert_eq!(parsed.verdict, "SKIP");
}
#[test]
fn parse_tolerates_prose_before_json() {
let raw =
"Sure, here's my answer:\n{\"verdict\":\"SKIP\",\"reason\":\"too narrow\"}\nThanks!";
let parsed = parse_gate_json(raw).expect("parses through prose");
assert_eq!(parsed.verdict, "SKIP");
assert_eq!(parsed.reason.as_deref(), Some("too narrow"));
}
#[test]
fn parse_treats_null_optional_fields_as_none() {
let raw =
r#"{"verdict":"KEEP","rule_id":null,"title":"T","body":"B","file_patterns":["a.rs"]}"#;
let parsed = parse_gate_json(raw).expect("parses");
assert!(parsed.rule_id.is_none());
assert_eq!(parsed.title.as_deref(), Some("T"));
}
#[test]
fn parse_rejects_malformed_payload_with_clean_error() {
let raw = "this is not JSON at all";
let err = parse_gate_json(raw).unwrap_err();
match err {
GateError::ParseFailure { reason, .. } => {
assert!(
reason.contains("no JSON object"),
"expected 'no JSON object' diagnostic, got: {reason}"
);
}
other => panic!("expected ParseFailure, got {other:?}"),
}
}
#[test]
fn parse_rejects_missing_verdict_field() {
let raw = r#"{"title":"T","body":"B"}"#;
let err = parse_gate_json(raw).unwrap_err();
match err {
GateError::ParseFailure { reason, .. } => {
assert!(reason.contains("verdict"), "diagnostic mentions verdict: {reason}");
}
other => panic!("expected ParseFailure, got {other:?}"),
}
}
#[test]
fn parse_rejects_invalid_json_body() {
let raw = r#"{"verdict":"KEEP""#;
let err = parse_gate_json(raw).unwrap_err();
assert!(matches!(err, GateError::ParseFailure { .. }));
}
#[test]
fn keep_verdict_builds_session_mined_candidate() {
let parsed = GateJson {
verdict: "KEEP".to_owned(),
rule_id: None,
title: Some("Validate before enqueue".to_owned()),
body: Some("Session-mined candidates must validate before reaching the outbox.".to_owned()),
file_patterns: vec!["crates/**/*.rs".to_owned()],
reason: None,
};
let pairs = vec![pair("u", "a")];
let a = args(&pairs, &[]);
let verdict = parsed_to_verdict(parsed, &a).expect("verdict");
match verdict {
GateVerdict::Keep { candidate } => {
assert_eq!(candidate.source_repo, "owner/repo");
assert_eq!(candidate.session_id, "sess_test");
assert_eq!(candidate.gate_verdict, "KEEP");
assert_eq!(candidate.gate_model, "claude-code:gate");
assert!(candidate.requires_human_approval);
assert_eq!(candidate.file_patterns, vec!["crates/**/*.rs"]);
}
other => panic!("expected Keep, got {other:?}"),
}
}
#[test]
fn merge_verdict_carries_rule_id_and_updated_body() {
let parsed = GateJson {
verdict: "MERGE".to_owned(),
rule_id: Some("rule-42".to_owned()),
title: Some("Extended".to_owned()),
body: Some("Refined body".to_owned()),
file_patterns: vec![],
reason: None,
};
let pairs = vec![pair("u", "a")];
let a = args(&pairs, &[]);
let verdict = parsed_to_verdict(parsed, &a).expect("verdict");
assert_eq!(
verdict,
GateVerdict::Merge {
rule_id: "rule-42".to_owned(),
updated_body: "Refined body".to_owned(),
}
);
}
#[test]
fn skip_verdict_falls_back_to_default_reason() {
let parsed = GateJson {
verdict: "SKIP".to_owned(),
rule_id: None,
title: None,
body: None,
file_patterns: vec![],
reason: None,
};
let pairs = vec![pair("u", "a")];
let a = args(&pairs, &[]);
let verdict = parsed_to_verdict(parsed, &a).expect("verdict");
assert_eq!(
verdict,
GateVerdict::Skip {
reason: "no reason given".to_owned(),
}
);
}
#[test]
fn keep_missing_title_or_body_is_invalid_candidate() {
let parsed = GateJson {
verdict: "KEEP".to_owned(),
rule_id: None,
title: None,
body: Some("body".to_owned()),
file_patterns: vec!["a.rs".to_owned()],
reason: None,
};
let pairs = vec![pair("u", "a")];
let a = args(&pairs, &[]);
let err = parsed_to_verdict(parsed, &a).unwrap_err();
assert!(matches!(err, GateError::InvalidCandidate { .. }));
}
#[test]
fn keep_with_empty_file_patterns_is_invalid_candidate() {
let parsed = GateJson {
verdict: "KEEP".to_owned(),
rule_id: None,
title: Some("T".to_owned()),
body: Some("B".to_owned()),
file_patterns: vec![],
reason: None,
};
let pairs = vec![pair("u", "a")];
let a = args(&pairs, &[]);
let err = parsed_to_verdict(parsed, &a).unwrap_err();
match err {
GateError::InvalidCandidate { reason } => {
assert!(reason.contains("file_patterns"));
}
other => panic!("expected InvalidCandidate, got {other:?}"),
}
}
#[test]
fn unknown_verdict_string_is_parse_failure() {
let parsed = GateJson {
verdict: "REJECT".to_owned(),
rule_id: None,
title: None,
body: None,
file_patterns: vec![],
reason: None,
};
let pairs = vec![pair("u", "a")];
let a = args(&pairs, &[]);
let err = parsed_to_verdict(parsed, &a).unwrap_err();
assert!(matches!(err, GateError::ParseFailure { .. }));
}
#[tokio::test]
async fn run_gate_rejects_empty_input_without_spawning() {
let a = args(&[], &[]);
let err = run_gate(a).await.unwrap_err();
assert_eq!(err, GateError::EmptyInput);
}
#[test]
fn existing_rule_shape_clones_and_compares_cheaply() {
let r = ExistingRule {
rule_id: "rule-1".to_owned(),
title: "Prefer typed parse".to_owned(),
body_snippet: "..".to_owned(),
};
assert_eq!(r.clone(), r);
}
}