use std::collections::BTreeMap;
use std::fmt;
use std::fs;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use crate::agent_events::{AgentEvent, PersistedAgentEvent, ToolCallErrorCategory, ToolCallStatus};
use crate::value::VmError;
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FindingSeverity {
Info,
Warn,
Error,
}
impl FindingSeverity {
pub fn as_str(self) -> &'static str {
match self {
Self::Info => "info",
Self::Warn => "warn",
Self::Error => "error",
}
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FindingCategory {
ExtraModelCall,
InvalidStructuredOutput,
RepeatedRead,
BadWait,
UnsafeAttemptedAction,
SkippedVerification,
MissingApproval,
NonMinimalToolUsage,
MissingStateStep,
StateOutOfOrder,
IncompleteTranscript,
ForbiddenAction,
}
impl FindingCategory {
pub fn as_str(self) -> &'static str {
match self {
Self::ExtraModelCall => "extra_model_call",
Self::InvalidStructuredOutput => "invalid_structured_output",
Self::RepeatedRead => "repeated_read",
Self::BadWait => "bad_wait",
Self::UnsafeAttemptedAction => "unsafe_attempted_action",
Self::SkippedVerification => "skipped_verification",
Self::MissingApproval => "missing_approval",
Self::NonMinimalToolUsage => "non_minimal_tool_usage",
Self::MissingStateStep => "missing_state_step",
Self::StateOutOfOrder => "state_out_of_order",
Self::IncompleteTranscript => "incomplete_transcript",
Self::ForbiddenAction => "forbidden_action",
}
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AuditFinding {
pub category: FindingCategory,
pub severity: FindingSeverity,
pub message: String,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub event_indices: Vec<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub state_step: Option<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub tools: Vec<String>,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct StateTransition {
pub step: String,
pub event_index: u64,
pub triggered_by: String,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(default)]
pub struct ToolPattern {
pub name: Option<String>,
pub glob: Option<String>,
}
impl ToolPattern {
pub fn matches(&self, tool: &str) -> bool {
let needle = tool.to_lowercase();
if let Some(name) = &self.name {
return name.eq_ignore_ascii_case(tool);
}
if let Some(glob) = &self.glob {
return glob_match(&glob.to_lowercase(), &needle);
}
false
}
}
fn glob_match(pattern: &str, value: &str) -> bool {
if !pattern.contains('*') {
return pattern == value;
}
let parts: Vec<&str> = pattern.split('*').collect();
let mut cursor = 0usize;
let last = parts.len().saturating_sub(1);
for (i, part) in parts.iter().enumerate() {
if part.is_empty() {
if i == 0 || i == last {
continue;
}
continue;
}
if i == 0 && !pattern.starts_with('*') {
if !value[cursor..].starts_with(part) {
return false;
}
cursor += part.len();
continue;
}
if i == last && !pattern.ends_with('*') {
return value[cursor..].ends_with(part);
}
match value[cursor..].find(part) {
Some(idx) => cursor += idx + part.len(),
None => return false,
}
}
pattern.ends_with('*') || cursor == value.len()
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(default)]
pub struct GoldenStateStep {
pub step: String,
pub tools: Vec<ToolPattern>,
pub plan_fields: Vec<String>,
pub events: Vec<String>,
pub required: bool,
#[serde(default)]
pub approval_gate: bool,
#[serde(default)]
pub verifier: bool,
#[serde(default)]
pub merge_action: bool,
}
#[derive(Clone, Debug, Serialize, Deserialize, Default, PartialEq, Eq)]
#[serde(default)]
pub struct MergeCaptainGolden {
#[serde(rename = "_type")]
pub type_name: String,
pub scenario: String,
pub description: Option<String>,
pub max_model_calls: Option<u64>,
pub max_tool_calls: Option<u64>,
pub max_repeat: Option<u32>,
pub require_approval_for: Vec<ToolPattern>,
pub forbidden_actions: Vec<ToolPattern>,
pub state_steps: Vec<GoldenStateStep>,
}
#[derive(Clone, Debug, Serialize, Deserialize, Default)]
pub struct AuditReport {
pub scenario: Option<String>,
pub source_path: Option<String>,
pub session_ids: Vec<String>,
pub event_count: u64,
pub model_call_count: u64,
pub tool_call_count: u64,
pub findings: Vec<AuditFinding>,
pub state_transitions: Vec<StateTransition>,
pub pass: bool,
}
impl AuditReport {
pub fn error_findings(&self) -> usize {
self.findings
.iter()
.filter(|f| f.severity == FindingSeverity::Error)
.count()
}
pub fn warn_findings(&self) -> usize {
self.findings
.iter()
.filter(|f| f.severity == FindingSeverity::Warn)
.count()
}
}
impl fmt::Display for AuditReport {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(
f,
"{} scenario={} events={} tool_calls={} model_calls={}",
if self.pass { "PASS" } else { "FAIL" },
self.scenario.as_deref().unwrap_or("<none>"),
self.event_count,
self.tool_call_count,
self.model_call_count
)?;
if let Some(path) = &self.source_path {
writeln!(f, " transcript: {}", path)?;
}
if !self.state_transitions.is_empty() {
writeln!(f, " state transitions:")?;
for t in &self.state_transitions {
writeln!(
f,
" [{}] {} <- {}",
t.event_index, t.step, t.triggered_by
)?;
}
}
if self.findings.is_empty() {
writeln!(f, " findings: none")?;
} else {
writeln!(f, " findings ({}):", self.findings.len())?;
for finding in &self.findings {
let step = finding
.state_step
.as_deref()
.map(|s| format!(" step={}", s))
.unwrap_or_default();
let tools = if finding.tools.is_empty() {
String::new()
} else {
format!(" tools={}", finding.tools.join(","))
};
let events = if finding.event_indices.is_empty() {
String::new()
} else {
format!(
" events=[{}]",
finding
.event_indices
.iter()
.map(u64::to_string)
.collect::<Vec<_>>()
.join(",")
)
};
writeln!(
f,
" [{}] {}: {}{}{}{}",
finding.severity.as_str(),
finding.category.as_str(),
finding.message,
step,
tools,
events
)?;
}
}
Ok(())
}
}
#[derive(Clone, Debug)]
pub struct LoadedTranscript {
pub source_path: PathBuf,
pub events: Vec<PersistedAgentEvent>,
}
pub fn load_transcript_jsonl(path: &Path) -> Result<LoadedTranscript, VmError> {
let metadata = fs::metadata(path).map_err(|e| {
VmError::Runtime(format!("failed to stat transcript {}: {e}", path.display()))
})?;
let mut events = Vec::new();
if metadata.is_dir() {
let mut files: Vec<PathBuf> = fs::read_dir(path)
.map_err(|e| {
VmError::Runtime(format!(
"failed to read transcript directory {}: {e}",
path.display()
))
})?
.filter_map(|entry| entry.ok())
.map(|entry| entry.path())
.filter(|p| {
p.file_name()
.and_then(|n| n.to_str())
.map(|name| {
name.starts_with("event_log")
&& p.extension().and_then(|e| e.to_str()) == Some("jsonl")
})
.unwrap_or(false)
})
.collect();
files.sort();
if files.is_empty() {
return Err(VmError::Runtime(format!(
"no event_log*.jsonl files under {}",
path.display()
)));
}
for file in &files {
events.extend(read_jsonl_file(file)?);
}
} else {
events.extend(read_jsonl_file(path)?);
}
events.sort_by_key(|e| e.index);
Ok(LoadedTranscript {
source_path: path.to_path_buf(),
events,
})
}
fn read_jsonl_file(path: &Path) -> Result<Vec<PersistedAgentEvent>, VmError> {
let file = fs::File::open(path).map_err(|e| {
VmError::Runtime(format!("failed to open transcript {}: {e}", path.display()))
})?;
let reader = BufReader::new(file);
let mut events = Vec::new();
for (line_no, line) in reader.lines().enumerate() {
let line = line.map_err(|e| {
VmError::Runtime(format!(
"failed to read line {} of {}: {e}",
line_no + 1,
path.display()
))
})?;
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
let event: PersistedAgentEvent = serde_json::from_str(trimmed).map_err(|e| {
VmError::Runtime(format!(
"failed to parse line {} of {} as PersistedAgentEvent: {e}",
line_no + 1,
path.display()
))
})?;
events.push(event);
}
Ok(events)
}
pub fn load_merge_captain_golden(path: &Path) -> Result<MergeCaptainGolden, VmError> {
let bytes = fs::read(path).map_err(|e| {
VmError::Runtime(format!(
"failed to read merge_captain golden {}: {e}",
path.display()
))
})?;
let golden: MergeCaptainGolden = serde_json::from_slice(&bytes).map_err(|e| {
VmError::Runtime(format!(
"failed to parse merge_captain golden {}: {e}",
path.display()
))
})?;
Ok(golden)
}
fn default_state_steps() -> Vec<GoldenStateStep> {
vec![
GoldenStateStep {
step: "intake".into(),
tools: vec![ToolPattern {
glob: Some("*pull_request*".into()),
..Default::default()
}],
plan_fields: vec!["pr_number".into()],
events: vec!["plan".into()],
..Default::default()
},
GoldenStateStep {
step: "verify_checks".into(),
tools: vec![
ToolPattern {
glob: Some("*check*".into()),
..Default::default()
},
ToolPattern {
glob: Some("*ci*".into()),
..Default::default()
},
ToolPattern {
glob: Some("*workflow_run*".into()),
..Default::default()
},
],
verifier: true,
..Default::default()
},
GoldenStateStep {
step: "decide_risk".into(),
plan_fields: vec!["review_risk".into()],
events: vec!["plan".into()],
..Default::default()
},
GoldenStateStep {
step: "approval_gate".into(),
plan_fields: vec!["approval_required".into()],
events: vec!["handoff".into(), "feedback_injected".into()],
approval_gate: true,
..Default::default()
},
GoldenStateStep {
step: "merge_or_handoff".into(),
tools: vec![
ToolPattern {
glob: Some("*merge*".into()),
..Default::default()
},
ToolPattern {
glob: Some("*label*".into()),
..Default::default()
},
],
events: vec!["handoff".into()],
merge_action: true,
..Default::default()
},
]
}
fn is_default_write_tool(name: &str) -> bool {
let lower = name.to_lowercase();
lower.contains("merge")
|| lower.contains("write_file")
|| lower.contains("create_pull")
|| lower.contains("delete")
|| lower.contains("force_push")
|| lower.contains("apply_patch")
|| lower.contains("set_label")
|| lower.contains("post_comment")
|| lower.contains("approve")
}
fn is_wait_tool(name: &str) -> bool {
let lower = name.to_lowercase();
lower.contains("sleep") || lower.contains("wait") || lower.contains("poll")
}
pub fn audit_transcript(
events: &[PersistedAgentEvent],
golden: Option<&MergeCaptainGolden>,
) -> AuditReport {
let scenario = golden.map(|g| g.scenario.clone());
let mut session_ids: Vec<String> = Vec::new();
let mut model_calls: u64 = 0;
let mut tool_calls: u64 = 0;
let mut findings: Vec<AuditFinding> = Vec::new();
let mut transitions: Vec<StateTransition> = Vec::new();
let state_steps_owned: Vec<GoldenStateStep> = match golden {
Some(g) if !g.state_steps.is_empty() => g.state_steps.clone(),
_ => default_state_steps(),
};
let max_repeat = golden.and_then(|g| g.max_repeat).unwrap_or(1);
let mut last_tool_call: BTreeMap<String, (String, String, Vec<u64>)> = BTreeMap::new();
let mut pending_approvals: Vec<u64> = Vec::new();
let mut verifier_fired: bool = false;
let mut steps_seen: Vec<String> = Vec::new();
let mut last_index: u64 = 0;
let mut saw_terminal: bool = false;
for env in events {
last_index = env.index;
let event = &env.event;
let session = event.session_id().to_string();
if !session_ids.contains(&session) {
session_ids.push(session.clone());
}
match event {
AgentEvent::AgentMessageChunk { .. } | AgentEvent::AgentThoughtChunk { .. } => {
}
AgentEvent::TurnStart { .. } => {
model_calls += 1;
}
AgentEvent::TurnEnd { .. } => {
saw_terminal = true;
}
AgentEvent::BudgetExhausted { .. } => {
saw_terminal = true;
findings.push(AuditFinding {
category: FindingCategory::ExtraModelCall,
severity: FindingSeverity::Error,
message: "loop hit max_iterations without resolving".into(),
event_indices: vec![env.index],
state_step: None,
tools: vec![],
});
}
AgentEvent::LoopStuck { .. } => {
saw_terminal = true;
findings.push(AuditFinding {
category: FindingCategory::ExtraModelCall,
severity: FindingSeverity::Error,
message: "loop stuck on consecutive text-only turns".into(),
event_indices: vec![env.index],
state_step: None,
tools: vec![],
});
}
AgentEvent::Handoff { .. } => {
saw_terminal = true;
if !pending_approvals.is_empty() {
pending_approvals.clear();
}
check_state_transition(
&state_steps_owned,
StepTrigger::Event("handoff"),
env.index,
"handoff",
&mut transitions,
&mut steps_seen,
&mut findings,
&mut pending_approvals,
&mut verifier_fired,
);
}
AgentEvent::FeedbackInjected { kind, .. } => {
if kind.eq_ignore_ascii_case("approval") || kind.eq_ignore_ascii_case("approved") {
pending_approvals.clear();
}
check_state_transition(
&state_steps_owned,
StepTrigger::Event("feedback_injected"),
env.index,
"feedback_injected",
&mut transitions,
&mut steps_seen,
&mut findings,
&mut pending_approvals,
&mut verifier_fired,
);
}
AgentEvent::Plan { plan, .. } => {
check_plan_transitions(
&state_steps_owned,
plan,
env.index,
&mut transitions,
&mut steps_seen,
&mut findings,
&mut pending_approvals,
&mut verifier_fired,
);
if let Some(approval) = plan
.get("approval_required")
.and_then(serde_json::Value::as_bool)
{
if approval {
pending_approvals.push(env.index);
}
}
if !plan.is_object() {
findings.push(AuditFinding {
category: FindingCategory::InvalidStructuredOutput,
severity: FindingSeverity::Error,
message: "Plan event payload was not a JSON object".into(),
event_indices: vec![env.index],
state_step: None,
tools: vec![],
});
}
}
AgentEvent::ToolCall {
tool_name,
raw_input,
status,
..
} => {
tool_calls += 1;
let arg_hash = canonical_json(raw_input);
match last_tool_call.get_mut(&session) {
Some(entry) if entry.0 == *tool_name && entry.1 == arg_hash => {
entry.2.push(env.index);
if (entry.2.len() as u32) > max_repeat {
let indices = entry.2.clone();
findings.push(AuditFinding {
category: FindingCategory::RepeatedRead,
severity: FindingSeverity::Error,
message: format!(
"tool `{}` called {} times consecutively with identical args",
tool_name,
indices.len()
),
event_indices: indices,
state_step: None,
tools: vec![tool_name.clone()],
});
*entry = (tool_name.clone(), arg_hash.clone(), vec![env.index]);
}
}
_ => {
last_tool_call.insert(
session.clone(),
(tool_name.clone(), arg_hash.clone(), vec![env.index]),
);
}
}
if is_wait_tool(tool_name) {
let indicates_progress = raw_input
.as_object()
.map(|obj| {
obj.contains_key("until")
|| obj.contains_key("condition")
|| obj.contains_key("subscription_id")
})
.unwrap_or(false);
if !indicates_progress {
findings.push(AuditFinding {
category: FindingCategory::BadWait,
severity: FindingSeverity::Warn,
message: format!(
"wait/poll tool `{}` invoked without progress predicate (until/condition/subscription_id)",
tool_name
),
event_indices: vec![env.index],
state_step: None,
tools: vec![tool_name.clone()],
});
}
}
let needs_approval_match = match golden {
Some(g) if !g.require_approval_for.is_empty() => {
g.require_approval_for.iter().any(|p| p.matches(tool_name))
}
_ => is_default_write_tool(tool_name),
};
if needs_approval_match
&& pending_approvals.is_empty()
&& !already_approved(&steps_seen, &state_steps_owned)
{
findings.push(AuditFinding {
category: FindingCategory::UnsafeAttemptedAction,
severity: FindingSeverity::Error,
message: format!(
"tool `{}` requires prior approval gate, but none observed",
tool_name
),
event_indices: vec![env.index],
state_step: None,
tools: vec![tool_name.clone()],
});
}
if let Some(g) = golden {
if g.forbidden_actions.iter().any(|p| p.matches(tool_name)) {
findings.push(AuditFinding {
category: FindingCategory::ForbiddenAction,
severity: FindingSeverity::Error,
message: format!(
"tool `{}` is forbidden in scenario `{}`",
tool_name, g.scenario
),
event_indices: vec![env.index],
state_step: None,
tools: vec![tool_name.clone()],
});
}
}
check_state_transition(
&state_steps_owned,
StepTrigger::Tool(tool_name),
env.index,
tool_name,
&mut transitions,
&mut steps_seen,
&mut findings,
&mut pending_approvals,
&mut verifier_fired,
);
let _ = status;
}
AgentEvent::ToolCallUpdate {
status,
error,
error_category,
tool_name,
..
} => {
if matches!(status, ToolCallStatus::Failed) {
if let Some(category) = error_category {
if matches!(category, ToolCallErrorCategory::SchemaValidation) {
findings.push(AuditFinding {
category: FindingCategory::InvalidStructuredOutput,
severity: FindingSeverity::Error,
message: format!(
"tool `{}` failed schema validation: {}",
tool_name,
error.clone().unwrap_or_default()
),
event_indices: vec![env.index],
state_step: None,
tools: vec![tool_name.clone()],
});
}
}
}
}
_ => {
}
}
}
if !pending_approvals.is_empty() {
findings.push(AuditFinding {
category: FindingCategory::MissingApproval,
severity: FindingSeverity::Error,
message: format!(
"{} plan(s) declared approval_required: true with no following approval gate",
pending_approvals.len()
),
event_indices: pending_approvals.clone(),
state_step: Some("approval_gate".into()),
tools: vec![],
});
}
if !events.is_empty() && !saw_terminal {
findings.push(AuditFinding {
category: FindingCategory::IncompleteTranscript,
severity: FindingSeverity::Warn,
message:
"transcript ended without a TurnEnd / Handoff / BudgetExhausted / LoopStuck event"
.into(),
event_indices: vec![last_index],
state_step: None,
tools: vec![],
});
}
for step in &state_steps_owned {
if step.required && !steps_seen.iter().any(|s| s == &step.step) {
findings.push(AuditFinding {
category: FindingCategory::MissingStateStep,
severity: FindingSeverity::Error,
message: format!("required state step `{}` was never reached", step.step),
event_indices: vec![],
state_step: Some(step.step.clone()),
tools: vec![],
});
}
}
let order: BTreeMap<&str, usize> = state_steps_owned
.iter()
.enumerate()
.map(|(i, s)| (s.step.as_str(), i))
.collect();
let mut highest: usize = 0;
let mut last_step: Option<&str> = None;
for step in &steps_seen {
if let Some(idx) = order.get(step.as_str()) {
if *idx + 1 < highest && last_step != Some(step.as_str()) {
findings.push(AuditFinding {
category: FindingCategory::StateOutOfOrder,
severity: FindingSeverity::Warn,
message: format!("state step `{}` fired after a later step", step),
event_indices: vec![],
state_step: Some(step.clone()),
tools: vec![],
});
}
if *idx > highest {
highest = *idx;
}
last_step = Some(step.as_str());
}
}
if let Some(g) = golden {
if let Some(max) = g.max_tool_calls {
if tool_calls > max {
findings.push(AuditFinding {
category: FindingCategory::NonMinimalToolUsage,
severity: FindingSeverity::Error,
message: format!(
"tool calls ({}) exceeded scenario budget ({})",
tool_calls, max
),
event_indices: vec![],
state_step: None,
tools: vec![],
});
}
}
if let Some(max) = g.max_model_calls {
if model_calls > max {
findings.push(AuditFinding {
category: FindingCategory::ExtraModelCall,
severity: FindingSeverity::Error,
message: format!(
"model calls ({}) exceeded scenario budget ({})",
model_calls, max
),
event_indices: vec![],
state_step: None,
tools: vec![],
});
}
}
}
let pass = findings
.iter()
.all(|f| f.severity != FindingSeverity::Error);
AuditReport {
scenario,
source_path: None,
session_ids,
event_count: events.len() as u64,
model_call_count: model_calls,
tool_call_count: tool_calls,
findings,
state_transitions: transitions,
pass,
}
}
enum StepTrigger<'a> {
Tool(&'a str),
Event(&'a str),
}
#[allow(clippy::too_many_arguments)]
fn check_state_transition(
steps: &[GoldenStateStep],
trigger: StepTrigger,
event_index: u64,
triggered_by: &str,
transitions: &mut Vec<StateTransition>,
steps_seen: &mut Vec<String>,
findings: &mut Vec<AuditFinding>,
pending_approvals: &mut Vec<u64>,
verifier_fired: &mut bool,
) {
for step in steps {
let matched = match &trigger {
StepTrigger::Tool(name) => step.tools.iter().any(|p| p.matches(name)),
StepTrigger::Event(name) => step.events.iter().any(|e| e.eq_ignore_ascii_case(name)),
};
if !matched {
continue;
}
record_step(
step,
event_index,
triggered_by,
transitions,
steps_seen,
findings,
pending_approvals,
verifier_fired,
);
}
}
#[allow(clippy::too_many_arguments)]
fn check_plan_transitions(
steps: &[GoldenStateStep],
plan: &serde_json::Value,
event_index: u64,
transitions: &mut Vec<StateTransition>,
steps_seen: &mut Vec<String>,
findings: &mut Vec<AuditFinding>,
pending_approvals: &mut Vec<u64>,
verifier_fired: &mut bool,
) {
let obj = match plan.as_object() {
Some(o) => o,
None => return,
};
for step in steps {
let plan_match = step.plan_fields.iter().any(|f| obj.contains_key(f));
let event_match = step.events.iter().any(|e| e.eq_ignore_ascii_case("plan"));
if !(plan_match || (event_match && step.plan_fields.is_empty())) {
continue;
}
if !plan_match && !event_match {
continue;
}
record_step(
step,
event_index,
"plan",
transitions,
steps_seen,
findings,
pending_approvals,
verifier_fired,
);
}
}
#[allow(clippy::too_many_arguments)]
fn record_step(
step: &GoldenStateStep,
event_index: u64,
triggered_by: &str,
transitions: &mut Vec<StateTransition>,
steps_seen: &mut Vec<String>,
findings: &mut Vec<AuditFinding>,
pending_approvals: &mut Vec<u64>,
verifier_fired: &mut bool,
) {
transitions.push(StateTransition {
step: step.step.clone(),
event_index,
triggered_by: triggered_by.to_string(),
});
if !steps_seen.contains(&step.step) {
steps_seen.push(step.step.clone());
}
if step.approval_gate {
pending_approvals.clear();
}
if step.verifier {
*verifier_fired = true;
}
if step.merge_action && !*verifier_fired {
findings.push(AuditFinding {
category: FindingCategory::SkippedVerification,
severity: FindingSeverity::Error,
message: format!(
"merge action `{}` reached without a preceding verifier step",
step.step
),
event_indices: vec![event_index],
state_step: Some(step.step.clone()),
tools: vec![],
});
}
}
fn already_approved(steps_seen: &[String], steps: &[GoldenStateStep]) -> bool {
steps
.iter()
.filter(|s| s.approval_gate)
.any(|s| steps_seen.contains(&s.step))
}
fn canonical_json(value: &serde_json::Value) -> String {
serde_json::to_string(value).unwrap_or_default()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::agent_events::{AgentEvent, PersistedAgentEvent, ToolCallStatus};
use serde_json::json;
fn env(index: u64, event: AgentEvent) -> PersistedAgentEvent {
PersistedAgentEvent {
index,
emitted_at_ms: 0,
frame_depth: None,
event,
}
}
fn turn_start(index: u64, session: &str, iter: usize) -> PersistedAgentEvent {
env(
index,
AgentEvent::TurnStart {
session_id: session.into(),
iteration: iter,
},
)
}
fn turn_end(index: u64, session: &str, iter: usize) -> PersistedAgentEvent {
env(
index,
AgentEvent::TurnEnd {
session_id: session.into(),
iteration: iter,
turn_info: serde_json::Value::Null,
},
)
}
fn tool_call(
index: u64,
session: &str,
tool: &str,
args: serde_json::Value,
) -> PersistedAgentEvent {
env(
index,
AgentEvent::ToolCall {
session_id: session.into(),
tool_call_id: format!("call_{}", index),
tool_name: tool.into(),
kind: None,
status: ToolCallStatus::Pending,
raw_input: args,
parsing: None,
audit: None,
},
)
}
fn plan(index: u64, session: &str, plan: serde_json::Value) -> PersistedAgentEvent {
env(
index,
AgentEvent::Plan {
session_id: session.into(),
plan,
},
)
}
fn handoff(index: u64, session: &str) -> PersistedAgentEvent {
env(
index,
AgentEvent::Handoff {
session_id: session.into(),
artifact_id: format!("artifact_{index}"),
handoff: Box::new(crate::orchestration::HandoffArtifact::default()),
},
)
}
#[test]
fn pass_minimal_green_pr_default_rules() {
let events = vec![
turn_start(1, "s", 1),
tool_call(2, "s", "fetch_pull_request", json!({"number": 1})),
tool_call(3, "s", "list_checks", json!({"pr": 1})),
plan(
4,
"s",
json!({
"review_risk": "low",
"approval_required": false,
"pr_number": 1,
}),
),
turn_end(5, "s", 1),
];
let report = audit_transcript(&events, None);
assert!(report.pass, "report: {}", report);
assert_eq!(report.tool_call_count, 2);
assert_eq!(report.model_call_count, 1);
assert!(
report.findings.is_empty(),
"findings: {:?}",
report.findings
);
}
#[test]
fn flags_repeated_reads_with_default_threshold() {
let events = vec![
turn_start(1, "s", 1),
tool_call(2, "s", "list_checks", json!({"pr": 1})),
tool_call(3, "s", "list_checks", json!({"pr": 1})),
tool_call(4, "s", "list_checks", json!({"pr": 1})),
turn_end(5, "s", 1),
];
let report = audit_transcript(&events, None);
assert!(!report.pass);
assert!(report
.findings
.iter()
.any(|f| f.category == FindingCategory::RepeatedRead));
}
#[test]
fn flags_unsafe_action_without_approval() {
let events = vec![
turn_start(1, "s", 1),
tool_call(2, "s", "merge_pull_request", json!({"number": 1})),
turn_end(3, "s", 1),
];
let report = audit_transcript(&events, None);
assert!(!report.pass);
assert!(report
.findings
.iter()
.any(|f| f.category == FindingCategory::UnsafeAttemptedAction));
}
#[test]
fn flags_missing_approval_after_required_plan() {
let events = vec![
turn_start(1, "s", 1),
plan(
2,
"s",
json!({"approval_required": true, "review_risk": "high"}),
),
turn_end(3, "s", 1),
];
let report = audit_transcript(&events, None);
assert!(!report.pass);
assert!(report
.findings
.iter()
.any(|f| f.category == FindingCategory::MissingApproval));
}
#[test]
fn handoff_satisfies_pending_approval() {
let events = vec![
turn_start(1, "s", 1),
plan(
2,
"s",
json!({"approval_required": true, "review_risk": "high"}),
),
handoff(3, "s"),
];
let report = audit_transcript(&events, None);
assert!(
!report
.findings
.iter()
.any(|f| f.category == FindingCategory::MissingApproval),
"findings: {:?}",
report.findings
);
}
#[test]
fn flags_skipped_verification_when_merge_runs_without_verifier() {
let golden = MergeCaptainGolden {
type_name: "merge_captain_golden".into(),
scenario: "test".into(),
state_steps: vec![
GoldenStateStep {
step: "verify".into(),
tools: vec![ToolPattern {
glob: Some("*list_checks*".into()),
..Default::default()
}],
verifier: true,
..Default::default()
},
GoldenStateStep {
step: "approve".into(),
events: vec!["feedback_injected".into()],
approval_gate: true,
..Default::default()
},
GoldenStateStep {
step: "merge".into(),
tools: vec![ToolPattern {
glob: Some("*merge*".into()),
..Default::default()
}],
merge_action: true,
required: true,
..Default::default()
},
],
..Default::default()
};
let events = vec![
turn_start(1, "s", 1),
env(
2,
AgentEvent::FeedbackInjected {
session_id: "s".into(),
kind: "approval".into(),
content: "ok".into(),
},
),
tool_call(3, "s", "merge_pull_request", json!({"number": 1})),
turn_end(4, "s", 1),
];
let report = audit_transcript(&events, Some(&golden));
assert!(report
.findings
.iter()
.any(|f| f.category == FindingCategory::SkippedVerification));
}
#[test]
fn flags_extra_model_calls_against_golden() {
let golden = MergeCaptainGolden {
type_name: "merge_captain_golden".into(),
scenario: "test".into(),
max_model_calls: Some(1),
..Default::default()
};
let events = vec![
turn_start(1, "s", 1),
turn_end(2, "s", 1),
turn_start(3, "s", 2),
turn_end(4, "s", 2),
];
let report = audit_transcript(&events, Some(&golden));
assert!(!report.pass);
assert!(report
.findings
.iter()
.any(|f| f.category == FindingCategory::ExtraModelCall));
}
#[test]
fn flags_non_minimal_tool_usage() {
let golden = MergeCaptainGolden {
type_name: "merge_captain_golden".into(),
scenario: "test".into(),
max_tool_calls: Some(1),
..Default::default()
};
let events = vec![
turn_start(1, "s", 1),
tool_call(2, "s", "list_checks", json!({"a": 1})),
tool_call(3, "s", "list_threads", json!({"a": 2})),
turn_end(4, "s", 1),
];
let report = audit_transcript(&events, Some(&golden));
assert!(!report.pass);
assert!(report
.findings
.iter()
.any(|f| f.category == FindingCategory::NonMinimalToolUsage));
}
#[test]
fn flags_invalid_structured_output_from_failed_tool_update() {
let events = vec![
turn_start(1, "s", 1),
tool_call(2, "s", "list_checks", json!({"a": 1})),
env(
3,
AgentEvent::ToolCallUpdate {
session_id: "s".into(),
tool_call_id: "call_2".into(),
tool_name: "list_checks".into(),
status: ToolCallStatus::Failed,
raw_output: None,
error: Some("missing required field".into()),
duration_ms: None,
execution_duration_ms: None,
error_category: Some(ToolCallErrorCategory::SchemaValidation),
executor: None,
parsing: None,
raw_input: None,
raw_input_partial: None,
audit: None,
},
),
turn_end(4, "s", 1),
];
let report = audit_transcript(&events, None);
assert!(report
.findings
.iter()
.any(|f| f.category == FindingCategory::InvalidStructuredOutput));
}
#[test]
fn flags_forbidden_action() {
let golden = MergeCaptainGolden {
type_name: "merge_captain_golden".into(),
scenario: "test".into(),
forbidden_actions: vec![ToolPattern {
glob: Some("*force_push*".into()),
..Default::default()
}],
..Default::default()
};
let events = vec![
turn_start(1, "s", 1),
env(
2,
AgentEvent::FeedbackInjected {
session_id: "s".into(),
kind: "approval".into(),
content: "ok".into(),
},
),
tool_call(3, "s", "force_push", json!({"branch": "main"})),
turn_end(4, "s", 1),
];
let report = audit_transcript(&events, Some(&golden));
assert!(!report.pass);
assert!(report
.findings
.iter()
.any(|f| f.category == FindingCategory::ForbiddenAction));
}
#[test]
fn missing_required_state_step() {
let golden = MergeCaptainGolden {
type_name: "merge_captain_golden".into(),
scenario: "test".into(),
state_steps: vec![GoldenStateStep {
step: "verify".into(),
tools: vec![ToolPattern {
glob: Some("*list_checks*".into()),
..Default::default()
}],
required: true,
verifier: true,
..Default::default()
}],
..Default::default()
};
let events = vec![turn_start(1, "s", 1), turn_end(2, "s", 1)];
let report = audit_transcript(&events, Some(&golden));
assert!(!report.pass);
assert!(report
.findings
.iter()
.any(|f| f.category == FindingCategory::MissingStateStep));
}
#[test]
fn glob_matching_basic_cases() {
let p = ToolPattern {
glob: Some("*merge*".into()),
..Default::default()
};
assert!(p.matches("gh_merge_pr"));
assert!(p.matches("MERGE"));
assert!(!p.matches("approve"));
let prefix = ToolPattern {
glob: Some("gh_*".into()),
..Default::default()
};
assert!(prefix.matches("gh_pr_list"));
assert!(!prefix.matches("git_pr_list"));
let suffix = ToolPattern {
glob: Some("*_merge".into()),
..Default::default()
};
assert!(suffix.matches("force_merge"));
assert!(!suffix.matches("merge_force"));
let exact = ToolPattern {
name: Some("read_file".into()),
..Default::default()
};
assert!(exact.matches("read_file"));
assert!(!exact.matches("read_files"));
}
#[test]
fn round_trip_report_serialization() {
let events = vec![
turn_start(1, "s", 1),
tool_call(2, "s", "list_checks", json!({"pr": 1})),
turn_end(3, "s", 1),
];
let report = audit_transcript(&events, None);
let json = serde_json::to_string(&report).expect("serialize");
let parsed: AuditReport = serde_json::from_str(&json).expect("deserialize");
assert_eq!(parsed.pass, report.pass);
assert_eq!(parsed.event_count, report.event_count);
}
#[test]
fn loads_jsonl_transcript_from_file() {
use std::io::Write;
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("event_log.jsonl");
let mut file = fs::File::create(&path).expect("create");
for env in [turn_start(1, "s", 1), turn_end(2, "s", 1)] {
let line = serde_json::to_string(&env).expect("ser");
writeln!(file, "{}", line).expect("write");
}
drop(file);
let loaded = load_transcript_jsonl(&path).expect("load");
assert_eq!(loaded.events.len(), 2);
}
#[test]
fn loads_jsonl_transcript_from_directory() {
use std::io::Write;
let dir = tempfile::tempdir().expect("tempdir");
let path1 = dir.path().join("event_log.jsonl");
let path2 = dir.path().join("event_log-000001.jsonl");
{
let mut file = fs::File::create(&path1).expect("create");
writeln!(
file,
"{}",
serde_json::to_string(&turn_start(1, "s", 1)).unwrap()
)
.unwrap();
}
{
let mut file = fs::File::create(&path2).expect("create");
writeln!(
file,
"{}",
serde_json::to_string(&turn_end(2, "s", 1)).unwrap()
)
.unwrap();
}
let loaded = load_transcript_jsonl(dir.path()).expect("load");
assert_eq!(loaded.events.len(), 2);
assert_eq!(loaded.events[0].index, 1);
assert_eq!(loaded.events[1].index, 2);
}
}