use std::time::{SystemTime, UNIX_EPOCH};
use serde_json::json;
use crate::eval::dataset::{EvalTestCase, TestNode};
use crate::{
AutomationAgentMcpPolicy, AutomationAgentProfile, AutomationAgentToolPolicy,
AutomationExecutionPolicy, AutomationFlowNode, AutomationFlowOutputContract,
AutomationFlowSpec, AutomationOutputValidatorKind, AutomationV2Schedule,
AutomationV2ScheduleType, AutomationV2Spec, AutomationV2Status, RoutineMisfirePolicy,
};
pub const EVAL_AGENT_ID: &str = "eval-agent-1";
pub const EVAL_TRIGGER_TYPE: &str = "eval_runner";
pub const PER_REPAIR_TIMEOUT_MS: u64 = 60_000;
pub const MIN_NODE_TIMEOUT_MS: u64 = 60_000;
pub const DEFAULT_MAX_REPAIR_ITERATIONS: u32 = 3;
pub fn test_case_to_spec(case: &EvalTestCase) -> AutomationV2Spec {
let max_repair = effective_max_repair_iterations(case);
let nodes = case
.automation_spec
.nodes
.iter()
.map(|n| map_node(n, max_repair))
.collect();
let now = current_time_ms();
AutomationV2Spec {
automation_id: format!("eval-{}", case.id),
name: if case.automation_spec.name.is_empty() {
format!("eval/{}", case.id)
} else {
case.automation_spec.name.clone()
},
description: Some(case.description.clone()),
status: AutomationV2Status::Active,
schedule: AutomationV2Schedule {
schedule_type: AutomationV2ScheduleType::Manual,
cron_expression: None,
interval_seconds: None,
timezone: "UTC".to_string(),
misfire_policy: RoutineMisfirePolicy::Skip,
},
knowledge: tandem_orchestrator::KnowledgeBinding::default(),
agents: vec![default_agent()],
flow: AutomationFlowSpec { nodes },
execution: AutomationExecutionPolicy {
profile: None,
max_parallel_agents: Some(1),
max_total_runtime_ms: Some(max_repair as u64 * PER_REPAIR_TIMEOUT_MS * 4),
max_total_tool_calls: None,
max_total_tokens: None,
max_total_cost_usd: None,
},
output_targets: Vec::new(),
created_at_ms: now,
updated_at_ms: now,
creator_id: EVAL_TRIGGER_TYPE.to_string(),
workspace_root: None,
metadata: None,
next_fire_at_ms: None,
last_fired_at_ms: None,
scope_policy: None,
watch_conditions: Vec::new(),
handoff_config: None,
}
}
fn map_node(node: &TestNode, max_repair: u32) -> AutomationFlowNode {
let validator = validator_for_node_type(&node.node_type);
let kind_label = contract_kind_for_node_type(&node.node_type);
let summary_guidance = if node.output_contract.is_empty() {
None
} else {
Some(node.output_contract.clone())
};
AutomationFlowNode {
knowledge: tandem_orchestrator::KnowledgeBinding::default(),
node_id: node.id.clone(),
agent_id: EVAL_AGENT_ID.to_string(),
objective: node.objective.clone(),
depends_on: Vec::new(),
input_refs: Vec::new(),
output_contract: Some(AutomationFlowOutputContract {
kind: kind_label.to_string(),
validator: Some(validator),
enforcement: None,
schema: None,
summary_guidance,
}),
tool_policy: None,
mcp_policy: None,
retry_policy: Some(json!({
"max_attempts": max_repair,
"retries": max_repair.saturating_sub(1),
})),
timeout_ms: Some(node_timeout_ms(max_repair)),
max_tool_calls: None,
stage_kind: None,
gate: None,
metadata: None,
}
}
fn default_agent() -> AutomationAgentProfile {
AutomationAgentProfile {
agent_id: EVAL_AGENT_ID.to_string(),
template_id: None,
display_name: "Eval Worker".to_string(),
avatar_url: None,
model_policy: None,
skills: Vec::new(),
tool_policy: AutomationAgentToolPolicy {
allowlist: Vec::new(),
denylist: Vec::new(),
},
mcp_policy: AutomationAgentMcpPolicy {
allowed_servers: Vec::new(),
allowed_tools: None,
},
approval_policy: None,
}
}
pub fn validator_for_node_type(node_type: &str) -> AutomationOutputValidatorKind {
let lower = node_type.to_ascii_lowercase();
match lower.as_str() {
"research" | "research_synthesis" | "web_research" | "report" => {
AutomationOutputValidatorKind::ResearchBrief
}
"code" | "code_generation" | "code_patch" | "patch" => {
AutomationOutputValidatorKind::CodePatch
}
"review" | "review_decision" | "decision" | "approval" => {
AutomationOutputValidatorKind::ReviewDecision
}
"generation" | "summarization" | "structured" | "structured_json" | "json" => {
AutomationOutputValidatorKind::StructuredJson
}
"standup" | "standup_update" => AutomationOutputValidatorKind::StandupUpdate,
_ => AutomationOutputValidatorKind::GenericArtifact,
}
}
pub fn contract_kind_for_node_type(node_type: &str) -> &'static str {
match validator_for_node_type(node_type) {
AutomationOutputValidatorKind::ResearchBrief => "report",
AutomationOutputValidatorKind::CodePatch => "code",
AutomationOutputValidatorKind::ReviewDecision => "decision",
AutomationOutputValidatorKind::StructuredJson => "structured",
AutomationOutputValidatorKind::GenericArtifact => "artifact",
AutomationOutputValidatorKind::StandupUpdate => "standup",
}
}
fn effective_max_repair_iterations(case: &EvalTestCase) -> u32 {
let from_config = case
.automation_spec
.config
.get("max_repair_iterations")
.and_then(|v| v.as_u64())
.map(|n| n as u32);
let from_expected = case.expected_output.max_repair_iterations;
from_config
.or(from_expected)
.unwrap_or(DEFAULT_MAX_REPAIR_ITERATIONS)
.max(1)
}
fn node_timeout_ms(max_repair: u32) -> u64 {
(max_repair as u64 * PER_REPAIR_TIMEOUT_MS).max(MIN_NODE_TIMEOUT_MS)
}
fn current_time_ms() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64
}
#[cfg(test)]
mod tests {
use super::*;
use crate::eval::dataset::{
ArtifactStatus, AutomationSpecTest, EvalExpectedOutput, EvalTestCase, MetricTolerance,
TestNode,
};
use std::collections::HashMap;
fn make_case(id: &str, nodes: Vec<TestNode>) -> EvalTestCase {
EvalTestCase {
id: id.to_string(),
description: format!("desc for {}", id),
priority: 1,
automation_spec: AutomationSpecTest {
name: format!("automation-{}", id),
nodes,
validators: vec!["contract".to_string()],
config: HashMap::new(),
},
expected_output: EvalExpectedOutput {
artifact_status: ArtifactStatus::Completed,
required_validators: vec!["contract".to_string()],
optional_validators: Vec::new(),
unmet_requirements_acceptable: false,
max_repair_iterations: Some(2),
output_format: "json".to_string(),
quality_indicators: Vec::new(),
},
enabled: true,
tags: vec!["test".to_string()],
metric_tolerance: MetricTolerance::default(),
}
}
fn make_node(id: &str, node_type: &str) -> TestNode {
TestNode {
id: id.to_string(),
node_type: node_type.to_string(),
objective: format!("Do {}", node_type),
output_contract: format!("Produce a {} output", node_type),
}
}
#[test]
fn validator_mapping_covers_all_eval_dataset_node_types() {
assert_eq!(
validator_for_node_type("research"),
AutomationOutputValidatorKind::ResearchBrief
);
assert_eq!(
validator_for_node_type("research_synthesis"),
AutomationOutputValidatorKind::ResearchBrief
);
assert_eq!(
validator_for_node_type("code"),
AutomationOutputValidatorKind::CodePatch
);
assert_eq!(
validator_for_node_type("generation"),
AutomationOutputValidatorKind::StructuredJson
);
assert_eq!(
validator_for_node_type("summarization"),
AutomationOutputValidatorKind::StructuredJson
);
assert_eq!(
validator_for_node_type("review"),
AutomationOutputValidatorKind::ReviewDecision
);
assert_eq!(
validator_for_node_type("standup_update"),
AutomationOutputValidatorKind::StandupUpdate
);
assert_eq!(
validator_for_node_type("totally-new-thing"),
AutomationOutputValidatorKind::GenericArtifact
);
}
#[test]
fn node_type_matching_is_case_insensitive() {
assert_eq!(
validator_for_node_type("Research"),
AutomationOutputValidatorKind::ResearchBrief
);
assert_eq!(
validator_for_node_type("CODE_GENERATION"),
AutomationOutputValidatorKind::CodePatch
);
}
#[test]
fn contract_kind_matches_validator_family() {
assert_eq!(contract_kind_for_node_type("research"), "report");
assert_eq!(contract_kind_for_node_type("code"), "code");
assert_eq!(contract_kind_for_node_type("review"), "decision");
assert_eq!(contract_kind_for_node_type("generation"), "structured");
assert_eq!(contract_kind_for_node_type("unknown_type"), "artifact");
}
#[test]
fn produces_valid_spec_with_single_node() {
let case = make_case("ev_001", vec![make_node("n1", "research")]);
let spec = test_case_to_spec(&case);
assert_eq!(spec.automation_id, "eval-ev_001");
assert_eq!(spec.flow.nodes.len(), 1);
assert_eq!(spec.agents.len(), 1);
assert_eq!(spec.agents[0].agent_id, EVAL_AGENT_ID);
assert!(matches!(spec.status, AutomationV2Status::Active));
assert!(matches!(
spec.schedule.schedule_type,
AutomationV2ScheduleType::Manual
));
let node = &spec.flow.nodes[0];
assert_eq!(node.node_id, "n1");
assert_eq!(node.agent_id, EVAL_AGENT_ID);
let contract = node.output_contract.as_ref().expect("contract present");
assert_eq!(
contract.validator,
Some(AutomationOutputValidatorKind::ResearchBrief)
);
assert_eq!(contract.kind, "report");
assert_eq!(
contract.summary_guidance.as_deref(),
Some("Produce a research output")
);
}
#[test]
fn produces_valid_spec_with_multiple_nodes() {
let case = make_case(
"ev_002",
vec![
make_node("step1", "research"),
make_node("step2", "code"),
make_node("step3", "summarization"),
],
);
let spec = test_case_to_spec(&case);
assert_eq!(spec.flow.nodes.len(), 3);
let validators: Vec<_> = spec
.flow
.nodes
.iter()
.map(|n| n.output_contract.as_ref().unwrap().validator.unwrap())
.collect();
assert_eq!(
validators,
vec![
AutomationOutputValidatorKind::ResearchBrief,
AutomationOutputValidatorKind::CodePatch,
AutomationOutputValidatorKind::StructuredJson,
]
);
for node in &spec.flow.nodes {
assert!(node.depends_on.is_empty());
}
}
#[test]
fn config_max_repair_overrides_expected_output() {
let mut case = make_case("ev_003", vec![make_node("n1", "research")]);
case.automation_spec
.config
.insert("max_repair_iterations".to_string(), serde_json::json!(5));
case.expected_output.max_repair_iterations = Some(2);
let spec = test_case_to_spec(&case);
let retry = spec.flow.nodes[0]
.retry_policy
.as_ref()
.expect("retry_policy present");
assert_eq!(retry["max_attempts"], 5);
assert_eq!(retry["retries"], 4);
assert_eq!(
spec.flow.nodes[0].timeout_ms,
Some(5 * PER_REPAIR_TIMEOUT_MS)
);
}
#[test]
fn falls_back_to_expected_output_when_config_missing() {
let case = make_case("ev_004", vec![make_node("n1", "research")]);
let spec = test_case_to_spec(&case);
let retry = spec.flow.nodes[0].retry_policy.as_ref().unwrap();
assert_eq!(retry["max_attempts"], 2);
assert_eq!(retry["retries"], 1);
assert_eq!(
spec.flow.nodes[0].timeout_ms,
Some(2 * PER_REPAIR_TIMEOUT_MS)
);
}
#[test]
fn falls_back_to_default_when_neither_specified() {
let mut case = make_case("ev_005", vec![make_node("n1", "research")]);
case.expected_output.max_repair_iterations = None;
let spec = test_case_to_spec(&case);
let retry = spec.flow.nodes[0].retry_policy.as_ref().unwrap();
assert_eq!(retry["max_attempts"], DEFAULT_MAX_REPAIR_ITERATIONS);
}
#[test]
fn timeout_floor_applies_when_max_repair_is_one() {
let mut case = make_case("ev_006", vec![make_node("n1", "research")]);
case.expected_output.max_repair_iterations = Some(1);
let spec = test_case_to_spec(&case);
assert_eq!(spec.flow.nodes[0].timeout_ms, Some(MIN_NODE_TIMEOUT_MS));
}
#[test]
fn empty_objective_contract_yields_none_summary_guidance() {
let case = make_case(
"ev_007",
vec![TestNode {
id: "n1".to_string(),
node_type: "research".to_string(),
objective: "Investigate".to_string(),
output_contract: String::new(),
}],
);
let spec = test_case_to_spec(&case);
let contract = spec.flow.nodes[0].output_contract.as_ref().unwrap();
assert_eq!(contract.summary_guidance, None);
}
#[test]
fn empty_automation_name_falls_back_to_eval_id() {
let mut case = make_case("ev_008", vec![make_node("n1", "research")]);
case.automation_spec.name = String::new();
let spec = test_case_to_spec(&case);
assert_eq!(spec.name, "eval/ev_008");
}
#[test]
fn execution_policy_has_single_agent_and_runtime_cap() {
let case = make_case("ev_009", vec![make_node("n1", "research")]);
let spec = test_case_to_spec(&case);
assert_eq!(spec.execution.max_parallel_agents, Some(1));
assert!(spec.execution.max_total_runtime_ms.unwrap() > 0);
assert_eq!(spec.execution.profile, None);
}
}