use agentforge_core::{
AgentFile, DifficultyTier, ExpectedToolCall, Result, Scenario, ScenarioExpected, ScenarioInput,
ScenarioSource, ToolDefinition,
};
use chrono::Utc;
use uuid::Uuid;
pub fn generate_schema_derived_scenarios(
agent: &AgentFile,
count: usize,
agent_id: Uuid,
) -> Result<Vec<Scenario>> {
let mut scenarios = Vec::new();
for tool in &agent.tools {
if scenarios.len() >= count {
break;
}
scenarios.push(happy_path_for_tool(agent, tool, agent_id));
}
if let Some(schema) = &agent.output_schema {
if scenarios.len() < count {
if let Some(required) = schema.get("required").and_then(|r| r.as_array()) {
for field in required {
if scenarios.len() >= count {
break;
}
if let Some(field_name) = field.as_str() {
scenarios.push(output_field_scenario(agent, field_name, agent_id));
}
}
}
}
}
let mut rng_seed = 42u64;
while scenarios.len() < count && !agent.tools.is_empty() {
scenarios.push(multi_tool_scenario(agent, agent_id, rng_seed));
rng_seed += 1;
}
while scenarios.len() < count {
scenarios.push(generic_task_scenario(agent, agent_id, scenarios.len()));
}
Ok(scenarios)
}
fn happy_path_for_tool(agent: &AgentFile, tool: &ToolDefinition, agent_id: Uuid) -> Scenario {
let user_message = realistic_user_message(tool, agent);
let pass_criteria =
"The agent should understand the user's request, select the most appropriate tool(s), \
call them with valid and complete arguments, and provide a clear, actionable response."
.to_string();
Scenario {
id: Uuid::new_v4(),
agent_id,
input: ScenarioInput {
user_message,
conversation_history: vec![],
context: None,
},
expected: ScenarioExpected {
tool_calls: vec![ExpectedToolCall {
tool_name: tool.name.clone(),
required: true,
argument_schema: Some(tool.parameters.clone()),
}],
output_schema: agent.output_schema.clone(),
pass_criteria,
min_turns: Some(1),
max_turns: Some(5),
},
difficulty: DifficultyTier::Easy,
domain: agent.eval_hints.as_ref().and_then(|h| h.domain.clone()),
source: ScenarioSource::SchemaDerived,
tags: vec!["happy_path".to_string(), format!("tool:{}", tool.name)],
created_at: Utc::now(),
}
}
fn domain_label(agent: &AgentFile) -> String {
agent
.metadata
.as_ref()
.and_then(|m| m.get("description"))
.and_then(|v| v.as_str())
.map(|d| {
let end = d.find([',', '.', ';']).unwrap_or(d.len().min(80));
d[..end].trim().to_string()
})
.filter(|s| s.len() > 5)
.unwrap_or_else(|| agent.name.clone())
}
fn realistic_user_message(tool: &ToolDefinition, agent: &AgentFile) -> String {
let capability = tool
.parameters
.get("x-copilot-capability")
.and_then(|v| v.as_str())
.unwrap_or(tool.name.as_str());
let cap = capability.to_lowercase();
let agent_lower = agent.name.to_lowercase();
let domain = domain_label(agent);
let is_ci_agent = agent_lower.contains("action")
|| agent_lower.contains("workflow")
|| agent_lower.contains("ci")
|| agent_lower.contains("cd");
if cap.starts_with("github") {
if is_ci_agent {
"Create a secure GitHub Actions CI workflow for a Node.js project. \
It should trigger on pull requests, run the test suite, upload coverage \
reports, and block merges on failure. Pin every action reference to a \
full commit SHA and apply least-privilege permissions throughout."
.to_string()
} else {
format!(
"I need help with a GitHub repository task related to {domain}. \
Please help me configure or update the relevant GitHub settings."
)
}
} else if cap.contains("search") && cap.contains("codebase") {
if is_ci_agent {
"Search the repository for all GitHub Actions workflow files under \
.github/workflows/. List each file and flag any that use mutable action \
references (like @main or @v3) instead of pinned commit SHAs."
.to_string()
} else {
format!(
"Search the codebase for all {domain} configuration files and flag \
anything that looks outdated or inconsistent with best practices."
)
}
} else if cap.contains("edit") || cap.contains("write") {
if is_ci_agent {
"Update the workflow at .github/workflows/ci.yml to pin all action \
references to their full commit SHAs, restrict permissions to the \
minimum required, and add a concurrency group to cancel stale PR runs."
.to_string()
} else {
format!(
"Update the {domain} configuration files to follow current best \
practices. Please make the necessary edits."
)
}
} else if cap.contains("execute") || cap.contains("run") || cap.contains("terminal") {
if is_ci_agent {
"Run actionlint on all workflow files in this repository to validate \
their syntax and logic. Report any errors or warnings found."
.to_string()
} else {
format!(
"Run the standard validation and lint checks for {domain}. \
Report any issues found."
)
}
} else if cap.contains("read") || cap.contains("file") {
if is_ci_agent {
"Read the file .github/workflows/ci.yml and give me a detailed review: \
what does it do, are the permissions correct, are actions pinned, and \
what improvements would you recommend?"
.to_string()
} else {
format!(
"Read the current {domain} configuration and summarize what it does, \
highlighting any potential issues or improvements."
)
}
} else if cap.contains("web") || cap.contains("fetch") || cap.contains("browse") {
format!(
"Look up the latest official guidance on {domain} and give me a concise \
summary of the key recommendations I should follow."
)
} else if cap.contains("context") || cap.contains("docs") {
format!(
"What are the current best practices for {domain}? Please look up the \
relevant documentation and give me a concise, actionable summary."
)
} else if tool.description.len() > 20 && !tool.description.starts_with("Copilot capability") {
format!(
"I need to accomplish a task involving: {}. Please help me complete it \
as part of my work on {domain}.",
tool.description.chars().take(60).collect::<String>()
)
} else {
format!("I need assistance with a {domain} task. Please help me get started.")
}
}
fn composite_task_message(agent: &AgentFile) -> String {
let agent_lower = agent.name.to_lowercase();
let domain = domain_label(agent);
if agent_lower.contains("action")
|| agent_lower.contains("workflow")
|| agent_lower.contains("ci")
{
"Set up a complete, secure CI/CD pipeline for my project. Search for any \
existing workflow files, review them for security issues (mutable action \
tags, overly broad permissions, hardcoded secrets), update them to follow \
current security best practices, and finally run actionlint to validate \
the result."
.to_string()
} else if agent_lower.contains("security") || agent_lower.contains("safe") {
"Audit the security posture of this repository. Read the relevant \
configuration files, identify any vulnerabilities or misconfigurations, \
and apply the necessary fixes."
.to_string()
} else {
format!(
"Help me complete a multi-step {domain} project task: first discover \
the relevant files, review their current state, then apply any needed \
improvements and verify the result."
)
}
}
fn output_field_scenario(agent: &AgentFile, field_name: &str, agent_id: Uuid) -> Scenario {
Scenario {
id: Uuid::new_v4(),
agent_id,
input: ScenarioInput {
user_message: format!(
"Complete a task that requires the agent to produce a '{}' field in its output.",
field_name
),
conversation_history: vec![],
context: None,
},
expected: ScenarioExpected {
tool_calls: vec![],
output_schema: agent.output_schema.clone(),
pass_criteria: format!(
"The agent's output must include a valid '{}' field matching the schema.",
field_name
),
min_turns: Some(1),
max_turns: Some(3),
},
difficulty: DifficultyTier::Easy,
domain: agent.eval_hints.as_ref().and_then(|h| h.domain.clone()),
source: ScenarioSource::SchemaDerived,
tags: vec!["output_field".to_string(), format!("field:{}", field_name)],
created_at: Utc::now(),
}
}
fn multi_tool_scenario(agent: &AgentFile, agent_id: Uuid, seed: u64) -> Scenario {
let n = agent.tools.len();
let tool_a = &agent.tools[(seed as usize) % n];
let tool_b = &agent.tools[(seed as usize + 1) % n];
let expected_calls = if tool_a.name == tool_b.name {
vec![ExpectedToolCall {
tool_name: tool_a.name.clone(),
required: true,
argument_schema: Some(tool_a.parameters.clone()),
}]
} else {
vec![
ExpectedToolCall {
tool_name: tool_a.name.clone(),
required: true,
argument_schema: Some(tool_a.parameters.clone()),
},
ExpectedToolCall {
tool_name: tool_b.name.clone(),
required: false,
argument_schema: Some(tool_b.parameters.clone()),
},
]
};
Scenario {
id: Uuid::new_v4(),
agent_id,
input: ScenarioInput {
user_message: composite_task_message(agent),
conversation_history: vec![],
context: None,
},
expected: ScenarioExpected {
tool_calls: expected_calls,
output_schema: agent.output_schema.clone(),
pass_criteria: "Agent should call the appropriate tools in a logical order and provide a complete response.".to_string(),
min_turns: Some(1),
max_turns: Some(6),
},
difficulty: DifficultyTier::Medium,
domain: agent.eval_hints.as_ref().and_then(|h| h.domain.clone()),
source: ScenarioSource::SchemaDerived,
tags: vec!["multi_tool".to_string()],
created_at: Utc::now(),
}
}
fn generic_task_scenario(agent: &AgentFile, agent_id: Uuid, index: usize) -> Scenario {
let agent_lower = agent.name.to_lowercase();
let domain = domain_label(agent);
let messages: &[&str] = if agent_lower.contains("action")
|| agent_lower.contains("workflow")
|| agent_lower.contains("ci")
{
&[
"What security risks should I be aware of when setting up GitHub Actions \
for a public repository?",
"Walk me through migrating from mutable action version tags like @v3 to \
pinned commit SHAs across all my workflows.",
"Explain how OIDC works with GitHub Actions and when I should use it \
instead of long-lived credentials.",
"What is the recommended way to handle deployment secrets in GitHub \
Actions workflows to avoid accidental exposure?",
"How should I structure my workflow permissions to follow the principle \
of least privilege?",
]
} else {
&[
"What best practices should I follow for this type of configuration?",
"Help me understand the key concepts I need to know to work effectively \
with this tool.",
"What are the most common mistakes people make and how can I avoid them?",
"Give me a step-by-step guide for getting started with the basics.",
"What should I check first when troubleshooting unexpected behavior?",
]
};
let msg = messages[index % messages.len()];
Scenario {
id: Uuid::new_v4(),
agent_id,
input: ScenarioInput {
user_message: msg.to_string(),
conversation_history: vec![],
context: None,
},
expected: ScenarioExpected {
tool_calls: vec![],
output_schema: agent.output_schema.clone(),
pass_criteria: format!(
"The agent should provide a helpful, accurate, and actionable response \
that draws on its expertise in {domain}."
),
min_turns: Some(1),
max_turns: Some(5),
},
difficulty: DifficultyTier::Easy,
domain: agent.eval_hints.as_ref().and_then(|h| h.domain.clone()),
source: ScenarioSource::SchemaDerived,
tags: vec!["generic".to_string()],
created_at: Utc::now(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use agentforge_core::{ModelConfig, ModelProvider};
fn make_agent_with_tools() -> AgentFile {
AgentFile {
agentforge_schema_version: "1".to_string(),
name: "test-agent".to_string(),
version: "1.0.0".to_string(),
model: ModelConfig {
provider: ModelProvider::Openai,
model_id: "gpt-4o".to_string(),
temperature: None,
max_tokens: None,
top_p: None,
},
system_prompt: "You are helpful.".to_string(),
tools: vec![
ToolDefinition {
name: "tool_a".to_string(),
description: "Tool A".to_string(),
parameters: serde_json::json!({"type": "object", "properties": {}}),
},
ToolDefinition {
name: "tool_b".to_string(),
description: "Tool B".to_string(),
parameters: serde_json::json!({"type": "object", "properties": {}}),
},
],
output_schema: Some(serde_json::json!({
"type": "object",
"properties": {"response": {"type": "string"}},
"required": ["response"]
})),
constraints: vec![],
eval_hints: None,
metadata: None,
}
}
#[test]
fn generates_at_least_one_per_tool() {
let agent = make_agent_with_tools();
let id = Uuid::new_v4();
let scenarios = generate_schema_derived_scenarios(&agent, 5, id).unwrap();
assert_eq!(scenarios.len(), 5);
assert!(scenarios
.iter()
.any(|s| s.tags.iter().any(|t| t.contains("tool_a"))));
}
#[test]
fn generates_exact_count() {
let agent = make_agent_with_tools();
let id = Uuid::new_v4();
for n in [1, 5, 10, 20] {
let s = generate_schema_derived_scenarios(&agent, n, id).unwrap();
assert_eq!(s.len(), n, "Expected {n} scenarios");
}
}
#[test]
fn all_scenarios_have_unique_ids() {
let agent = make_agent_with_tools();
let id = Uuid::new_v4();
let scenarios = generate_schema_derived_scenarios(&agent, 10, id).unwrap();
let ids: std::collections::HashSet<_> = scenarios.iter().map(|s| s.id).collect();
assert_eq!(ids.len(), 10);
}
}