use serde::{Deserialize, Serialize};
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TaskSuite {
pub tasks: Vec<TaskDefinition>,
#[serde(default)]
pub metadata: SuiteMetadata,
}
impl TaskSuite {
pub fn from_file(path: impl AsRef<Path>) -> Result<Self, TaskDefinitionError> {
let path_ref = path.as_ref();
let content = std::fs::read_to_string(path_ref)?;
let suite: Self = serde_json::from_str(&content)?;
suite.validate()?;
Ok(suite)
}
pub fn validate(&self) -> Result<(), TaskDefinitionError> {
if self.tasks.is_empty() {
return Err(TaskDefinitionError::Validation(
"Task suite must contain at least one task".to_string(),
));
}
for task in &self.tasks {
task.validate()?;
}
let mut names = std::collections::HashSet::new();
for task in &self.tasks {
if !names.insert(&task.name) {
return Err(TaskDefinitionError::Validation(format!(
"Duplicate task name: '{}'",
task.name
)));
}
}
Ok(())
}
pub fn filter_by_complexity(&self, complexity: &str) -> Vec<&TaskDefinition> {
self.tasks
.iter()
.filter(|t| t.complexity == complexity)
.collect()
}
pub fn filter_by_tag(&self, tag: &str) -> Vec<&TaskDefinition> {
self.tasks
.iter()
.filter(|t| t.tags.iter().any(|t| t == tag))
.collect()
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct SuiteMetadata {
pub name: Option<String>,
pub description: Option<String>,
pub version: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TaskDefinition {
pub name: String,
pub prompt_file: String,
pub completion_promise: String,
pub verification: Verification,
#[serde(default)]
pub description: Option<String>,
#[serde(default = "default_complexity")]
pub complexity: String,
#[serde(default = "default_max_iterations")]
pub max_iterations: u32,
#[serde(default)]
pub expected_iterations: Option<u32>,
#[serde(default = "default_timeout_seconds")]
pub timeout_seconds: u64,
#[serde(default)]
pub setup: TaskSetup,
#[serde(default)]
pub tags: Vec<String>,
}
fn default_complexity() -> String {
"medium".to_string()
}
fn default_max_iterations() -> u32 {
100
}
fn default_timeout_seconds() -> u64 {
300 }
impl TaskDefinition {
pub fn builder(
name: impl Into<String>,
prompt_file: impl Into<String>,
completion_promise: impl Into<String>,
) -> TaskDefinitionBuilder {
TaskDefinitionBuilder::new(name, prompt_file, completion_promise)
}
pub fn validate(&self) -> Result<(), TaskDefinitionError> {
if self.name.is_empty() {
return Err(TaskDefinitionError::MissingField("name".to_string()));
}
if !self
.name
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
{
return Err(TaskDefinitionError::Validation(format!(
"Task name '{}' contains invalid characters. Use alphanumeric, hyphens, or underscores only.",
self.name
)));
}
if self.prompt_file.is_empty() {
return Err(TaskDefinitionError::MissingField("prompt_file".to_string()));
}
if self.completion_promise.is_empty() {
return Err(TaskDefinitionError::MissingField(
"completion_promise".to_string(),
));
}
if self.verification.command.is_empty() {
return Err(TaskDefinitionError::MissingField(
"verification.command".to_string(),
));
}
if !["simple", "medium", "complex"].contains(&self.complexity.as_str()) {
return Err(TaskDefinitionError::Validation(format!(
"Invalid complexity '{}'. Must be one of: simple, medium, complex",
self.complexity
)));
}
Ok(())
}
pub fn iteration_delta(&self, actual: u32) -> Option<i32> {
self.expected_iterations
.map(|expected| actual as i32 - expected as i32)
}
}
pub struct TaskDefinitionBuilder {
name: String,
prompt_file: String,
completion_promise: String,
verification: Verification,
description: Option<String>,
complexity: String,
max_iterations: u32,
expected_iterations: Option<u32>,
timeout_seconds: u64,
setup: TaskSetup,
tags: Vec<String>,
}
impl TaskDefinitionBuilder {
pub fn new(
name: impl Into<String>,
prompt_file: impl Into<String>,
completion_promise: impl Into<String>,
) -> Self {
Self {
name: name.into(),
prompt_file: prompt_file.into(),
completion_promise: completion_promise.into(),
verification: Verification::default(),
description: None,
complexity: default_complexity(),
max_iterations: default_max_iterations(),
expected_iterations: None,
timeout_seconds: default_timeout_seconds(),
setup: TaskSetup::default(),
tags: Vec::new(),
}
}
pub fn verification_command(mut self, command: impl Into<String>) -> Self {
self.verification.command = command.into();
self
}
pub fn verification_exit_code(mut self, code: i32) -> Self {
self.verification.success_exit_code = code;
self
}
pub fn verification(mut self, verification: Verification) -> Self {
self.verification = verification;
self
}
pub fn description(mut self, description: impl Into<String>) -> Self {
self.description = Some(description.into());
self
}
pub fn complexity(mut self, complexity: impl Into<String>) -> Self {
self.complexity = complexity.into();
self
}
pub fn max_iterations(mut self, max: u32) -> Self {
self.max_iterations = max;
self
}
pub fn expected_iterations(mut self, expected: u32) -> Self {
self.expected_iterations = Some(expected);
self
}
pub fn timeout_seconds(mut self, seconds: u64) -> Self {
self.timeout_seconds = seconds;
self
}
pub fn setup(mut self, setup: TaskSetup) -> Self {
self.setup = setup;
self
}
pub fn setup_script(mut self, script: impl Into<String>) -> Self {
self.setup.script = Some(script.into());
self
}
pub fn setup_files(mut self, files: Vec<String>) -> Self {
self.setup.files = files;
self
}
pub fn tags(mut self, tags: Vec<String>) -> Self {
self.tags = tags;
self
}
pub fn tag(mut self, tag: impl Into<String>) -> Self {
self.tags.push(tag.into());
self
}
pub fn build(self) -> TaskDefinition {
TaskDefinition {
name: self.name,
prompt_file: self.prompt_file,
completion_promise: self.completion_promise,
verification: self.verification,
description: self.description,
complexity: self.complexity,
max_iterations: self.max_iterations,
expected_iterations: self.expected_iterations,
timeout_seconds: self.timeout_seconds,
setup: self.setup,
tags: self.tags,
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Verification {
#[serde(default)]
pub command: String,
#[serde(default)]
pub success_exit_code: i32,
}
impl Verification {
pub fn new(command: impl Into<String>) -> Self {
Self {
command: command.into(),
success_exit_code: 0,
}
}
pub fn expect_failure(command: impl Into<String>, exit_code: i32) -> Self {
Self {
command: command.into(),
success_exit_code: exit_code,
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct TaskSetup {
#[serde(default)]
pub script: Option<String>,
#[serde(default)]
pub files: Vec<String>,
}
impl TaskSetup {
pub fn has_setup(&self) -> bool {
self.script.is_some() || !self.files.is_empty()
}
}
#[derive(Debug, thiserror::Error)]
pub enum TaskDefinitionError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("JSON parse error: {0}")]
Json(#[from] serde_json::Error),
#[error("Missing required field: {0}")]
MissingField(String),
#[error("Validation error: {0}")]
Validation(String),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_task_definition_builder() {
let task = TaskDefinition::builder("hello-world", "tasks/hello.md", "TASK_COMPLETE")
.verification_command("python hello.py | grep -q 'Hello, World!'")
.description("Create a hello world script")
.complexity("simple")
.max_iterations(5)
.expected_iterations(1)
.tag("python")
.build();
assert_eq!(task.name, "hello-world");
assert_eq!(task.prompt_file, "tasks/hello.md");
assert_eq!(task.completion_promise, "TASK_COMPLETE");
assert!(task.verification.command.contains("Hello, World!"));
assert_eq!(task.complexity, "simple");
assert_eq!(task.max_iterations, 5);
assert_eq!(task.expected_iterations, Some(1));
assert!(task.tags.contains(&"python".to_string()));
}
#[test]
fn test_task_definition_defaults() {
let task = TaskDefinition::builder("test", "prompt.md", "DONE")
.verification_command("echo ok")
.build();
assert_eq!(task.complexity, "medium");
assert_eq!(task.max_iterations, 100);
assert_eq!(task.timeout_seconds, 300);
assert!(task.expected_iterations.is_none());
assert!(task.tags.is_empty());
}
#[test]
fn test_task_validation_valid() {
let task = TaskDefinition::builder("valid-task", "prompt.md", "DONE")
.verification_command("echo ok")
.build();
assert!(task.validate().is_ok());
}
#[test]
fn test_task_validation_invalid_name() {
let task = TaskDefinition::builder("invalid task name!", "prompt.md", "DONE")
.verification_command("echo ok")
.build();
let err = task.validate().unwrap_err();
assert!(matches!(err, TaskDefinitionError::Validation(_)));
}
#[test]
fn test_task_validation_empty_prompt() {
let task = TaskDefinition::builder("test", "", "DONE")
.verification_command("echo ok")
.build();
let err = task.validate().unwrap_err();
assert!(matches!(err, TaskDefinitionError::MissingField(f) if f == "prompt_file"));
}
#[test]
fn test_task_validation_empty_verification() {
let task = TaskDefinition::builder("test", "prompt.md", "DONE").build();
let err = task.validate().unwrap_err();
assert!(matches!(err, TaskDefinitionError::MissingField(f) if f == "verification.command"));
}
#[test]
fn test_task_validation_invalid_complexity() {
let task = TaskDefinition::builder("test", "prompt.md", "DONE")
.verification_command("echo ok")
.complexity("invalid")
.build();
let err = task.validate().unwrap_err();
assert!(matches!(err, TaskDefinitionError::Validation(_)));
}
#[test]
fn test_iteration_delta() {
let task = TaskDefinition::builder("test", "prompt.md", "DONE")
.verification_command("echo ok")
.expected_iterations(5)
.build();
assert_eq!(task.iteration_delta(3), Some(-2));
assert_eq!(task.iteration_delta(7), Some(2));
assert_eq!(task.iteration_delta(5), Some(0));
}
#[test]
fn test_iteration_delta_no_expected() {
let task = TaskDefinition::builder("test", "prompt.md", "DONE")
.verification_command("echo ok")
.build();
assert!(task.iteration_delta(5).is_none());
}
#[test]
fn test_task_suite_parse() {
let json = r#"{
"tasks": [
{
"name": "hello-world",
"prompt_file": "tasks/hello/PROMPT.md",
"completion_promise": "TASK_COMPLETE",
"verification": {
"command": "python hello.py | grep -q 'Hello, World!'"
},
"complexity": "simple",
"max_iterations": 5,
"expected_iterations": 1
},
{
"name": "fizzbuzz-tdd",
"description": "Implement FizzBuzz with TDD",
"prompt_file": "tasks/fizzbuzz/PROMPT.md",
"completion_promise": "TESTS_PASSING",
"verification": {
"command": "pytest test_fizzbuzz.py -v"
},
"complexity": "medium",
"max_iterations": 15,
"expected_iterations": 5,
"setup": {
"files": ["test_fizzbuzz.py"]
},
"tags": ["python", "tdd"]
}
],
"metadata": {
"name": "Ralph Benchmark Suite",
"version": "1.0.0"
}
}"#;
let suite: TaskSuite = serde_json::from_str(json).unwrap();
assert_eq!(suite.tasks.len(), 2);
let hello = &suite.tasks[0];
assert_eq!(hello.name, "hello-world");
assert_eq!(hello.complexity, "simple");
assert_eq!(hello.max_iterations, 5);
assert_eq!(hello.expected_iterations, Some(1));
let fizzbuzz = &suite.tasks[1];
assert_eq!(fizzbuzz.name, "fizzbuzz-tdd");
assert!(fizzbuzz.description.is_some());
assert_eq!(fizzbuzz.setup.files.len(), 1);
assert!(fizzbuzz.tags.contains(&"tdd".to_string()));
assert_eq!(
suite.metadata.name,
Some("Ralph Benchmark Suite".to_string())
);
}
#[test]
fn test_task_suite_validation_empty() {
let suite = TaskSuite {
tasks: vec![],
metadata: SuiteMetadata::default(),
};
let err = suite.validate().unwrap_err();
assert!(matches!(err, TaskDefinitionError::Validation(_)));
}
#[test]
fn test_task_suite_validation_duplicates() {
let task = TaskDefinition::builder("duplicate", "prompt.md", "DONE")
.verification_command("echo ok")
.build();
let suite = TaskSuite {
tasks: vec![task.clone(), task],
metadata: SuiteMetadata::default(),
};
let err = suite.validate().unwrap_err();
assert!(err.to_string().contains("Duplicate task name"));
}
#[test]
fn test_filter_by_complexity() {
let json = r#"{
"tasks": [
{"name": "t1", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "complexity": "simple"},
{"name": "t2", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "complexity": "medium"},
{"name": "t3", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "complexity": "simple"}
]
}"#;
let suite: TaskSuite = serde_json::from_str(json).unwrap();
let simple = suite.filter_by_complexity("simple");
assert_eq!(simple.len(), 2);
assert!(simple.iter().all(|t| t.complexity == "simple"));
}
#[test]
fn test_filter_by_tag() {
let json = r#"{
"tasks": [
{"name": "t1", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "tags": ["python", "testing"]},
{"name": "t2", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "tags": ["rust"]},
{"name": "t3", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "tags": ["python"]}
]
}"#;
let suite: TaskSuite = serde_json::from_str(json).unwrap();
let python = suite.filter_by_tag("python");
assert_eq!(python.len(), 2);
}
#[test]
fn test_setup_has_setup() {
let empty = TaskSetup::default();
assert!(!empty.has_setup());
let with_script = TaskSetup {
script: Some("setup.sh".to_string()),
files: vec![],
};
assert!(with_script.has_setup());
let with_files = TaskSetup {
script: None,
files: vec!["file.py".to_string()],
};
assert!(with_files.has_setup());
}
#[test]
fn test_verification_new() {
let v = Verification::new("pytest tests/");
assert_eq!(v.command, "pytest tests/");
assert_eq!(v.success_exit_code, 0);
}
#[test]
fn test_verification_expect_failure() {
let v = Verification::expect_failure("false", 1);
assert_eq!(v.command, "false");
assert_eq!(v.success_exit_code, 1);
}
}