1use serde::{Deserialize, Serialize};
24use std::path::Path;
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct TaskSuite {
32 pub tasks: Vec<TaskDefinition>,
34
35 #[serde(default)]
37 pub metadata: SuiteMetadata,
38}
39
40impl TaskSuite {
41 pub fn from_file(path: impl AsRef<Path>) -> Result<Self, TaskDefinitionError> {
43 let path_ref = path.as_ref();
44 let content = std::fs::read_to_string(path_ref)?;
45 let suite: Self = serde_json::from_str(&content)?;
46 suite.validate()?;
47 Ok(suite)
48 }
49
50 pub fn validate(&self) -> Result<(), TaskDefinitionError> {
52 if self.tasks.is_empty() {
53 return Err(TaskDefinitionError::Validation(
54 "Task suite must contain at least one task".to_string(),
55 ));
56 }
57
58 for task in &self.tasks {
59 task.validate()?;
60 }
61
62 let mut names = std::collections::HashSet::new();
64 for task in &self.tasks {
65 if !names.insert(&task.name) {
66 return Err(TaskDefinitionError::Validation(format!(
67 "Duplicate task name: '{}'",
68 task.name
69 )));
70 }
71 }
72
73 Ok(())
74 }
75
76 pub fn filter_by_complexity(&self, complexity: &str) -> Vec<&TaskDefinition> {
78 self.tasks
79 .iter()
80 .filter(|t| t.complexity == complexity)
81 .collect()
82 }
83
84 pub fn filter_by_tag(&self, tag: &str) -> Vec<&TaskDefinition> {
86 self.tasks
87 .iter()
88 .filter(|t| t.tags.iter().any(|t| t == tag))
89 .collect()
90 }
91}
92
93#[derive(Debug, Clone, Default, Serialize, Deserialize)]
95pub struct SuiteMetadata {
96 pub name: Option<String>,
98
99 pub description: Option<String>,
101
102 pub version: Option<String>,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct TaskDefinition {
112 pub name: String,
119
120 pub prompt_file: String,
124
125 pub completion_promise: String,
129
130 pub verification: Verification,
132
133 #[serde(default)]
138 pub description: Option<String>,
139
140 #[serde(default = "default_complexity")]
144 pub complexity: String,
145
146 #[serde(default = "default_max_iterations")]
150 pub max_iterations: u32,
151
152 #[serde(default)]
156 pub expected_iterations: Option<u32>,
157
158 #[serde(default = "default_timeout_seconds")]
160 pub timeout_seconds: u64,
161
162 #[serde(default)]
164 pub setup: TaskSetup,
165
166 #[serde(default)]
168 pub tags: Vec<String>,
169}
170
171fn default_complexity() -> String {
172 "medium".to_string()
173}
174
175fn default_max_iterations() -> u32 {
176 100
177}
178
179fn default_timeout_seconds() -> u64 {
180 300 }
182
183impl TaskDefinition {
184 pub fn builder(
186 name: impl Into<String>,
187 prompt_file: impl Into<String>,
188 completion_promise: impl Into<String>,
189 ) -> TaskDefinitionBuilder {
190 TaskDefinitionBuilder::new(name, prompt_file, completion_promise)
191 }
192
193 pub fn validate(&self) -> Result<(), TaskDefinitionError> {
195 if self.name.is_empty() {
197 return Err(TaskDefinitionError::MissingField("name".to_string()));
198 }
199
200 if !self
201 .name
202 .chars()
203 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
204 {
205 return Err(TaskDefinitionError::Validation(format!(
206 "Task name '{}' contains invalid characters. Use alphanumeric, hyphens, or underscores only.",
207 self.name
208 )));
209 }
210
211 if self.prompt_file.is_empty() {
213 return Err(TaskDefinitionError::MissingField("prompt_file".to_string()));
214 }
215
216 if self.completion_promise.is_empty() {
218 return Err(TaskDefinitionError::MissingField(
219 "completion_promise".to_string(),
220 ));
221 }
222
223 if self.verification.command.is_empty() {
225 return Err(TaskDefinitionError::MissingField(
226 "verification.command".to_string(),
227 ));
228 }
229
230 if !["simple", "medium", "complex"].contains(&self.complexity.as_str()) {
232 return Err(TaskDefinitionError::Validation(format!(
233 "Invalid complexity '{}'. Must be one of: simple, medium, complex",
234 self.complexity
235 )));
236 }
237
238 Ok(())
239 }
240
241 pub fn iteration_delta(&self, actual: u32) -> Option<i32> {
245 self.expected_iterations
246 .map(|expected| actual as i32 - expected as i32)
247 }
248}
249
250pub struct TaskDefinitionBuilder {
252 name: String,
253 prompt_file: String,
254 completion_promise: String,
255 verification: Verification,
256 description: Option<String>,
257 complexity: String,
258 max_iterations: u32,
259 expected_iterations: Option<u32>,
260 timeout_seconds: u64,
261 setup: TaskSetup,
262 tags: Vec<String>,
263}
264
265impl TaskDefinitionBuilder {
266 pub fn new(
268 name: impl Into<String>,
269 prompt_file: impl Into<String>,
270 completion_promise: impl Into<String>,
271 ) -> Self {
272 Self {
273 name: name.into(),
274 prompt_file: prompt_file.into(),
275 completion_promise: completion_promise.into(),
276 verification: Verification::default(),
277 description: None,
278 complexity: default_complexity(),
279 max_iterations: default_max_iterations(),
280 expected_iterations: None,
281 timeout_seconds: default_timeout_seconds(),
282 setup: TaskSetup::default(),
283 tags: Vec::new(),
284 }
285 }
286
287 pub fn verification_command(mut self, command: impl Into<String>) -> Self {
289 self.verification.command = command.into();
290 self
291 }
292
293 pub fn verification_exit_code(mut self, code: i32) -> Self {
295 self.verification.success_exit_code = code;
296 self
297 }
298
299 pub fn verification(mut self, verification: Verification) -> Self {
301 self.verification = verification;
302 self
303 }
304
305 pub fn description(mut self, description: impl Into<String>) -> Self {
307 self.description = Some(description.into());
308 self
309 }
310
311 pub fn complexity(mut self, complexity: impl Into<String>) -> Self {
313 self.complexity = complexity.into();
314 self
315 }
316
317 pub fn max_iterations(mut self, max: u32) -> Self {
319 self.max_iterations = max;
320 self
321 }
322
323 pub fn expected_iterations(mut self, expected: u32) -> Self {
325 self.expected_iterations = Some(expected);
326 self
327 }
328
329 pub fn timeout_seconds(mut self, seconds: u64) -> Self {
331 self.timeout_seconds = seconds;
332 self
333 }
334
335 pub fn setup(mut self, setup: TaskSetup) -> Self {
337 self.setup = setup;
338 self
339 }
340
341 pub fn setup_script(mut self, script: impl Into<String>) -> Self {
343 self.setup.script = Some(script.into());
344 self
345 }
346
347 pub fn setup_files(mut self, files: Vec<String>) -> Self {
349 self.setup.files = files;
350 self
351 }
352
353 pub fn tags(mut self, tags: Vec<String>) -> Self {
355 self.tags = tags;
356 self
357 }
358
359 pub fn tag(mut self, tag: impl Into<String>) -> Self {
361 self.tags.push(tag.into());
362 self
363 }
364
365 pub fn build(self) -> TaskDefinition {
367 TaskDefinition {
368 name: self.name,
369 prompt_file: self.prompt_file,
370 completion_promise: self.completion_promise,
371 verification: self.verification,
372 description: self.description,
373 complexity: self.complexity,
374 max_iterations: self.max_iterations,
375 expected_iterations: self.expected_iterations,
376 timeout_seconds: self.timeout_seconds,
377 setup: self.setup,
378 tags: self.tags,
379 }
380 }
381}
382
383#[derive(Debug, Clone, Default, Serialize, Deserialize)]
385pub struct Verification {
386 #[serde(default)]
390 pub command: String,
391
392 #[serde(default)]
394 pub success_exit_code: i32,
395}
396
397impl Verification {
398 pub fn new(command: impl Into<String>) -> Self {
400 Self {
401 command: command.into(),
402 success_exit_code: 0,
403 }
404 }
405
406 pub fn expect_failure(command: impl Into<String>, exit_code: i32) -> Self {
408 Self {
409 command: command.into(),
410 success_exit_code: exit_code,
411 }
412 }
413}
414
415#[derive(Debug, Clone, Default, Serialize, Deserialize)]
417pub struct TaskSetup {
418 #[serde(default)]
422 pub script: Option<String>,
423
424 #[serde(default)]
428 pub files: Vec<String>,
429}
430
431impl TaskSetup {
432 pub fn has_setup(&self) -> bool {
434 self.script.is_some() || !self.files.is_empty()
435 }
436}
437
438#[derive(Debug, thiserror::Error)]
440pub enum TaskDefinitionError {
441 #[error("IO error: {0}")]
443 Io(#[from] std::io::Error),
444
445 #[error("JSON parse error: {0}")]
447 Json(#[from] serde_json::Error),
448
449 #[error("Missing required field: {0}")]
451 MissingField(String),
452
453 #[error("Validation error: {0}")]
455 Validation(String),
456}
457
458#[cfg(test)]
459mod tests {
460 use super::*;
461
462 #[test]
463 fn test_task_definition_builder() {
464 let task = TaskDefinition::builder("hello-world", "tasks/hello.md", "TASK_COMPLETE")
465 .verification_command("python hello.py | grep -q 'Hello, World!'")
466 .description("Create a hello world script")
467 .complexity("simple")
468 .max_iterations(5)
469 .expected_iterations(1)
470 .tag("python")
471 .build();
472
473 assert_eq!(task.name, "hello-world");
474 assert_eq!(task.prompt_file, "tasks/hello.md");
475 assert_eq!(task.completion_promise, "TASK_COMPLETE");
476 assert!(task.verification.command.contains("Hello, World!"));
477 assert_eq!(task.complexity, "simple");
478 assert_eq!(task.max_iterations, 5);
479 assert_eq!(task.expected_iterations, Some(1));
480 assert!(task.tags.contains(&"python".to_string()));
481 }
482
483 #[test]
484 fn test_task_definition_defaults() {
485 let task = TaskDefinition::builder("test", "prompt.md", "DONE")
486 .verification_command("echo ok")
487 .build();
488
489 assert_eq!(task.complexity, "medium");
490 assert_eq!(task.max_iterations, 100);
491 assert_eq!(task.timeout_seconds, 300);
492 assert!(task.expected_iterations.is_none());
493 assert!(task.tags.is_empty());
494 }
495
496 #[test]
497 fn test_task_validation_valid() {
498 let task = TaskDefinition::builder("valid-task", "prompt.md", "DONE")
499 .verification_command("echo ok")
500 .build();
501
502 assert!(task.validate().is_ok());
503 }
504
505 #[test]
506 fn test_task_validation_invalid_name() {
507 let task = TaskDefinition::builder("invalid task name!", "prompt.md", "DONE")
508 .verification_command("echo ok")
509 .build();
510
511 let err = task.validate().unwrap_err();
512 assert!(matches!(err, TaskDefinitionError::Validation(_)));
513 }
514
515 #[test]
516 fn test_task_validation_empty_prompt() {
517 let task = TaskDefinition::builder("test", "", "DONE")
518 .verification_command("echo ok")
519 .build();
520
521 let err = task.validate().unwrap_err();
522 assert!(matches!(err, TaskDefinitionError::MissingField(f) if f == "prompt_file"));
523 }
524
525 #[test]
526 fn test_task_validation_empty_verification() {
527 let task = TaskDefinition::builder("test", "prompt.md", "DONE").build();
528
529 let err = task.validate().unwrap_err();
530 assert!(matches!(err, TaskDefinitionError::MissingField(f) if f == "verification.command"));
531 }
532
533 #[test]
534 fn test_task_validation_invalid_complexity() {
535 let task = TaskDefinition::builder("test", "prompt.md", "DONE")
536 .verification_command("echo ok")
537 .complexity("invalid")
538 .build();
539
540 let err = task.validate().unwrap_err();
541 assert!(matches!(err, TaskDefinitionError::Validation(_)));
542 }
543
544 #[test]
545 fn test_iteration_delta() {
546 let task = TaskDefinition::builder("test", "prompt.md", "DONE")
547 .verification_command("echo ok")
548 .expected_iterations(5)
549 .build();
550
551 assert_eq!(task.iteration_delta(3), Some(-2));
553
554 assert_eq!(task.iteration_delta(7), Some(2));
556
557 assert_eq!(task.iteration_delta(5), Some(0));
559 }
560
561 #[test]
562 fn test_iteration_delta_no_expected() {
563 let task = TaskDefinition::builder("test", "prompt.md", "DONE")
564 .verification_command("echo ok")
565 .build();
566
567 assert!(task.iteration_delta(5).is_none());
568 }
569
570 #[test]
571 fn test_task_suite_parse() {
572 let json = r#"{
573 "tasks": [
574 {
575 "name": "hello-world",
576 "prompt_file": "tasks/hello/PROMPT.md",
577 "completion_promise": "TASK_COMPLETE",
578 "verification": {
579 "command": "python hello.py | grep -q 'Hello, World!'"
580 },
581 "complexity": "simple",
582 "max_iterations": 5,
583 "expected_iterations": 1
584 },
585 {
586 "name": "fizzbuzz-tdd",
587 "description": "Implement FizzBuzz with TDD",
588 "prompt_file": "tasks/fizzbuzz/PROMPT.md",
589 "completion_promise": "TESTS_PASSING",
590 "verification": {
591 "command": "pytest test_fizzbuzz.py -v"
592 },
593 "complexity": "medium",
594 "max_iterations": 15,
595 "expected_iterations": 5,
596 "setup": {
597 "files": ["test_fizzbuzz.py"]
598 },
599 "tags": ["python", "tdd"]
600 }
601 ],
602 "metadata": {
603 "name": "Ralph Benchmark Suite",
604 "version": "1.0.0"
605 }
606 }"#;
607
608 let suite: TaskSuite = serde_json::from_str(json).unwrap();
609 assert_eq!(suite.tasks.len(), 2);
610
611 let hello = &suite.tasks[0];
612 assert_eq!(hello.name, "hello-world");
613 assert_eq!(hello.complexity, "simple");
614 assert_eq!(hello.max_iterations, 5);
615 assert_eq!(hello.expected_iterations, Some(1));
616
617 let fizzbuzz = &suite.tasks[1];
618 assert_eq!(fizzbuzz.name, "fizzbuzz-tdd");
619 assert!(fizzbuzz.description.is_some());
620 assert_eq!(fizzbuzz.setup.files.len(), 1);
621 assert!(fizzbuzz.tags.contains(&"tdd".to_string()));
622
623 assert_eq!(
624 suite.metadata.name,
625 Some("Ralph Benchmark Suite".to_string())
626 );
627 }
628
629 #[test]
630 fn test_task_suite_validation_empty() {
631 let suite = TaskSuite {
632 tasks: vec![],
633 metadata: SuiteMetadata::default(),
634 };
635
636 let err = suite.validate().unwrap_err();
637 assert!(matches!(err, TaskDefinitionError::Validation(_)));
638 }
639
640 #[test]
641 fn test_task_suite_validation_duplicates() {
642 let task = TaskDefinition::builder("duplicate", "prompt.md", "DONE")
643 .verification_command("echo ok")
644 .build();
645
646 let suite = TaskSuite {
647 tasks: vec![task.clone(), task],
648 metadata: SuiteMetadata::default(),
649 };
650
651 let err = suite.validate().unwrap_err();
652 assert!(err.to_string().contains("Duplicate task name"));
653 }
654
655 #[test]
656 fn test_filter_by_complexity() {
657 let json = r#"{
658 "tasks": [
659 {"name": "t1", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "complexity": "simple"},
660 {"name": "t2", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "complexity": "medium"},
661 {"name": "t3", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "complexity": "simple"}
662 ]
663 }"#;
664
665 let suite: TaskSuite = serde_json::from_str(json).unwrap();
666 let simple = suite.filter_by_complexity("simple");
667 assert_eq!(simple.len(), 2);
668 assert!(simple.iter().all(|t| t.complexity == "simple"));
669 }
670
671 #[test]
672 fn test_filter_by_tag() {
673 let json = r#"{
674 "tasks": [
675 {"name": "t1", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "tags": ["python", "testing"]},
676 {"name": "t2", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "tags": ["rust"]},
677 {"name": "t3", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "tags": ["python"]}
678 ]
679 }"#;
680
681 let suite: TaskSuite = serde_json::from_str(json).unwrap();
682 let python = suite.filter_by_tag("python");
683 assert_eq!(python.len(), 2);
684 }
685
686 #[test]
687 fn test_setup_has_setup() {
688 let empty = TaskSetup::default();
689 assert!(!empty.has_setup());
690
691 let with_script = TaskSetup {
692 script: Some("setup.sh".to_string()),
693 files: vec![],
694 };
695 assert!(with_script.has_setup());
696
697 let with_files = TaskSetup {
698 script: None,
699 files: vec!["file.py".to_string()],
700 };
701 assert!(with_files.has_setup());
702 }
703
704 #[test]
705 fn test_verification_new() {
706 let v = Verification::new("pytest tests/");
707 assert_eq!(v.command, "pytest tests/");
708 assert_eq!(v.success_exit_code, 0);
709 }
710
711 #[test]
712 fn test_verification_expect_failure() {
713 let v = Verification::expect_failure("false", 1);
714 assert_eq!(v.command, "false");
715 assert_eq!(v.success_exit_code, 1);
716 }
717}