1use serde::{Deserialize, Serialize};
24use std::path::Path;
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct TaskSuite {
32 pub tasks: Vec<TaskDefinition>,
34
35 #[serde(default)]
37 pub metadata: SuiteMetadata,
38}
39
40impl TaskSuite {
41 pub fn from_file(path: impl AsRef<Path>) -> Result<Self, TaskDefinitionError> {
43 let path_ref = path.as_ref();
44 let content = std::fs::read_to_string(path_ref)?;
45 let suite: Self = serde_json::from_str(&content)?;
46 suite.validate()?;
47 Ok(suite)
48 }
49
50 pub fn validate(&self) -> Result<(), TaskDefinitionError> {
52 if self.tasks.is_empty() {
53 return Err(TaskDefinitionError::Validation(
54 "Task suite must contain at least one task".to_string(),
55 ));
56 }
57
58 for task in &self.tasks {
59 task.validate()?;
60 }
61
62 let mut names = std::collections::HashSet::new();
64 for task in &self.tasks {
65 if !names.insert(&task.name) {
66 return Err(TaskDefinitionError::Validation(format!(
67 "Duplicate task name: '{}'",
68 task.name
69 )));
70 }
71 }
72
73 Ok(())
74 }
75
76 pub fn filter_by_complexity(&self, complexity: &str) -> Vec<&TaskDefinition> {
78 self.tasks
79 .iter()
80 .filter(|t| t.complexity == complexity)
81 .collect()
82 }
83
84 pub fn filter_by_tag(&self, tag: &str) -> Vec<&TaskDefinition> {
86 self.tasks
87 .iter()
88 .filter(|t| t.tags.iter().any(|t| t == tag))
89 .collect()
90 }
91}
92
93#[derive(Debug, Clone, Default, Serialize, Deserialize)]
95pub struct SuiteMetadata {
96 pub name: Option<String>,
98
99 pub description: Option<String>,
101
102 pub version: Option<String>,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct TaskDefinition {
112 pub name: String,
120
121 pub prompt_file: String,
125
126 pub completion_promise: String,
130
131 pub verification: Verification,
133
134 #[serde(default)]
140 pub description: Option<String>,
141
142 #[serde(default = "default_complexity")]
146 pub complexity: String,
147
148 #[serde(default = "default_max_iterations")]
152 pub max_iterations: u32,
153
154 #[serde(default)]
158 pub expected_iterations: Option<u32>,
159
160 #[serde(default = "default_timeout_seconds")]
162 pub timeout_seconds: u64,
163
164 #[serde(default)]
166 pub setup: TaskSetup,
167
168 #[serde(default)]
170 pub tags: Vec<String>,
171}
172
173fn default_complexity() -> String {
174 "medium".to_string()
175}
176
177fn default_max_iterations() -> u32 {
178 100
179}
180
181fn default_timeout_seconds() -> u64 {
182 300 }
184
185impl TaskDefinition {
186 pub fn builder(
188 name: impl Into<String>,
189 prompt_file: impl Into<String>,
190 completion_promise: impl Into<String>,
191 ) -> TaskDefinitionBuilder {
192 TaskDefinitionBuilder::new(name, prompt_file, completion_promise)
193 }
194
195 pub fn validate(&self) -> Result<(), TaskDefinitionError> {
197 if self.name.is_empty() {
199 return Err(TaskDefinitionError::MissingField("name".to_string()));
200 }
201
202 if !self
203 .name
204 .chars()
205 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
206 {
207 return Err(TaskDefinitionError::Validation(format!(
208 "Task name '{}' contains invalid characters. Use alphanumeric, hyphens, or underscores only.",
209 self.name
210 )));
211 }
212
213 if self.prompt_file.is_empty() {
215 return Err(TaskDefinitionError::MissingField("prompt_file".to_string()));
216 }
217
218 if self.completion_promise.is_empty() {
220 return Err(TaskDefinitionError::MissingField(
221 "completion_promise".to_string(),
222 ));
223 }
224
225 if self.verification.command.is_empty() {
227 return Err(TaskDefinitionError::MissingField(
228 "verification.command".to_string(),
229 ));
230 }
231
232 if !["simple", "medium", "complex"].contains(&self.complexity.as_str()) {
234 return Err(TaskDefinitionError::Validation(format!(
235 "Invalid complexity '{}'. Must be one of: simple, medium, complex",
236 self.complexity
237 )));
238 }
239
240 Ok(())
241 }
242
243 pub fn iteration_delta(&self, actual: u32) -> Option<i32> {
247 self.expected_iterations
248 .map(|expected| actual as i32 - expected as i32)
249 }
250}
251
252pub struct TaskDefinitionBuilder {
254 name: String,
255 prompt_file: String,
256 completion_promise: String,
257 verification: Verification,
258 description: Option<String>,
259 complexity: String,
260 max_iterations: u32,
261 expected_iterations: Option<u32>,
262 timeout_seconds: u64,
263 setup: TaskSetup,
264 tags: Vec<String>,
265}
266
267impl TaskDefinitionBuilder {
268 pub fn new(
270 name: impl Into<String>,
271 prompt_file: impl Into<String>,
272 completion_promise: impl Into<String>,
273 ) -> Self {
274 Self {
275 name: name.into(),
276 prompt_file: prompt_file.into(),
277 completion_promise: completion_promise.into(),
278 verification: Verification::default(),
279 description: None,
280 complexity: default_complexity(),
281 max_iterations: default_max_iterations(),
282 expected_iterations: None,
283 timeout_seconds: default_timeout_seconds(),
284 setup: TaskSetup::default(),
285 tags: Vec::new(),
286 }
287 }
288
289 pub fn verification_command(mut self, command: impl Into<String>) -> Self {
291 self.verification.command = command.into();
292 self
293 }
294
295 pub fn verification_exit_code(mut self, code: i32) -> Self {
297 self.verification.success_exit_code = code;
298 self
299 }
300
301 pub fn verification(mut self, verification: Verification) -> Self {
303 self.verification = verification;
304 self
305 }
306
307 pub fn description(mut self, description: impl Into<String>) -> Self {
309 self.description = Some(description.into());
310 self
311 }
312
313 pub fn complexity(mut self, complexity: impl Into<String>) -> Self {
315 self.complexity = complexity.into();
316 self
317 }
318
319 pub fn max_iterations(mut self, max: u32) -> Self {
321 self.max_iterations = max;
322 self
323 }
324
325 pub fn expected_iterations(mut self, expected: u32) -> Self {
327 self.expected_iterations = Some(expected);
328 self
329 }
330
331 pub fn timeout_seconds(mut self, seconds: u64) -> Self {
333 self.timeout_seconds = seconds;
334 self
335 }
336
337 pub fn setup(mut self, setup: TaskSetup) -> Self {
339 self.setup = setup;
340 self
341 }
342
343 pub fn setup_script(mut self, script: impl Into<String>) -> Self {
345 self.setup.script = Some(script.into());
346 self
347 }
348
349 pub fn setup_files(mut self, files: Vec<String>) -> Self {
351 self.setup.files = files;
352 self
353 }
354
355 pub fn tags(mut self, tags: Vec<String>) -> Self {
357 self.tags = tags;
358 self
359 }
360
361 pub fn tag(mut self, tag: impl Into<String>) -> Self {
363 self.tags.push(tag.into());
364 self
365 }
366
367 pub fn build(self) -> TaskDefinition {
369 TaskDefinition {
370 name: self.name,
371 prompt_file: self.prompt_file,
372 completion_promise: self.completion_promise,
373 verification: self.verification,
374 description: self.description,
375 complexity: self.complexity,
376 max_iterations: self.max_iterations,
377 expected_iterations: self.expected_iterations,
378 timeout_seconds: self.timeout_seconds,
379 setup: self.setup,
380 tags: self.tags,
381 }
382 }
383}
384
385#[derive(Debug, Clone, Default, Serialize, Deserialize)]
387pub struct Verification {
388 #[serde(default)]
392 pub command: String,
393
394 #[serde(default)]
396 pub success_exit_code: i32,
397}
398
399impl Verification {
400 pub fn new(command: impl Into<String>) -> Self {
402 Self {
403 command: command.into(),
404 success_exit_code: 0,
405 }
406 }
407
408 pub fn expect_failure(command: impl Into<String>, exit_code: i32) -> Self {
410 Self {
411 command: command.into(),
412 success_exit_code: exit_code,
413 }
414 }
415}
416
417#[derive(Debug, Clone, Default, Serialize, Deserialize)]
419pub struct TaskSetup {
420 #[serde(default)]
424 pub script: Option<String>,
425
426 #[serde(default)]
430 pub files: Vec<String>,
431}
432
433impl TaskSetup {
434 pub fn has_setup(&self) -> bool {
436 self.script.is_some() || !self.files.is_empty()
437 }
438}
439
440#[derive(Debug, thiserror::Error)]
442pub enum TaskDefinitionError {
443 #[error("IO error: {0}")]
445 Io(#[from] std::io::Error),
446
447 #[error("JSON parse error: {0}")]
449 Json(#[from] serde_json::Error),
450
451 #[error("Missing required field: {0}")]
453 MissingField(String),
454
455 #[error("Validation error: {0}")]
457 Validation(String),
458}
459
460#[cfg(test)]
461mod tests {
462 use super::*;
463
464 #[test]
465 fn test_task_definition_builder() {
466 let task = TaskDefinition::builder("hello-world", "tasks/hello.md", "TASK_COMPLETE")
467 .verification_command("python hello.py | grep -q 'Hello, World!'")
468 .description("Create a hello world script")
469 .complexity("simple")
470 .max_iterations(5)
471 .expected_iterations(1)
472 .tag("python")
473 .build();
474
475 assert_eq!(task.name, "hello-world");
476 assert_eq!(task.prompt_file, "tasks/hello.md");
477 assert_eq!(task.completion_promise, "TASK_COMPLETE");
478 assert!(task.verification.command.contains("Hello, World!"));
479 assert_eq!(task.complexity, "simple");
480 assert_eq!(task.max_iterations, 5);
481 assert_eq!(task.expected_iterations, Some(1));
482 assert!(task.tags.contains(&"python".to_string()));
483 }
484
485 #[test]
486 fn test_task_definition_defaults() {
487 let task = TaskDefinition::builder("test", "prompt.md", "DONE")
488 .verification_command("echo ok")
489 .build();
490
491 assert_eq!(task.complexity, "medium");
492 assert_eq!(task.max_iterations, 100);
493 assert_eq!(task.timeout_seconds, 300);
494 assert!(task.expected_iterations.is_none());
495 assert!(task.tags.is_empty());
496 }
497
498 #[test]
499 fn test_task_validation_valid() {
500 let task = TaskDefinition::builder("valid-task", "prompt.md", "DONE")
501 .verification_command("echo ok")
502 .build();
503
504 assert!(task.validate().is_ok());
505 }
506
507 #[test]
508 fn test_task_validation_invalid_name() {
509 let task = TaskDefinition::builder("invalid task name!", "prompt.md", "DONE")
510 .verification_command("echo ok")
511 .build();
512
513 let err = task.validate().unwrap_err();
514 assert!(matches!(err, TaskDefinitionError::Validation(_)));
515 }
516
517 #[test]
518 fn test_task_validation_empty_prompt() {
519 let task = TaskDefinition::builder("test", "", "DONE")
520 .verification_command("echo ok")
521 .build();
522
523 let err = task.validate().unwrap_err();
524 assert!(matches!(err, TaskDefinitionError::MissingField(f) if f == "prompt_file"));
525 }
526
527 #[test]
528 fn test_task_validation_empty_verification() {
529 let task = TaskDefinition::builder("test", "prompt.md", "DONE").build();
530
531 let err = task.validate().unwrap_err();
532 assert!(matches!(err, TaskDefinitionError::MissingField(f) if f == "verification.command"));
533 }
534
535 #[test]
536 fn test_task_validation_invalid_complexity() {
537 let task = TaskDefinition::builder("test", "prompt.md", "DONE")
538 .verification_command("echo ok")
539 .complexity("invalid")
540 .build();
541
542 let err = task.validate().unwrap_err();
543 assert!(matches!(err, TaskDefinitionError::Validation(_)));
544 }
545
546 #[test]
547 fn test_iteration_delta() {
548 let task = TaskDefinition::builder("test", "prompt.md", "DONE")
549 .verification_command("echo ok")
550 .expected_iterations(5)
551 .build();
552
553 assert_eq!(task.iteration_delta(3), Some(-2));
555
556 assert_eq!(task.iteration_delta(7), Some(2));
558
559 assert_eq!(task.iteration_delta(5), Some(0));
561 }
562
563 #[test]
564 fn test_iteration_delta_no_expected() {
565 let task = TaskDefinition::builder("test", "prompt.md", "DONE")
566 .verification_command("echo ok")
567 .build();
568
569 assert!(task.iteration_delta(5).is_none());
570 }
571
572 #[test]
573 fn test_task_suite_parse() {
574 let json = r#"{
575 "tasks": [
576 {
577 "name": "hello-world",
578 "prompt_file": "tasks/hello/PROMPT.md",
579 "completion_promise": "TASK_COMPLETE",
580 "verification": {
581 "command": "python hello.py | grep -q 'Hello, World!'"
582 },
583 "complexity": "simple",
584 "max_iterations": 5,
585 "expected_iterations": 1
586 },
587 {
588 "name": "fizzbuzz-tdd",
589 "description": "Implement FizzBuzz with TDD",
590 "prompt_file": "tasks/fizzbuzz/PROMPT.md",
591 "completion_promise": "TESTS_PASSING",
592 "verification": {
593 "command": "pytest test_fizzbuzz.py -v"
594 },
595 "complexity": "medium",
596 "max_iterations": 15,
597 "expected_iterations": 5,
598 "setup": {
599 "files": ["test_fizzbuzz.py"]
600 },
601 "tags": ["python", "tdd"]
602 }
603 ],
604 "metadata": {
605 "name": "Ralph Benchmark Suite",
606 "version": "1.0.0"
607 }
608 }"#;
609
610 let suite: TaskSuite = serde_json::from_str(json).unwrap();
611 assert_eq!(suite.tasks.len(), 2);
612
613 let hello = &suite.tasks[0];
614 assert_eq!(hello.name, "hello-world");
615 assert_eq!(hello.complexity, "simple");
616 assert_eq!(hello.max_iterations, 5);
617 assert_eq!(hello.expected_iterations, Some(1));
618
619 let fizzbuzz = &suite.tasks[1];
620 assert_eq!(fizzbuzz.name, "fizzbuzz-tdd");
621 assert!(fizzbuzz.description.is_some());
622 assert_eq!(fizzbuzz.setup.files.len(), 1);
623 assert!(fizzbuzz.tags.contains(&"tdd".to_string()));
624
625 assert_eq!(suite.metadata.name, Some("Ralph Benchmark Suite".to_string()));
626 }
627
628 #[test]
629 fn test_task_suite_validation_empty() {
630 let suite = TaskSuite {
631 tasks: vec![],
632 metadata: SuiteMetadata::default(),
633 };
634
635 let err = suite.validate().unwrap_err();
636 assert!(matches!(err, TaskDefinitionError::Validation(_)));
637 }
638
639 #[test]
640 fn test_task_suite_validation_duplicates() {
641 let task = TaskDefinition::builder("duplicate", "prompt.md", "DONE")
642 .verification_command("echo ok")
643 .build();
644
645 let suite = TaskSuite {
646 tasks: vec![task.clone(), task],
647 metadata: SuiteMetadata::default(),
648 };
649
650 let err = suite.validate().unwrap_err();
651 assert!(err.to_string().contains("Duplicate task name"));
652 }
653
654 #[test]
655 fn test_filter_by_complexity() {
656 let json = r#"{
657 "tasks": [
658 {"name": "t1", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "complexity": "simple"},
659 {"name": "t2", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "complexity": "medium"},
660 {"name": "t3", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "complexity": "simple"}
661 ]
662 }"#;
663
664 let suite: TaskSuite = serde_json::from_str(json).unwrap();
665 let simple = suite.filter_by_complexity("simple");
666 assert_eq!(simple.len(), 2);
667 assert!(simple.iter().all(|t| t.complexity == "simple"));
668 }
669
670 #[test]
671 fn test_filter_by_tag() {
672 let json = r#"{
673 "tasks": [
674 {"name": "t1", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "tags": ["python", "testing"]},
675 {"name": "t2", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "tags": ["rust"]},
676 {"name": "t3", "prompt_file": "p.md", "completion_promise": "DONE", "verification": {"command": "echo ok"}, "tags": ["python"]}
677 ]
678 }"#;
679
680 let suite: TaskSuite = serde_json::from_str(json).unwrap();
681 let python = suite.filter_by_tag("python");
682 assert_eq!(python.len(), 2);
683 }
684
685 #[test]
686 fn test_setup_has_setup() {
687 let empty = TaskSetup::default();
688 assert!(!empty.has_setup());
689
690 let with_script = TaskSetup {
691 script: Some("setup.sh".to_string()),
692 files: vec![],
693 };
694 assert!(with_script.has_setup());
695
696 let with_files = TaskSetup {
697 script: None,
698 files: vec!["file.py".to_string()],
699 };
700 assert!(with_files.has_setup());
701 }
702
703 #[test]
704 fn test_verification_new() {
705 let v = Verification::new("pytest tests/");
706 assert_eq!(v.command, "pytest tests/");
707 assert_eq!(v.success_exit_code, 0);
708 }
709
710 #[test]
711 fn test_verification_expect_failure() {
712 let v = Verification::expect_failure("false", 1);
713 assert_eq!(v.command, "false");
714 assert_eq!(v.success_exit_code, 1);
715 }
716}