use crate::task_definition::{TaskDefinition, Verification};
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::{SystemTime, UNIX_EPOCH};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum CleanupPolicy {
Rotate(usize),
#[default]
OnSuccess,
Always,
Never,
}
impl CleanupPolicy {
#[allow(clippy::match_same_arms)] pub fn from_str(s: &str, keep_last_n: Option<usize>) -> Self {
match s.to_lowercase().as_str() {
"rotate" => CleanupPolicy::Rotate(keep_last_n.unwrap_or(5)),
"on_success" => CleanupPolicy::OnSuccess,
"always" => CleanupPolicy::Always,
"never" => CleanupPolicy::Never,
_ => CleanupPolicy::OnSuccess,
}
}
}
#[derive(Debug)]
pub struct TaskWorkspace {
path: PathBuf,
task_name: String,
created_at: u64,
cleaned_up: bool,
}
impl TaskWorkspace {
pub fn create(task: &TaskDefinition, base_dir: &Path) -> Result<Self, WorkspaceError> {
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_millis() as u64)
.unwrap_or(0);
let dir_name = format!("ralph-bench-{}-{}", task.name, timestamp);
let path = base_dir.join(&dir_name);
fs::create_dir_all(&path)?;
let agent_dir = path.join(".ralph").join("agent");
fs::create_dir_all(&agent_dir)?;
fs::write(agent_dir.join("scratchpad.md"), "")?;
let git_output = Command::new("git")
.args(["init", "--initial-branch=main"])
.current_dir(&path)
.output()?;
if !git_output.status.success() {
let stderr = String::from_utf8_lossy(&git_output.stderr);
return Err(WorkspaceError::GitInit(stderr.to_string()));
}
Command::new("git")
.args(["config", "user.email", "benchmark@ralph.local"])
.current_dir(&path)
.output()?;
Command::new("git")
.args(["config", "user.name", "Ralph Benchmark"])
.current_dir(&path)
.output()?;
Ok(Self {
path,
task_name: task.name.clone(),
created_at: timestamp,
cleaned_up: false,
})
}
pub fn path(&self) -> &Path {
&self.path
}
pub fn task_name(&self) -> &str {
&self.task_name
}
pub fn created_at(&self) -> u64 {
self.created_at
}
pub fn setup(&self, task: &TaskDefinition, tasks_dir: &Path) -> Result<(), WorkspaceError> {
let prompt_src = tasks_dir.join(&task.prompt_file);
let prompt_dst = self.path.join("PROMPT.md");
if prompt_src.exists() {
fs::copy(&prompt_src, &prompt_dst)?;
} else {
return Err(WorkspaceError::MissingFile(
prompt_src.to_string_lossy().to_string(),
));
}
for file in &task.setup.files {
let src = tasks_dir.join(file);
let dst = self.path.join(file);
if let Some(parent) = dst.parent() {
fs::create_dir_all(parent)?;
}
if src.exists() {
if src.is_dir() {
copy_dir_recursive(&src, &dst)?;
} else {
fs::copy(&src, &dst)?;
}
} else {
return Err(WorkspaceError::MissingFile(
src.to_string_lossy().to_string(),
));
}
}
if let Some(script) = &task.setup.script {
let script_path = tasks_dir.join(script);
if script_path.exists() {
let script_dst = self.path.join(script);
if let Some(parent) = script_dst.parent() {
fs::create_dir_all(parent)?;
}
fs::copy(&script_path, &script_dst)?;
let output = Command::new("bash")
.arg(&script_dst)
.current_dir(&self.path)
.output()?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(WorkspaceError::SetupScript(stderr.to_string()));
}
}
}
Command::new("git")
.args(["add", "-A"])
.current_dir(&self.path)
.output()?;
let commit_output = Command::new("git")
.args(["commit", "-m", "Initial benchmark setup", "--allow-empty"])
.current_dir(&self.path)
.output()?;
if !commit_output.status.success() {
tracing::debug!(
"Initial commit warning: {}",
String::from_utf8_lossy(&commit_output.stderr)
);
}
Ok(())
}
pub fn cleanup(&mut self) -> Result<(), WorkspaceError> {
if self.cleaned_up {
return Ok(());
}
if self.path.exists() {
fs::remove_dir_all(&self.path)?;
}
self.cleaned_up = true;
Ok(())
}
pub fn is_cleaned_up(&self) -> bool {
self.cleaned_up
}
}
impl Drop for TaskWorkspace {
fn drop(&mut self) {
if !self.cleaned_up && self.path.exists() {
tracing::debug!(
"Workspace {} not cleaned up, path retained: {}",
self.task_name,
self.path.display()
);
}
}
}
#[derive(Debug, Clone)]
pub struct VerificationResult {
pub passed: bool,
pub exit_code: i32,
pub expected_exit_code: i32,
pub stdout: String,
pub stderr: String,
}
impl VerificationResult {
pub fn summary(&self) -> String {
if self.passed {
format!("PASSED (exit code {})", self.exit_code)
} else {
format!(
"FAILED (exit code {}, expected {})",
self.exit_code, self.expected_exit_code
)
}
}
}
impl TaskWorkspace {
pub fn run_verification(
&self,
verification: &Verification,
) -> Result<VerificationResult, WorkspaceError> {
if verification.command.is_empty() {
return Ok(VerificationResult {
passed: true,
exit_code: 0,
expected_exit_code: 0,
stdout: String::new(),
stderr: String::new(),
});
}
tracing::debug!(
"Running verification in {}: {}",
self.path.display(),
verification.command
);
let output = Command::new("bash")
.args(["-c", &verification.command])
.current_dir(&self.path)
.output()
.map_err(|e| WorkspaceError::Verification(format!("Failed to execute: {}", e)))?;
let exit_code = output.status.code().unwrap_or(-1);
let passed = exit_code == verification.success_exit_code;
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
tracing::debug!(
"Verification result: {} (exit code {}, expected {})",
if passed { "PASSED" } else { "FAILED" },
exit_code,
verification.success_exit_code
);
Ok(VerificationResult {
passed,
exit_code,
expected_exit_code: verification.success_exit_code,
stdout,
stderr,
})
}
}
#[derive(Debug)]
pub struct WorkspaceManager {
base_dir: PathBuf,
policy: CleanupPolicy,
}
impl WorkspaceManager {
pub fn new(base_dir: impl Into<PathBuf>, policy: CleanupPolicy) -> Self {
Self {
base_dir: base_dir.into(),
policy,
}
}
pub fn base_dir(&self) -> &Path {
&self.base_dir
}
pub fn policy(&self) -> CleanupPolicy {
self.policy
}
pub fn create_workspace(&self, task: &TaskDefinition) -> Result<TaskWorkspace, WorkspaceError> {
TaskWorkspace::create(task, &self.base_dir)
}
pub fn apply_cleanup(
&self,
workspace: &mut TaskWorkspace,
success: bool,
) -> Result<bool, WorkspaceError> {
match self.policy {
CleanupPolicy::Always => {
workspace.cleanup()?;
Ok(true)
}
CleanupPolicy::OnSuccess => {
if success {
workspace.cleanup()?;
Ok(true)
} else {
Ok(false)
}
}
CleanupPolicy::Never => Ok(false),
CleanupPolicy::Rotate(keep_last_n) => {
self.rotate_workspaces(keep_last_n)?;
Ok(false)
}
}
}
pub fn rotate_workspaces(&self, keep_last_n: usize) -> Result<(), WorkspaceError> {
if !self.base_dir.exists() {
return Ok(());
}
let mut workspaces: Vec<(PathBuf, u64)> = Vec::new();
for entry in fs::read_dir(&self.base_dir)? {
let entry = entry?;
let path = entry.path();
if !path.is_dir() {
continue;
}
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
continue;
};
if !name.starts_with("ralph-bench-") {
continue;
}
if let Some(ts) = extract_timestamp(name) {
workspaces.push((path, ts));
}
}
workspaces.sort_by_key(|b| std::cmp::Reverse(b.1));
for (path, _) in workspaces.into_iter().skip(keep_last_n) {
tracing::debug!("Rotating old workspace: {}", path.display());
fs::remove_dir_all(&path)?;
}
Ok(())
}
pub fn list_workspaces(&self) -> Result<Vec<WorkspaceInfo>, WorkspaceError> {
if !self.base_dir.exists() {
return Ok(Vec::new());
}
let mut workspaces = Vec::new();
for entry in fs::read_dir(&self.base_dir)? {
let entry = entry?;
let path = entry.path();
if !path.is_dir() {
continue;
}
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
continue;
};
if !name.starts_with("ralph-bench-") {
continue;
}
let timestamp = extract_timestamp(name);
let task_name = extract_task_name(name);
workspaces.push(WorkspaceInfo {
path,
task_name,
timestamp,
});
}
workspaces.sort_by_key(|b| std::cmp::Reverse(b.timestamp));
Ok(workspaces)
}
}
#[derive(Debug, Clone)]
pub struct WorkspaceInfo {
pub path: PathBuf,
pub task_name: Option<String>,
pub timestamp: Option<u64>,
}
#[derive(Debug, thiserror::Error)]
pub enum WorkspaceError {
#[error("IO error: {0}")]
Io(#[from] io::Error),
#[error("Git init failed: {0}")]
GitInit(String),
#[error("Missing required file: {0}")]
MissingFile(String),
#[error("Setup script failed: {0}")]
SetupScript(String),
#[error("Verification failed: {0}")]
Verification(String),
}
fn copy_dir_recursive(src: &Path, dst: &Path) -> io::Result<()> {
fs::create_dir_all(dst)?;
for entry in fs::read_dir(src)? {
let entry = entry?;
let src_path = entry.path();
let dst_path = dst.join(entry.file_name());
if src_path.is_dir() {
copy_dir_recursive(&src_path, &dst_path)?;
} else {
fs::copy(&src_path, &dst_path)?;
}
}
Ok(())
}
fn extract_timestamp(dir_name: &str) -> Option<u64> {
dir_name
.rsplit('-')
.next()
.and_then(|s| s.parse::<u64>().ok())
}
fn extract_task_name(dir_name: &str) -> Option<String> {
let stripped = dir_name.strip_prefix("ralph-bench-")?;
let parts: Vec<&str> = stripped.rsplitn(2, '-').collect();
if parts.len() == 2 {
Some(parts[1].to_string())
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn make_test_task(name: &str) -> TaskDefinition {
TaskDefinition::builder(name, "tasks/test/PROMPT.md", "DONE")
.verification_command("echo ok")
.build()
}
#[test]
fn test_workspace_create() {
let temp_dir = TempDir::new().unwrap();
let task = make_test_task("hello-world");
let workspace = TaskWorkspace::create(&task, temp_dir.path()).unwrap();
assert!(workspace.path().exists());
assert!(workspace.path().join(".git").exists());
assert!(workspace.path().join(".ralph/agent").exists());
assert!(workspace.path().join(".ralph/agent/scratchpad.md").exists());
assert_eq!(workspace.task_name(), "hello-world");
}
#[test]
fn test_workspace_cleanup() {
let temp_dir = TempDir::new().unwrap();
let task = make_test_task("cleanup-test");
let mut workspace = TaskWorkspace::create(&task, temp_dir.path()).unwrap();
let path = workspace.path().to_path_buf();
assert!(path.exists());
assert!(!workspace.is_cleaned_up());
workspace.cleanup().unwrap();
assert!(!path.exists());
assert!(workspace.is_cleaned_up());
workspace.cleanup().unwrap();
}
#[test]
fn test_workspace_setup_with_prompt() {
let temp_dir = TempDir::new().unwrap();
let tasks_dir = TempDir::new().unwrap();
let prompt_dir = tasks_dir.path().join("tasks/test");
fs::create_dir_all(&prompt_dir).unwrap();
fs::write(
prompt_dir.join("PROMPT.md"),
"# Test Prompt\n\nDo something.",
)
.unwrap();
let task = make_test_task("setup-test");
let workspace = TaskWorkspace::create(&task, temp_dir.path()).unwrap();
workspace.setup(&task, tasks_dir.path()).unwrap();
let prompt_dst = workspace.path().join("PROMPT.md");
assert!(prompt_dst.exists());
assert!(
fs::read_to_string(&prompt_dst)
.unwrap()
.contains("Test Prompt")
);
}
#[test]
fn test_workspace_setup_with_files() {
let temp_dir = TempDir::new().unwrap();
let tasks_dir = TempDir::new().unwrap();
let prompt_dir = tasks_dir.path().join("tasks/test");
fs::create_dir_all(&prompt_dir).unwrap();
fs::write(prompt_dir.join("PROMPT.md"), "# Test").unwrap();
fs::write(tasks_dir.path().join("helper.py"), "# helper").unwrap();
let task = TaskDefinition::builder("setup-files-test", "tasks/test/PROMPT.md", "DONE")
.verification_command("echo ok")
.setup_files(vec!["helper.py".to_string()])
.build();
let workspace = TaskWorkspace::create(&task, temp_dir.path()).unwrap();
workspace.setup(&task, tasks_dir.path()).unwrap();
assert!(workspace.path().join("helper.py").exists());
}
#[test]
fn test_workspace_setup_missing_prompt() {
let temp_dir = TempDir::new().unwrap();
let tasks_dir = TempDir::new().unwrap();
let task = make_test_task("missing-prompt");
let workspace = TaskWorkspace::create(&task, temp_dir.path()).unwrap();
let result = workspace.setup(&task, tasks_dir.path());
assert!(matches!(result, Err(WorkspaceError::MissingFile(_))));
}
#[test]
fn test_cleanup_policy_from_str() {
assert_eq!(
CleanupPolicy::from_str("rotate", Some(10)),
CleanupPolicy::Rotate(10)
);
assert_eq!(
CleanupPolicy::from_str("rotate", None),
CleanupPolicy::Rotate(5)
);
assert_eq!(
CleanupPolicy::from_str("on_success", None),
CleanupPolicy::OnSuccess
);
assert_eq!(
CleanupPolicy::from_str("always", None),
CleanupPolicy::Always
);
assert_eq!(CleanupPolicy::from_str("never", None), CleanupPolicy::Never);
assert_eq!(
CleanupPolicy::from_str("ROTATE", Some(3)),
CleanupPolicy::Rotate(3)
);
assert_eq!(
CleanupPolicy::from_str("unknown", None),
CleanupPolicy::OnSuccess
);
}
#[test]
fn test_extract_timestamp() {
assert_eq!(
extract_timestamp("ralph-bench-hello-world-1704067200000"),
Some(1_704_067_200_000)
);
assert_eq!(
extract_timestamp("ralph-bench-fizz-buzz-tdd-1704067300000"),
Some(1_704_067_300_000)
);
assert_eq!(extract_timestamp("ralph-bench-invalid"), None);
assert_eq!(extract_timestamp("other-dir"), None);
}
#[test]
fn test_extract_task_name() {
assert_eq!(
extract_task_name("ralph-bench-hello-world-1704067200000"),
Some("hello-world".to_string())
);
assert_eq!(
extract_task_name("ralph-bench-simple-1704067200000"),
Some("simple".to_string())
);
}
#[test]
fn test_workspace_manager_rotate() {
let temp_dir = TempDir::new().unwrap();
let manager = WorkspaceManager::new(temp_dir.path(), CleanupPolicy::Rotate(2));
let task = make_test_task("rotate-test");
let ws1 = manager.create_workspace(&task).unwrap();
std::thread::sleep(std::time::Duration::from_millis(10));
let ws2 = manager.create_workspace(&task).unwrap();
std::thread::sleep(std::time::Duration::from_millis(10));
let ws3 = manager.create_workspace(&task).unwrap();
assert!(ws1.path().exists());
assert!(ws2.path().exists());
assert!(ws3.path().exists());
manager.rotate_workspaces(2).unwrap();
assert!(!ws1.path().exists());
assert!(ws2.path().exists());
assert!(ws3.path().exists());
}
#[test]
fn test_workspace_manager_apply_cleanup_always() {
let temp_dir = TempDir::new().unwrap();
let manager = WorkspaceManager::new(temp_dir.path(), CleanupPolicy::Always);
let task = make_test_task("always-cleanup");
let mut workspace = manager.create_workspace(&task).unwrap();
let path = workspace.path().to_path_buf();
assert!(path.exists());
let cleaned = manager.apply_cleanup(&mut workspace, true).unwrap();
assert!(cleaned);
assert!(!path.exists());
}
#[test]
fn test_workspace_manager_apply_cleanup_on_success() {
let temp_dir = TempDir::new().unwrap();
let manager = WorkspaceManager::new(temp_dir.path(), CleanupPolicy::OnSuccess);
let task = make_test_task("on-success-cleanup");
let mut ws_success = manager.create_workspace(&task).unwrap();
let path_success = ws_success.path().to_path_buf();
let cleaned = manager.apply_cleanup(&mut ws_success, true).unwrap();
assert!(cleaned);
assert!(!path_success.exists());
let mut ws_failure = manager.create_workspace(&task).unwrap();
let path_failure = ws_failure.path().to_path_buf();
let cleaned = manager.apply_cleanup(&mut ws_failure, false).unwrap();
assert!(!cleaned);
assert!(path_failure.exists());
}
#[test]
fn test_workspace_manager_list_workspaces() {
let temp_dir = TempDir::new().unwrap();
let manager = WorkspaceManager::new(temp_dir.path(), CleanupPolicy::Never);
let task1 = make_test_task("list-test-a");
let task2 = make_test_task("list-test-b");
let _ws1 = manager.create_workspace(&task1).unwrap();
std::thread::sleep(std::time::Duration::from_millis(10));
let _ws2 = manager.create_workspace(&task2).unwrap();
let list = manager.list_workspaces().unwrap();
assert_eq!(list.len(), 2);
assert!(list[0].timestamp > list[1].timestamp);
}
#[test]
fn test_copy_dir_recursive() {
let temp_dir = TempDir::new().unwrap();
let src = temp_dir.path().join("src");
let dst = temp_dir.path().join("dst");
fs::create_dir_all(src.join("subdir")).unwrap();
fs::write(src.join("file1.txt"), "content1").unwrap();
fs::write(src.join("subdir/file2.txt"), "content2").unwrap();
copy_dir_recursive(&src, &dst).unwrap();
assert!(dst.join("file1.txt").exists());
assert!(dst.join("subdir/file2.txt").exists());
assert_eq!(
fs::read_to_string(dst.join("file1.txt")).unwrap(),
"content1"
);
assert_eq!(
fs::read_to_string(dst.join("subdir/file2.txt")).unwrap(),
"content2"
);
}
#[test]
fn test_run_verification_success() {
let temp_dir = TempDir::new().unwrap();
let task = make_test_task("verify-success");
let workspace = TaskWorkspace::create(&task, temp_dir.path()).unwrap();
fs::write(workspace.path().join("hello.txt"), "Hello, World!").unwrap();
let verification = Verification {
command: "cat hello.txt | grep -q 'Hello, World!'".to_string(),
success_exit_code: 0,
};
let result = workspace.run_verification(&verification).unwrap();
assert!(result.passed);
assert_eq!(result.exit_code, 0);
assert_eq!(result.expected_exit_code, 0);
}
#[test]
fn test_run_verification_failure() {
let temp_dir = TempDir::new().unwrap();
let task = make_test_task("verify-failure");
let workspace = TaskWorkspace::create(&task, temp_dir.path()).unwrap();
let verification = Verification {
command: "cat nonexistent.txt".to_string(),
success_exit_code: 0,
};
let result = workspace.run_verification(&verification).unwrap();
assert!(!result.passed);
assert_ne!(result.exit_code, 0);
}
#[test]
fn test_run_verification_custom_exit_code() {
let temp_dir = TempDir::new().unwrap();
let task = make_test_task("verify-custom-exit");
let workspace = TaskWorkspace::create(&task, temp_dir.path()).unwrap();
let verification = Verification {
command: "exit 42".to_string(),
success_exit_code: 42,
};
let result = workspace.run_verification(&verification).unwrap();
assert!(result.passed);
assert_eq!(result.exit_code, 42);
assert_eq!(result.expected_exit_code, 42);
}
#[test]
fn test_run_verification_empty_command() {
let temp_dir = TempDir::new().unwrap();
let task = make_test_task("verify-empty");
let workspace = TaskWorkspace::create(&task, temp_dir.path()).unwrap();
let verification = Verification {
command: String::new(),
success_exit_code: 0,
};
let result = workspace.run_verification(&verification).unwrap();
assert!(result.passed);
}
#[test]
fn test_run_verification_captures_output() {
let temp_dir = TempDir::new().unwrap();
let task = make_test_task("verify-capture");
let workspace = TaskWorkspace::create(&task, temp_dir.path()).unwrap();
let verification = Verification {
command: "echo 'stdout message' && echo 'stderr message' >&2".to_string(),
success_exit_code: 0,
};
let result = workspace.run_verification(&verification).unwrap();
assert!(result.passed);
assert!(result.stdout.contains("stdout message"));
assert!(result.stderr.contains("stderr message"));
}
#[test]
fn test_verification_result_summary() {
let passed_result = VerificationResult {
passed: true,
exit_code: 0,
expected_exit_code: 0,
stdout: String::new(),
stderr: String::new(),
};
assert_eq!(passed_result.summary(), "PASSED (exit code 0)");
let failed_result = VerificationResult {
passed: false,
exit_code: 1,
expected_exit_code: 0,
stdout: String::new(),
stderr: String::new(),
};
assert_eq!(failed_result.summary(), "FAILED (exit code 1, expected 0)");
}
}