agent_sim/envd/
lifecycle.rs1use crate::daemon::lifecycle::session_root;
2use crate::envd::error::EnvDaemonError;
3use crate::envd::spec::{EnvSpec, write_env_spec};
4use std::io::Read;
5use std::path::{Path, PathBuf};
6use std::process::Stdio;
7use std::time::Duration;
8use tokio::net::UnixStream;
9use tokio::time::sleep;
10
11pub fn env_root() -> PathBuf {
12 session_root().join("envs")
13}
14
15pub fn bootstrap_dir() -> PathBuf {
16 env_root().join("bootstrap")
17}
18
19pub fn socket_path(env: &str) -> PathBuf {
20 env_root().join(format!("{env}.sock"))
21}
22
23pub fn pid_path(env: &str) -> PathBuf {
24 env_root().join(format!("{env}.pid"))
25}
26
27pub async fn list_envs() -> Result<Vec<(String, PathBuf, bool)>, EnvDaemonError> {
28 EnvRegistry.list_envs().await
29}
30
31pub async fn ensure_env_running(env: &str) -> Result<(), EnvDaemonError> {
32 EnvRegistry.ensure_running(env).await
33}
34
35pub async fn bootstrap_env_daemon(env_spec: &EnvSpec) -> Result<(), EnvDaemonError> {
36 EnvRegistry.bootstrap(env_spec).await
37}
38
39#[derive(Debug, Clone, Copy, Default)]
40pub struct EnvRegistry;
41
42impl EnvRegistry {
43 pub async fn list_envs(self) -> Result<Vec<(String, PathBuf, bool)>, EnvDaemonError> {
44 let root = env_root();
45 std::fs::create_dir_all(&root)?;
46 let mut out = Vec::new();
47 for entry in std::fs::read_dir(root)? {
48 let entry = entry?;
49 let path = entry.path();
50 if path.extension().and_then(|value| value.to_str()) != Some("sock") {
51 continue;
52 }
53 let Some(stem) = path.file_stem().and_then(|value| value.to_str()) else {
54 continue;
55 };
56 let running = can_connect(&path).await;
57 out.push((stem.to_string(), path, running));
58 }
59 out.sort_by(|a, b| a.0.cmp(&b.0));
60 Ok(out)
61 }
62
63 pub async fn ensure_running(self, env: &str) -> Result<(), EnvDaemonError> {
64 std::fs::create_dir_all(env_root())?;
65 let socket = socket_path(env);
66 if can_connect(&socket).await {
67 return Ok(());
68 }
69 Err(EnvDaemonError::NotRunning(env.to_string()))
70 }
71
72 pub async fn bootstrap(self, env_spec: &EnvSpec) -> Result<(), EnvDaemonError> {
73 std::fs::create_dir_all(env_root())?;
74 std::fs::create_dir_all(bootstrap_dir())?;
75 let socket = socket_path(&env_spec.name);
76 if can_connect(&socket).await {
77 return Err(EnvDaemonError::AlreadyRunning(env_spec.name.clone()));
78 }
79
80 let bootstrap_path =
81 bootstrap_dir().join(format!("{}-{}.json", env_spec.name, uuid::Uuid::new_v4()));
82 write_env_spec(&bootstrap_path, env_spec).map_err(EnvDaemonError::Request)?;
83 let mut child = self.spawn_env_daemon(&env_spec.name, &bootstrap_path)?;
84
85 let timeout = Duration::from_secs(5);
86 let mut waited = Duration::ZERO;
87 while waited < timeout {
88 if can_connect(&socket).await {
89 let _ = std::fs::remove_file(&bootstrap_path);
90 return Ok(());
91 }
92 if let Some(status) = child.try_wait()? {
93 let stderr = if let Some(mut pipe) = child.stderr.take() {
94 tokio::task::spawn_blocking(move || {
95 let mut stderr = String::new();
96 let _ = pipe.read_to_string(&mut stderr);
97 stderr
98 })
99 .await
100 .unwrap_or_else(|_| String::new())
101 } else {
102 String::new()
103 };
104 let _ = std::fs::remove_file(&bootstrap_path);
105 let details = stderr.trim();
106 return Err(EnvDaemonError::StartupFailed(if details.is_empty() {
107 format!("env daemon exited with status {status}")
108 } else {
109 details.to_string()
110 }));
111 }
112 sleep(Duration::from_millis(100)).await;
113 waited += Duration::from_millis(100);
114 }
115 cleanup_bootstrap_timeout(&mut child, &bootstrap_path);
116 Err(EnvDaemonError::StartupTimeout)
117 }
118
119 fn spawn_env_daemon(
120 self,
121 _env: &str,
122 bootstrap_path: &Path,
123 ) -> Result<std::process::Child, EnvDaemonError> {
124 let exe = std::env::current_exe()?;
125 let mut command = std::process::Command::new(exe);
126 command
127 .arg("__internal")
128 .arg("env-daemon")
129 .arg("--env-spec-path")
130 .arg(bootstrap_path)
131 .stdout(Stdio::null())
132 .stderr(Stdio::piped());
133 let child = command.spawn()?;
134 Ok(child)
135 }
136}
137
138fn cleanup_bootstrap_timeout(child: &mut std::process::Child, bootstrap_path: &Path) {
139 let _ = std::fs::remove_file(bootstrap_path);
140 let _ = child.kill();
141 let _ = child.wait();
142}
143
144async fn can_connect(socket: &Path) -> bool {
145 UnixStream::connect(socket).await.is_ok()
146}
147
148#[cfg(test)]
149mod tests {
150 use super::cleanup_bootstrap_timeout;
151 use std::process::{Command, Stdio};
152
153 #[cfg(unix)]
154 #[test]
155 fn timeout_cleanup_kills_child_and_removes_bootstrap_file() {
156 let bootstrap = tempfile::NamedTempFile::new().expect("temp bootstrap file");
157 let bootstrap_path = bootstrap.path().to_path_buf();
158 let mut child = Command::new("sh")
159 .arg("-c")
160 .arg("sleep 30")
161 .stdout(Stdio::null())
162 .stderr(Stdio::null())
163 .spawn()
164 .expect("sleep child should spawn");
165
166 cleanup_bootstrap_timeout(&mut child, &bootstrap_path);
167
168 assert!(
169 !bootstrap_path.exists(),
170 "bootstrap file should be removed during timeout cleanup"
171 );
172 assert!(
173 child
174 .try_wait()
175 .expect("child status should be queryable")
176 .is_some(),
177 "timed-out child should be reaped"
178 );
179 }
180}