agent_sim/daemon/
lifecycle.rs1use crate::daemon::error::DaemonError;
2use crate::load::{LoadSpec, write_load_spec};
3use std::io::Read;
4use std::path::{Path, PathBuf};
5use std::process::Stdio;
6use std::time::Duration;
7use tokio::net::UnixStream;
8use tokio::time::sleep;
9
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct InstanceRuntimeInfo {
12 pub name: String,
13 pub socket_path: PathBuf,
14 pub running: bool,
15 pub env: Option<String>,
16}
17
18#[derive(Debug, Clone, Copy, Default)]
19pub struct InstanceRegistry;
20
21pub fn session_root() -> PathBuf {
22 if let Some(path) = std::env::var_os("AGENT_SIM_HOME") {
23 return PathBuf::from(path);
24 }
25
26 dirs::home_dir()
27 .unwrap_or_else(|| PathBuf::from("."))
28 .join(".agent-sim")
29}
30
31pub fn socket_path(session: &str) -> PathBuf {
32 session_root().join(format!("{session}.sock"))
33}
34
35pub fn pid_path(session: &str) -> PathBuf {
36 session_root().join(format!("{session}.pid"))
37}
38
39pub fn meta_path(session: &str) -> PathBuf {
40 session_root().join(format!("{session}.meta"))
41}
42
43pub fn bootstrap_dir() -> PathBuf {
44 session_root().join("bootstrap")
45}
46
47pub async fn ensure_daemon_running(session: &str) -> Result<(), DaemonError> {
48 InstanceRegistry.ensure_running(session).await
49}
50
51pub async fn bootstrap_daemon(session: &str, load_spec: &LoadSpec) -> Result<(), DaemonError> {
52 InstanceRegistry.bootstrap(session, load_spec).await
53}
54
55impl InstanceRegistry {
56 pub async fn ensure_running(self, session: &str) -> Result<(), DaemonError> {
57 std::fs::create_dir_all(session_root())?;
58 let socket = socket_path(session);
59 if can_connect(&socket).await {
60 return Ok(());
61 }
62 Err(DaemonError::NotRunning(session.to_string()))
63 }
64
65 pub async fn bootstrap(self, session: &str, load_spec: &LoadSpec) -> Result<(), DaemonError> {
66 std::fs::create_dir_all(session_root())?;
67 std::fs::create_dir_all(bootstrap_dir())?;
68 let socket = socket_path(session);
69 if can_connect(&socket).await {
70 return Err(DaemonError::AlreadyRunning(session.to_string()));
71 }
72 let bootstrap_path =
73 bootstrap_dir().join(format!("{session}-{}.json", uuid::Uuid::new_v4()));
74 write_load_spec(&bootstrap_path, load_spec)
75 .map_err(|err| DaemonError::Request(err.to_string()))?;
76 let mut child = self.spawn_daemon(session, &bootstrap_path)?;
77
78 let timeout = Duration::from_secs(5);
79 let mut waited = Duration::from_millis(0);
80 while waited < timeout {
81 if can_connect(&socket).await {
82 let _ = std::fs::remove_file(&bootstrap_path);
83 return Ok(());
84 }
85 if let Some(status) = child.try_wait()? {
86 let stderr = if let Some(mut pipe) = child.stderr.take() {
87 tokio::task::spawn_blocking(move || {
88 let mut stderr = String::new();
89 let _ = pipe.read_to_string(&mut stderr);
90 stderr
91 })
92 .await
93 .unwrap_or_else(|_| String::new())
94 } else {
95 String::new()
96 };
97 let _ = std::fs::remove_file(&bootstrap_path);
98 let details = stderr.trim();
99 let message = if details.is_empty() {
100 format!("daemon exited with status {status}")
101 } else {
102 details.to_string()
103 };
104 return Err(DaemonError::StartupFailed(message));
105 }
106 sleep(Duration::from_millis(100)).await;
107 waited += Duration::from_millis(100);
108 }
109 cleanup_bootstrap_timeout(&mut child, &bootstrap_path);
110 Err(DaemonError::StartupTimeout)
111 }
112
113 fn spawn_daemon(
114 self,
115 session: &str,
116 bootstrap_path: &Path,
117 ) -> Result<std::process::Child, DaemonError> {
118 let exe = std::env::current_exe()?;
119 let mut command = std::process::Command::new(exe);
120 command
121 .arg("__internal")
122 .arg("instance-daemon")
123 .arg("--instance")
124 .arg(session)
125 .arg("--load-spec-path")
126 .arg(bootstrap_path)
127 .stdout(Stdio::null())
128 .stderr(Stdio::piped());
129 let child = command.spawn()?;
130 Ok(child)
131 }
132
133 pub async fn list_instances(self) -> Result<Vec<InstanceRuntimeInfo>, DaemonError> {
134 let root = session_root();
135 std::fs::create_dir_all(&root)?;
136 let mut out = Vec::new();
137 for entry in std::fs::read_dir(root)? {
138 let entry = entry?;
139 let path = entry.path();
140 if path.extension().and_then(|v| v.to_str()) != Some("sock") {
141 continue;
142 }
143 let Some(stem) = path.file_stem().and_then(|s| s.to_str()) else {
144 continue;
145 };
146 let running = can_connect(&path).await;
147 let env = read_env_tag(stem);
148 out.push(InstanceRuntimeInfo {
149 name: stem.to_string(),
150 socket_path: path,
151 running,
152 env,
153 });
154 }
155 out.sort_by(|a, b| a.name.cmp(&b.name));
156 Ok(out)
157 }
158}
159
160pub async fn list_instances() -> Result<Vec<InstanceRuntimeInfo>, DaemonError> {
161 InstanceRegistry.list_instances().await
162}
163
164fn cleanup_bootstrap_timeout(child: &mut std::process::Child, bootstrap_path: &Path) {
165 let _ = std::fs::remove_file(bootstrap_path);
166 let _ = child.kill();
167 let _ = child.wait();
168}
169
170async fn can_connect(socket: &Path) -> bool {
171 UnixStream::connect(socket).await.is_ok()
172}
173
174pub fn write_env_tag(session: &str, env: Option<&str>) -> Result<(), DaemonError> {
175 let path = meta_path(session);
176 if let Some(env) = env {
177 std::fs::write(path, env)?;
178 } else if path.exists() {
179 let _ = std::fs::remove_file(path);
180 }
181 Ok(())
182}
183
184pub fn read_env_tag(session: &str) -> Option<String> {
185 let path = meta_path(session);
186 std::fs::read_to_string(path)
187 .ok()
188 .map(|value| value.trim().to_string())
189 .filter(|value| !value.is_empty())
190}
191
192pub fn remove_env_tag(session: &str) {
193 let path = meta_path(session);
194 if path.exists() {
195 let _ = std::fs::remove_file(path);
196 }
197}
198
199pub fn read_pid(session: &str) -> Option<u32> {
200 std::fs::read_to_string(pid_path(session))
201 .ok()
202 .and_then(|value| value.trim().parse::<u32>().ok())
203}
204
205pub fn kill_pid(pid: u32) -> Result<(), DaemonError> {
206 #[cfg(unix)]
207 {
208 let result = unsafe { libc::kill(pid as i32, libc::SIGKILL) };
209 if result != 0 {
210 return Err(DaemonError::Request(format!(
211 "failed to kill pid {pid}: {}",
212 std::io::Error::last_os_error()
213 )));
214 }
215 Ok(())
216 }
217 #[cfg(not(unix))]
218 {
219 let _ = pid;
220 Err(DaemonError::Request(
221 "pid kill fallback is not supported on this platform".to_string(),
222 ))
223 }
224}
225
226#[cfg(test)]
227mod tests {
228 use super::cleanup_bootstrap_timeout;
229 use std::process::{Command, Stdio};
230
231 #[cfg(unix)]
232 #[test]
233 fn timeout_cleanup_kills_child_and_removes_bootstrap_file() {
234 let bootstrap = tempfile::NamedTempFile::new().expect("temp bootstrap file");
235 let bootstrap_path = bootstrap.path().to_path_buf();
236 let mut child = Command::new("sh")
237 .arg("-c")
238 .arg("sleep 30")
239 .stdout(Stdio::null())
240 .stderr(Stdio::null())
241 .spawn()
242 .expect("sleep child should spawn");
243
244 cleanup_bootstrap_timeout(&mut child, &bootstrap_path);
245
246 assert!(
247 !bootstrap_path.exists(),
248 "bootstrap file should be removed during timeout cleanup"
249 );
250 assert!(
251 child
252 .try_wait()
253 .expect("child status should be queryable")
254 .is_some(),
255 "timed-out child should be reaped"
256 );
257 }
258}