Skip to main content

agent_sim/envd/
lifecycle.rs

1use crate::daemon::lifecycle::session_root;
2use crate::envd::error::EnvDaemonError;
3use crate::envd::spec::{EnvSpec, write_env_spec};
4use std::io::Read;
5use std::path::{Path, PathBuf};
6use std::process::Stdio;
7use std::time::Duration;
8use tokio::net::UnixStream;
9use tokio::time::sleep;
10
11pub fn env_root() -> PathBuf {
12    session_root().join("envs")
13}
14
15pub fn bootstrap_dir() -> PathBuf {
16    env_root().join("bootstrap")
17}
18
19pub fn socket_path(env: &str) -> PathBuf {
20    env_root().join(format!("{env}.sock"))
21}
22
23pub fn pid_path(env: &str) -> PathBuf {
24    env_root().join(format!("{env}.pid"))
25}
26
27pub async fn list_envs() -> Result<Vec<(String, PathBuf, bool)>, EnvDaemonError> {
28    EnvRegistry.list_envs().await
29}
30
31pub async fn ensure_env_running(env: &str) -> Result<(), EnvDaemonError> {
32    EnvRegistry.ensure_running(env).await
33}
34
35pub async fn bootstrap_env_daemon(env_spec: &EnvSpec) -> Result<(), EnvDaemonError> {
36    EnvRegistry.bootstrap(env_spec).await
37}
38
39#[derive(Debug, Clone, Copy, Default)]
40pub struct EnvRegistry;
41
42impl EnvRegistry {
43    pub async fn list_envs(self) -> Result<Vec<(String, PathBuf, bool)>, EnvDaemonError> {
44        let root = env_root();
45        std::fs::create_dir_all(&root)?;
46        let mut out = Vec::new();
47        for entry in std::fs::read_dir(root)? {
48            let entry = entry?;
49            let path = entry.path();
50            if path.extension().and_then(|value| value.to_str()) != Some("sock") {
51                continue;
52            }
53            let Some(stem) = path.file_stem().and_then(|value| value.to_str()) else {
54                continue;
55            };
56            let running = can_connect(&path).await;
57            out.push((stem.to_string(), path, running));
58        }
59        out.sort_by(|a, b| a.0.cmp(&b.0));
60        Ok(out)
61    }
62
63    pub async fn ensure_running(self, env: &str) -> Result<(), EnvDaemonError> {
64        std::fs::create_dir_all(env_root())?;
65        let socket = socket_path(env);
66        if can_connect(&socket).await {
67            return Ok(());
68        }
69        Err(EnvDaemonError::NotRunning(env.to_string()))
70    }
71
72    pub async fn bootstrap(self, env_spec: &EnvSpec) -> Result<(), EnvDaemonError> {
73        std::fs::create_dir_all(env_root())?;
74        std::fs::create_dir_all(bootstrap_dir())?;
75        let socket = socket_path(&env_spec.name);
76        if can_connect(&socket).await {
77            return Err(EnvDaemonError::AlreadyRunning(env_spec.name.clone()));
78        }
79
80        let bootstrap_path =
81            bootstrap_dir().join(format!("{}-{}.json", env_spec.name, uuid::Uuid::new_v4()));
82        write_env_spec(&bootstrap_path, env_spec).map_err(EnvDaemonError::Request)?;
83        let mut child = self.spawn_env_daemon(&env_spec.name, &bootstrap_path)?;
84
85        let timeout = Duration::from_secs(5);
86        let mut waited = Duration::ZERO;
87        while waited < timeout {
88            if can_connect(&socket).await {
89                let _ = std::fs::remove_file(&bootstrap_path);
90                return Ok(());
91            }
92            if let Some(status) = child.try_wait()? {
93                let stderr = if let Some(mut pipe) = child.stderr.take() {
94                    tokio::task::spawn_blocking(move || {
95                        let mut stderr = String::new();
96                        let _ = pipe.read_to_string(&mut stderr);
97                        stderr
98                    })
99                    .await
100                    .unwrap_or_else(|_| String::new())
101                } else {
102                    String::new()
103                };
104                let _ = std::fs::remove_file(&bootstrap_path);
105                let details = stderr.trim();
106                return Err(EnvDaemonError::StartupFailed(if details.is_empty() {
107                    format!("env daemon exited with status {status}")
108                } else {
109                    details.to_string()
110                }));
111            }
112            sleep(Duration::from_millis(100)).await;
113            waited += Duration::from_millis(100);
114        }
115        cleanup_bootstrap_timeout(&mut child, &bootstrap_path);
116        Err(EnvDaemonError::StartupTimeout)
117    }
118
119    fn spawn_env_daemon(
120        self,
121        _env: &str,
122        bootstrap_path: &Path,
123    ) -> Result<std::process::Child, EnvDaemonError> {
124        let exe = std::env::current_exe()?;
125        let mut command = std::process::Command::new(exe);
126        command
127            .arg("__internal")
128            .arg("env-daemon")
129            .arg("--env-spec-path")
130            .arg(bootstrap_path)
131            .stdout(Stdio::null())
132            .stderr(Stdio::piped());
133        let child = command.spawn()?;
134        Ok(child)
135    }
136}
137
138fn cleanup_bootstrap_timeout(child: &mut std::process::Child, bootstrap_path: &Path) {
139    let _ = std::fs::remove_file(bootstrap_path);
140    let _ = child.kill();
141    let _ = child.wait();
142}
143
144async fn can_connect(socket: &Path) -> bool {
145    UnixStream::connect(socket).await.is_ok()
146}
147
148#[cfg(test)]
149mod tests {
150    use super::cleanup_bootstrap_timeout;
151    use std::process::{Command, Stdio};
152
153    #[cfg(unix)]
154    #[test]
155    fn timeout_cleanup_kills_child_and_removes_bootstrap_file() {
156        let bootstrap = tempfile::NamedTempFile::new().expect("temp bootstrap file");
157        let bootstrap_path = bootstrap.path().to_path_buf();
158        let mut child = Command::new("sh")
159            .arg("-c")
160            .arg("sleep 30")
161            .stdout(Stdio::null())
162            .stderr(Stdio::null())
163            .spawn()
164            .expect("sleep child should spawn");
165
166        cleanup_bootstrap_timeout(&mut child, &bootstrap_path);
167
168        assert!(
169            !bootstrap_path.exists(),
170            "bootstrap file should be removed during timeout cleanup"
171        );
172        assert!(
173            child
174                .try_wait()
175                .expect("child status should be queryable")
176                .is_some(),
177            "timed-out child should be reaped"
178        );
179    }
180}