lean_ctx/core/
startup_guard.rs1use std::io::Write as _;
2use std::path::PathBuf;
3use std::time::Duration;
4
5pub const CRASH_LOOP_WINDOW_SECS: u64 = 60;
6pub const CRASH_LOOP_THRESHOLD: usize = 8;
7pub const CRASH_LOOP_MAX_BACKOFF_SECS: u64 = 30;
8
9pub const MCP_PROCESS_NAME: &str = "mcp-server";
10
11pub fn crash_loop_log_path(process_name: &str) -> Option<PathBuf> {
12 crate::core::data_dir::lean_ctx_data_dir()
13 .ok()
14 .map(|dir| dir.join(format!(".{}-starts.log", sanitize_lock_name(process_name))))
15}
16
17pub struct StartupLockGuard {
18 path: PathBuf,
19}
20
21impl StartupLockGuard {
22 pub fn touch(&self) {
23 if let Ok(mut f) = std::fs::OpenOptions::new()
27 .write(true)
28 .truncate(true)
29 .open(&self.path)
30 {
31 let _ = writeln!(f, "{}", std::process::id());
32 }
33 }
34}
35
36fn lock_is_reclaimable(path: &std::path::Path, stale_after: Duration) -> bool {
44 if let Ok(content) = std::fs::read_to_string(path) {
45 if let Some(pid) = content
46 .lines()
47 .next()
48 .and_then(|l| l.trim().parse::<u32>().ok())
49 {
50 if !crate::ipc::process::is_alive(pid) {
51 return true;
52 }
53 }
54 }
55 if let Ok(meta) = std::fs::metadata(path) {
56 if let Ok(modified) = meta.modified() {
57 return modified.elapsed().unwrap_or_default() > stale_after;
58 }
59 }
60 false
61}
62
63impl Drop for StartupLockGuard {
64 fn drop(&mut self) {
65 let _ = std::fs::remove_file(&self.path);
66 }
67}
68
69fn sanitize_lock_name(name: &str) -> String {
70 name.chars()
71 .map(|c| {
72 if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
73 c
74 } else {
75 '_'
76 }
77 })
78 .collect()
79}
80
81pub fn try_acquire_lock(
86 name: &str,
87 timeout: Duration,
88 stale_after: Duration,
89) -> Option<StartupLockGuard> {
90 let dir = crate::core::data_dir::lean_ctx_data_dir().ok()?;
91 let _ = std::fs::create_dir_all(&dir);
92
93 let name = sanitize_lock_name(name);
94 let path = dir.join(format!(".{name}.lock"));
95
96 let deadline = std::time::Instant::now().checked_add(timeout)?;
97 let mut sleep_ms: u64 = 10;
98
99 loop {
100 match std::fs::OpenOptions::new()
101 .write(true)
102 .create_new(true)
103 .open(&path)
104 {
105 Ok(mut f) => {
106 let _ = writeln!(f, "{}", std::process::id());
109 return Some(StartupLockGuard { path });
110 }
111 Err(_) => {
112 if lock_is_reclaimable(&path, stale_after) {
113 let _ = std::fs::remove_file(&path);
114 }
115 }
116 }
117
118 if std::time::Instant::now() >= deadline {
119 return None;
120 }
121
122 std::thread::sleep(Duration::from_millis(sleep_ms));
123 sleep_ms = (sleep_ms.saturating_mul(2)).min(120);
124 }
125}
126
127pub fn crash_loop_backoff(process_name: &str) {
131 let Some(dir) = crate::core::data_dir::lean_ctx_data_dir().ok() else {
132 return;
133 };
134 let _ = std::fs::create_dir_all(&dir);
135 let ts_path = dir.join(format!(".{}-starts.log", sanitize_lock_name(process_name)));
136
137 let now = std::time::SystemTime::now()
138 .duration_since(std::time::UNIX_EPOCH)
139 .unwrap_or_default()
140 .as_secs();
141
142 let cutoff = now.saturating_sub(CRASH_LOOP_WINDOW_SECS);
143
144 let mut recent: Vec<u64> = std::fs::read_to_string(&ts_path)
145 .unwrap_or_default()
146 .lines()
147 .filter_map(|l| l.trim().parse::<u64>().ok())
148 .filter(|&ts| ts >= cutoff)
149 .collect();
150 recent.push(now);
151
152 if let Ok(mut f) = std::fs::File::create(&ts_path) {
153 for ts in &recent {
154 let _ = writeln!(f, "{ts}");
155 }
156 }
157
158 if recent.len() > CRASH_LOOP_THRESHOLD {
159 let restarts_over = recent.len() - CRASH_LOOP_THRESHOLD;
160 let backoff_secs =
161 (2u64.saturating_pow(restarts_over as u32)).min(CRASH_LOOP_MAX_BACKOFF_SECS);
162 let msg = format!(
163 "lean-ctx: crash-loop protection — {process_name} started {} times in {CRASH_LOOP_WINDOW_SECS}s, \
164 waiting {backoff_secs}s before accepting connections. \
165 If your IDE is slow to initialize, this is normal.",
166 recent.len()
167 );
168 tracing::warn!("{msg}");
169 eprintln!("{msg}");
170 std::thread::sleep(Duration::from_secs(backoff_secs));
171 }
172}
173
174pub fn reset_crash_loop(process_name: &str) {
176 let Some(dir) = crate::core::data_dir::lean_ctx_data_dir().ok() else {
177 return;
178 };
179 let ts_path = dir.join(format!(".{}-starts.log", sanitize_lock_name(process_name)));
180 let _ = std::fs::remove_file(&ts_path);
181}
182
183#[cfg(test)]
184mod tests {
185 use super::*;
186
187 struct EnvVarGuard {
188 key: &'static str,
189 prev: Option<String>,
190 }
191
192 impl EnvVarGuard {
193 fn set(key: &'static str, value: &std::path::Path) -> Self {
194 let prev = std::env::var(key).ok();
195 std::env::set_var(key, value);
196 Self { key, prev }
197 }
198 }
199
200 impl Drop for EnvVarGuard {
201 fn drop(&mut self) {
202 match self.prev.as_deref() {
203 Some(v) => std::env::set_var(self.key, v),
204 None => std::env::remove_var(self.key),
205 }
206 }
207 }
208
209 #[test]
210 fn lock_acquire_and_release() {
211 let _env = crate::core::data_dir::test_env_lock();
212 let dir = tempfile::tempdir().unwrap();
213 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
214
215 let g = try_acquire_lock(
216 "unit-test",
217 Duration::from_millis(200),
218 Duration::from_secs(30),
219 );
220 assert!(g.is_some());
221
222 let lock_path = dir.path().join(".unit-test.lock");
223 assert!(lock_path.exists());
224
225 drop(g);
226 assert!(!lock_path.exists());
227 }
228
229 #[test]
230 fn lock_times_out_while_held() {
231 let _env = crate::core::data_dir::test_env_lock();
232 let dir = tempfile::tempdir().unwrap();
233 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
234
235 let g1 = try_acquire_lock(
236 "unit-test-2",
237 Duration::from_millis(200),
238 Duration::from_secs(30),
239 )
240 .expect("first lock should acquire");
241 let g2 = try_acquire_lock(
242 "unit-test-2",
243 Duration::from_millis(60),
244 Duration::from_secs(30),
245 );
246 assert!(g2.is_none());
247
248 drop(g1);
249 let g3 = try_acquire_lock(
250 "unit-test-2",
251 Duration::from_millis(200),
252 Duration::from_secs(30),
253 );
254 assert!(g3.is_some());
255 }
256
257 #[test]
258 fn dead_owner_lock_is_reclaimed_immediately() {
259 let _env = crate::core::data_dir::test_env_lock();
260 let dir = tempfile::tempdir().unwrap();
261 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
262
263 let lock_path = dir.path().join(".dead-owner.lock");
265 std::fs::write(&lock_path, "4294967294\n").unwrap();
266
267 let g = try_acquire_lock(
270 "dead-owner",
271 Duration::from_millis(300),
272 Duration::from_secs(30),
273 );
274 assert!(
275 g.is_some(),
276 "lock with a dead owner PID must be reclaimable"
277 );
278 }
279
280 #[test]
281 fn crash_loop_thresholds_are_resilient() {
282 let threshold = CRASH_LOOP_THRESHOLD;
283 let window = CRASH_LOOP_WINDOW_SECS;
284 let backoff = CRASH_LOOP_MAX_BACKOFF_SECS;
285 assert!(
286 threshold >= 8,
287 "threshold must tolerate IDE restart patterns (was {threshold})"
288 );
289 assert!(
290 window >= 60,
291 "window must cover slow IDE startup (was {window}s)"
292 );
293 assert!(
294 backoff <= 30,
295 "max backoff must not be too aggressive (was {backoff}s)"
296 );
297 }
298
299 #[test]
300 fn crash_loop_backoff_under_threshold_no_sleep() {
301 let _env = crate::core::data_dir::test_env_lock();
302 let dir = tempfile::tempdir().unwrap();
303 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
304
305 let start = std::time::Instant::now();
306 for _ in 0..CRASH_LOOP_THRESHOLD {
307 crash_loop_backoff("test-no-sleep");
308 }
309 assert!(
310 start.elapsed() < Duration::from_secs(1),
311 "under threshold should not sleep"
312 );
313 }
314
315 #[test]
316 fn reset_crash_loop_clears_history() {
317 let _env = crate::core::data_dir::test_env_lock();
318 let dir = tempfile::tempdir().unwrap();
319 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
320
321 for _ in 0..5 {
322 crash_loop_backoff("test-reset");
323 }
324 let log_path = dir.path().join(".test-reset-starts.log");
325 assert!(log_path.exists(), "crash loop log should exist after calls");
326
327 reset_crash_loop("test-reset");
328 assert!(
329 !log_path.exists(),
330 "crash loop log should be removed after reset"
331 );
332 }
333
334 #[test]
335 fn reset_crash_loop_nonexistent_is_noop() {
336 let _env = crate::core::data_dir::test_env_lock();
337 let dir = tempfile::tempdir().unwrap();
338 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
339
340 reset_crash_loop("never-existed");
341 }
342
343 #[test]
344 fn crash_loop_log_only_keeps_recent_entries() {
345 let _env = crate::core::data_dir::test_env_lock();
346 let dir = tempfile::tempdir().unwrap();
347 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
348
349 let log_path = dir.path().join(".test-prune-starts.log");
350 let old_ts = 1000u64;
351 std::fs::write(&log_path, format!("{old_ts}\n")).unwrap();
352
353 crash_loop_backoff("test-prune");
354
355 let content = std::fs::read_to_string(&log_path).unwrap();
356 let lines: Vec<&str> = content.lines().collect();
357 assert_eq!(
358 lines.len(),
359 1,
360 "old entry should be pruned, only current remains"
361 );
362 let ts: u64 = lines[0].parse().unwrap();
363 assert!(ts > old_ts, "remaining entry should be recent");
364 }
365
366 #[test]
367 fn sanitize_lock_name_strips_special_chars() {
368 assert_eq!(sanitize_lock_name("mcp-stdio"), "mcp-stdio");
369 assert_eq!(sanitize_lock_name("mcp_http"), "mcp_http");
370 assert_eq!(sanitize_lock_name("a/b\\c:d"), "a_b_c_d");
371 assert_eq!(sanitize_lock_name("name with spaces"), "name_with_spaces");
372 }
373
374 #[test]
375 fn crash_loop_backoff_formula_correctness() {
376 assert_eq!(
377 2u64.saturating_pow(1).min(CRASH_LOOP_MAX_BACKOFF_SECS),
378 2,
379 "1 over threshold = 2s backoff"
380 );
381 assert_eq!(
382 2u64.saturating_pow(2).min(CRASH_LOOP_MAX_BACKOFF_SECS),
383 4,
384 "2 over threshold = 4s backoff"
385 );
386 assert_eq!(
387 2u64.saturating_pow(3).min(CRASH_LOOP_MAX_BACKOFF_SECS),
388 8,
389 "3 over threshold = 8s backoff"
390 );
391 assert_eq!(
392 2u64.saturating_pow(4).min(CRASH_LOOP_MAX_BACKOFF_SECS),
393 16,
394 "4 over threshold = 16s backoff"
395 );
396 assert_eq!(
397 2u64.saturating_pow(5).min(CRASH_LOOP_MAX_BACKOFF_SECS),
398 30,
399 "5 over threshold = capped at 30s"
400 );
401 assert_eq!(
402 2u64.saturating_pow(10).min(CRASH_LOOP_MAX_BACKOFF_SECS),
403 30,
404 "10 over threshold = still capped at 30s"
405 );
406 }
407}