lean_ctx/core/
startup_guard.rs1use std::io::Write as _;
2use std::path::PathBuf;
3use std::time::Duration;
4
5pub const CRASH_LOOP_WINDOW_SECS: u64 = 60;
6pub const CRASH_LOOP_THRESHOLD: usize = 8;
7pub const CRASH_LOOP_MAX_BACKOFF_SECS: u64 = 30;
8
9pub const MCP_PROCESS_NAME: &str = "mcp-server";
10
11pub fn crash_loop_log_path(process_name: &str) -> Option<PathBuf> {
12 crate::core::data_dir::lean_ctx_data_dir()
13 .ok()
14 .map(|dir| dir.join(format!(".{}-starts.log", sanitize_lock_name(process_name))))
15}
16
17pub struct StartupLockGuard {
18 path: PathBuf,
19}
20
21impl StartupLockGuard {
22 pub fn touch(&self) {
23 let now_ms = std::time::SystemTime::now()
25 .duration_since(std::time::UNIX_EPOCH)
26 .unwrap_or_default()
27 .as_millis() as u64;
28 if let Ok(mut f) = std::fs::OpenOptions::new()
29 .write(true)
30 .truncate(true)
31 .open(&self.path)
32 {
33 let _ = writeln!(f, "{now_ms}");
34 }
35 }
36}
37
38impl Drop for StartupLockGuard {
39 fn drop(&mut self) {
40 let _ = std::fs::remove_file(&self.path);
41 }
42}
43
44fn sanitize_lock_name(name: &str) -> String {
45 name.chars()
46 .map(|c| {
47 if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
48 c
49 } else {
50 '_'
51 }
52 })
53 .collect()
54}
55
56pub fn try_acquire_lock(
61 name: &str,
62 timeout: Duration,
63 stale_after: Duration,
64) -> Option<StartupLockGuard> {
65 let dir = crate::core::data_dir::lean_ctx_data_dir().ok()?;
66 let _ = std::fs::create_dir_all(&dir);
67
68 let name = sanitize_lock_name(name);
69 let path = dir.join(format!(".{name}.lock"));
70
71 let deadline = std::time::Instant::now().checked_add(timeout)?;
72 let mut sleep_ms: u64 = 10;
73
74 loop {
75 if std::fs::OpenOptions::new()
76 .write(true)
77 .create_new(true)
78 .open(&path)
79 .is_ok()
80 {
81 return Some(StartupLockGuard { path });
82 }
83
84 if let Ok(meta) = std::fs::metadata(&path) {
85 if let Ok(modified) = meta.modified() {
86 if modified
87 .elapsed()
88 .unwrap_or_default()
89 .saturating_sub(stale_after)
90 > Duration::from_secs(0)
91 {
92 let _ = std::fs::remove_file(&path);
93 }
94 }
95 }
96
97 if std::time::Instant::now() >= deadline {
98 return None;
99 }
100
101 std::thread::sleep(Duration::from_millis(sleep_ms));
102 sleep_ms = (sleep_ms.saturating_mul(2)).min(120);
103 }
104}
105
106pub fn crash_loop_backoff(process_name: &str) {
110 let Some(dir) = crate::core::data_dir::lean_ctx_data_dir().ok() else {
111 return;
112 };
113 let _ = std::fs::create_dir_all(&dir);
114 let ts_path = dir.join(format!(".{}-starts.log", sanitize_lock_name(process_name)));
115
116 let now = std::time::SystemTime::now()
117 .duration_since(std::time::UNIX_EPOCH)
118 .unwrap_or_default()
119 .as_secs();
120
121 let cutoff = now.saturating_sub(CRASH_LOOP_WINDOW_SECS);
122
123 let mut recent: Vec<u64> = std::fs::read_to_string(&ts_path)
124 .unwrap_or_default()
125 .lines()
126 .filter_map(|l| l.trim().parse::<u64>().ok())
127 .filter(|&ts| ts >= cutoff)
128 .collect();
129 recent.push(now);
130
131 if let Ok(mut f) = std::fs::File::create(&ts_path) {
132 for ts in &recent {
133 let _ = writeln!(f, "{ts}");
134 }
135 }
136
137 if recent.len() > CRASH_LOOP_THRESHOLD {
138 let restarts_over = recent.len() - CRASH_LOOP_THRESHOLD;
139 let backoff_secs =
140 (2u64.saturating_pow(restarts_over as u32)).min(CRASH_LOOP_MAX_BACKOFF_SECS);
141 let msg = format!(
142 "lean-ctx: crash-loop protection — {process_name} started {} times in {CRASH_LOOP_WINDOW_SECS}s, \
143 waiting {backoff_secs}s before accepting connections. \
144 If your IDE is slow to initialize, this is normal.",
145 recent.len()
146 );
147 tracing::warn!("{msg}");
148 eprintln!("{msg}");
149 std::thread::sleep(Duration::from_secs(backoff_secs));
150 }
151}
152
153pub fn reset_crash_loop(process_name: &str) {
155 let Some(dir) = crate::core::data_dir::lean_ctx_data_dir().ok() else {
156 return;
157 };
158 let ts_path = dir.join(format!(".{}-starts.log", sanitize_lock_name(process_name)));
159 let _ = std::fs::remove_file(&ts_path);
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165
166 struct EnvVarGuard {
167 key: &'static str,
168 prev: Option<String>,
169 }
170
171 impl EnvVarGuard {
172 fn set(key: &'static str, value: &std::path::Path) -> Self {
173 let prev = std::env::var(key).ok();
174 std::env::set_var(key, value);
175 Self { key, prev }
176 }
177 }
178
179 impl Drop for EnvVarGuard {
180 fn drop(&mut self) {
181 match self.prev.as_deref() {
182 Some(v) => std::env::set_var(self.key, v),
183 None => std::env::remove_var(self.key),
184 }
185 }
186 }
187
188 #[test]
189 fn lock_acquire_and_release() {
190 let _env = crate::core::data_dir::test_env_lock();
191 let dir = tempfile::tempdir().unwrap();
192 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
193
194 let g = try_acquire_lock(
195 "unit-test",
196 Duration::from_millis(200),
197 Duration::from_secs(30),
198 );
199 assert!(g.is_some());
200
201 let lock_path = dir.path().join(".unit-test.lock");
202 assert!(lock_path.exists());
203
204 drop(g);
205 assert!(!lock_path.exists());
206 }
207
208 #[test]
209 fn lock_times_out_while_held() {
210 let _env = crate::core::data_dir::test_env_lock();
211 let dir = tempfile::tempdir().unwrap();
212 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
213
214 let g1 = try_acquire_lock(
215 "unit-test-2",
216 Duration::from_millis(200),
217 Duration::from_secs(30),
218 )
219 .expect("first lock should acquire");
220 let g2 = try_acquire_lock(
221 "unit-test-2",
222 Duration::from_millis(60),
223 Duration::from_secs(30),
224 );
225 assert!(g2.is_none());
226
227 drop(g1);
228 let g3 = try_acquire_lock(
229 "unit-test-2",
230 Duration::from_millis(200),
231 Duration::from_secs(30),
232 );
233 assert!(g3.is_some());
234 }
235
236 #[test]
237 fn crash_loop_thresholds_are_resilient() {
238 let threshold = CRASH_LOOP_THRESHOLD;
239 let window = CRASH_LOOP_WINDOW_SECS;
240 let backoff = CRASH_LOOP_MAX_BACKOFF_SECS;
241 assert!(
242 threshold >= 8,
243 "threshold must tolerate IDE restart patterns (was {threshold})"
244 );
245 assert!(
246 window >= 60,
247 "window must cover slow IDE startup (was {window}s)"
248 );
249 assert!(
250 backoff <= 30,
251 "max backoff must not be too aggressive (was {backoff}s)"
252 );
253 }
254
255 #[test]
256 fn crash_loop_backoff_under_threshold_no_sleep() {
257 let _env = crate::core::data_dir::test_env_lock();
258 let dir = tempfile::tempdir().unwrap();
259 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
260
261 let start = std::time::Instant::now();
262 for _ in 0..CRASH_LOOP_THRESHOLD {
263 crash_loop_backoff("test-no-sleep");
264 }
265 assert!(
266 start.elapsed() < Duration::from_secs(1),
267 "under threshold should not sleep"
268 );
269 }
270
271 #[test]
272 fn reset_crash_loop_clears_history() {
273 let _env = crate::core::data_dir::test_env_lock();
274 let dir = tempfile::tempdir().unwrap();
275 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
276
277 for _ in 0..5 {
278 crash_loop_backoff("test-reset");
279 }
280 let log_path = dir.path().join(".test-reset-starts.log");
281 assert!(log_path.exists(), "crash loop log should exist after calls");
282
283 reset_crash_loop("test-reset");
284 assert!(
285 !log_path.exists(),
286 "crash loop log should be removed after reset"
287 );
288 }
289
290 #[test]
291 fn reset_crash_loop_nonexistent_is_noop() {
292 let _env = crate::core::data_dir::test_env_lock();
293 let dir = tempfile::tempdir().unwrap();
294 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
295
296 reset_crash_loop("never-existed");
297 }
298
299 #[test]
300 fn crash_loop_log_only_keeps_recent_entries() {
301 let _env = crate::core::data_dir::test_env_lock();
302 let dir = tempfile::tempdir().unwrap();
303 let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
304
305 let log_path = dir.path().join(".test-prune-starts.log");
306 let old_ts = 1000u64;
307 std::fs::write(&log_path, format!("{old_ts}\n")).unwrap();
308
309 crash_loop_backoff("test-prune");
310
311 let content = std::fs::read_to_string(&log_path).unwrap();
312 let lines: Vec<&str> = content.lines().collect();
313 assert_eq!(
314 lines.len(),
315 1,
316 "old entry should be pruned, only current remains"
317 );
318 let ts: u64 = lines[0].parse().unwrap();
319 assert!(ts > old_ts, "remaining entry should be recent");
320 }
321
322 #[test]
323 fn sanitize_lock_name_strips_special_chars() {
324 assert_eq!(sanitize_lock_name("mcp-stdio"), "mcp-stdio");
325 assert_eq!(sanitize_lock_name("mcp_http"), "mcp_http");
326 assert_eq!(sanitize_lock_name("a/b\\c:d"), "a_b_c_d");
327 assert_eq!(sanitize_lock_name("name with spaces"), "name_with_spaces");
328 }
329
330 #[test]
331 fn crash_loop_backoff_formula_correctness() {
332 assert_eq!(
333 2u64.saturating_pow(1).min(CRASH_LOOP_MAX_BACKOFF_SECS),
334 2,
335 "1 over threshold = 2s backoff"
336 );
337 assert_eq!(
338 2u64.saturating_pow(2).min(CRASH_LOOP_MAX_BACKOFF_SECS),
339 4,
340 "2 over threshold = 4s backoff"
341 );
342 assert_eq!(
343 2u64.saturating_pow(3).min(CRASH_LOOP_MAX_BACKOFF_SECS),
344 8,
345 "3 over threshold = 8s backoff"
346 );
347 assert_eq!(
348 2u64.saturating_pow(4).min(CRASH_LOOP_MAX_BACKOFF_SECS),
349 16,
350 "4 over threshold = 16s backoff"
351 );
352 assert_eq!(
353 2u64.saturating_pow(5).min(CRASH_LOOP_MAX_BACKOFF_SECS),
354 30,
355 "5 over threshold = capped at 30s"
356 );
357 assert_eq!(
358 2u64.saturating_pow(10).min(CRASH_LOOP_MAX_BACKOFF_SECS),
359 30,
360 "10 over threshold = still capped at 30s"
361 );
362 }
363}