Skip to main content

lean_ctx/core/
startup_guard.rs

1use std::io::Write as _;
2use std::path::PathBuf;
3use std::time::Duration;
4
5const CRASH_LOOP_WINDOW_SECS: u64 = 30;
6const CRASH_LOOP_THRESHOLD: usize = 5;
7const CRASH_LOOP_MAX_BACKOFF_SECS: u64 = 60;
8
9pub struct StartupLockGuard {
10    path: PathBuf,
11}
12
13impl StartupLockGuard {
14    pub fn touch(&self) {
15        // Update mtime so stale eviction doesn't kill active long-running processes.
16        let now_ms = std::time::SystemTime::now()
17            .duration_since(std::time::UNIX_EPOCH)
18            .unwrap_or_default()
19            .as_millis() as u64;
20        if let Ok(mut f) = std::fs::OpenOptions::new()
21            .write(true)
22            .truncate(true)
23            .open(&self.path)
24        {
25            let _ = writeln!(f, "{now_ms}");
26        }
27    }
28}
29
30impl Drop for StartupLockGuard {
31    fn drop(&mut self) {
32        let _ = std::fs::remove_file(&self.path);
33    }
34}
35
36fn sanitize_lock_name(name: &str) -> String {
37    name.chars()
38        .map(|c| {
39            if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
40                c
41            } else {
42                '_'
43            }
44        })
45        .collect()
46}
47
48/// Best-effort cross-process lock (create_new + stale eviction).
49///
50/// Returns `None` if the data dir can't be resolved or if the lock can't be acquired
51/// within `timeout`.
52pub fn try_acquire_lock(
53    name: &str,
54    timeout: Duration,
55    stale_after: Duration,
56) -> Option<StartupLockGuard> {
57    let dir = crate::core::data_dir::lean_ctx_data_dir().ok()?;
58    let _ = std::fs::create_dir_all(&dir);
59
60    let name = sanitize_lock_name(name);
61    let path = dir.join(format!(".{name}.lock"));
62
63    let deadline = std::time::Instant::now().checked_add(timeout)?;
64    let mut sleep_ms: u64 = 10;
65
66    loop {
67        if std::fs::OpenOptions::new()
68            .write(true)
69            .create_new(true)
70            .open(&path)
71            .is_ok()
72        {
73            return Some(StartupLockGuard { path });
74        }
75
76        if let Ok(meta) = std::fs::metadata(&path) {
77            if let Ok(modified) = meta.modified() {
78                if modified
79                    .elapsed()
80                    .unwrap_or_default()
81                    .saturating_sub(stale_after)
82                    > Duration::from_secs(0)
83                {
84                    let _ = std::fs::remove_file(&path);
85                }
86            }
87        }
88
89        if std::time::Instant::now() >= deadline {
90            return None;
91        }
92
93        std::thread::sleep(Duration::from_millis(sleep_ms));
94        sleep_ms = (sleep_ms.saturating_mul(2)).min(120);
95    }
96}
97
98/// Detects rapid restart loops (e.g., IDE keeps respawning a crashing MCP server).
99/// Records each startup timestamp; if too many happen within the window, sleeps
100/// with exponential backoff to break the loop and avoid host degradation.
101pub fn crash_loop_backoff(process_name: &str) {
102    let Some(dir) = crate::core::data_dir::lean_ctx_data_dir().ok() else {
103        return;
104    };
105    let _ = std::fs::create_dir_all(&dir);
106    let ts_path = dir.join(format!(".{}-starts.log", sanitize_lock_name(process_name)));
107
108    let now = std::time::SystemTime::now()
109        .duration_since(std::time::UNIX_EPOCH)
110        .unwrap_or_default()
111        .as_secs();
112
113    let cutoff = now.saturating_sub(CRASH_LOOP_WINDOW_SECS);
114
115    let mut recent: Vec<u64> = std::fs::read_to_string(&ts_path)
116        .unwrap_or_default()
117        .lines()
118        .filter_map(|l| l.trim().parse::<u64>().ok())
119        .filter(|&ts| ts >= cutoff)
120        .collect();
121    recent.push(now);
122
123    if let Ok(mut f) = std::fs::File::create(&ts_path) {
124        for ts in &recent {
125            let _ = writeln!(f, "{ts}");
126        }
127    }
128
129    if recent.len() > CRASH_LOOP_THRESHOLD {
130        let restarts_over = recent.len() - CRASH_LOOP_THRESHOLD;
131        let backoff_secs =
132            (2u64.saturating_pow(restarts_over as u32)).min(CRASH_LOOP_MAX_BACKOFF_SECS);
133        eprintln!(
134            "lean-ctx: crash-loop detected ({} starts in {CRASH_LOOP_WINDOW_SECS}s), \
135             backing off {backoff_secs}s",
136            recent.len()
137        );
138        std::thread::sleep(Duration::from_secs(backoff_secs));
139    }
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145
146    struct EnvVarGuard {
147        key: &'static str,
148        prev: Option<String>,
149    }
150
151    impl EnvVarGuard {
152        fn set(key: &'static str, value: &std::path::Path) -> Self {
153            let prev = std::env::var(key).ok();
154            std::env::set_var(key, value);
155            Self { key, prev }
156        }
157    }
158
159    impl Drop for EnvVarGuard {
160        fn drop(&mut self) {
161            match self.prev.as_deref() {
162                Some(v) => std::env::set_var(self.key, v),
163                None => std::env::remove_var(self.key),
164            }
165        }
166    }
167
168    #[test]
169    fn lock_acquire_and_release() {
170        let _env = crate::core::data_dir::test_env_lock();
171        let dir = tempfile::tempdir().unwrap();
172        let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
173
174        let g = try_acquire_lock(
175            "unit-test",
176            Duration::from_millis(200),
177            Duration::from_secs(30),
178        );
179        assert!(g.is_some());
180
181        let lock_path = dir.path().join(".unit-test.lock");
182        assert!(lock_path.exists());
183
184        drop(g);
185        assert!(!lock_path.exists());
186    }
187
188    #[test]
189    fn lock_times_out_while_held() {
190        let _env = crate::core::data_dir::test_env_lock();
191        let dir = tempfile::tempdir().unwrap();
192        let _guard = EnvVarGuard::set("LEAN_CTX_DATA_DIR", dir.path());
193
194        let g1 = try_acquire_lock(
195            "unit-test-2",
196            Duration::from_millis(200),
197            Duration::from_secs(30),
198        )
199        .expect("first lock should acquire");
200        let g2 = try_acquire_lock(
201            "unit-test-2",
202            Duration::from_millis(60),
203            Duration::from_secs(30),
204        );
205        assert!(g2.is_none());
206
207        drop(g1);
208        let g3 = try_acquire_lock(
209            "unit-test-2",
210            Duration::from_millis(200),
211            Duration::from_secs(30),
212        );
213        assert!(g3.is_some());
214    }
215}