Skip to main content

codelens_engine/lsp/
readiness.rs

1//! Per-session readiness tracking.
2//!
3//! LSP servers complete their `initialize` handshake in tens of
4//! milliseconds, but real workspace indexing (rust-analyzer's project
5//! model, pyright's module graph, tsserver's file-system walk) can
6//! take 15–60 seconds. Pre-P0-4 harnesses papered over this with a
7//! fixed `sleep 45` after `prepare_harness_session` — honest but
8//! wasteful: every bench run paid the worst-case wait regardless of
9//! how quickly indexing actually finished, and production agent
10//! sessions had no signal at all.
11//!
12//! This module exposes a cheap, lock-free readiness snapshot per LSP
13//! session. The pool records:
14//!
15//! - `started_at` — the wall-clock instant the session was spawned.
16//! - `ms_to_first_response` — elapsed milliseconds when any LSP call
17//!   first returned `Ok`. Usually the bootstrap `workspace/symbol`
18//!   from the auto-attach prewarm. Proves the server's handshake
19//!   completed.
20//! - `ms_to_first_nonempty` — elapsed milliseconds when a call first
21//!   returned a **non-empty** result. This is the stronger signal
22//!   that indexing has progressed far enough to serve real caller
23//!   queries: rust-analyzer and pyright both reply with `[]` while
24//!   the project is still being walked, then start returning real
25//!   hits once the module graph is populated.
26//! - `response_count` / `nonempty_count` / `failure_count` — rolling
27//!   counters so callers can distinguish "indexing still warming" from
28//!   "server is failing every request".
29//!
30//! Reads are via `Arc<ReadinessState>` + atomics, so snapshot calls
31//! never contend with the per-session I/O mutex. That keeps the
32//! downstream MCP `get_lsp_readiness` handler cheap enough for a
33//! 500 ms polling loop to be the canonical wait-for-ready mechanism.
34
35use std::sync::atomic::{AtomicU64, Ordering};
36use std::time::Instant;
37
38/// Readiness state shared between a session's owning thread and the
39/// pool's snapshot readers. Created when a session is spawned and
40/// retained until the session is dropped.
41#[derive(Debug)]
42pub struct ReadinessState {
43    pub command: String,
44    pub args: Vec<String>,
45    started_at: Instant,
46    ms_to_first_response: AtomicU64,
47    ms_to_first_nonempty: AtomicU64,
48    ms_to_last_response: AtomicU64,
49    response_count: AtomicU64,
50    nonempty_count: AtomicU64,
51    failure_count: AtomicU64,
52}
53
54impl ReadinessState {
55    pub(super) fn new(command: String, args: Vec<String>) -> Self {
56        Self {
57            command,
58            args,
59            started_at: Instant::now(),
60            ms_to_first_response: AtomicU64::new(0),
61            ms_to_first_nonempty: AtomicU64::new(0),
62            ms_to_last_response: AtomicU64::new(0),
63            response_count: AtomicU64::new(0),
64            nonempty_count: AtomicU64::new(0),
65            failure_count: AtomicU64::new(0),
66        }
67    }
68
69    /// Record a successful LSP response. `was_nonempty` is the caller's
70    /// domain judgement (e.g. `references.len() > 0`,
71    /// `workspace_symbols.len() > 0`). A response with zero results is
72    /// still meaningful — it proves the server handshake is alive —
73    /// but indexing-readiness requires at least one hit.
74    pub(super) fn record_ok(&self, was_nonempty: bool) {
75        // `max(1)` so a response at exactly t=0 (test mock) is still
76        // distinguishable from "no response yet".
77        let elapsed = self.started_at.elapsed().as_millis() as u64;
78        let ms = elapsed.max(1);
79
80        // compare_exchange with expected=0 gives us a one-shot latch
81        // for the "first" milestones. Subsequent calls silently no-op.
82        let _ =
83            self.ms_to_first_response
84                .compare_exchange(0, ms, Ordering::Relaxed, Ordering::Relaxed);
85        if was_nonempty {
86            let _ = self.ms_to_first_nonempty.compare_exchange(
87                0,
88                ms,
89                Ordering::Relaxed,
90                Ordering::Relaxed,
91            );
92            self.nonempty_count.fetch_add(1, Ordering::Relaxed);
93        }
94        self.ms_to_last_response.store(ms, Ordering::Relaxed);
95        self.response_count.fetch_add(1, Ordering::Relaxed);
96    }
97
98    /// Record a failed LSP call. Failures bump a counter so callers
99    /// can treat a session with `failure_count > 0 && response_count == 0`
100    /// as unhealthy rather than warming.
101    pub(super) fn record_failure(&self) {
102        self.failure_count.fetch_add(1, Ordering::Relaxed);
103    }
104
105    pub fn snapshot(&self) -> ReadinessSnapshot {
106        let read = |a: &AtomicU64| a.load(Ordering::Relaxed);
107        let opt = |v: u64| if v == 0 { None } else { Some(v) };
108        ReadinessSnapshot {
109            command: self.command.clone(),
110            args: self.args.clone(),
111            elapsed_ms: self.started_at.elapsed().as_millis() as u64,
112            ms_to_first_response: opt(read(&self.ms_to_first_response)),
113            ms_to_first_nonempty: opt(read(&self.ms_to_first_nonempty)),
114            ms_to_last_response: opt(read(&self.ms_to_last_response)),
115            response_count: read(&self.response_count),
116            nonempty_count: read(&self.nonempty_count),
117            failure_count: read(&self.failure_count),
118        }
119    }
120}
121
122/// Plain-old-data readiness view for callers (MCP handlers, bench
123/// scripts). All milliseconds are relative to `session.started_at`.
124#[derive(Debug, Clone, serde::Serialize)]
125pub struct ReadinessSnapshot {
126    pub command: String,
127    pub args: Vec<String>,
128    pub elapsed_ms: u64,
129    pub ms_to_first_response: Option<u64>,
130    pub ms_to_first_nonempty: Option<u64>,
131    pub ms_to_last_response: Option<u64>,
132    pub response_count: u64,
133    pub nonempty_count: u64,
134    pub failure_count: u64,
135}
136
137impl ReadinessSnapshot {
138    /// A session is **ready** when it has returned at least one
139    /// non-empty response. Zero-result responses are not enough —
140    /// pyright and rust-analyzer both emit `[]` while the project is
141    /// being walked, and an agent that unblocks on the first empty
142    /// reply ends up issuing the real query before indexing is done
143    /// (which is the failure mode P0-4 was created to stop).
144    pub fn is_ready(&self) -> bool {
145        self.ms_to_first_nonempty.is_some()
146    }
147
148    /// A session is **alive** when its handshake round-tripped at
149    /// least once. Alive-but-not-ready means the LSP is up but has
150    /// not produced usable data yet.
151    pub fn is_alive(&self) -> bool {
152        self.ms_to_first_response.is_some()
153    }
154}