Skip to main content

aft/lsp/
manager.rs

1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use crossbeam_channel::{unbounded, Receiver, RecvTimeoutError, Sender};
5use lsp_types::notification::{
6    DidChangeTextDocument, DidChangeWatchedFiles, DidCloseTextDocument, DidOpenTextDocument,
7};
8use lsp_types::{
9    DidChangeTextDocumentParams, DidChangeWatchedFilesParams, DidCloseTextDocumentParams,
10    DidOpenTextDocumentParams, FileChangeType, FileEvent, TextDocumentContentChangeEvent,
11    TextDocumentIdentifier, TextDocumentItem, VersionedTextDocumentIdentifier,
12};
13
14use crate::config::Config;
15use crate::lsp::child_registry::LspChildRegistry;
16use crate::lsp::client::{LspClient, LspEvent, ServerState};
17use crate::lsp::diagnostics::{
18    from_lsp_diagnostics, DiagnosticEntry, DiagnosticsStore, StoredDiagnostic,
19};
20use crate::lsp::document::DocumentStore;
21use crate::lsp::position::{uri_for_path, uri_to_path};
22use crate::lsp::pull_params::{
23    AftDocumentDiagnosticParams, AftDocumentDiagnosticRequest, AftWorkspaceDiagnosticParams,
24    AftWorkspaceDiagnosticRequest,
25};
26use crate::lsp::registry::{resolve_lsp_binary, servers_for_file, ServerDef, ServerKind};
27use crate::lsp::roots::{find_workspace_root, ServerKey};
28use crate::lsp::LspError;
29use crate::slog_error;
30
31const STDERR_REASON_BYTES: usize = 2 * 1024;
32
33/// Outcome of attempting to ensure a server is running for a single matching
34/// `ServerDef`. Returned per matching server so the caller can report exactly
35/// what happened to the user instead of collapsing all failures into "no
36/// server".
37#[derive(Debug, Clone)]
38pub enum ServerAttemptResult {
39    /// Server is running and ready to serve requests for this file.
40    Ok { server_key: ServerKey },
41    /// No workspace root was found by walking up from the file looking for
42    /// any of the server's configured root markers.
43    NoRootMarker { looked_for: Vec<String> },
44    /// The server's binary could not be found on PATH (or override was
45    /// missing/invalid).
46    BinaryNotInstalled { binary: String },
47    /// Binary was found but spawning or initializing the server failed.
48    SpawnFailed { binary: String, reason: String },
49}
50
51/// One server's attempt to handle a file.
52#[derive(Debug, Clone)]
53pub struct ServerAttempt {
54    /// Stable server identifier (kind ID, e.g. "pyright", "rust-analyzer").
55    pub server_id: String,
56    /// Server display name from the registry.
57    pub server_name: String,
58    pub result: ServerAttemptResult,
59}
60
61/// Aggregate outcome of `ensure_server_for_file_detailed`. Distinguishes:
62/// - "No server registered for this file's extension" (`attempts.is_empty()`)
63/// - "Servers registered but none could start" (`successful.is_empty()` but
64///   `!attempts.is_empty()`)
65/// - "At least one server is ready" (`!successful.is_empty()`)
66#[derive(Debug, Clone, Default)]
67pub struct EnsureServerOutcomes {
68    /// Server keys that are now running and ready to serve requests.
69    pub successful: Vec<ServerKey>,
70    /// Per-server attempt records. Empty if no server is registered for the
71    /// file's extension.
72    pub attempts: Vec<ServerAttempt>,
73}
74
75impl EnsureServerOutcomes {
76    /// True if no server in the registry matched this file's extension.
77    pub fn no_server_registered(&self) -> bool {
78        self.attempts.is_empty()
79    }
80
81    /// True when servers matched the file's extension but none actually apply
82    /// to this project — i.e. nothing started and every attempt failed the root
83    /// marker check (e.g. oxlint registered for `.ts` with no `.oxlintrc.json`).
84    /// Distinct from `no_server_registered` (extension unsupported) and from a
85    /// real outage (binary missing / spawn failed): a missing root marker is a
86    /// filesystem fact that never changes mid-scan, so such a file will never
87    /// produce diagnostics and must not be reported as "pending".
88    pub fn only_inapplicable_root_markers(&self) -> bool {
89        self.successful.is_empty()
90            && !self.attempts.is_empty()
91            && self
92                .attempts
93                .iter()
94                .all(|attempt| matches!(attempt.result, ServerAttemptResult::NoRootMarker { .. }))
95    }
96}
97
98/// Outcome of a post-edit diagnostics wait. Reports the per-server status
99/// alongside the fresh diagnostics, so the response layer can build an
100/// honest tri-state payload (`success: true` + `complete: bool` + named
101/// gap fields per `crates/aft/src/protocol.rs`).
102///
103/// `diagnostics` only contains entries from servers that proved freshness
104/// (version-match preferred, epoch-fallback for unversioned servers).
105/// Pre-edit cached entries are NEVER included — that's the whole point of
106/// this type.
107#[derive(Debug, Clone, Default)]
108pub struct PostEditWaitOutcome {
109    /// Diagnostics from servers whose response we verified is FOR the
110    /// post-edit document version (or whose epoch we saw advance after our
111    /// pre-edit snapshot, for unversioned servers).
112    pub diagnostics: Vec<StoredDiagnostic>,
113    /// Servers we expected to publish but didn't before the deadline.
114    /// Reported to the agent via `pending_lsp_servers` so they understand
115    /// the result is partial.
116    pub pending_servers: Vec<ServerKey>,
117    /// Servers whose process exited between notification and deadline.
118    /// Reported separately so the agent knows the gap is unrecoverable
119    /// without a server restart, not "wait longer."
120    pub exited_servers: Vec<ServerKey>,
121}
122
123/// Pre-edit freshness snapshot for one server/file pair.
124#[derive(Debug, Clone, Copy, Default)]
125pub struct PreEditSnapshot {
126    pub epoch: u64,
127    pub document_version_at_capture: Option<i32>,
128}
129
130pub fn post_edit_entry_is_fresh(
131    entry: &DiagnosticEntry,
132    target_version: i32,
133    pre: PreEditSnapshot,
134) -> bool {
135    if entry.epoch <= pre.epoch {
136        return false;
137    }
138
139    match entry.version {
140        Some(version) => version >= target_version,
141        // Unversioned publishDiagnostics payloads cannot prove which document
142        // state they describe. Epoch advancement only proves arrival order; an
143        // old analysis result can still arrive after our pre-snapshot. Treat as
144        // pending/partial rather than fresh.
145        None => false,
146    }
147}
148
149impl PostEditWaitOutcome {
150    /// True if every expected server reported a fresh result. False means
151    /// the agent should treat the diagnostics as a partial picture.
152    pub fn complete(&self) -> bool {
153        self.pending_servers.is_empty() && self.exited_servers.is_empty()
154    }
155}
156
157/// Per-server outcome of a `textDocument/diagnostic` (per-file pull) request.
158#[derive(Debug, Clone)]
159pub enum PullFileOutcome {
160    /// Server returned a full report; diagnostics stored.
161    Full { diagnostic_count: usize },
162    /// Server returned `kind: "unchanged"` — cached diagnostics still valid.
163    Unchanged,
164    /// Server returned a partial-result token; we don't subscribe to streamed
165    /// progress so the response is treated as a soft empty until the next pull.
166    PartialNotSupported,
167    /// Server doesn't advertise pull capability — caller should fall back to
168    /// push diagnostics for this server.
169    PullNotSupported,
170    /// The pull request failed (timeout, server error, etc.).
171    RequestFailed { reason: String },
172}
173
174/// Result of `pull_file_diagnostics` for one matching server.
175#[derive(Debug, Clone)]
176pub struct PullFileResult {
177    pub server_key: ServerKey,
178    pub outcome: PullFileOutcome,
179}
180
181/// Result of `pull_workspace_diagnostics` for a single server.
182#[derive(Debug, Clone)]
183pub struct PullWorkspaceResult {
184    pub server_key: ServerKey,
185    /// Files for which a Full report was received and cached. Files that came
186    /// back as `Unchanged` are NOT listed here because their cached entry was
187    /// already authoritative.
188    pub files_reported: Vec<PathBuf>,
189    /// True if the server returned a full response within the timeout.
190    pub complete: bool,
191    /// True if we cancelled (request timed out before the server responded).
192    pub cancelled: bool,
193    /// True if the server advertised workspace pull support. When false, the
194    /// other fields are empty and the caller should fall back to file-mode
195    /// pull or to push semantics.
196    pub supports_workspace: bool,
197}
198
199pub struct LspManager {
200    /// Active server instances, keyed by (ServerKind, workspace_root).
201    clients: HashMap<ServerKey, LspClient>,
202    /// Binary names for active server instances. Kept separate from
203    /// `LspClient` so crash handling can report the installable binary name
204    /// after a post-initialize process exit.
205    server_binaries: HashMap<ServerKey, String>,
206    /// Tracks opened documents and versions per active server.
207    documents: HashMap<ServerKey, DocumentStore>,
208    /// Stored publishDiagnostics payloads across all servers.
209    diagnostics: DiagnosticsStore,
210    /// Unified event channel — all server reader threads send here.
211    event_tx: Sender<LspEvent>,
212    event_rx: Receiver<LspEvent>,
213    /// Optional binary path overrides used by integration tests.
214    binary_overrides: HashMap<ServerKind, PathBuf>,
215    /// Extra env vars merged into every spawned LSP child. Used in tests to
216    /// drive the fake server's behavioral variants (`AFT_FAKE_LSP_PULL=1`,
217    /// `AFT_FAKE_LSP_WORKSPACE=1`, etc.). Production code does not set this.
218    extra_env: HashMap<String, String>,
219    /// Per-(kind,root) cache of spawn failures. Once a server fails to spawn
220    /// for a workspace root, we remember why and skip subsequent attempts for
221    /// the lifetime of this AFT process. Without this, every file open or
222    /// didChange retries `spawn_server` and logs a fresh ERROR — visible as
223    /// repeated `failed to spawn TypeScript Language Server: Could not find a
224    /// valid TypeScript installation` lines per edit.
225    ///
226    /// Entries are NEVER evicted automatically. The expected recovery path is
227    /// for the user to fix their environment (install the missing binary or
228    /// add a `tsconfig.json` / `package.json` with the right dependency) and
229    /// restart OpenCode/Pi, which spawns a fresh `aft` process with an empty
230    /// cache. We deliberately don't auto-retry on file events: the failure
231    /// modes we track here (binary not installed, init handshake failure)
232    /// don't fix themselves at runtime.
233    failed_spawns: HashMap<ServerKey, ServerAttemptResult>,
234    /// Server/root pairs for which we already logged that watched-file
235    /// notifications are skipped because the capability is absent.
236    watched_file_skip_logged: HashSet<ServerKey>,
237    /// Tracks PIDs of spawned LSP child processes so the signal handler can
238    /// kill them on SIGTERM/SIGINT before aft exits, preventing orphans.
239    /// Defaults to empty; production wires this from `AppContext`.
240    child_registry: LspChildRegistry,
241}
242
243impl LspManager {
244    pub fn new() -> Self {
245        let (event_tx, event_rx) = unbounded();
246        Self {
247            clients: HashMap::new(),
248            server_binaries: HashMap::new(),
249            documents: HashMap::new(),
250            diagnostics: DiagnosticsStore::new(),
251            event_tx,
252            event_rx,
253            binary_overrides: HashMap::new(),
254            extra_env: HashMap::new(),
255            failed_spawns: HashMap::new(),
256            watched_file_skip_logged: HashSet::new(),
257            child_registry: LspChildRegistry::new(),
258        }
259    }
260
261    /// Set the child-PID registry. Must be called before any servers spawn.
262    pub fn set_child_registry(&mut self, registry: LspChildRegistry) {
263        self.child_registry = registry;
264    }
265
266    /// For testing: set an extra environment variable that gets passed to
267    /// every spawned LSP child process. Useful for driving fake-server
268    /// behavioral variants in integration tests.
269    pub fn set_extra_env(&mut self, key: &str, value: &str) {
270        self.extra_env.insert(key.to_string(), value.to_string());
271    }
272
273    /// Count active LSP server instances.
274    pub fn server_count(&self) -> usize {
275        self.clients.len()
276    }
277
278    /// For testing: override the binary for a server kind.
279    pub fn override_binary(&mut self, kind: ServerKind, binary_path: PathBuf) {
280        self.binary_overrides.insert(kind, binary_path);
281    }
282
283    /// Ensure a server is running for the given file. Spawns if needed.
284    /// Returns the active server keys for the file, or an empty vec if none match.
285    ///
286    /// This is the lightweight wrapper around [`ensure_server_for_file_detailed`]
287    /// that drops failure context. Prefer the detailed variant in command
288    /// handlers that need to surface honest error messages to the agent.
289    pub fn ensure_server_for_file(&mut self, file_path: &Path, config: &Config) -> Vec<ServerKey> {
290        self.ensure_server_for_file_detailed(file_path, config)
291            .successful
292    }
293
294    /// Detailed version of [`ensure_server_for_file`] that records every
295    /// matching server's outcome (`Ok` / `NoRootMarker` / `BinaryNotInstalled`
296    /// / `SpawnFailed`).
297    ///
298    /// Use this when the caller wants to honestly report _why_ a file has no
299    /// active server (e.g., to surface "bash-language-server not on PATH" to
300    /// the agent instead of silently returning `total: 0`).
301    pub fn ensure_server_for_file_detailed(
302        &mut self,
303        file_path: &Path,
304        config: &Config,
305    ) -> EnsureServerOutcomes {
306        let defs = servers_for_file(file_path, config);
307        let mut outcomes = EnsureServerOutcomes::default();
308
309        for def in defs {
310            let server_id = def.kind.id_str().to_string();
311            let server_name = def.name.to_string();
312
313            let Some(root) = find_workspace_root(file_path, &def.root_markers) else {
314                outcomes.attempts.push(ServerAttempt {
315                    server_id,
316                    server_name,
317                    result: ServerAttemptResult::NoRootMarker {
318                        looked_for: def.root_markers.iter().map(|s| s.to_string()).collect(),
319                    },
320                });
321                continue;
322            };
323
324            let key = ServerKey {
325                kind: def.kind.clone(),
326                root,
327            };
328
329            if !self.clients.contains_key(&key) {
330                // If we already tried and failed to spawn this server for this
331                // root, return the cached classification without retrying or
332                // re-logging. This prevents per-edit ERROR spam when the user's
333                // environment is missing a dependency the LSP needs (the
334                // typescript-language-server "Could not find a valid TypeScript
335                // installation" case is the canonical example).
336                if let Some(cached) = self.failed_spawns.get(&key) {
337                    outcomes.attempts.push(ServerAttempt {
338                        server_id,
339                        server_name,
340                        result: cached.clone(),
341                    });
342                    continue;
343                }
344
345                match self.spawn_server(&def, &key.root, config) {
346                    Ok(client) => {
347                        self.clients.insert(key.clone(), client);
348                        self.server_binaries.insert(key.clone(), def.binary.clone());
349                        self.documents.entry(key.clone()).or_default();
350                    }
351                    Err(err) => {
352                        slog_error!("failed to spawn {}: {}", def.name, err);
353                        let result = classify_spawn_error(&def.binary, &err);
354                        // Remember the failure so subsequent file events skip
355                        // this (kind, root) pair instead of producing a fresh
356                        // spawn attempt + ERROR log per request.
357                        self.failed_spawns.insert(key.clone(), result.clone());
358                        outcomes.attempts.push(ServerAttempt {
359                            server_id,
360                            server_name,
361                            result,
362                        });
363                        continue;
364                    }
365                }
366            }
367
368            outcomes.attempts.push(ServerAttempt {
369                server_id,
370                server_name,
371                result: ServerAttemptResult::Ok {
372                    server_key: key.clone(),
373                },
374            });
375            outcomes.successful.push(key);
376        }
377
378        outcomes
379    }
380
381    /// Ensure a server is running using the default LSP registry.
382    /// Kept for integration tests that exercise built-in server helpers directly.
383    pub fn ensure_server_for_file_default(&mut self, file_path: &Path) -> Vec<ServerKey> {
384        self.ensure_server_for_file(file_path, &Config::default())
385    }
386    /// Ensure that servers are running for the file and that the document is open
387    /// in each server's DocumentStore. Reads file content from disk if not already open.
388    /// Returns the server keys for the file.
389    pub fn ensure_file_open(
390        &mut self,
391        file_path: &Path,
392        config: &Config,
393    ) -> Result<Vec<ServerKey>, LspError> {
394        let canonical_path = canonicalize_for_lsp(file_path)?;
395        let server_keys = self.ensure_server_for_file(&canonical_path, config);
396        if server_keys.is_empty() {
397            return Ok(server_keys);
398        }
399
400        let uri = uri_for_path(&canonical_path)?;
401        let language_id = language_id_for_extension(
402            canonical_path
403                .extension()
404                .and_then(|ext| ext.to_str())
405                .unwrap_or_default(),
406        )
407        .to_string();
408
409        for key in &server_keys {
410            let already_open = self
411                .documents
412                .get(key)
413                .is_some_and(|store| store.is_open(&canonical_path));
414
415            if !already_open {
416                let content = std::fs::read_to_string(&canonical_path).map_err(LspError::Io)?;
417                if let Some(client) = self.clients.get_mut(key) {
418                    client.send_notification::<DidOpenTextDocument>(DidOpenTextDocumentParams {
419                        text_document: TextDocumentItem::new(
420                            uri.clone(),
421                            language_id.clone(),
422                            0,
423                            content,
424                        ),
425                    })?;
426                }
427                self.documents
428                    .entry(key.clone())
429                    .or_default()
430                    .open(canonical_path.clone());
431                continue;
432            }
433
434            // Document is already open. Check disk drift — if the file has
435            // been modified outside the AFT pipeline (other tool, manual
436            // edit, sibling session) we MUST send a didChange before any
437            // pull-diagnostic / hover query, otherwise the LSP server
438            // returns results computed from stale in-memory content.
439            //
440            // This is the regression fix Oracle flagged in finding #6:
441            // "ensure_file_open skips already-open files without checking
442            // if disk content changed."
443            let drifted = self
444                .documents
445                .get(key)
446                .is_some_and(|store| store.is_stale_on_disk(&canonical_path));
447            if drifted {
448                let content = std::fs::read_to_string(&canonical_path).map_err(LspError::Io)?;
449                let next_version = self
450                    .documents
451                    .get(key)
452                    .and_then(|store| store.version(&canonical_path))
453                    .map(|v| v + 1)
454                    .unwrap_or(1);
455                if let Some(client) = self.clients.get_mut(key) {
456                    client.send_notification::<DidChangeTextDocument>(
457                        DidChangeTextDocumentParams {
458                            text_document: VersionedTextDocumentIdentifier::new(
459                                uri.clone(),
460                                next_version,
461                            ),
462                            content_changes: vec![TextDocumentContentChangeEvent {
463                                range: None,
464                                range_length: None,
465                                text: content,
466                            }],
467                        },
468                    )?;
469                }
470                if let Some(store) = self.documents.get_mut(key) {
471                    store.bump_version(&canonical_path);
472                }
473            }
474        }
475
476        Ok(server_keys)
477    }
478
479    pub fn ensure_file_open_default(
480        &mut self,
481        file_path: &Path,
482    ) -> Result<Vec<ServerKey>, LspError> {
483        self.ensure_file_open(file_path, &Config::default())
484    }
485
486    /// Notify relevant LSP servers that a file has been written/changed.
487    /// This is the main hook called after every file write in AFT.
488    ///
489    /// If the file's server isn't running yet, starts it (lazy spawn).
490    /// If the file isn't open in LSP yet, sends didOpen. Otherwise sends didChange.
491    pub fn notify_file_changed(
492        &mut self,
493        file_path: &Path,
494        content: &str,
495        config: &Config,
496    ) -> Result<(), LspError> {
497        self.notify_file_changed_versioned(file_path, content, config)
498            .map(|_| ())
499    }
500
501    /// Like `notify_file_changed`, but returns the target document version
502    /// per server so the post-edit waiter can match `publishDiagnostics`
503    /// against the exact version that this notification carried.
504    ///
505    /// Returns: `Vec<(ServerKey, target_version)>`. `target_version` is the
506    /// `version` field on the `VersionedTextDocumentIdentifier` we just sent
507    /// (post-bump). For freshly-opened documents (`didOpen`) the version is
508    /// `0`. Servers that don't honor versioned text document sync will not
509    /// echo this back on `publishDiagnostics`; the caller is expected to
510    /// fall back to the epoch-delta path for those.
511    pub fn notify_file_changed_versioned(
512        &mut self,
513        file_path: &Path,
514        content: &str,
515        config: &Config,
516    ) -> Result<Vec<(ServerKey, i32)>, LspError> {
517        let canonical_path = canonicalize_for_lsp(file_path)?;
518        let server_keys = self.ensure_server_for_file(&canonical_path, config);
519        if server_keys.is_empty() {
520            return Ok(Vec::new());
521        }
522
523        let uri = uri_for_path(&canonical_path)?;
524        let language_id = language_id_for_extension(
525            canonical_path
526                .extension()
527                .and_then(|ext| ext.to_str())
528                .unwrap_or_default(),
529        )
530        .to_string();
531
532        let mut versions: Vec<(ServerKey, i32)> = Vec::with_capacity(server_keys.len());
533
534        for key in server_keys {
535            let current_version = self
536                .documents
537                .get(&key)
538                .and_then(|store| store.version(&canonical_path));
539
540            if let Some(version) = current_version {
541                let next_version = version + 1;
542                if let Some(client) = self.clients.get_mut(&key) {
543                    client.send_notification::<DidChangeTextDocument>(
544                        DidChangeTextDocumentParams {
545                            text_document: VersionedTextDocumentIdentifier::new(
546                                uri.clone(),
547                                next_version,
548                            ),
549                            content_changes: vec![TextDocumentContentChangeEvent {
550                                range: None,
551                                range_length: None,
552                                text: content.to_string(),
553                            }],
554                        },
555                    )?;
556                }
557                if let Some(store) = self.documents.get_mut(&key) {
558                    store.bump_version(&canonical_path);
559                }
560                versions.push((key, next_version));
561                continue;
562            }
563
564            if let Some(client) = self.clients.get_mut(&key) {
565                client.send_notification::<DidOpenTextDocument>(DidOpenTextDocumentParams {
566                    text_document: TextDocumentItem::new(
567                        uri.clone(),
568                        language_id.clone(),
569                        0,
570                        content.to_string(),
571                    ),
572                })?;
573            }
574            self.documents
575                .entry(key.clone())
576                .or_default()
577                .open(canonical_path.clone());
578            // didOpen carries version 0 — that's the version the server
579            // will echo on its first publishDiagnostics for this document.
580            versions.push((key, 0));
581        }
582
583        Ok(versions)
584    }
585
586    pub fn notify_file_changed_default(
587        &mut self,
588        file_path: &Path,
589        content: &str,
590    ) -> Result<(), LspError> {
591        self.notify_file_changed(file_path, content, &Config::default())
592    }
593
594    /// Notify every active server whose workspace contains at least one changed
595    /// path that watched files changed. This is intentionally workspace-scoped
596    /// rather than extension-scoped: configuration edits such as `package.json`
597    /// or `tsconfig.json` affect a server's project graph even though those
598    /// files may not be documents handled by the server itself.
599    pub fn notify_files_watched_changed(
600        &mut self,
601        paths: &[(PathBuf, FileChangeType)],
602        _config: &Config,
603    ) -> Result<(), LspError> {
604        if paths.is_empty() {
605            return Ok(());
606        }
607
608        let mut canonical_events = Vec::with_capacity(paths.len());
609        for (path, typ) in paths {
610            let canonical_path = resolve_for_lsp_uri(path);
611            canonical_events.push((canonical_path, *typ));
612        }
613
614        let keys: Vec<ServerKey> = self.clients.keys().cloned().collect();
615        for key in keys {
616            let mut changes = Vec::new();
617            for (path, typ) in &canonical_events {
618                if !path.starts_with(&key.root) {
619                    continue;
620                }
621                changes.push(FileEvent::new(uri_for_path(path)?, *typ));
622            }
623
624            if changes.is_empty() {
625                continue;
626            }
627
628            if let Some(client) = self.clients.get_mut(&key) {
629                // Send when the server either advertised initialize-time
630                // watched-file support or dynamically registered a watcher.
631                // The dynamic client capability we send during initialize only
632                // permits runtime registration; it is tracked separately via
633                // `has_watched_file_registration()`.
634                let supports_static_watched_files = client.supports_watched_files();
635                let has_dynamic_registration = client.has_watched_file_registration();
636                if !(supports_static_watched_files || has_dynamic_registration) {
637                    if self.watched_file_skip_logged.insert(key.clone()) {
638                        log::debug!(
639                            "skipping didChangeWatchedFiles for {:?} (not supported or registered)",
640                            key
641                        );
642                    }
643                    continue;
644                }
645                client.send_notification::<DidChangeWatchedFiles>(DidChangeWatchedFilesParams {
646                    changes,
647                })?;
648            }
649        }
650
651        Ok(())
652    }
653
654    /// Close a document in all servers that have it open.
655    pub fn notify_file_closed(&mut self, file_path: &Path) -> Result<(), LspError> {
656        let canonical_path = canonicalize_for_lsp(file_path)?;
657        let uri = uri_for_path(&canonical_path)?;
658        let keys: Vec<ServerKey> = self.documents.keys().cloned().collect();
659
660        for key in keys {
661            let was_open = self
662                .documents
663                .get(&key)
664                .map(|store| store.is_open(&canonical_path))
665                .unwrap_or(false);
666            if !was_open {
667                continue;
668            }
669
670            if let Some(client) = self.clients.get_mut(&key) {
671                client.send_notification::<DidCloseTextDocument>(DidCloseTextDocumentParams {
672                    text_document: TextDocumentIdentifier::new(uri.clone()),
673                })?;
674            }
675
676            if let Some(store) = self.documents.get_mut(&key) {
677                store.close(&canonical_path);
678            }
679            self.diagnostics
680                .clear_for_server_file(&key, &canonical_path);
681        }
682
683        Ok(())
684    }
685
686    /// Get an active client for a file path, if one exists.
687    pub fn client_for_file(&self, file_path: &Path, config: &Config) -> Option<&LspClient> {
688        let key = self.server_key_for_file(file_path, config)?;
689        self.clients.get(&key)
690    }
691
692    pub fn client_for_file_default(&self, file_path: &Path) -> Option<&LspClient> {
693        self.client_for_file(file_path, &Config::default())
694    }
695
696    /// Get a mutable active client for a file path, if one exists.
697    pub fn client_for_file_mut(
698        &mut self,
699        file_path: &Path,
700        config: &Config,
701    ) -> Option<&mut LspClient> {
702        let key = self.server_key_for_file(file_path, config)?;
703        self.clients.get_mut(&key)
704    }
705
706    pub fn client_for_file_mut_default(&mut self, file_path: &Path) -> Option<&mut LspClient> {
707        self.client_for_file_mut(file_path, &Config::default())
708    }
709
710    /// Number of tracked server clients.
711    pub fn active_client_count(&self) -> usize {
712        self.clients.len()
713    }
714
715    /// Drain all pending LSP events. Call from the main loop.
716    pub fn drain_events(&mut self) -> Vec<LspEvent> {
717        let mut events = Vec::new();
718        while let Ok(event) = self.event_rx.try_recv() {
719            self.handle_event(&event);
720            events.push(event);
721        }
722        events
723    }
724
725    /// Wait for diagnostics to arrive for a specific file until a timeout expires.
726    pub fn wait_for_diagnostics(
727        &mut self,
728        file_path: &Path,
729        config: &Config,
730        timeout: std::time::Duration,
731    ) -> Vec<StoredDiagnostic> {
732        let deadline = std::time::Instant::now() + timeout;
733        self.wait_for_file_diagnostics(file_path, config, deadline)
734    }
735
736    pub fn wait_for_diagnostics_default(
737        &mut self,
738        file_path: &Path,
739        timeout: std::time::Duration,
740    ) -> Vec<StoredDiagnostic> {
741        self.wait_for_diagnostics(file_path, &Config::default(), timeout)
742    }
743
744    /// Test-only accessor for the diagnostics store. Used by integration
745    /// tests that need to inspect per-server entries (e.g., to verify that
746    /// `ServerKey::root` is populated correctly, not the empty path that
747    /// the legacy `publish_with_kind` path produced).
748    #[doc(hidden)]
749    pub fn diagnostics_store_for_test(&self) -> &DiagnosticsStore {
750        &self.diagnostics
751    }
752
753    /// Snapshot the current per-server epoch for every entry that exists
754    /// for `file_path`. Servers without an entry yet (never published)
755    /// are absent from the map; for those, `pre = 0` (any first publish
756    /// will be considered fresh under the epoch-fallback rule).
757    pub fn snapshot_diagnostic_epochs(&self, file_path: &Path) -> HashMap<ServerKey, u64> {
758        let lookup_path = normalize_lookup_path(file_path);
759        self.diagnostics
760            .entries_for_file(&lookup_path)
761            .into_iter()
762            .map(|(key, entry)| (key.clone(), entry.epoch))
763            .collect()
764    }
765
766    /// Snapshot the current diagnostic epoch and document version for every
767    /// active server relevant to `file_path` before a post-edit notification.
768    pub fn snapshot_pre_edit_state(&self, file_path: &Path) -> HashMap<ServerKey, PreEditSnapshot> {
769        let lookup_path = normalize_lookup_path(file_path);
770        let mut snapshots: HashMap<ServerKey, PreEditSnapshot> = self
771            .diagnostics
772            .entries_for_file(&lookup_path)
773            .into_iter()
774            .map(|(key, entry)| {
775                (
776                    key.clone(),
777                    PreEditSnapshot {
778                        epoch: entry.epoch,
779                        document_version_at_capture: None,
780                    },
781                )
782            })
783            .collect();
784
785        for (key, store) in &self.documents {
786            if let Some(version) = store.version(&lookup_path) {
787                snapshots
788                    .entry(key.clone())
789                    .or_default()
790                    .document_version_at_capture = Some(version);
791            }
792        }
793
794        snapshots
795    }
796
797    /// True when the current diagnostic entry for `server_key` can be tied to
798    /// that server's current in-memory document version for `file_path`.
799    ///
800    /// File-mode `lsp_diagnostics` uses this for push-only fallback after it
801    /// has synced/opened the document. Versioned publishes are accepted when
802    /// they match the current document version; unversioned publishes are not
803    /// accepted as fresh because epoch/wall-clock ordering alone is racy.
804    pub fn diagnostic_entry_is_fresh_for_document(
805        &self,
806        file_path: &Path,
807        server_key: &ServerKey,
808        pre: PreEditSnapshot,
809    ) -> bool {
810        let lookup_path = normalize_lookup_path(file_path);
811        let Some(entry) = self
812            .diagnostics
813            .entries_for_file(&lookup_path)
814            .into_iter()
815            .find_map(|(key, entry)| if key == server_key { Some(entry) } else { None })
816        else {
817            return false;
818        };
819
820        let target_version = self
821            .documents
822            .get(server_key)
823            .and_then(|store| store.version(&lookup_path))
824            .or(pre.document_version_at_capture)
825            .unwrap_or(0);
826
827        matches!(entry.version, Some(version) if version >= target_version)
828    }
829
830    /// Wait for FRESH per-server diagnostics that match the just-sent
831    /// document version. This is the v0.17.3 post-edit path that fixes the
832    /// stale-diagnostics bug: instead of returning whatever is in the cache
833    /// when the deadline hits, we only return entries whose `version`
834    /// matches the post-edit target version (or, for servers that don't
835    /// participate in versioned sync, whose `epoch` was bumped after the
836    /// pre-edit snapshot).
837    ///
838    /// `expected_versions` should come from `notify_file_changed_versioned`
839    /// — one `(ServerKey, target_version)` per server we sent didChange/
840    /// didOpen to.
841    ///
842    /// `pre_snapshot` is the per-server epoch BEFORE the notification was
843    /// sent; it gates the epoch-fallback path so an old-version publish
844    /// arriving after `drain_events` and before `didChange` cannot be
845    /// mistaken for a fresh response.
846    ///
847    /// Returns a per-server tri-state: `Fresh` (publish matched target
848    /// version OR epoch advanced past snapshot for an unversioned server),
849    /// `Pending` (deadline hit before this server published anything we
850    /// could verify), or `Exited` (server died between notification and
851    /// deadline).
852    pub fn wait_for_post_edit_diagnostics(
853        &mut self,
854        file_path: &Path,
855        // `config` is intentionally accepted (matches sibling wait APIs and
856        // future-proofs us if freshness rules need it). Currently unused
857        // because expected_versions/pre_snapshot fully determine behavior.
858        _config: &Config,
859        expected_versions: &[(ServerKey, i32)],
860        pre_snapshot: &HashMap<ServerKey, PreEditSnapshot>,
861        timeout: std::time::Duration,
862    ) -> PostEditWaitOutcome {
863        let lookup_path = normalize_lookup_path(file_path);
864        let deadline = std::time::Instant::now() + timeout;
865
866        // Drain any events that arrived while we were sending didChange.
867        // The publishDiagnostics handler stores the version, so even
868        // pre-snapshot publishes that landed late won't be mistaken for
869        // fresh — the version-match check will reject them.
870        let _ = self.drain_events_for_file(&lookup_path);
871
872        let mut fresh: HashMap<ServerKey, Vec<StoredDiagnostic>> = HashMap::new();
873        let mut exited: Vec<ServerKey> = Vec::new();
874
875        loop {
876            // Check freshness for every expected server. A server is fresh
877            // if its current entry for this file satisfies either:
878            //   1. version-match: entry.version == Some(target_version), OR
879            //   2. push-only freshness: entry.version is None AND entry.epoch
880            //      advanced strictly after the pre-edit snapshot. Versioned
881            //      publishes must be >= the post-edit target version.
882            // Servers whose process has exited are reported separately.
883            for (key, target_version) in expected_versions {
884                if fresh.contains_key(key) || exited.contains(key) {
885                    continue;
886                }
887                if !self.clients.contains_key(key) {
888                    exited.push(key.clone());
889                    continue;
890                }
891                if let Some(entry) = self
892                    .diagnostics
893                    .entries_for_file(&lookup_path)
894                    .into_iter()
895                    .find_map(|(k, e)| if k == key { Some(e) } else { None })
896                {
897                    let pre = pre_snapshot.get(key).copied().unwrap_or_default();
898                    let is_fresh = post_edit_entry_is_fresh(entry, *target_version, pre);
899                    if is_fresh {
900                        fresh.insert(key.clone(), entry.diagnostics.clone());
901                    }
902                }
903            }
904
905            // All accounted for? Done.
906            if fresh.len() + exited.len() == expected_versions.len() {
907                break;
908            }
909
910            let now = std::time::Instant::now();
911            if now >= deadline {
912                break;
913            }
914
915            let timeout = deadline.saturating_duration_since(now);
916            match self.event_rx.recv_timeout(timeout) {
917                Ok(event) => {
918                    self.handle_event(&event);
919                }
920                Err(RecvTimeoutError::Timeout) | Err(RecvTimeoutError::Disconnected) => break,
921            }
922        }
923
924        // Pending = expected but neither fresh nor exited.
925        let pending: Vec<ServerKey> = expected_versions
926            .iter()
927            .filter(|(k, _)| !fresh.contains_key(k) && !exited.contains(k))
928            .map(|(k, _)| k.clone())
929            .collect();
930
931        // Build deduplicated, sorted diagnostics from the fresh servers only.
932        // Stale or pending servers contribute zero diagnostics.
933        let mut diagnostics: Vec<StoredDiagnostic> = fresh
934            .into_iter()
935            .flat_map(|(_, diags)| diags.into_iter())
936            .collect();
937        diagnostics.sort_by(|a, b| {
938            a.file
939                .cmp(&b.file)
940                .then(a.line.cmp(&b.line))
941                .then(a.column.cmp(&b.column))
942                .then(a.message.cmp(&b.message))
943        });
944
945        PostEditWaitOutcome {
946            diagnostics,
947            pending_servers: pending,
948            exited_servers: exited,
949        }
950    }
951
952    /// Wait for diagnostics to arrive for a specific file until a deadline.
953    ///
954    /// Drains already-queued events first, then blocks on the shared event
955    /// channel only until either `publishDiagnostics` arrives for this file or
956    /// the deadline is reached.
957    pub fn wait_for_file_diagnostics(
958        &mut self,
959        file_path: &Path,
960        config: &Config,
961        deadline: std::time::Instant,
962    ) -> Vec<StoredDiagnostic> {
963        let lookup_path = normalize_lookup_path(file_path);
964
965        if self.server_key_for_file(&lookup_path, config).is_none() {
966            return Vec::new();
967        }
968
969        loop {
970            if self.drain_events_for_file(&lookup_path) {
971                break;
972            }
973
974            let now = std::time::Instant::now();
975            if now >= deadline {
976                break;
977            }
978
979            let timeout = deadline.saturating_duration_since(now);
980            match self.event_rx.recv_timeout(timeout) {
981                Ok(event) => {
982                    if matches!(
983                        self.handle_event(&event),
984                        Some(ref published_file) if published_file.as_path() == lookup_path.as_path()
985                    ) {
986                        break;
987                    }
988                }
989                Err(RecvTimeoutError::Timeout) | Err(RecvTimeoutError::Disconnected) => break,
990            }
991        }
992
993        self.get_diagnostics_for_file(&lookup_path)
994            .into_iter()
995            .cloned()
996            .collect()
997    }
998
999    /// Default timeout for `textDocument/diagnostic` (per-file pull). Servers
1000    /// usually respond in under 1s for files they've already analyzed; we
1001    /// allow up to 10s before falling back to push semantics. Currently
1002    /// surfaced via [`Self::pull_file_timeout`] for callers that want to
1003    /// override the wait via the `wait_ms` knob.
1004    pub const PULL_FILE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
1005
1006    /// Public accessor so command handlers can reuse the documented default.
1007    pub fn pull_file_timeout() -> std::time::Duration {
1008        Self::PULL_FILE_TIMEOUT
1009    }
1010
1011    /// Default timeout for `workspace/diagnostic`. The LSP spec allows the
1012    /// server to hold this open indefinitely; we cap at 10s and report
1013    /// `complete: false` to the agent rather than hanging the bridge.
1014    const PULL_WORKSPACE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
1015
1016    /// Issue a `textDocument/diagnostic` (LSP 3.17 per-file pull) request to
1017    /// every server that supports pull diagnostics for the given file.
1018    ///
1019    /// Returns the per-server outcome. If a server reports `kind: "unchanged"`,
1020    /// the cached entry's diagnostics are surfaced (deterministic re-use of
1021    /// the previous response). If a server doesn't advertise pull capability,
1022    /// it's skipped here — the caller should fall back to push for those.
1023    ///
1024    /// Side effects: results are stored in `DiagnosticsStore` so directory-mode
1025    /// queries can aggregate them later.
1026    pub fn pull_file_diagnostics(
1027        &mut self,
1028        file_path: &Path,
1029        config: &Config,
1030    ) -> Result<Vec<PullFileResult>, LspError> {
1031        let canonical_path = canonicalize_for_lsp(file_path)?;
1032        // Make sure servers are running and the document is open with fresh
1033        // content (handles disk-drift via DocumentStore::is_stale_on_disk).
1034        self.ensure_file_open(&canonical_path, config)?;
1035
1036        let server_keys = self.ensure_server_for_file(&canonical_path, config);
1037        if server_keys.is_empty() {
1038            return Ok(Vec::new());
1039        }
1040
1041        let uri = uri_for_path(&canonical_path)?;
1042        let mut results = Vec::with_capacity(server_keys.len());
1043
1044        for key in server_keys {
1045            let supports_pull = self
1046                .clients
1047                .get(&key)
1048                .and_then(|c| c.diagnostic_capabilities())
1049                .is_some_and(|caps| caps.pull_diagnostics);
1050
1051            if !supports_pull {
1052                results.push(PullFileResult {
1053                    server_key: key.clone(),
1054                    outcome: PullFileOutcome::PullNotSupported,
1055                });
1056                continue;
1057            }
1058
1059            // Look up previous resultId for incremental requests.
1060            let previous_result_id = self
1061                .diagnostics
1062                .entries_for_file(&canonical_path)
1063                .into_iter()
1064                .find(|(k, _)| **k == key)
1065                .and_then(|(_, entry)| entry.result_id.clone());
1066
1067            let identifier = self
1068                .clients
1069                .get(&key)
1070                .and_then(|c| c.diagnostic_capabilities())
1071                .and_then(|caps| caps.identifier.clone());
1072
1073            let params = AftDocumentDiagnosticParams {
1074                text_document: lsp_types::TextDocumentIdentifier { uri: uri.clone() },
1075                identifier,
1076                previous_result_id,
1077                work_done_progress_params: Default::default(),
1078                partial_result_params: Default::default(),
1079            };
1080
1081            let outcome = match self.send_pull_request(&key, params) {
1082                Ok(report) => self.ingest_document_report(&key, &canonical_path, report),
1083                Err(err) => {
1084                    if let Some(result) = self.cache_post_initialize_exit(&key, &err) {
1085                        PullFileOutcome::RequestFailed {
1086                            reason: server_attempt_result_reason(&result),
1087                        }
1088                    } else if recoverable_pull_rejection(&err)
1089                        && self.clients.get(&key).is_some_and(|client| {
1090                            matches!(
1091                                client.state(),
1092                                ServerState::Ready | ServerState::Initializing
1093                            )
1094                        })
1095                    {
1096                        PullFileOutcome::RequestFailed {
1097                            reason: format!("pull_rejected_push_fallback: {err}"),
1098                        }
1099                    } else {
1100                        PullFileOutcome::RequestFailed {
1101                            reason: err.to_string(),
1102                        }
1103                    }
1104                }
1105            };
1106
1107            results.push(PullFileResult {
1108                server_key: key,
1109                outcome,
1110            });
1111        }
1112
1113        Ok(results)
1114    }
1115
1116    /// Issue a `workspace/diagnostic` request to a specific server. Cancels
1117    /// internally if `timeout` elapses before the server responds. Cached
1118    /// entries from the response are stored so directory-mode queries pick
1119    /// them up.
1120    pub fn pull_workspace_diagnostics(
1121        &mut self,
1122        server_key: &ServerKey,
1123        timeout: Option<std::time::Duration>,
1124    ) -> Result<PullWorkspaceResult, LspError> {
1125        let timeout = timeout.unwrap_or(Self::PULL_WORKSPACE_TIMEOUT);
1126
1127        let supports_workspace = self
1128            .clients
1129            .get(server_key)
1130            .and_then(|c| c.diagnostic_capabilities())
1131            .is_some_and(|caps| caps.workspace_diagnostics);
1132
1133        if !supports_workspace {
1134            return Ok(PullWorkspaceResult {
1135                server_key: server_key.clone(),
1136                files_reported: Vec::new(),
1137                complete: false,
1138                cancelled: false,
1139                supports_workspace: false,
1140            });
1141        }
1142
1143        let identifier = self
1144            .clients
1145            .get(server_key)
1146            .and_then(|c| c.diagnostic_capabilities())
1147            .and_then(|caps| caps.identifier.clone());
1148
1149        let params = AftWorkspaceDiagnosticParams {
1150            identifier,
1151            previous_result_ids: Vec::new(),
1152            work_done_progress_params: Default::default(),
1153            partial_result_params: Default::default(),
1154        };
1155
1156        let result = match self
1157            .clients
1158            .get_mut(server_key)
1159            .ok_or_else(|| LspError::ServerNotReady("server not found".into()))?
1160            .send_request_with_timeout::<AftWorkspaceDiagnosticRequest>(params, timeout)
1161        {
1162            Ok(result) => result,
1163            Err(LspError::Timeout(_)) => {
1164                return Ok(PullWorkspaceResult {
1165                    server_key: server_key.clone(),
1166                    files_reported: Vec::new(),
1167                    complete: false,
1168                    cancelled: true,
1169                    supports_workspace: true,
1170                });
1171            }
1172            Err(err) => {
1173                if let Some(result) = self.cache_post_initialize_exit(server_key, &err) {
1174                    return Err(LspError::ServerNotReady(server_attempt_result_reason(
1175                        &result,
1176                    )));
1177                }
1178                return Err(err);
1179            }
1180        };
1181
1182        // Extract the items list. Partial responses are not a complete
1183        // workspace view, but the partial payload can still contain useful
1184        // document reports; ingest those while surfacing complete=false.
1185        let (items, complete) = match result {
1186            lsp_types::WorkspaceDiagnosticReportResult::Report(report) => (report.items, true),
1187            lsp_types::WorkspaceDiagnosticReportResult::Partial(partial) => (partial.items, false),
1188        };
1189
1190        // Ingest each file report into the diagnostics store.
1191        let mut files_reported = Vec::with_capacity(items.len());
1192        for item in items {
1193            match item {
1194                lsp_types::WorkspaceDocumentDiagnosticReport::Full(full) => {
1195                    if let Some(file) = uri_to_path(&full.uri) {
1196                        let stored = from_lsp_diagnostics(
1197                            file.clone(),
1198                            full.full_document_diagnostic_report.items.clone(),
1199                        );
1200                        self.diagnostics.publish_with_result_id(
1201                            server_key.clone(),
1202                            file.clone(),
1203                            stored,
1204                            full.full_document_diagnostic_report.result_id.clone(),
1205                        );
1206                        files_reported.push(file);
1207                    }
1208                }
1209                lsp_types::WorkspaceDocumentDiagnosticReport::Unchanged(_unchanged) => {
1210                    // "Unchanged" means the previously cached report is still
1211                    // valid. We left it in place; nothing to do.
1212                }
1213            }
1214        }
1215
1216        Ok(PullWorkspaceResult {
1217            server_key: server_key.clone(),
1218            files_reported,
1219            complete,
1220            cancelled: false,
1221            supports_workspace: true,
1222        })
1223    }
1224
1225    fn cache_post_initialize_exit(
1226        &mut self,
1227        key: &ServerKey,
1228        err: &LspError,
1229    ) -> Option<ServerAttemptResult> {
1230        let binary = self
1231            .server_binaries
1232            .get(key)
1233            .cloned()
1234            .unwrap_or_else(|| key.kind.id_str().to_string());
1235        let (status, stderr_tail) = {
1236            let client = self.clients.get_mut(key)?;
1237            let mut status = client.child_exit_status();
1238            for _ in 0..10 {
1239                if status.is_some() {
1240                    break;
1241                }
1242                std::thread::sleep(std::time::Duration::from_millis(10));
1243                status = client.child_exit_status();
1244            }
1245            let status = status?;
1246            wait_for_stderr_tail(client);
1247            (status, client.stderr_tail())
1248        };
1249        let reason = format_post_initialize_exit_reason(&binary, status, &stderr_tail, err);
1250        let result = ServerAttemptResult::SpawnFailed { binary, reason };
1251        self.clients.remove(key);
1252        self.server_binaries.remove(key);
1253        self.documents.remove(key);
1254        self.diagnostics.clear_for_server(key);
1255        self.failed_spawns.insert(key.clone(), result.clone());
1256        Some(result)
1257    }
1258
1259    /// Issue the per-file diagnostic request and return the report.
1260    fn send_pull_request(
1261        &mut self,
1262        key: &ServerKey,
1263        params: AftDocumentDiagnosticParams,
1264    ) -> Result<lsp_types::DocumentDiagnosticReportResult, LspError> {
1265        let client = self
1266            .clients
1267            .get_mut(key)
1268            .ok_or_else(|| LspError::ServerNotReady("server not found".into()))?;
1269        client.send_request::<AftDocumentDiagnosticRequest>(params)
1270    }
1271
1272    /// Store the result of a per-file pull request and return a structured
1273    /// outcome the caller can inspect.
1274    fn ingest_document_report(
1275        &mut self,
1276        key: &ServerKey,
1277        canonical_path: &Path,
1278        result: lsp_types::DocumentDiagnosticReportResult,
1279    ) -> PullFileOutcome {
1280        let report = match result {
1281            lsp_types::DocumentDiagnosticReportResult::Report(report) => report,
1282            lsp_types::DocumentDiagnosticReportResult::Partial(_) => {
1283                // Partial results stream in via $/progress notifications which
1284                // we don't currently subscribe to. Treat as a soft-empty
1285                // success — the next pull will get the full version.
1286                return PullFileOutcome::PartialNotSupported;
1287            }
1288        };
1289
1290        match report {
1291            lsp_types::DocumentDiagnosticReport::Full(full) => {
1292                let result_id = full.full_document_diagnostic_report.result_id.clone();
1293                let stored = from_lsp_diagnostics(
1294                    canonical_path.to_path_buf(),
1295                    full.full_document_diagnostic_report.items.clone(),
1296                );
1297                let count = stored.len();
1298                self.diagnostics.publish_with_result_id(
1299                    key.clone(),
1300                    canonical_path.to_path_buf(),
1301                    stored,
1302                    result_id,
1303                );
1304                PullFileOutcome::Full {
1305                    diagnostic_count: count,
1306                }
1307            }
1308            lsp_types::DocumentDiagnosticReport::Unchanged(_unchanged) => {
1309                // The server says cache is still valid. That is only usable if
1310                // we already have a report for this exact server/file; an
1311                // initial `unchanged` response cannot prove freshness.
1312                if self
1313                    .diagnostics
1314                    .has_report_for_server_file(key, canonical_path)
1315                {
1316                    PullFileOutcome::Unchanged
1317                } else {
1318                    PullFileOutcome::RequestFailed {
1319                        reason: "no_cache_for_unchanged".to_string(),
1320                    }
1321                }
1322            }
1323        }
1324    }
1325
1326    /// Shutdown all servers gracefully.
1327    pub fn shutdown_all(&mut self) {
1328        for (key, mut client) in self.clients.drain() {
1329            if let Err(err) = client.shutdown() {
1330                slog_error!("error shutting down {:?}: {}", key, err);
1331            }
1332        }
1333        self.server_binaries.clear();
1334        self.documents.clear();
1335        self.diagnostics = DiagnosticsStore::new();
1336    }
1337
1338    /// Check if any server is active.
1339    pub fn has_active_servers(&self) -> bool {
1340        self.clients
1341            .values()
1342            .any(|client| client.state() == ServerState::Ready)
1343    }
1344
1345    /// Active server keys (running clients). Used by `lsp_diagnostics`
1346    /// directory mode to know which servers to ask for workspace pull.
1347    pub fn active_server_keys(&self) -> Vec<ServerKey> {
1348        self.clients.keys().cloned().collect()
1349    }
1350
1351    pub fn get_diagnostics_for_file(&self, file: &Path) -> Vec<&StoredDiagnostic> {
1352        let normalized = normalize_lookup_path(file);
1353        self.diagnostics.for_file(&normalized)
1354    }
1355
1356    pub fn get_diagnostics_for_directory(&self, dir: &Path) -> Vec<&StoredDiagnostic> {
1357        let normalized = normalize_lookup_path(dir);
1358        self.diagnostics.for_directory(&normalized)
1359    }
1360
1361    pub fn get_all_diagnostics(&self) -> Vec<&StoredDiagnostic> {
1362        self.diagnostics.all()
1363    }
1364
1365    /// True if any LSP server has reported diagnostics at least once, including
1366    /// an empty report that proves a checked-clean file. This lets callers avoid
1367    /// treating an empty flattened diagnostic list as trustworthy when no server
1368    /// has actually run.
1369    pub fn has_any_diagnostic_reports(&self) -> bool {
1370        !self.diagnostics.is_empty()
1371    }
1372
1373    /// True if any server has reported for this file, including an empty
1374    /// checked-clean report.
1375    pub fn has_diagnostic_report_for_file(&self, file: &Path) -> bool {
1376        let normalized = normalize_lookup_path(file);
1377        self.diagnostics.has_any_report_for_file(&normalized)
1378    }
1379
1380    /// True if this exact server/file pair has a diagnostic report, including
1381    /// an empty checked-clean report.
1382    pub fn has_diagnostic_report_for_server_file(&self, server: &ServerKey, file: &Path) -> bool {
1383        let normalized = normalize_lookup_path(file);
1384        self.diagnostics
1385            .has_report_for_server_file(server, &normalized)
1386    }
1387
1388    fn drain_events_for_file(&mut self, file_path: &Path) -> bool {
1389        let mut saw_file_diagnostics = false;
1390        while let Ok(event) = self.event_rx.try_recv() {
1391            if matches!(
1392                self.handle_event(&event),
1393                Some(ref published_file) if published_file.as_path() == file_path
1394            ) {
1395                saw_file_diagnostics = true;
1396            }
1397        }
1398        saw_file_diagnostics
1399    }
1400
1401    fn handle_event(&mut self, event: &LspEvent) -> Option<PathBuf> {
1402        match event {
1403            LspEvent::Notification {
1404                server_kind,
1405                root,
1406                method,
1407                params: Some(params),
1408            } if method == "textDocument/publishDiagnostics" => {
1409                self.handle_publish_diagnostics(server_kind.clone(), root.clone(), params)
1410            }
1411            LspEvent::ServerExited { server_kind, root } => {
1412                let key = ServerKey {
1413                    kind: server_kind.clone(),
1414                    root: root.clone(),
1415                };
1416                self.clients.remove(&key);
1417                self.server_binaries.remove(&key);
1418                self.documents.remove(&key);
1419                self.diagnostics.clear_for_server(&key);
1420                None
1421            }
1422            _ => None,
1423        }
1424    }
1425
1426    fn handle_publish_diagnostics(
1427        &mut self,
1428        server: ServerKind,
1429        root: PathBuf,
1430        params: &serde_json::Value,
1431    ) -> Option<PathBuf> {
1432        if let Ok(publish_params) =
1433            serde_json::from_value::<lsp_types::PublishDiagnosticsParams>(params.clone())
1434        {
1435            let file = uri_to_path(&publish_params.uri)?;
1436            let stored = from_lsp_diagnostics(file.clone(), publish_params.diagnostics);
1437            // v0.17.3: store with real ServerKey { kind, root } and capture
1438            // the document `version` (when the server provided one) so the
1439            // post-edit waiter can reject stale publishes deterministically
1440            // via version-match (preferred) or epoch-delta (fallback). The
1441            // earlier `publish_with_kind` path silently dropped both.
1442            let key = ServerKey { kind: server, root };
1443            self.diagnostics
1444                .publish_full(key, file.clone(), stored, None, publish_params.version);
1445            return Some(file);
1446        }
1447        None
1448    }
1449
1450    fn spawn_server(
1451        &self,
1452        def: &ServerDef,
1453        root: &Path,
1454        config: &Config,
1455    ) -> Result<LspClient, LspError> {
1456        let binary = self.resolve_binary(def, config)?;
1457
1458        // Merge the server-defined env with our test-injected env.
1459        // `extra_env` is empty in production; tests use it to drive fake
1460        // server variants (AFT_FAKE_LSP_PULL=1, etc.).
1461        let mut merged_env = def.env.clone();
1462        for (key, value) in &self.extra_env {
1463            merged_env.insert(key.clone(), value.clone());
1464        }
1465
1466        let mut client = LspClient::spawn(
1467            def.kind.clone(),
1468            root.to_path_buf(),
1469            &binary,
1470            &def.args,
1471            &merged_env,
1472            self.event_tx.clone(),
1473            self.child_registry.clone(),
1474        )?;
1475        if let Err(err) = client.initialize(root, def.initialization_options.clone()) {
1476            wait_for_stderr_tail(&mut client);
1477            let stderr_tail = client.stderr_tail();
1478            let reason = if client.child_exited() || !stderr_tail.is_empty() {
1479                format_initialize_failure_reason(&def.binary, &stderr_tail, &err)
1480            } else {
1481                format!("server failed during initialize: {err}")
1482            };
1483            return Err(LspError::ServerNotReady(reason));
1484        }
1485        Ok(client)
1486    }
1487
1488    fn resolve_binary(&self, def: &ServerDef, config: &Config) -> Result<PathBuf, LspError> {
1489        if let Some(path) = self.binary_overrides.get(&def.kind) {
1490            if path.exists() {
1491                return Ok(path.clone());
1492            }
1493            return Err(LspError::NotFound(format!(
1494                "override binary for {:?} not found: {}",
1495                def.kind,
1496                path.display()
1497            )));
1498        }
1499
1500        if let Some(path) = env_binary_override(&def.kind) {
1501            if path.exists() {
1502                return Ok(path);
1503            }
1504            return Err(LspError::NotFound(format!(
1505                "environment override binary for {:?} not found: {}",
1506                def.kind,
1507                path.display()
1508            )));
1509        }
1510
1511        // Layered resolution:
1512        //   1. <project_root>/node_modules/.bin/<binary>
1513        //   2. config.lsp_paths_extra (plugin auto-install cache, etc.)
1514        //   3. PATH via `which`
1515        resolve_lsp_binary(
1516            &def.binary,
1517            config.project_root.as_deref(),
1518            &config.lsp_paths_extra,
1519        )
1520        .ok_or_else(|| {
1521            LspError::NotFound(format!(
1522                "language server binary '{}' not found in node_modules/.bin, lsp_paths_extra, or PATH",
1523                def.binary
1524            ))
1525        })
1526    }
1527
1528    fn server_key_for_file(&self, file_path: &Path, config: &Config) -> Option<ServerKey> {
1529        for def in servers_for_file(file_path, config) {
1530            let root = find_workspace_root(file_path, &def.root_markers)?;
1531            let key = ServerKey {
1532                kind: def.kind.clone(),
1533                root,
1534            };
1535            if self.clients.contains_key(&key) {
1536                return Some(key);
1537            }
1538        }
1539        None
1540    }
1541}
1542
1543impl Default for LspManager {
1544    fn default() -> Self {
1545        Self::new()
1546    }
1547}
1548
1549fn wait_for_stderr_tail(client: &mut LspClient) {
1550    for _ in 0..10 {
1551        if !client.stderr_tail().is_empty() {
1552            break;
1553        }
1554        std::thread::sleep(std::time::Duration::from_millis(10));
1555    }
1556}
1557
1558fn recoverable_pull_rejection(err: &LspError) -> bool {
1559    matches!(
1560        err,
1561        LspError::ServerError {
1562            code: -32601 | -32602,
1563            ..
1564        }
1565    )
1566}
1567
1568fn server_attempt_result_reason(result: &ServerAttemptResult) -> String {
1569    match result {
1570        ServerAttemptResult::SpawnFailed { binary, reason } => {
1571            format!("spawn_failed: {binary} ({reason})")
1572        }
1573        ServerAttemptResult::BinaryNotInstalled { binary } => {
1574            format!("binary_not_installed: {binary}")
1575        }
1576        ServerAttemptResult::NoRootMarker { looked_for } => {
1577            format!("no_root_marker (looked for: {})", looked_for.join(", "))
1578        }
1579        ServerAttemptResult::Ok { .. } => "ok".to_string(),
1580    }
1581}
1582
1583fn format_stderr_tail_for_reason(stderr_tail: &str) -> String {
1584    truncate_stderr_tail_for_reason(stderr_tail)
1585        .lines()
1586        .map(|line| format!("  {line}"))
1587        .collect::<Vec<_>>()
1588        .join("\n")
1589}
1590
1591fn truncate_stderr_tail_for_reason(stderr_tail: &str) -> String {
1592    if stderr_tail.len() <= STDERR_REASON_BYTES {
1593        return stderr_tail.to_string();
1594    }
1595
1596    let ellipsis = "...";
1597    let target_len = STDERR_REASON_BYTES.saturating_sub(ellipsis.len());
1598    let mut start = stderr_tail.len() - target_len;
1599    while start < stderr_tail.len() && !stderr_tail.is_char_boundary(start) {
1600        start += 1;
1601    }
1602    format!("{ellipsis}{}", &stderr_tail[start..])
1603}
1604
1605fn format_initialize_failure_reason(binary: &str, stderr_tail: &str, err: &LspError) -> String {
1606    let mut reason = format!("server crashed during initialize: {err}");
1607    if !stderr_tail.is_empty() {
1608        reason.push_str("; stderr (last 64 lines):\n");
1609        reason.push_str(&format_stderr_tail_for_reason(stderr_tail));
1610        reason.push_str("\n\n");
1611        reason.push_str(&failure_hint(binary, stderr_tail));
1612    }
1613    reason
1614}
1615
1616fn format_post_initialize_exit_reason(
1617    binary: &str,
1618    status: std::process::ExitStatus,
1619    stderr_tail: &str,
1620    err: &LspError,
1621) -> String {
1622    let code = status
1623        .code()
1624        .map(|c| c.to_string())
1625        .unwrap_or_else(|| "signal/unknown".to_string());
1626    let mut reason = format!("server exited after initialize (code {code}): {err}");
1627    if !stderr_tail.is_empty() {
1628        reason.push_str("; stderr (last 64 lines):\n");
1629        reason.push_str(&format_stderr_tail_for_reason(stderr_tail));
1630        reason.push_str("\n\n");
1631        reason.push_str(&failure_hint(binary, stderr_tail));
1632    }
1633    reason
1634}
1635
1636fn failure_hint(binary: &str, stderr_tail: &str) -> String {
1637    if stderr_tail.contains("MODULE_NOT_FOUND") || stderr_tail.contains("Cannot find module") {
1638        let package_manager = infer_package_manager(stderr_tail);
1639        format!(
1640            "Your package-manager shim resolves to a missing file. Try reinstalling: {package_manager} install -g {binary} --force. Common cause: hard-link breakage from fs migration or store prune."
1641        )
1642    } else {
1643        format!("Hint: see stderr above for '{binary}' failure details.")
1644    }
1645}
1646
1647fn infer_package_manager(stderr_tail: &str) -> &'static str {
1648    let lower = stderr_tail.to_ascii_lowercase();
1649    if lower.contains(".pnpm/") || lower.contains(".pnpm\\") || lower.contains("/pnpm/") {
1650        "pnpm"
1651    } else if lower.contains(".yarn/")
1652        || lower.contains(".yarn\\")
1653        || lower.contains("/yarn/")
1654        || lower.contains("yarn")
1655    {
1656        "yarn"
1657    } else {
1658        "npm"
1659    }
1660}
1661
1662fn canonicalize_for_lsp(file_path: &Path) -> Result<PathBuf, LspError> {
1663    std::fs::canonicalize(file_path).map_err(LspError::from)
1664}
1665
1666fn resolve_for_lsp_uri(file_path: &Path) -> PathBuf {
1667    if let Ok(path) = std::fs::canonicalize(file_path) {
1668        return path;
1669    }
1670
1671    let mut existing = file_path.to_path_buf();
1672    let mut missing = Vec::new();
1673    while !existing.exists() {
1674        let Some(name) = existing.file_name() else {
1675            break;
1676        };
1677        missing.push(name.to_owned());
1678        let Some(parent) = existing.parent() else {
1679            break;
1680        };
1681        existing = parent.to_path_buf();
1682    }
1683
1684    let mut resolved = std::fs::canonicalize(&existing).unwrap_or(existing);
1685    for segment in missing.into_iter().rev() {
1686        resolved.push(segment);
1687    }
1688    resolved
1689}
1690
1691fn language_id_for_extension(ext: &str) -> &'static str {
1692    match ext {
1693        "ts" => "typescript",
1694        "tsx" => "typescriptreact",
1695        "js" | "mjs" | "cjs" => "javascript",
1696        "jsx" => "javascriptreact",
1697        "py" | "pyi" => "python",
1698        "rs" => "rust",
1699        "go" => "go",
1700        "html" | "htm" => "html",
1701        _ => "plaintext",
1702    }
1703}
1704
1705fn normalize_lookup_path(path: &Path) -> PathBuf {
1706    std::fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
1707}
1708
1709/// Classify an error returned by `spawn_server` into a structured
1710/// `ServerAttemptResult`. The two interesting cases for callers are:
1711/// - `BinaryNotInstalled` — the server's binary couldn't be resolved on PATH
1712///   or via override. The agent can be told "install bash-language-server".
1713/// - `SpawnFailed` — binary was found but spawning/initializing failed
1714///   (permissions, missing runtime, server crashed during initialize, etc.).
1715fn classify_spawn_error(binary: &str, err: &LspError) -> ServerAttemptResult {
1716    match err {
1717        // resolve_binary returns NotFound for both missing override paths and
1718        // missing PATH binaries. The "override missing" case is rare in
1719        // practice (only set in tests / env vars); we report all NotFound as
1720        // BinaryNotInstalled so the user sees an actionable install hint.
1721        LspError::NotFound(_) => ServerAttemptResult::BinaryNotInstalled {
1722            binary: binary.to_string(),
1723        },
1724        other => ServerAttemptResult::SpawnFailed {
1725            binary: binary.to_string(),
1726            reason: other.to_string(),
1727        },
1728    }
1729}
1730
1731fn env_binary_override(kind: &ServerKind) -> Option<PathBuf> {
1732    let id = kind.id_str();
1733    let suffix: String = id
1734        .chars()
1735        .map(|ch| {
1736            if ch.is_ascii_alphanumeric() {
1737                ch.to_ascii_uppercase()
1738            } else {
1739                '_'
1740            }
1741        })
1742        .collect();
1743    let key = format!("AFT_LSP_{suffix}_BINARY");
1744    std::env::var_os(key).map(PathBuf::from)
1745}