Skip to main content

cartog_lsp/
lib.rs

1//! LSP-based edge resolution for the cartog code graph.
2//!
3//! Resolves edges left unresolved by the heuristic resolver in [`cartog_db`],
4//! by querying real language servers (pyright, rust-analyzer, etc.) for
5//! `textDocument/definition` responses. Optional — gated behind the `lsp` feature.
6
7pub mod client;
8pub mod manager;
9pub mod servers;
10
11use std::collections::HashMap;
12use std::path::Path;
13
14use anyhow::Result;
15
16use cartog_core::detect_language;
17use cartog_db::{Database, UnresolvedEdge};
18
19use manager::LspManager;
20
21/// Resolve edges that heuristic resolution left unresolved, using LSP servers.
22///
23/// If `shared_manager` is provided, reuses existing LSP servers (warm start).
24/// Otherwise creates a temporary manager that is dropped after resolution.
25///
26/// Returns the number of edges resolved by LSP.
27pub fn lsp_resolve_edges(
28    db: &Database,
29    root: &Path,
30    shared_manager: Option<&mut LspManager>,
31) -> Result<u32> {
32    let unresolved = db.unresolved_edges()?;
33
34    if unresolved.is_empty() {
35        return Ok(0);
36    }
37
38    // Group by language (derived from file extension)
39    let mut by_language: HashMap<String, Vec<UnresolvedEdge>> = HashMap::new();
40    for edge in unresolved {
41        let path = Path::new(&edge.file_path);
42        if let Some(lang) = detect_language(path) {
43            by_language.entry(lang.to_string()).or_default().push(edge);
44        }
45    }
46
47    if by_language.is_empty() {
48        return Ok(0);
49    }
50
51    // Use shared manager if provided, otherwise create a temporary one
52    let mut owned_manager;
53    let manager: &mut LspManager = match shared_manager {
54        Some(m) => {
55            m.ensure_root(root);
56            m
57        }
58        None => {
59            owned_manager = LspManager::new(root);
60            &mut owned_manager
61        }
62    };
63
64    let mut resolved = 0u32;
65    let mut any_server_started = false;
66
67    for (language, edges) in &by_language {
68        match manager.start(language) {
69            Ok(()) => {
70                any_server_started = true;
71            }
72            Err(e) => {
73                tracing::info!("LSP: {language} — {e:#} ({} unresolved edges)", edges.len());
74                continue;
75            }
76        }
77
78        // Group edges by file for batched didOpen
79        let mut by_file: HashMap<&str, Vec<&UnresolvedEdge>> = HashMap::new();
80        for edge in edges {
81            by_file.entry(&edge.file_path).or_default().push(edge);
82        }
83
84        tracing::info!(
85            "LSP: resolving {} unresolved {language} edges across {} files...",
86            edges.len(),
87            by_file.len()
88        );
89
90        for (file_path, file_edges) in by_file {
91            let abs_path = root.join(file_path);
92            let content = match std::fs::read_to_string(&abs_path) {
93                Ok(c) => c,
94                Err(e) => {
95                    tracing::debug!("cannot read {file_path}: {e}");
96                    continue;
97                }
98            };
99
100            if let Err(e) = manager.open_file(language, file_path, &content) {
101                tracing::debug!("didOpen failed for {file_path}: {e:#}");
102                if !manager.is_alive(language) {
103                    tracing::warn!("{language} server died during didOpen");
104                    break;
105                }
106                continue;
107            }
108
109            let lines: Vec<&str> = content.lines().collect();
110
111            for edge in file_edges {
112                let col = match find_column_in_line(&lines, edge.line, &edge.target_name) {
113                    Some(c) => c,
114                    None => continue,
115                };
116
117                let lsp_line = edge.line.saturating_sub(1); // cartog 1-based → LSP 0-based
118
119                match manager.definition(language, file_path, lsp_line, col) {
120                    Ok(Some(loc)) => {
121                        match db.find_symbol_at_location(&loc.file_path, loc.line) {
122                            Ok(Some(symbol_id)) => {
123                                match db.update_edge_target(edge.edge_id, &symbol_id) {
124                                    Ok(()) => resolved += 1,
125                                    Err(e) => tracing::debug!(
126                                        "failed to update edge {}: {e:#}",
127                                        edge.edge_id
128                                    ),
129                                }
130                            }
131                            Ok(None) => {
132                                tracing::debug!(
133                                    "no cartog symbol at {}:{}",
134                                    loc.file_path,
135                                    loc.line
136                                );
137                            }
138                            Err(e) => return Err(e), // DB errors propagate
139                        }
140                    }
141                    Ok(None) => {} // LSP couldn't resolve either
142                    Err(e) => {
143                        tracing::debug!(
144                            "definition failed for {} at {file_path}:{}: {e:#}",
145                            edge.target_name,
146                            edge.line
147                        );
148                        if !manager.is_alive(language) {
149                            tracing::warn!("{language} server died, skipping remaining edges");
150                            break;
151                        }
152                    }
153                }
154            }
155
156            // Close the file to free server memory
157            let _ = manager.close_file(language, file_path);
158        }
159    }
160
161    if !any_server_started {
162        tracing::debug!("LSP: no servers found on PATH, skipping");
163    } else if resolved > 0 {
164        tracing::info!("LSP: resolved {resolved} additional edges");
165    } else {
166        tracing::info!("LSP: no additional edges resolved");
167    }
168
169    // manager.shutdown_all() called via Drop
170    Ok(resolved)
171}
172
173/// Find the column (0-based UTF-16 offset) of `target_name` in the given source line.
174/// Uses word-boundary matching to avoid matching inside longer identifiers.
175/// LSP positions use UTF-16 code units by default.
176fn find_column_in_line(lines: &[&str], line_1based: u32, target_name: &str) -> Option<u32> {
177    let idx = line_1based.checked_sub(1)? as usize;
178    let line = lines.get(idx)?;
179
180    let mut start = 0;
181    while let Some(offset) = line[start..].find(target_name) {
182        let abs_offset = start + offset;
183        let end_offset = abs_offset + target_name.len();
184
185        let before_ok = abs_offset == 0
186            || !line.as_bytes()[abs_offset - 1].is_ascii_alphanumeric()
187                && line.as_bytes()[abs_offset - 1] != b'_';
188
189        let after_ok = end_offset >= line.len()
190            || !line.as_bytes()[end_offset].is_ascii_alphanumeric()
191                && line.as_bytes()[end_offset] != b'_';
192
193        if before_ok && after_ok {
194            return Some(line[..abs_offset].encode_utf16().count() as u32);
195        }
196
197        start = abs_offset + 1;
198    }
199    None
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn test_find_column_basic() {
208        let lines = vec!["    result = validate_token(tok)"];
209        assert_eq!(find_column_in_line(&lines, 1, "validate_token"), Some(13));
210    }
211
212    #[test]
213    fn test_find_column_multiple_occurrences_takes_first() {
214        let lines = vec!["foo(foo)"];
215        assert_eq!(find_column_in_line(&lines, 1, "foo"), Some(0));
216    }
217
218    #[test]
219    fn test_find_column_qualified_name() {
220        let lines = vec!["self.validate_token()"];
221        assert_eq!(find_column_in_line(&lines, 1, "validate_token"), Some(5));
222    }
223
224    #[test]
225    fn test_find_column_not_found() {
226        let lines = vec!["something_else()"];
227        assert_eq!(find_column_in_line(&lines, 1, "validate_token"), None);
228    }
229
230    #[test]
231    fn test_find_column_line_out_of_range() {
232        let lines = vec!["one line"];
233        assert_eq!(find_column_in_line(&lines, 5, "one"), None);
234    }
235
236    #[test]
237    fn test_find_column_zero_line() {
238        let lines = vec!["one line"];
239        assert_eq!(find_column_in_line(&lines, 0, "one"), None);
240    }
241
242    #[test]
243    fn test_find_column_word_boundary_skips_substring() {
244        // "id" inside "validate_id" should be skipped, match the standalone "id"
245        let lines = vec!["validate_id(id)"];
246        assert_eq!(find_column_in_line(&lines, 1, "id"), Some(12));
247    }
248
249    #[test]
250    fn test_find_column_word_boundary_at_start() {
251        let lines = vec!["id = 5"];
252        assert_eq!(find_column_in_line(&lines, 1, "id"), Some(0));
253    }
254
255    #[test]
256    fn test_find_column_word_boundary_no_standalone() {
257        // "id" only appears inside "valid" — no word-boundary match
258        let lines = vec!["valid()"];
259        assert_eq!(find_column_in_line(&lines, 1, "id"), None);
260    }
261}