Skip to main content

lang_check/
lsp.rs

1//! LSP JSON-RPC backend for language-check-server.
2//!
3//! Reuses the existing orchestrator, prose extraction, config, dictionary, and
4//! ignore-store logic.  Activated with `language-check-server --lsp`.
5
6#![allow(clippy::cast_possible_truncation)]
7
8use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11
12use dashmap::DashMap;
13use tokio::sync::Mutex;
14use tower_lsp::jsonrpc::Result;
15use tower_lsp::lsp_types::{
16    CodeAction, CodeActionKind, CodeActionOrCommand, CodeActionParams,
17    CodeActionProviderCapability, CodeActionResponse, Command, Diagnostic, DiagnosticSeverity,
18    DidChangeConfigurationParams, DidChangeTextDocumentParams, DidCloseTextDocumentParams,
19    DidOpenTextDocumentParams, DidSaveTextDocumentParams, ExecuteCommandOptions,
20    ExecuteCommandParams, InitializeParams, InitializeResult, InitializedParams, NumberOrString,
21    Position, Range, ServerCapabilities, ServerInfo, TextDocumentSyncCapability,
22    TextDocumentSyncKind, TextEdit, Url, WorkspaceEdit,
23};
24use tower_lsp::{Client, LanguageServer, LspService, Server};
25use tracing::{debug, info, warn};
26
27use crate::checker;
28use crate::config::Config;
29use crate::dictionary::Dictionary;
30use crate::hashing::{DiagnosticFingerprint, IgnoreStore};
31use crate::orchestrator::Orchestrator;
32use crate::prose;
33use crate::sls::SchemaRegistry;
34
35// ── LSP settings ────────────────────────────────────────────────────────────
36
37/// Settings received via `workspace/didChangeConfiguration`.
38#[derive(Debug, Default, serde::Deserialize)]
39#[serde(default)]
40struct LspSettings {
41    #[serde(alias = "langCheck")]
42    lang_check: LangCheckSettings,
43}
44
45#[derive(Debug, Default, serde::Deserialize)]
46#[serde(default)]
47struct LangCheckSettings {
48    engines: Option<EngineSettings>,
49    performance: Option<PerformanceSettings>,
50}
51
52#[derive(Debug, Default, serde::Deserialize)]
53#[serde(default)]
54struct EngineSettings {
55    harper: Option<bool>,
56    languagetool: Option<bool>,
57    languagetool_url: Option<String>,
58    vale: Option<bool>,
59    proselint: Option<bool>,
60    spell_language: Option<String>,
61}
62
63#[derive(Debug, Default, serde::Deserialize)]
64#[serde(default)]
65struct PerformanceSettings {
66    high_performance_mode: Option<bool>,
67    debounce_ms: Option<u64>,
68    max_file_size: Option<usize>,
69}
70
71// ── Document store ──────────────────────────────────────────────────────────
72
73/// In-memory text of open documents (keyed by URI string).
74/// Value is `(text, language_id)`.
75type DocumentStore = DashMap<String, (String, String)>;
76
77// ── Backend ─────────────────────────────────────────────────────────────────
78
79pub struct Backend {
80    client: Client,
81    orchestrator: Arc<Mutex<Orchestrator>>,
82    config: Arc<Mutex<Config>>,
83    dictionary: Arc<Mutex<Dictionary>>,
84    ignore_store: Arc<Mutex<IgnoreStore>>,
85    schema_registry: Arc<Mutex<SchemaRegistry>>,
86    documents: DocumentStore,
87    workspace_root: Mutex<Option<PathBuf>>,
88}
89
90impl Backend {
91    fn new(client: Client) -> Self {
92        Self {
93            client,
94            orchestrator: Arc::new(Mutex::new(Orchestrator::new(Config::default()))),
95            config: Arc::new(Mutex::new(Config::default())),
96            dictionary: Arc::new(Mutex::new(Dictionary::new())),
97            ignore_store: Arc::new(Mutex::new(IgnoreStore::new())),
98            schema_registry: Arc::new(Mutex::new(SchemaRegistry::new())),
99            documents: DashMap::new(),
100            workspace_root: Mutex::new(None),
101        }
102    }
103
104    /// Initialise all state from the workspace root (config, dictionary, …).
105    async fn init_workspace(&self, root: &Path) {
106        let config = Config::load(root).unwrap_or_default();
107        info!(
108            harper = config.engines.harper.enabled,
109            languagetool = config.engines.languagetool.enabled,
110            vale = config.engines.vale.enabled,
111            proselint = config.engines.proselint.enabled,
112            "LSP: engines configured"
113        );
114
115        self.orchestrator.lock().await.update_config(config.clone());
116        *self.config.lock().await = config.clone();
117
118        match Dictionary::load(root) {
119            Ok(mut dict) => {
120                if config.dictionaries.bundled {
121                    dict.load_bundled();
122                }
123                for p in &config.dictionaries.paths {
124                    if let Err(e) = dict.load_wordlist_file(Path::new(p), root) {
125                        warn!(path = p, "Could not load wordlist: {e}");
126                    }
127                }
128                *self.dictionary.lock().await = dict;
129            }
130            Err(e) => warn!("Could not load dictionary: {e}"),
131        }
132
133        if let Ok(store) = IgnoreStore::load(root) {
134            *self.ignore_store.lock().await = store;
135        }
136        if let Ok(reg) = SchemaRegistry::from_workspace(root) {
137            *self.schema_registry.lock().await = reg;
138        }
139
140        *self.workspace_root.lock().await = Some(root.to_path_buf());
141    }
142
143    /// Apply LSP settings on top of the workspace config.
144    async fn apply_settings(&self, settings: &LangCheckSettings) {
145        let mut config = self.config.lock().await;
146        if let Some(ref eng) = settings.engines {
147            if let Some(v) = eng.harper {
148                config.engines.harper.enabled = v;
149            }
150            if let Some(v) = eng.languagetool {
151                config.engines.languagetool.enabled = v;
152            }
153            if let Some(ref v) = eng.languagetool_url {
154                config.engines.languagetool.url.clone_from(v);
155            }
156            if let Some(v) = eng.vale {
157                config.engines.vale.enabled = v;
158            }
159            if let Some(v) = eng.proselint {
160                config.engines.proselint.enabled = v;
161            }
162            if let Some(ref v) = eng.spell_language {
163                config.engines.spell_language.clone_from(v);
164            }
165        }
166        if let Some(ref perf) = settings.performance {
167            if let Some(v) = perf.high_performance_mode {
168                config.performance.high_performance_mode = v;
169            }
170            if let Some(v) = perf.debounce_ms {
171                config.performance.debounce_ms = v;
172            }
173            if let Some(v) = perf.max_file_size {
174                config.performance.max_file_size = v;
175            }
176        }
177        let updated = config.clone();
178        drop(config);
179        self.orchestrator.lock().await.update_config(updated);
180        info!("LSP: config updated via didChangeConfiguration");
181    }
182
183    /// Re-diagnose all currently open documents.
184    async fn rediagnose_all(&self) {
185        let entries: Vec<(String, String, String)> = self
186            .documents
187            .iter()
188            .map(|r| {
189                let (text, lang_id) = r.value();
190                (r.key().clone(), text.clone(), lang_id.clone())
191            })
192            .collect();
193        for (uri_str, text, lang_id) in entries {
194            if let Ok(uri) = Url::parse(&uri_str) {
195                self.diagnose(&uri, &text, &lang_id).await;
196            }
197        }
198    }
199
200    /// Run diagnostics on a document and publish them.
201    async fn diagnose(&self, uri: &Url, text: &str, lang_id: &str) {
202        let canonical = crate::languages::resolve_language_id(lang_id);
203
204        let extraction = {
205            let schema_reg = self.schema_registry.lock().await;
206            let cfg = self.config.lock().await;
207            let latex_extras = prose::latex::LatexExtras {
208                skip_envs: &cfg.languages.latex.skip_environments,
209                skip_commands: &cfg.languages.latex.skip_commands,
210            };
211            let result = prose::extract_with_fallback(
212                text,
213                canonical,
214                None,
215                Some(&schema_reg),
216                &latex_extras,
217            );
218            drop(cfg);
219            drop(schema_reg);
220            result
221        };
222
223        let ranges = match extraction {
224            Ok(r) => r,
225            Err(e) => {
226                warn!(uri = %uri, "Extraction error: {e}");
227                return;
228            }
229        };
230
231        let mut all_diagnostics: Vec<Diagnostic> = Vec::new();
232
233        for range in &ranges {
234            let prose_text = range.extract_text(text);
235
236            let check_result = {
237                let mut orch = self.orchestrator.lock().await;
238                orch.check(&prose_text, lang_id).await
239            };
240
241            if let Ok(mut diags) = check_result {
242                diags.retain(|d| !range.overlaps_exclusion(d.start_byte, d.end_byte));
243
244                for d in &mut diags {
245                    d.start_byte += range.start_byte as u32;
246                    d.end_byte += range.start_byte as u32;
247                }
248
249                let ignore = self.ignore_store.lock().await;
250                let dict = self.dictionary.lock().await;
251                diags.retain(|d| {
252                    let fp = DiagnosticFingerprint::new(
253                        &d.message,
254                        text,
255                        d.start_byte as usize,
256                        d.end_byte as usize,
257                    );
258                    if ignore.is_ignored(&fp) {
259                        return false;
260                    }
261                    if d.unified_id.starts_with("spelling.") {
262                        let word = safe_slice(text, d.start_byte as usize, d.end_byte as usize);
263                        if dict.contains(word) {
264                            return false;
265                        }
266                    }
267                    true
268                });
269
270                all_diagnostics.extend(diags.iter().map(|d| to_lsp_diagnostic(text, d)));
271            }
272        }
273
274        self.client
275            .publish_diagnostics(uri.clone(), all_diagnostics, None)
276            .await;
277    }
278}
279
280// ── LanguageServer impl ─────────────────────────────────────────────────────
281
282#[tower_lsp::async_trait]
283impl LanguageServer for Backend {
284    async fn initialize(&self, params: InitializeParams) -> Result<InitializeResult> {
285        if let Some(root_uri) = params.root_uri
286            && let Ok(path) = root_uri.to_file_path()
287        {
288            self.init_workspace(&path).await;
289        }
290
291        Ok(InitializeResult {
292            capabilities: ServerCapabilities {
293                text_document_sync: Some(TextDocumentSyncCapability::Kind(
294                    TextDocumentSyncKind::FULL,
295                )),
296                code_action_provider: Some(CodeActionProviderCapability::Simple(true)),
297                execute_command_provider: Some(ExecuteCommandOptions {
298                    commands: vec![
299                        "langCheck.addDictionaryWord".into(),
300                        "langCheck.ignoreDiagnostic".into(),
301                    ],
302                    ..Default::default()
303                }),
304                ..Default::default()
305            },
306            server_info: Some(ServerInfo {
307                name: "language-check-server".into(),
308                version: Some(env!("CARGO_PKG_VERSION").into()),
309            }),
310        })
311    }
312
313    async fn initialized(&self, _: InitializedParams) {
314        info!("LSP client initialized");
315    }
316
317    async fn shutdown(&self) -> Result<()> {
318        Ok(())
319    }
320
321    async fn did_open(&self, params: DidOpenTextDocumentParams) {
322        let uri = params.text_document.uri;
323        let text = params.text_document.text;
324        let lang_id = params.text_document.language_id.clone();
325        self.documents
326            .insert(uri.to_string(), (text.clone(), lang_id.clone()));
327        self.diagnose(&uri, &text, &lang_id).await;
328    }
329
330    async fn did_change(&self, params: DidChangeTextDocumentParams) {
331        let uri = params.text_document.uri;
332        if let Some(change) = params.content_changes.into_iter().last() {
333            let lang_id = guess_lang_id(&uri);
334            self.documents
335                .insert(uri.to_string(), (change.text.clone(), lang_id.clone()));
336            self.diagnose(&uri, &change.text, &lang_id).await;
337        }
338    }
339
340    async fn did_save(&self, params: DidSaveTextDocumentParams) {
341        let uri = params.text_document.uri;
342        let key = uri.to_string();
343        let entry = self.documents.get(&key).map(|r| r.value().clone());
344        if let Some((text, lang_id)) = entry {
345            self.diagnose(&uri, &text, &lang_id).await;
346        }
347    }
348
349    async fn did_close(&self, params: DidCloseTextDocumentParams) {
350        self.documents.remove(&params.text_document.uri.to_string());
351    }
352
353    async fn did_change_configuration(&self, params: DidChangeConfigurationParams) {
354        let settings: LspSettings = serde_json::from_value(params.settings).unwrap_or_default();
355        self.apply_settings(&settings.lang_check).await;
356        self.rediagnose_all().await;
357    }
358
359    async fn code_action(&self, params: CodeActionParams) -> Result<Option<CodeActionResponse>> {
360        let uri = &params.text_document.uri;
361        let mut actions: Vec<CodeActionOrCommand> = Vec::new();
362
363        for diag in &params.context.diagnostics {
364            if diag.source.as_deref() != Some("language-check") {
365                continue;
366            }
367
368            let Some(data) = &diag.data else { continue };
369            let Some(obj) = data.as_object() else {
370                continue;
371            };
372
373            // Apply suggestion actions
374            if let Some(suggestions) = obj.get("suggestions").and_then(|v| v.as_array()) {
375                for s in suggestions {
376                    if let Some(text) = s.as_str() {
377                        let edit = TextEdit {
378                            range: diag.range,
379                            new_text: text.to_string(),
380                        };
381                        let mut changes = HashMap::new();
382                        changes.insert(uri.clone(), vec![edit]);
383                        actions.push(CodeActionOrCommand::CodeAction(CodeAction {
384                            title: format!("Replace with \"{text}\""),
385                            kind: Some(CodeActionKind::QUICKFIX),
386                            diagnostics: Some(vec![diag.clone()]),
387                            edit: Some(WorkspaceEdit {
388                                changes: Some(changes),
389                                ..Default::default()
390                            }),
391                            ..Default::default()
392                        }));
393                    }
394                }
395            }
396
397            // Add to dictionary (spelling rules)
398            if let Some(rule_id) = obj.get("rule_id").and_then(|v| v.as_str())
399                && (rule_id.contains("TYPO")
400                    || rule_id.contains("MORFOLOGIK")
401                    || rule_id.contains("spelling"))
402                && let Some(doc) = self.documents.get(&uri.to_string())
403            {
404                let word = extract_word_at_range(&doc.value().0, diag.range).unwrap_or_default();
405                if !word.is_empty() {
406                    actions.push(CodeActionOrCommand::CodeAction(CodeAction {
407                        title: format!("Add \"{word}\" to dictionary"),
408                        kind: Some(CodeActionKind::QUICKFIX),
409                        diagnostics: Some(vec![diag.clone()]),
410                        command: Some(Command {
411                            title: "Add to dictionary".into(),
412                            command: "langCheck.addDictionaryWord".into(),
413                            arguments: Some(vec![serde_json::json!(word)]),
414                        }),
415                        ..Default::default()
416                    }));
417                }
418            }
419        }
420
421        if actions.is_empty() {
422            Ok(None)
423        } else {
424            Ok(Some(actions))
425        }
426    }
427
428    async fn execute_command(
429        &self,
430        params: ExecuteCommandParams,
431    ) -> Result<Option<serde_json::Value>> {
432        match params.command.as_str() {
433            "langCheck.addDictionaryWord" => {
434                if let Some(word_val) = params.arguments.first()
435                    && let Some(word) = word_val.as_str()
436                {
437                    debug!(word, "Adding to dictionary");
438                    let mut dict = self.dictionary.lock().await;
439                    if let Err(e) = dict.add_word(word) {
440                        warn!(word, "Failed to add word: {e}");
441                    }
442                }
443            }
444            "langCheck.ignoreDiagnostic" => {
445                if let Some(args) = params.arguments.first()
446                    && let Some(obj) = args.as_object()
447                {
448                    let message = obj
449                        .get("message")
450                        .and_then(|v| v.as_str())
451                        .unwrap_or_default();
452                    let context = obj
453                        .get("context")
454                        .and_then(|v| v.as_str())
455                        .unwrap_or_default();
456                    let start = obj
457                        .get("start_byte")
458                        .and_then(serde_json::Value::as_u64)
459                        .map_or(0, |v| v as usize);
460                    let end = obj
461                        .get("end_byte")
462                        .and_then(serde_json::Value::as_u64)
463                        .map_or(0, |v| v as usize);
464                    let fp = DiagnosticFingerprint::new(message, context, start, end);
465                    self.ignore_store.lock().await.ignore(&fp);
466                }
467            }
468            _ => {}
469        }
470        Ok(None)
471    }
472}
473
474// ── Helpers ─────────────────────────────────────────────────────────────────
475
476/// Convert an internal Diagnostic to an LSP Diagnostic.
477fn to_lsp_diagnostic(text: &str, d: &checker::Diagnostic) -> Diagnostic {
478    let range = byte_range_to_lsp(text, d.start_byte as usize, d.end_byte as usize);
479    let severity = match d.severity {
480        3 => Some(DiagnosticSeverity::ERROR),
481        2 => Some(DiagnosticSeverity::WARNING),
482        4 => Some(DiagnosticSeverity::HINT),
483        // SEVERITY_UNSPECIFIED (0) and SEVERITY_INFORMATION (1)
484        _ => Some(DiagnosticSeverity::INFORMATION),
485    };
486
487    let data = serde_json::json!({
488        "suggestions": d.suggestions,
489        "rule_id": d.rule_id,
490        "unified_id": d.unified_id,
491    });
492
493    Diagnostic {
494        range,
495        severity,
496        source: Some("language-check".into()),
497        code: Some(NumberOrString::String(d.unified_id.clone())),
498        message: d.message.clone(),
499        data: Some(data),
500        ..Default::default()
501    }
502}
503
504/// Convert byte offsets to an LSP Range (line/character).
505fn byte_range_to_lsp(text: &str, start: usize, end: usize) -> Range {
506    Range {
507        start: byte_to_position(text, start),
508        end: byte_to_position(text, end),
509    }
510}
511
512fn byte_to_position(text: &str, byte_offset: usize) -> Position {
513    let offset = byte_offset.min(text.len());
514    let prefix = &text[..offset];
515    let line = prefix.matches('\n').count() as u32;
516    let last_newline = prefix.rfind('\n').map_or(0, |i| i + 1);
517    let character = prefix[last_newline..].chars().count() as u32;
518    Position { line, character }
519}
520
521/// Guess a language ID from a file URI extension.
522fn guess_lang_id(uri: &Url) -> String {
523    let path = uri.path();
524    let ext = path.rsplit('.').next().unwrap_or("");
525    match ext {
526        "html" | "htm" | "xhtml" => "html",
527        "tex" | "latex" | "ltx" => "latex",
528        "typ" => "typst",
529        "rst" => "rst",
530        "org" => "org",
531        "bib" => "bibtex",
532        "Rnw" | "rnw" | "Snw" | "snw" => "sweave",
533        "tree" => "forester",
534        // md, mdx, markdown, and everything else defaults to markdown
535        _ => "markdown",
536    }
537    .to_string()
538}
539
540fn safe_slice(s: &str, start: usize, end: usize) -> &str {
541    let mut lo = start.min(s.len());
542    while lo > 0 && !s.is_char_boundary(lo) {
543        lo -= 1;
544    }
545    let mut hi = end.min(s.len());
546    while hi < s.len() && !s.is_char_boundary(hi) {
547        hi += 1;
548    }
549    &s[lo..hi]
550}
551
552/// Extract the word at a given LSP range from a document.
553fn extract_word_at_range(text: &str, range: Range) -> Option<String> {
554    let start = position_to_byte(text, range.start)?;
555    let end = position_to_byte(text, range.end)?;
556    Some(safe_slice(text, start, end).to_string())
557}
558
559fn position_to_byte(text: &str, pos: Position) -> Option<usize> {
560    let mut line = 0u32;
561    let mut byte = 0usize;
562    for (i, ch) in text.char_indices() {
563        if line == pos.line {
564            let col_offset = text[byte..].char_indices().nth(pos.character as usize);
565            return Some(col_offset.map_or(text.len(), |(off, _)| byte + off));
566        }
567        if ch == '\n' {
568            line += 1;
569            byte = i + 1;
570        }
571    }
572    if line == pos.line {
573        let col_offset = text[byte..].char_indices().nth(pos.character as usize);
574        return Some(col_offset.map_or(text.len(), |(off, _)| byte + off));
575    }
576    None
577}
578
579// ── Entry point ─────────────────────────────────────────────────────────────
580
581/// Run the LSP server on stdin/stdout.
582pub async fn run_lsp() {
583    let stdin = tokio::io::stdin();
584    let stdout = tokio::io::stdout();
585
586    let (service, socket) = LspService::new(Backend::new);
587    Server::new(stdin, stdout, socket).serve(service).await;
588}