1#![allow(clippy::cast_possible_truncation)]
7
8use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11
12use dashmap::DashMap;
13use tokio::sync::Mutex;
14use tower_lsp::jsonrpc::Result;
15use tower_lsp::lsp_types::{
16 CodeAction, CodeActionKind, CodeActionOrCommand, CodeActionParams,
17 CodeActionProviderCapability, CodeActionResponse, Command, Diagnostic, DiagnosticSeverity,
18 DidChangeConfigurationParams, DidChangeTextDocumentParams, DidCloseTextDocumentParams,
19 DidOpenTextDocumentParams, DidSaveTextDocumentParams, ExecuteCommandOptions,
20 ExecuteCommandParams, InitializeParams, InitializeResult, InitializedParams, NumberOrString,
21 Position, Range, ServerCapabilities, ServerInfo, TextDocumentSyncCapability,
22 TextDocumentSyncKind, TextEdit, Url, WorkspaceEdit,
23};
24use tower_lsp::{Client, LanguageServer, LspService, Server};
25use tracing::{debug, info, warn};
26
27use crate::checker;
28use crate::config::Config;
29use crate::dictionary::Dictionary;
30use crate::hashing::{DiagnosticFingerprint, IgnoreStore};
31use crate::orchestrator::Orchestrator;
32use crate::prose;
33use crate::sls::SchemaRegistry;
34
35#[derive(Debug, Default, serde::Deserialize)]
39#[serde(default)]
40struct LspSettings {
41 #[serde(alias = "langCheck")]
42 lang_check: LangCheckSettings,
43}
44
45#[derive(Debug, Default, serde::Deserialize)]
46#[serde(default)]
47struct LangCheckSettings {
48 engines: Option<EngineSettings>,
49 performance: Option<PerformanceSettings>,
50}
51
52#[derive(Debug, Default, serde::Deserialize)]
53#[serde(default)]
54struct EngineSettings {
55 harper: Option<bool>,
56 languagetool: Option<bool>,
57 languagetool_url: Option<String>,
58 english_engine: Option<String>,
59}
60
61#[derive(Debug, Default, serde::Deserialize)]
62#[serde(default)]
63struct PerformanceSettings {
64 high_performance_mode: Option<bool>,
65 debounce_ms: Option<u64>,
66 max_file_size: Option<usize>,
67}
68
69type DocumentStore = DashMap<String, (String, String)>;
74
75pub struct Backend {
78 client: Client,
79 orchestrator: Arc<Mutex<Orchestrator>>,
80 config: Arc<Mutex<Config>>,
81 dictionary: Arc<Mutex<Dictionary>>,
82 ignore_store: Arc<Mutex<IgnoreStore>>,
83 schema_registry: Arc<Mutex<SchemaRegistry>>,
84 documents: DocumentStore,
85 workspace_root: Mutex<Option<PathBuf>>,
86}
87
88impl Backend {
89 fn new(client: Client) -> Self {
90 Self {
91 client,
92 orchestrator: Arc::new(Mutex::new(Orchestrator::new(Config::default()))),
93 config: Arc::new(Mutex::new(Config::default())),
94 dictionary: Arc::new(Mutex::new(Dictionary::new())),
95 ignore_store: Arc::new(Mutex::new(IgnoreStore::new())),
96 schema_registry: Arc::new(Mutex::new(SchemaRegistry::new())),
97 documents: DashMap::new(),
98 workspace_root: Mutex::new(None),
99 }
100 }
101
102 async fn init_workspace(&self, root: &Path) {
104 let config = Config::load(root).unwrap_or_default();
105 info!(
106 harper = config.engines.harper,
107 languagetool = config.engines.languagetool,
108 "LSP: engines configured"
109 );
110
111 self.orchestrator.lock().await.update_config(config.clone());
112 *self.config.lock().await = config.clone();
113
114 match Dictionary::load(root) {
115 Ok(mut dict) => {
116 if config.dictionaries.bundled {
117 dict.load_bundled();
118 }
119 for p in &config.dictionaries.paths {
120 if let Err(e) = dict.load_wordlist_file(Path::new(p), root) {
121 warn!(path = p, "Could not load wordlist: {e}");
122 }
123 }
124 *self.dictionary.lock().await = dict;
125 }
126 Err(e) => warn!("Could not load dictionary: {e}"),
127 }
128
129 if let Ok(store) = IgnoreStore::load(root) {
130 *self.ignore_store.lock().await = store;
131 }
132 if let Ok(reg) = SchemaRegistry::from_workspace(root) {
133 *self.schema_registry.lock().await = reg;
134 }
135
136 *self.workspace_root.lock().await = Some(root.to_path_buf());
137 }
138
139 async fn apply_settings(&self, settings: &LangCheckSettings) {
141 let mut config = self.config.lock().await;
142 if let Some(ref eng) = settings.engines {
143 if let Some(v) = eng.harper {
144 config.engines.harper = v;
145 }
146 if let Some(v) = eng.languagetool {
147 config.engines.languagetool = v;
148 }
149 if let Some(ref v) = eng.languagetool_url {
150 config.engines.languagetool_url.clone_from(v);
151 }
152 if let Some(ref v) = eng.english_engine {
153 config.engines.english_engine.clone_from(v);
154 }
155 }
156 if let Some(ref perf) = settings.performance {
157 if let Some(v) = perf.high_performance_mode {
158 config.performance.high_performance_mode = v;
159 }
160 if let Some(v) = perf.debounce_ms {
161 config.performance.debounce_ms = v;
162 }
163 if let Some(v) = perf.max_file_size {
164 config.performance.max_file_size = v;
165 }
166 }
167 let updated = config.clone();
168 drop(config);
169 self.orchestrator.lock().await.update_config(updated);
170 info!("LSP: config updated via didChangeConfiguration");
171 }
172
173 async fn rediagnose_all(&self) {
175 let entries: Vec<(String, String, String)> = self
176 .documents
177 .iter()
178 .map(|r| {
179 let (text, lang_id) = r.value();
180 (r.key().clone(), text.clone(), lang_id.clone())
181 })
182 .collect();
183 for (uri_str, text, lang_id) in entries {
184 if let Ok(uri) = Url::parse(&uri_str) {
185 self.diagnose(&uri, &text, &lang_id).await;
186 }
187 }
188 }
189
190 async fn diagnose(&self, uri: &Url, text: &str, lang_id: &str) {
192 let canonical = crate::languages::resolve_language_id(lang_id);
193
194 let extraction = {
195 let schema_reg = self.schema_registry.lock().await;
196 let cfg = self.config.lock().await;
197 let latex_extras = prose::latex::LatexExtras {
198 skip_envs: &cfg.languages.latex.skip_environments,
199 skip_commands: &cfg.languages.latex.skip_commands,
200 };
201 let result = prose::extract_with_fallback(
202 text,
203 canonical,
204 None,
205 Some(&schema_reg),
206 &latex_extras,
207 );
208 drop(cfg);
209 drop(schema_reg);
210 result
211 };
212
213 let ranges = match extraction {
214 Ok(r) => r,
215 Err(e) => {
216 warn!(uri = %uri, "Extraction error: {e}");
217 return;
218 }
219 };
220
221 let mut all_diagnostics: Vec<Diagnostic> = Vec::new();
222
223 for range in &ranges {
224 let prose_text = range.extract_text(text);
225
226 let check_result = {
227 let mut orch = self.orchestrator.lock().await;
228 orch.check(&prose_text, lang_id).await
229 };
230
231 if let Ok(mut diags) = check_result {
232 diags.retain(|d| !range.overlaps_exclusion(d.start_byte, d.end_byte));
233
234 for d in &mut diags {
235 d.start_byte += range.start_byte as u32;
236 d.end_byte += range.start_byte as u32;
237 }
238
239 let ignore = self.ignore_store.lock().await;
240 let dict = self.dictionary.lock().await;
241 diags.retain(|d| {
242 let fp = DiagnosticFingerprint::new(
243 &d.message,
244 text,
245 d.start_byte as usize,
246 d.end_byte as usize,
247 );
248 if ignore.is_ignored(&fp) {
249 return false;
250 }
251 if d.unified_id.starts_with("spelling.") {
252 let word = safe_slice(text, d.start_byte as usize, d.end_byte as usize);
253 if dict.contains(word) {
254 return false;
255 }
256 }
257 true
258 });
259
260 all_diagnostics.extend(diags.iter().map(|d| to_lsp_diagnostic(text, d)));
261 }
262 }
263
264 self.client
265 .publish_diagnostics(uri.clone(), all_diagnostics, None)
266 .await;
267 }
268}
269
270#[tower_lsp::async_trait]
273impl LanguageServer for Backend {
274 async fn initialize(&self, params: InitializeParams) -> Result<InitializeResult> {
275 if let Some(root_uri) = params.root_uri
276 && let Ok(path) = root_uri.to_file_path()
277 {
278 self.init_workspace(&path).await;
279 }
280
281 Ok(InitializeResult {
282 capabilities: ServerCapabilities {
283 text_document_sync: Some(TextDocumentSyncCapability::Kind(
284 TextDocumentSyncKind::FULL,
285 )),
286 code_action_provider: Some(CodeActionProviderCapability::Simple(true)),
287 execute_command_provider: Some(ExecuteCommandOptions {
288 commands: vec![
289 "langCheck.addDictionaryWord".into(),
290 "langCheck.ignoreDiagnostic".into(),
291 ],
292 ..Default::default()
293 }),
294 ..Default::default()
295 },
296 server_info: Some(ServerInfo {
297 name: "language-check-server".into(),
298 version: Some(env!("CARGO_PKG_VERSION").into()),
299 }),
300 })
301 }
302
303 async fn initialized(&self, _: InitializedParams) {
304 info!("LSP client initialized");
305 }
306
307 async fn shutdown(&self) -> Result<()> {
308 Ok(())
309 }
310
311 async fn did_open(&self, params: DidOpenTextDocumentParams) {
312 let uri = params.text_document.uri;
313 let text = params.text_document.text;
314 let lang_id = params.text_document.language_id.clone();
315 self.documents
316 .insert(uri.to_string(), (text.clone(), lang_id.clone()));
317 self.diagnose(&uri, &text, &lang_id).await;
318 }
319
320 async fn did_change(&self, params: DidChangeTextDocumentParams) {
321 let uri = params.text_document.uri;
322 if let Some(change) = params.content_changes.into_iter().last() {
323 let lang_id = guess_lang_id(&uri);
324 self.documents
325 .insert(uri.to_string(), (change.text.clone(), lang_id.clone()));
326 self.diagnose(&uri, &change.text, &lang_id).await;
327 }
328 }
329
330 async fn did_save(&self, params: DidSaveTextDocumentParams) {
331 let uri = params.text_document.uri;
332 let key = uri.to_string();
333 let entry = self.documents.get(&key).map(|r| r.value().clone());
334 if let Some((text, lang_id)) = entry {
335 self.diagnose(&uri, &text, &lang_id).await;
336 }
337 }
338
339 async fn did_close(&self, params: DidCloseTextDocumentParams) {
340 self.documents.remove(¶ms.text_document.uri.to_string());
341 }
342
343 async fn did_change_configuration(&self, params: DidChangeConfigurationParams) {
344 let settings: LspSettings = serde_json::from_value(params.settings).unwrap_or_default();
345 self.apply_settings(&settings.lang_check).await;
346 self.rediagnose_all().await;
347 }
348
349 async fn code_action(&self, params: CodeActionParams) -> Result<Option<CodeActionResponse>> {
350 let uri = ¶ms.text_document.uri;
351 let mut actions: Vec<CodeActionOrCommand> = Vec::new();
352
353 for diag in ¶ms.context.diagnostics {
354 if diag.source.as_deref() != Some("language-check") {
355 continue;
356 }
357
358 let Some(data) = &diag.data else { continue };
359 let Some(obj) = data.as_object() else {
360 continue;
361 };
362
363 if let Some(suggestions) = obj.get("suggestions").and_then(|v| v.as_array()) {
365 for s in suggestions {
366 if let Some(text) = s.as_str() {
367 let edit = TextEdit {
368 range: diag.range,
369 new_text: text.to_string(),
370 };
371 let mut changes = HashMap::new();
372 changes.insert(uri.clone(), vec![edit]);
373 actions.push(CodeActionOrCommand::CodeAction(CodeAction {
374 title: format!("Replace with \"{text}\""),
375 kind: Some(CodeActionKind::QUICKFIX),
376 diagnostics: Some(vec![diag.clone()]),
377 edit: Some(WorkspaceEdit {
378 changes: Some(changes),
379 ..Default::default()
380 }),
381 ..Default::default()
382 }));
383 }
384 }
385 }
386
387 if let Some(rule_id) = obj.get("rule_id").and_then(|v| v.as_str())
389 && (rule_id.contains("TYPO")
390 || rule_id.contains("MORFOLOGIK")
391 || rule_id.contains("spelling"))
392 && let Some(doc) = self.documents.get(&uri.to_string())
393 {
394 let word = extract_word_at_range(&doc.value().0, diag.range).unwrap_or_default();
395 if !word.is_empty() {
396 actions.push(CodeActionOrCommand::CodeAction(CodeAction {
397 title: format!("Add \"{word}\" to dictionary"),
398 kind: Some(CodeActionKind::QUICKFIX),
399 diagnostics: Some(vec![diag.clone()]),
400 command: Some(Command {
401 title: "Add to dictionary".into(),
402 command: "langCheck.addDictionaryWord".into(),
403 arguments: Some(vec![serde_json::json!(word)]),
404 }),
405 ..Default::default()
406 }));
407 }
408 }
409 }
410
411 if actions.is_empty() {
412 Ok(None)
413 } else {
414 Ok(Some(actions))
415 }
416 }
417
418 async fn execute_command(
419 &self,
420 params: ExecuteCommandParams,
421 ) -> Result<Option<serde_json::Value>> {
422 match params.command.as_str() {
423 "langCheck.addDictionaryWord" => {
424 if let Some(word_val) = params.arguments.first()
425 && let Some(word) = word_val.as_str()
426 {
427 debug!(word, "Adding to dictionary");
428 let mut dict = self.dictionary.lock().await;
429 if let Err(e) = dict.add_word(word) {
430 warn!(word, "Failed to add word: {e}");
431 }
432 }
433 }
434 "langCheck.ignoreDiagnostic" => {
435 if let Some(args) = params.arguments.first()
436 && let Some(obj) = args.as_object()
437 {
438 let message = obj
439 .get("message")
440 .and_then(|v| v.as_str())
441 .unwrap_or_default();
442 let context = obj
443 .get("context")
444 .and_then(|v| v.as_str())
445 .unwrap_or_default();
446 let start = obj
447 .get("start_byte")
448 .and_then(serde_json::Value::as_u64)
449 .map_or(0, |v| v as usize);
450 let end = obj
451 .get("end_byte")
452 .and_then(serde_json::Value::as_u64)
453 .map_or(0, |v| v as usize);
454 let fp = DiagnosticFingerprint::new(message, context, start, end);
455 self.ignore_store.lock().await.ignore(&fp);
456 }
457 }
458 _ => {}
459 }
460 Ok(None)
461 }
462}
463
464fn to_lsp_diagnostic(text: &str, d: &checker::Diagnostic) -> Diagnostic {
468 let range = byte_range_to_lsp(text, d.start_byte as usize, d.end_byte as usize);
469 let severity = match d.severity {
470 3 => Some(DiagnosticSeverity::ERROR),
471 2 => Some(DiagnosticSeverity::WARNING),
472 4 => Some(DiagnosticSeverity::HINT),
473 _ => Some(DiagnosticSeverity::INFORMATION),
475 };
476
477 let data = serde_json::json!({
478 "suggestions": d.suggestions,
479 "rule_id": d.rule_id,
480 "unified_id": d.unified_id,
481 });
482
483 Diagnostic {
484 range,
485 severity,
486 source: Some("language-check".into()),
487 code: Some(NumberOrString::String(d.unified_id.clone())),
488 message: d.message.clone(),
489 data: Some(data),
490 ..Default::default()
491 }
492}
493
494fn byte_range_to_lsp(text: &str, start: usize, end: usize) -> Range {
496 Range {
497 start: byte_to_position(text, start),
498 end: byte_to_position(text, end),
499 }
500}
501
502fn byte_to_position(text: &str, byte_offset: usize) -> Position {
503 let offset = byte_offset.min(text.len());
504 let prefix = &text[..offset];
505 let line = prefix.matches('\n').count() as u32;
506 let last_newline = prefix.rfind('\n').map_or(0, |i| i + 1);
507 let character = prefix[last_newline..].chars().count() as u32;
508 Position { line, character }
509}
510
511fn guess_lang_id(uri: &Url) -> String {
513 let path = uri.path();
514 let ext = path.rsplit('.').next().unwrap_or("");
515 match ext {
516 "html" | "htm" | "xhtml" => "html",
517 "tex" | "latex" | "ltx" => "latex",
518 "typ" => "typst",
519 "rst" => "rst",
520 "org" => "org",
521 "bib" => "bibtex",
522 "Rnw" | "rnw" | "Snw" | "snw" => "sweave",
523 "tree" => "forester",
524 _ => "markdown",
526 }
527 .to_string()
528}
529
530fn safe_slice(s: &str, start: usize, end: usize) -> &str {
531 let mut lo = start.min(s.len());
532 while lo > 0 && !s.is_char_boundary(lo) {
533 lo -= 1;
534 }
535 let mut hi = end.min(s.len());
536 while hi < s.len() && !s.is_char_boundary(hi) {
537 hi += 1;
538 }
539 &s[lo..hi]
540}
541
542fn extract_word_at_range(text: &str, range: Range) -> Option<String> {
544 let start = position_to_byte(text, range.start)?;
545 let end = position_to_byte(text, range.end)?;
546 Some(safe_slice(text, start, end).to_string())
547}
548
549fn position_to_byte(text: &str, pos: Position) -> Option<usize> {
550 let mut line = 0u32;
551 let mut byte = 0usize;
552 for (i, ch) in text.char_indices() {
553 if line == pos.line {
554 let col_offset = text[byte..].char_indices().nth(pos.character as usize);
555 return Some(col_offset.map_or(text.len(), |(off, _)| byte + off));
556 }
557 if ch == '\n' {
558 line += 1;
559 byte = i + 1;
560 }
561 }
562 if line == pos.line {
563 let col_offset = text[byte..].char_indices().nth(pos.character as usize);
564 return Some(col_offset.map_or(text.len(), |(off, _)| byte + off));
565 }
566 None
567}
568
569pub async fn run_lsp() {
573 let stdin = tokio::io::stdin();
574 let stdout = tokio::io::stdout();
575
576 let (service, socket) = LspService::new(Backend::new);
577 Server::new(stdin, stdout, socket).serve(service).await;
578}