Skip to main content

lex_analysis/
spellcheck.rs

1//! Spellcheck analysis for Lex documents.
2//!
3//! This module provides the core spellchecking logic, decoupled from
4//! dictionary loading. Consumers provide a `DictionaryProvider` implementation
5//! to handle dictionary source (filesystem, embedded, etc.).
6//!
7//! # Architecture
8//!
9//! The spellcheck system is split into two parts:
10//! - **Core logic** (this module): Traverses documents, extracts words, checks spelling
11//! - **Dictionary provider** (consumer-provided): Loads and caches dictionaries
12//!
13//! This design allows the same checking logic to work in:
14//! - Native LSP (filesystem-based dictionaries)
15//! - WASM (embedded dictionaries)
16//! - Tests (mock dictionaries)
17
18use lex_core::lex::ast::elements::{ContentItem, Document, Session, TextLine};
19use lex_core::lex::ast::{AstNode, Container};
20use lsp_types::{Diagnostic, DiagnosticSeverity, NumberOrString, Position, Range};
21
22/// A word checker that can verify spelling and suggest corrections.
23///
24/// This trait abstracts the dictionary implementation, allowing different
25/// backends (spellbook, hunspell, embedded, mock, etc.).
26pub trait WordChecker: Send + Sync {
27    /// Check if a word is spelled correctly.
28    fn check(&self, word: &str) -> bool;
29
30    /// Get spelling suggestions for a misspelled word.
31    /// Returns up to `limit` suggestions.
32    fn suggest(&self, word: &str, limit: usize) -> Vec<String>;
33}
34
35/// Result of checking a document for spelling errors.
36#[derive(Debug, Default)]
37pub struct SpellcheckResult {
38    /// Diagnostics for misspelled words.
39    pub diagnostics: Vec<Diagnostic>,
40    /// Number of misspelled words found.
41    pub misspelled_count: usize,
42}
43
44/// Check a document for spelling errors using the provided word checker.
45pub fn check_document(document: &Document, checker: &dyn WordChecker) -> SpellcheckResult {
46    let mut diagnostics = Vec::new();
47    traverse_session(&document.root, checker, &mut diagnostics);
48
49    let misspelled_count = diagnostics.len();
50    SpellcheckResult {
51        diagnostics,
52        misspelled_count,
53    }
54}
55
56/// Get spelling suggestions for a word.
57pub fn suggest_corrections(word: &str, checker: &dyn WordChecker, limit: usize) -> Vec<String> {
58    checker.suggest(word, limit)
59}
60
61fn traverse_session(
62    session: &Session,
63    checker: &dyn WordChecker,
64    diagnostics: &mut Vec<Diagnostic>,
65) {
66    for child in session.children() {
67        traverse_content_item(child, checker, diagnostics);
68    }
69}
70
71fn traverse_content_item(
72    item: &ContentItem,
73    checker: &dyn WordChecker,
74    diagnostics: &mut Vec<Diagnostic>,
75) {
76    match item {
77        ContentItem::Paragraph(para) => {
78            for line_item in &para.lines {
79                if let ContentItem::TextLine(tl) = line_item {
80                    check_text_line(tl, checker, diagnostics);
81                }
82            }
83        }
84        ContentItem::Session(session) => traverse_session(session, checker, diagnostics),
85        ContentItem::TextLine(tl) => check_text_line(tl, checker, diagnostics),
86        _ => {
87            // Generic traversal for other containers
88            if let Some(children) = item.children() {
89                for child in children {
90                    traverse_content_item(child, checker, diagnostics);
91                }
92            }
93        }
94    }
95}
96
97fn check_text_line(line: &TextLine, checker: &dyn WordChecker, diagnostics: &mut Vec<Diagnostic>) {
98    let text = line.text();
99    let range = line.range();
100
101    let mut current_offset = 0;
102    for word in text.split_whitespace() {
103        if let Some(index) = text[current_offset..].find(word) {
104            let start_offset = current_offset + index;
105            // Strip punctuation
106            let clean_word = word.trim_matches(|c: char| !c.is_alphabetic());
107            if !clean_word.is_empty() && !checker.check(clean_word) {
108                // Calculate LSP range
109                // TextLine is always single line.
110                let start_char = range.start.column + start_offset;
111                let end_char = start_char + word.len();
112
113                diagnostics.push(Diagnostic {
114                    range: Range {
115                        start: Position {
116                            line: range.start.line as u32,
117                            character: start_char as u32,
118                        },
119                        end: Position {
120                            line: range.end.line as u32,
121                            character: end_char as u32,
122                        },
123                    },
124                    severity: Some(DiagnosticSeverity::INFORMATION),
125                    code: Some(NumberOrString::String("spelling".to_string())),
126                    code_description: None,
127                    source: Some("lex-spell".to_string()),
128                    message: format!("Unknown word: {clean_word}"),
129                    related_information: None,
130                    tags: None,
131                    data: None,
132                });
133            }
134            current_offset = start_offset + word.len();
135        }
136    }
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142    use lex_core::lex::ast::elements::Paragraph;
143    use lex_core::lex::ast::{Position as AstPosition, Range as AstRange};
144
145    /// A simple mock checker for testing.
146    struct MockChecker {
147        known_words: Vec<&'static str>,
148    }
149
150    impl MockChecker {
151        fn new(words: &[&'static str]) -> Self {
152            Self {
153                known_words: words.to_vec(),
154            }
155        }
156    }
157
158    impl WordChecker for MockChecker {
159        fn check(&self, word: &str) -> bool {
160            self.known_words
161                .iter()
162                .any(|w| w.eq_ignore_ascii_case(word))
163        }
164
165        fn suggest(&self, _word: &str, _limit: usize) -> Vec<String> {
166            vec![]
167        }
168    }
169
170    #[test]
171    fn test_check_document_finds_misspellings() {
172        let checker = MockChecker::new(&["hello", "world"]);
173
174        let range = AstRange::new(0..17, AstPosition::new(0, 0), AstPosition::new(0, 17));
175        let para = Paragraph::from_line("hello wrold test".to_string()).at(range);
176
177        let mut session = Session::with_title("Title".to_string());
178        session.children_mut().push(ContentItem::Paragraph(para));
179
180        let doc = Document {
181            root: session,
182            ..Default::default()
183        };
184
185        let result = check_document(&doc, &checker);
186
187        // "wrold" and "test" should be flagged
188        assert_eq!(result.misspelled_count, 2);
189        assert_eq!(result.diagnostics.len(), 2);
190        assert!(result.diagnostics[0].message.contains("wrold"));
191        assert!(result.diagnostics[1].message.contains("test"));
192    }
193
194    #[test]
195    fn test_check_document_no_errors() {
196        let checker = MockChecker::new(&["hello", "world"]);
197
198        let range = AstRange::new(0..11, AstPosition::new(0, 0), AstPosition::new(0, 11));
199        let para = Paragraph::from_line("hello world".to_string()).at(range);
200
201        let mut session = Session::with_title("Title".to_string());
202        session.children_mut().push(ContentItem::Paragraph(para));
203
204        let doc = Document {
205            root: session,
206            ..Default::default()
207        };
208
209        let result = check_document(&doc, &checker);
210
211        assert_eq!(result.misspelled_count, 0);
212        assert!(result.diagnostics.is_empty());
213    }
214
215    #[test]
216    fn test_punctuation_stripped() {
217        let checker = MockChecker::new(&["hello"]);
218
219        let range = AstRange::new(0..8, AstPosition::new(0, 0), AstPosition::new(0, 8));
220        let para = Paragraph::from_line("hello!!!".to_string()).at(range);
221
222        let mut session = Session::with_title("Title".to_string());
223        session.children_mut().push(ContentItem::Paragraph(para));
224
225        let doc = Document {
226            root: session,
227            ..Default::default()
228        };
229
230        let result = check_document(&doc, &checker);
231
232        // "hello" with punctuation should still match
233        assert_eq!(result.misspelled_count, 0);
234    }
235}