Skip to main content

docolint_dictionary/
lib.rs

1use docolint_types::GrammarError;
2use std::collections::HashSet;
3use std::fs::{self, OpenOptions};
4use std::io::Write;
5use std::path::Path;
6
7/// Manages a set of ignored words for filtering grammar errors.
8///
9/// Loads the workspace-root `.docolint-ignore` file. Words are stored
10/// case-insensitively (lowercased). Supports
11/// adding new words to a target ignore file and filtering errors based on ignored words.
12pub struct Dictionary {
13    ignored_words: HashSet<String>,
14}
15
16impl Default for Dictionary {
17    fn default() -> Self {
18        Self::new()
19    }
20}
21
22impl Dictionary {
23    fn char_offset_to_byte_offset(text: &str, char_offset: usize) -> Option<usize> {
24        if char_offset == text.chars().count() {
25            return Some(text.len());
26        }
27
28        text.char_indices().nth(char_offset).map(|(idx, _)| idx)
29    }
30
31    /// Creates an empty dictionary with no ignored words.
32    pub fn new() -> Self {
33        Self {
34            ignored_words: HashSet::new(),
35        }
36    }
37
38    /// Loads the workspace-root `.docolint-ignore` file.
39    ///
40    /// Lines starting with `#` are treated as comments and skipped. Empty lines are ignored.
41    /// Words are lowercased before storage.
42    ///
43    /// # Arguments
44    /// * `workspace_root` - The workspace root directory containing `.docolint-ignore`.
45    /// * `document_path` - Path to the source file being checked. Retained for API
46    ///   compatibility but ignored because the dictionary is always loaded from the
47    ///   workspace root.
48    ///
49    /// # Panics
50    /// Does not panic. File read errors are silently ignored (missing files = no words).
51    pub fn load(workspace_root: &Path, document_path: &Path) -> Self {
52        let _ = document_path;
53        let mut ignored_words = HashSet::new();
54
55        let ignore_file = workspace_root.join(".docolint-ignore");
56        if let Ok(content) = fs::read_to_string(ignore_file) {
57            for line in content.lines() {
58                let word = line.trim();
59                if !word.is_empty() && !word.starts_with('#') {
60                    ignored_words.insert(word.to_lowercase());
61                }
62            }
63        }
64
65        Self { ignored_words }
66    }
67
68    /// Checks if a word is in the ignored set (case-insensitive).
69    ///
70    /// # Arguments
71    /// * `word` - The word to check. Compared in lowercase against stored words.
72    pub fn is_ignored(&self, word: &str) -> bool {
73        self.ignored_words.contains(&word.to_lowercase())
74    }
75
76    /// Appends a word to a `.docolint-ignore` file and adds it to the in-memory set.
77    ///
78    /// Creates the file if it does not exist. The word is lowercased before writing.
79    /// No duplicate check is performed on the file; duplicates are harmless since
80    /// the in-memory set deduplicates automatically.
81    ///
82    /// # Arguments
83    /// * `word` - The word to ignore. Empty strings are silently ignored.
84    /// * `target_file` - Path to the `.docolint-ignore` file to append to.
85    ///
86    /// # Errors
87    /// Returns `std::io::Error` if the file cannot be opened or written.
88    pub fn add_word(&mut self, word: &str, target_file: &Path) -> std::io::Result<()> {
89        let word = word.trim().to_lowercase();
90        if word.is_empty() {
91            return Ok(());
92        }
93
94        let mut file = OpenOptions::new()
95            .create(true)
96            .append(true)
97            .open(target_file)?;
98
99        writeln!(file, "{}", word)?;
100        self.ignored_words.insert(word);
101        Ok(())
102    }
103
104    /// Filters out grammar errors whose matched word is in the ignored set.
105    ///
106    /// Extracts the word from `text` using each error's `offset` and `length`,
107    /// then checks it against the ignored set. Errors with out-of-bounds offsets
108    /// are kept (not filtered).
109    ///
110    /// # Arguments
111    /// * `text` - The plain text string that LanguageTool checked. Offsets in errors
112    ///   are relative to this string.
113    /// * `errors` - Grammar errors to filter. Consumed by this function.
114    ///
115    /// # Returns
116    /// A new `Vec` containing only errors whose matched word is not ignored.
117    pub fn filter_errors(&self, text: &str, errors: Vec<GrammarError>) -> Vec<GrammarError> {
118        errors
119            .into_iter()
120            .filter(|error| {
121                let Some(start) = Self::char_offset_to_byte_offset(text, error.offset) else {
122                    return true;
123                };
124                let Some(end) = Self::char_offset_to_byte_offset(text, error.offset + error.length)
125                else {
126                    return true;
127                };
128                let Some(word) = text.get(start..end) else {
129                    return true;
130                };
131                !self.is_ignored(word)
132            })
133            .collect()
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140    use std::fs::File;
141    use std::io::Write;
142    use tempfile::tempdir;
143
144    #[test]
145    fn test_loads_only_workspace_root_ignore() {
146        let root = tempdir().unwrap();
147        let root_path = root.path();
148        let sub = root_path.join("sub");
149        fs::create_dir(&sub).unwrap();
150
151        let mut root_ignore = File::create(root_path.join(".docolint-ignore")).unwrap();
152        writeln!(root_ignore, "rootword").unwrap();
153
154        let mut sub_ignore = File::create(sub.join(".docolint-ignore")).unwrap();
155        writeln!(sub_ignore, "subword").unwrap();
156
157        let dict = Dictionary::load(root_path, &sub.join("file.rs"));
158
159        assert!(dict.is_ignored("rootword"));
160        assert!(!dict.is_ignored("subword"));
161        assert!(!dict.is_ignored("unknown"));
162    }
163
164    #[test]
165    fn test_is_ignored_case_insensitive() {
166        let mut dict = Dictionary::new();
167        dict.ignored_words.insert("word".to_string());
168
169        assert!(dict.is_ignored("word"));
170        assert!(dict.is_ignored("WORD"));
171    }
172
173    #[test]
174    fn test_add_word_creates_file() {
175        let root = tempdir().unwrap();
176        let root_path = root.path();
177        let ignore_file = root_path.join(".docolint-ignore");
178
179        let mut dict = Dictionary::new();
180        dict.add_word("newword", &ignore_file).unwrap();
181
182        assert!(ignore_file.exists());
183        let content = fs::read_to_string(ignore_file).unwrap();
184        assert!(content.contains("newword"));
185        assert!(dict.is_ignored("newword"));
186    }
187
188    #[test]
189    fn test_filter_errors() {
190        let mut dict = Dictionary::new();
191        dict.ignored_words.insert("ignored".to_string());
192
193        let text = "This has an ignored word and a valid word.";
194        let errors = vec![
195            GrammarError {
196                message: "Error 1".to_string(),
197                offset: 12,
198                length: 7, // "ignored"
199                replacements: vec![],
200                rule_id: "RULE1".to_string(),
201            },
202            GrammarError {
203                message: "Error 2".to_string(),
204                offset: 31,
205                length: 5, // "valid"
206                replacements: vec![],
207                rule_id: "RULE2".to_string(),
208            },
209        ];
210
211        let filtered = dict.filter_errors(text, errors);
212
213        assert_eq!(filtered.len(), 1);
214        assert_eq!(filtered[0].rule_id, "RULE2");
215    }
216
217    #[test]
218    fn test_filter_errors_handles_unicode_offsets() {
219        let mut dict = Dictionary::new();
220        dict.ignored_words.insert("❌".to_string());
221
222        let text = "alpha ❌ beta";
223        let errors = vec![GrammarError {
224            message: "Error".to_string(),
225            offset: 6,
226            length: 1,
227            replacements: vec![],
228            rule_id: "RULE1".to_string(),
229        }];
230
231        let filtered = dict.filter_errors(text, errors);
232        assert!(filtered.is_empty());
233    }
234}