Skip to main content

docolint_dictionary/
lib.rs

1use std::collections::HashSet;
2use std::fs::{self, OpenOptions};
3use std::io::Write;
4use std::path::Path;
5use docolint_types::GrammarError;
6
7/// Manages a set of ignored words for filtering grammar errors.
8///
9/// Loads `.docolint-ignore` files hierarchically from the document's directory up to
10/// the workspace root. Words are stored case-insensitively (lowercased). Supports
11/// adding new words to a target ignore file and filtering errors based on ignored words.
12pub struct Dictionary {
13    ignored_words: HashSet<String>,
14}
15
16impl Default for Dictionary {
17    fn default() -> Self {
18        Self::new()
19    }
20}
21
22impl Dictionary {
23    fn char_offset_to_byte_offset(text: &str, char_offset: usize) -> Option<usize> {
24        if char_offset == text.chars().count() {
25            return Some(text.len());
26        }
27
28        text.char_indices().nth(char_offset).map(|(idx, _)| idx)
29    }
30
31    /// Creates an empty dictionary with no ignored words.
32    pub fn new() -> Self {
33        Self {
34            ignored_words: HashSet::new(),
35        }
36    }
37
38    /// Loads and merges `.docolint-ignore` files from `document_path` up to `workspace_root`.
39    ///
40    /// Walks the directory tree upward, reading each `.docolint-ignore` file found.
41    /// Lines starting with `#` are treated as comments and skipped. Empty lines are ignored.
42    /// Words are lowercased before storage.
43    ///
44    /// # Arguments
45    /// * `workspace_root` - The root directory to stop walking at. Must be an ancestor
46    ///   of (or equal to) `document_path`'s parent.
47    /// * `document_path` - Path to the source file being checked. If this is a file,
48    ///   its parent directory is used as the starting point.
49    ///
50    /// # Panics
51    /// Does not panic. File read errors are silently ignored (missing files = no words).
52    pub fn load(workspace_root: &Path, document_path: &Path) -> Self {
53        let mut ignored_words = HashSet::new();
54        
55        let mut current = if document_path.is_file() {
56            document_path.parent()
57        } else {
58            Some(document_path)
59        };
60
61        while let Some(path) = current {
62            let ignore_file = path.join(".docolint-ignore");
63            if let Ok(content) = fs::read_to_string(ignore_file) {
64                for line in content.lines() {
65                    let word = line.trim();
66                    if !word.is_empty() && !word.starts_with('#') {
67                        ignored_words.insert(word.to_lowercase());
68                    }
69                }
70            }
71
72            if path == workspace_root {
73                break;
74            }
75            current = path.parent();
76        }
77
78        Self { ignored_words }
79    }
80
81    /// Checks if a word is in the ignored set (case-insensitive).
82    ///
83    /// # Arguments
84    /// * `word` - The word to check. Compared in lowercase against stored words.
85    pub fn is_ignored(&self, word: &str) -> bool {
86        self.ignored_words.contains(&word.to_lowercase())
87    }
88
89    /// Appends a word to a `.docolint-ignore` file and adds it to the in-memory set.
90    ///
91    /// Creates the file if it does not exist. The word is lowercased before writing.
92    /// No duplicate check is performed on the file; duplicates are harmless since
93    /// the in-memory set deduplicates automatically.
94    ///
95    /// # Arguments
96    /// * `word` - The word to ignore. Empty strings are silently ignored.
97    /// * `target_file` - Path to the `.docolint-ignore` file to append to.
98    ///
99    /// # Errors
100    /// Returns `std::io::Error` if the file cannot be opened or written.
101    pub fn add_word(&mut self, word: &str, target_file: &Path) -> std::io::Result<()> {
102        let word = word.trim().to_lowercase();
103        if word.is_empty() {
104            return Ok(());
105        }
106
107        let mut file = OpenOptions::new()
108            .create(true)
109            .append(true)
110            .open(target_file)?;
111
112        writeln!(file, "{}", word)?;
113        self.ignored_words.insert(word);
114        Ok(())
115    }
116
117    /// Filters out grammar errors whose matched word is in the ignored set.
118    ///
119    /// Extracts the word from `text` using each error's `offset` and `length`,
120    /// then checks it against the ignored set. Errors with out-of-bounds offsets
121    /// are kept (not filtered).
122    ///
123    /// # Arguments
124    /// * `text` - The plain text string that LanguageTool checked. Offsets in errors
125    ///   are relative to this string.
126    /// * `errors` - Grammar errors to filter. Consumed by this function.
127    ///
128    /// # Returns
129    /// A new `Vec` containing only errors whose matched word is not ignored.
130    pub fn filter_errors(&self, text: &str, errors: Vec<GrammarError>) -> Vec<GrammarError> {
131        errors.into_iter().filter(|error| {
132            let Some(start) = Self::char_offset_to_byte_offset(text, error.offset) else {
133                return true;
134            };
135            let Some(end) = Self::char_offset_to_byte_offset(text, error.offset + error.length) else {
136                return true;
137            };
138            let Some(word) = text.get(start..end) else {
139                return true;
140            };
141            !self.is_ignored(word)
142        }).collect()
143    }
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149    use std::fs::File;
150    use std::io::Write;
151    use tempfile::tempdir;
152
153    #[test]
154    fn test_load_and_merge_ignores() {
155        let root = tempdir().unwrap();
156        let root_path = root.path();
157        let sub = root_path.join("sub");
158        fs::create_dir(&sub).unwrap();
159        
160        let mut root_ignore = File::create(root_path.join(".docolint-ignore")).unwrap();
161        writeln!(root_ignore, "rootword").unwrap();
162        
163        let mut sub_ignore = File::create(sub.join(".docolint-ignore")).unwrap();
164        writeln!(sub_ignore, "subword").unwrap();
165        
166        let dict = Dictionary::load(root_path, &sub.join("file.rs"));
167        
168        assert!(dict.is_ignored("rootword"));
169        assert!(dict.is_ignored("subword"));
170        assert!(!dict.is_ignored("unknown"));
171    }
172
173    #[test]
174    fn test_is_ignored_case_insensitive() {
175        let mut dict = Dictionary::new();
176        dict.ignored_words.insert("word".to_string());
177        
178        assert!(dict.is_ignored("word"));
179        assert!(dict.is_ignored("WORD"));
180    }
181
182    #[test]
183    fn test_add_word_creates_file() {
184        let root = tempdir().unwrap();
185        let root_path = root.path();
186        let ignore_file = root_path.join(".docolint-ignore");
187        
188        let mut dict = Dictionary::new();
189        dict.add_word("newword", &ignore_file).unwrap();
190        
191        assert!(ignore_file.exists());
192        let content = fs::read_to_string(ignore_file).unwrap();
193        assert!(content.contains("newword"));
194        assert!(dict.is_ignored("newword"));
195    }
196
197    #[test]
198    fn test_filter_errors() {
199        let mut dict = Dictionary::new();
200        dict.ignored_words.insert("ignored".to_string());
201        
202        let text = "This has an ignored word and a valid word.";
203        let errors = vec![
204            GrammarError {
205                message: "Error 1".to_string(),
206                offset: 12,
207                length: 7, // "ignored"
208                replacements: vec![],
209                rule_id: "RULE1".to_string(),
210            },
211            GrammarError {
212                message: "Error 2".to_string(),
213                offset: 31,
214                length: 5, // "valid"
215                replacements: vec![],
216                rule_id: "RULE2".to_string(),
217            },
218        ];
219        
220        let filtered = dict.filter_errors(text, errors);
221        
222        assert_eq!(filtered.len(), 1);
223        assert_eq!(filtered[0].rule_id, "RULE2");
224    }
225
226    #[test]
227    fn test_filter_errors_handles_unicode_offsets() {
228        let mut dict = Dictionary::new();
229        dict.ignored_words.insert("❌".to_string());
230
231        let text = "alpha ❌ beta";
232        let errors = vec![GrammarError {
233            message: "Error".to_string(),
234            offset: 6,
235            length: 1,
236            replacements: vec![],
237            rule_id: "RULE1".to_string(),
238        }];
239
240        let filtered = dict.filter_errors(text, errors);
241        assert!(filtered.is_empty());
242    }
243}