Skip to main content

docolint_dictionary/
lib.rs

1use std::collections::HashSet;
2use std::fs::{self, OpenOptions};
3use std::io::Write;
4use std::path::Path;
5use docolint_types::GrammarError;
6
7/// Manages a set of ignored words for filtering grammar errors.
8///
9/// Loads `.docolint-ignore` files hierarchically from the document's directory up to
10/// the workspace root. Words are stored case-insensitively (lowercased). Supports
11/// adding new words to a target ignore file and filtering errors based on ignored words.
12pub struct Dictionary {
13    ignored_words: HashSet<String>,
14}
15
16impl Default for Dictionary {
17    fn default() -> Self {
18        Self::new()
19    }
20}
21
22impl Dictionary {
23    /// Creates an empty dictionary with no ignored words.
24    pub fn new() -> Self {
25        Self {
26            ignored_words: HashSet::new(),
27        }
28    }
29
30    /// Loads and merges `.docolint-ignore` files from `document_path` up to `workspace_root`.
31    ///
32    /// Walks the directory tree upward, reading each `.docolint-ignore` file found.
33    /// Lines starting with `#` are treated as comments and skipped. Empty lines are ignored.
34    /// Words are lowercased before storage.
35    ///
36    /// # Arguments
37    /// * `workspace_root` - The root directory to stop walking at. Must be an ancestor
38    ///   of (or equal to) `document_path`'s parent.
39    /// * `document_path` - Path to the source file being checked. If this is a file,
40    ///   its parent directory is used as the starting point.
41    ///
42    /// # Panics
43    /// Does not panic. File read errors are silently ignored (missing files = no words).
44    pub fn load(workspace_root: &Path, document_path: &Path) -> Self {
45        let mut ignored_words = HashSet::new();
46        
47        let mut current = if document_path.is_file() {
48            document_path.parent()
49        } else {
50            Some(document_path)
51        };
52
53        while let Some(path) = current {
54            let ignore_file = path.join(".docolint-ignore");
55            if let Ok(content) = fs::read_to_string(ignore_file) {
56                for line in content.lines() {
57                    let word = line.trim();
58                    if !word.is_empty() && !word.starts_with('#') {
59                        ignored_words.insert(word.to_lowercase());
60                    }
61                }
62            }
63
64            if path == workspace_root {
65                break;
66            }
67            current = path.parent();
68        }
69
70        Self { ignored_words }
71    }
72
73    /// Checks if a word is in the ignored set (case-insensitive).
74    ///
75    /// # Arguments
76    /// * `word` - The word to check. Compared in lowercase against stored words.
77    pub fn is_ignored(&self, word: &str) -> bool {
78        self.ignored_words.contains(&word.to_lowercase())
79    }
80
81    /// Appends a word to a `.docolint-ignore` file and adds it to the in-memory set.
82    ///
83    /// Creates the file if it does not exist. The word is lowercased before writing.
84    /// No duplicate check is performed on the file; duplicates are harmless since
85    /// the in-memory set deduplicates automatically.
86    ///
87    /// # Arguments
88    /// * `word` - The word to ignore. Empty strings are silently ignored.
89    /// * `target_file` - Path to the `.docolint-ignore` file to append to.
90    ///
91    /// # Errors
92    /// Returns `std::io::Error` if the file cannot be opened or written.
93    pub fn add_word(&mut self, word: &str, target_file: &Path) -> std::io::Result<()> {
94        let word = word.trim().to_lowercase();
95        if word.is_empty() {
96            return Ok(());
97        }
98
99        let mut file = OpenOptions::new()
100            .create(true)
101            .append(true)
102            .open(target_file)?;
103
104        writeln!(file, "{}", word)?;
105        self.ignored_words.insert(word);
106        Ok(())
107    }
108
109    /// Filters out grammar errors whose matched word is in the ignored set.
110    ///
111    /// Extracts the word from `text` using each error's `offset` and `length`,
112    /// then checks it against the ignored set. Errors with out-of-bounds offsets
113    /// are kept (not filtered).
114    ///
115    /// # Arguments
116    /// * `text` - The plain text string that LanguageTool checked. Offsets in errors
117    ///   are relative to this string.
118    /// * `errors` - Grammar errors to filter. Consumed by this function.
119    ///
120    /// # Returns
121    /// A new `Vec` containing only errors whose matched word is not ignored.
122    pub fn filter_errors(&self, text: &str, errors: Vec<GrammarError>) -> Vec<GrammarError> {
123        errors.into_iter().filter(|error| {
124            if error.offset + error.length > text.len() {
125                return true;
126            }
127            let word = &text[error.offset..(error.offset + error.length)];
128            !self.is_ignored(word)
129        }).collect()
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136    use std::fs::File;
137    use std::io::Write;
138    use tempfile::tempdir;
139
140    #[test]
141    fn test_load_and_merge_ignores() {
142        let root = tempdir().unwrap();
143        let root_path = root.path();
144        let sub = root_path.join("sub");
145        fs::create_dir(&sub).unwrap();
146        
147        let mut root_ignore = File::create(root_path.join(".docolint-ignore")).unwrap();
148        writeln!(root_ignore, "rootword").unwrap();
149        
150        let mut sub_ignore = File::create(sub.join(".docolint-ignore")).unwrap();
151        writeln!(sub_ignore, "subword").unwrap();
152        
153        let dict = Dictionary::load(root_path, &sub.join("file.rs"));
154        
155        assert!(dict.is_ignored("rootword"));
156        assert!(dict.is_ignored("subword"));
157        assert!(!dict.is_ignored("unknown"));
158    }
159
160    #[test]
161    fn test_is_ignored_case_insensitive() {
162        let mut dict = Dictionary::new();
163        dict.ignored_words.insert("word".to_string());
164        
165        assert!(dict.is_ignored("word"));
166        assert!(dict.is_ignored("WORD"));
167    }
168
169    #[test]
170    fn test_add_word_creates_file() {
171        let root = tempdir().unwrap();
172        let root_path = root.path();
173        let ignore_file = root_path.join(".docolint-ignore");
174        
175        let mut dict = Dictionary::new();
176        dict.add_word("newword", &ignore_file).unwrap();
177        
178        assert!(ignore_file.exists());
179        let content = fs::read_to_string(ignore_file).unwrap();
180        assert!(content.contains("newword"));
181        assert!(dict.is_ignored("newword"));
182    }
183
184    #[test]
185    fn test_filter_errors() {
186        let mut dict = Dictionary::new();
187        dict.ignored_words.insert("ignored".to_string());
188        
189        let text = "This has an ignored word and a valid word.";
190        let errors = vec![
191            GrammarError {
192                message: "Error 1".to_string(),
193                offset: 12,
194                length: 7, // "ignored"
195                replacements: vec![],
196                rule_id: "RULE1".to_string(),
197            },
198            GrammarError {
199                message: "Error 2".to_string(),
200                offset: 31,
201                length: 5, // "valid"
202                replacements: vec![],
203                rule_id: "RULE2".to_string(),
204            },
205        ];
206        
207        let filtered = dict.filter_errors(text, errors);
208        
209        assert_eq!(filtered.len(), 1);
210        assert_eq!(filtered[0].rule_id, "RULE2");
211    }
212}