docolint_dictionary/
lib.rs1use std::collections::HashSet;
2use std::fs::{self, OpenOptions};
3use std::io::Write;
4use std::path::Path;
5use docolint_types::GrammarError;
6
7pub struct Dictionary {
13 ignored_words: HashSet<String>,
14}
15
16impl Default for Dictionary {
17 fn default() -> Self {
18 Self::new()
19 }
20}
21
22impl Dictionary {
23 fn char_offset_to_byte_offset(text: &str, char_offset: usize) -> Option<usize> {
24 if char_offset == text.chars().count() {
25 return Some(text.len());
26 }
27
28 text.char_indices().nth(char_offset).map(|(idx, _)| idx)
29 }
30
31 pub fn new() -> Self {
33 Self {
34 ignored_words: HashSet::new(),
35 }
36 }
37
38 pub fn load(workspace_root: &Path, document_path: &Path) -> Self {
53 let mut ignored_words = HashSet::new();
54
55 let mut current = if document_path.is_file() {
56 document_path.parent()
57 } else {
58 Some(document_path)
59 };
60
61 while let Some(path) = current {
62 let ignore_file = path.join(".docolint-ignore");
63 if let Ok(content) = fs::read_to_string(ignore_file) {
64 for line in content.lines() {
65 let word = line.trim();
66 if !word.is_empty() && !word.starts_with('#') {
67 ignored_words.insert(word.to_lowercase());
68 }
69 }
70 }
71
72 if path == workspace_root {
73 break;
74 }
75 current = path.parent();
76 }
77
78 Self { ignored_words }
79 }
80
81 pub fn is_ignored(&self, word: &str) -> bool {
86 self.ignored_words.contains(&word.to_lowercase())
87 }
88
89 pub fn add_word(&mut self, word: &str, target_file: &Path) -> std::io::Result<()> {
102 let word = word.trim().to_lowercase();
103 if word.is_empty() {
104 return Ok(());
105 }
106
107 let mut file = OpenOptions::new()
108 .create(true)
109 .append(true)
110 .open(target_file)?;
111
112 writeln!(file, "{}", word)?;
113 self.ignored_words.insert(word);
114 Ok(())
115 }
116
117 pub fn filter_errors(&self, text: &str, errors: Vec<GrammarError>) -> Vec<GrammarError> {
131 errors.into_iter().filter(|error| {
132 let Some(start) = Self::char_offset_to_byte_offset(text, error.offset) else {
133 return true;
134 };
135 let Some(end) = Self::char_offset_to_byte_offset(text, error.offset + error.length) else {
136 return true;
137 };
138 let Some(word) = text.get(start..end) else {
139 return true;
140 };
141 !self.is_ignored(word)
142 }).collect()
143 }
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149 use std::fs::File;
150 use std::io::Write;
151 use tempfile::tempdir;
152
153 #[test]
154 fn test_load_and_merge_ignores() {
155 let root = tempdir().unwrap();
156 let root_path = root.path();
157 let sub = root_path.join("sub");
158 fs::create_dir(&sub).unwrap();
159
160 let mut root_ignore = File::create(root_path.join(".docolint-ignore")).unwrap();
161 writeln!(root_ignore, "rootword").unwrap();
162
163 let mut sub_ignore = File::create(sub.join(".docolint-ignore")).unwrap();
164 writeln!(sub_ignore, "subword").unwrap();
165
166 let dict = Dictionary::load(root_path, &sub.join("file.rs"));
167
168 assert!(dict.is_ignored("rootword"));
169 assert!(dict.is_ignored("subword"));
170 assert!(!dict.is_ignored("unknown"));
171 }
172
173 #[test]
174 fn test_is_ignored_case_insensitive() {
175 let mut dict = Dictionary::new();
176 dict.ignored_words.insert("word".to_string());
177
178 assert!(dict.is_ignored("word"));
179 assert!(dict.is_ignored("WORD"));
180 }
181
182 #[test]
183 fn test_add_word_creates_file() {
184 let root = tempdir().unwrap();
185 let root_path = root.path();
186 let ignore_file = root_path.join(".docolint-ignore");
187
188 let mut dict = Dictionary::new();
189 dict.add_word("newword", &ignore_file).unwrap();
190
191 assert!(ignore_file.exists());
192 let content = fs::read_to_string(ignore_file).unwrap();
193 assert!(content.contains("newword"));
194 assert!(dict.is_ignored("newword"));
195 }
196
197 #[test]
198 fn test_filter_errors() {
199 let mut dict = Dictionary::new();
200 dict.ignored_words.insert("ignored".to_string());
201
202 let text = "This has an ignored word and a valid word.";
203 let errors = vec![
204 GrammarError {
205 message: "Error 1".to_string(),
206 offset: 12,
207 length: 7, replacements: vec![],
209 rule_id: "RULE1".to_string(),
210 },
211 GrammarError {
212 message: "Error 2".to_string(),
213 offset: 31,
214 length: 5, replacements: vec![],
216 rule_id: "RULE2".to_string(),
217 },
218 ];
219
220 let filtered = dict.filter_errors(text, errors);
221
222 assert_eq!(filtered.len(), 1);
223 assert_eq!(filtered[0].rule_id, "RULE2");
224 }
225
226 #[test]
227 fn test_filter_errors_handles_unicode_offsets() {
228 let mut dict = Dictionary::new();
229 dict.ignored_words.insert("❌".to_string());
230
231 let text = "alpha ❌ beta";
232 let errors = vec![GrammarError {
233 message: "Error".to_string(),
234 offset: 6,
235 length: 1,
236 replacements: vec![],
237 rule_id: "RULE1".to_string(),
238 }];
239
240 let filtered = dict.filter_errors(text, errors);
241 assert!(filtered.is_empty());
242 }
243}