docolint_dictionary/
lib.rs1use docolint_types::GrammarError;
2use std::collections::HashSet;
3use std::fs::{self, OpenOptions};
4use std::io::Write;
5use std::path::Path;
6
7pub struct Dictionary {
13 ignored_words: HashSet<String>,
14}
15
16impl Default for Dictionary {
17 fn default() -> Self {
18 Self::new()
19 }
20}
21
22impl Dictionary {
23 fn char_offset_to_byte_offset(text: &str, char_offset: usize) -> Option<usize> {
24 if char_offset == text.chars().count() {
25 return Some(text.len());
26 }
27
28 text.char_indices().nth(char_offset).map(|(idx, _)| idx)
29 }
30
31 pub fn new() -> Self {
33 Self {
34 ignored_words: HashSet::new(),
35 }
36 }
37
38 pub fn load(workspace_root: &Path, document_path: &Path) -> Self {
52 let _ = document_path;
53 let mut ignored_words = HashSet::new();
54
55 let ignore_file = workspace_root.join(".docolint-ignore");
56 if let Ok(content) = fs::read_to_string(ignore_file) {
57 for line in content.lines() {
58 let word = line.trim();
59 if !word.is_empty() && !word.starts_with('#') {
60 ignored_words.insert(word.to_lowercase());
61 }
62 }
63 }
64
65 Self { ignored_words }
66 }
67
68 pub fn is_ignored(&self, word: &str) -> bool {
73 self.ignored_words.contains(&word.to_lowercase())
74 }
75
76 pub fn add_word(&mut self, word: &str, target_file: &Path) -> std::io::Result<()> {
89 let word = word.trim().to_lowercase();
90 if word.is_empty() {
91 return Ok(());
92 }
93
94 let mut file = OpenOptions::new()
95 .create(true)
96 .append(true)
97 .open(target_file)?;
98
99 writeln!(file, "{}", word)?;
100 self.ignored_words.insert(word);
101 Ok(())
102 }
103
104 pub fn filter_errors(&self, text: &str, errors: Vec<GrammarError>) -> Vec<GrammarError> {
118 errors
119 .into_iter()
120 .filter(|error| {
121 let Some(start) = Self::char_offset_to_byte_offset(text, error.offset) else {
122 return true;
123 };
124 let Some(end) = Self::char_offset_to_byte_offset(text, error.offset + error.length)
125 else {
126 return true;
127 };
128 let Some(word) = text.get(start..end) else {
129 return true;
130 };
131 !self.is_ignored(word)
132 })
133 .collect()
134 }
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140 use std::fs::File;
141 use std::io::Write;
142 use tempfile::tempdir;
143
144 #[test]
145 fn test_loads_only_workspace_root_ignore() {
146 let root = tempdir().unwrap();
147 let root_path = root.path();
148 let sub = root_path.join("sub");
149 fs::create_dir(&sub).unwrap();
150
151 let mut root_ignore = File::create(root_path.join(".docolint-ignore")).unwrap();
152 writeln!(root_ignore, "rootword").unwrap();
153
154 let mut sub_ignore = File::create(sub.join(".docolint-ignore")).unwrap();
155 writeln!(sub_ignore, "subword").unwrap();
156
157 let dict = Dictionary::load(root_path, &sub.join("file.rs"));
158
159 assert!(dict.is_ignored("rootword"));
160 assert!(!dict.is_ignored("subword"));
161 assert!(!dict.is_ignored("unknown"));
162 }
163
164 #[test]
165 fn test_is_ignored_case_insensitive() {
166 let mut dict = Dictionary::new();
167 dict.ignored_words.insert("word".to_string());
168
169 assert!(dict.is_ignored("word"));
170 assert!(dict.is_ignored("WORD"));
171 }
172
173 #[test]
174 fn test_add_word_creates_file() {
175 let root = tempdir().unwrap();
176 let root_path = root.path();
177 let ignore_file = root_path.join(".docolint-ignore");
178
179 let mut dict = Dictionary::new();
180 dict.add_word("newword", &ignore_file).unwrap();
181
182 assert!(ignore_file.exists());
183 let content = fs::read_to_string(ignore_file).unwrap();
184 assert!(content.contains("newword"));
185 assert!(dict.is_ignored("newword"));
186 }
187
188 #[test]
189 fn test_filter_errors() {
190 let mut dict = Dictionary::new();
191 dict.ignored_words.insert("ignored".to_string());
192
193 let text = "This has an ignored word and a valid word.";
194 let errors = vec![
195 GrammarError {
196 message: "Error 1".to_string(),
197 offset: 12,
198 length: 7, replacements: vec![],
200 rule_id: "RULE1".to_string(),
201 },
202 GrammarError {
203 message: "Error 2".to_string(),
204 offset: 31,
205 length: 5, replacements: vec![],
207 rule_id: "RULE2".to_string(),
208 },
209 ];
210
211 let filtered = dict.filter_errors(text, errors);
212
213 assert_eq!(filtered.len(), 1);
214 assert_eq!(filtered[0].rule_id, "RULE2");
215 }
216
217 #[test]
218 fn test_filter_errors_handles_unicode_offsets() {
219 let mut dict = Dictionary::new();
220 dict.ignored_words.insert("❌".to_string());
221
222 let text = "alpha ❌ beta";
223 let errors = vec![GrammarError {
224 message: "Error".to_string(),
225 offset: 6,
226 length: 1,
227 replacements: vec![],
228 rule_id: "RULE1".to_string(),
229 }];
230
231 let filtered = dict.filter_errors(text, errors);
232 assert!(filtered.is_empty());
233 }
234}