1use anyhow::Result;
2use std::collections::HashSet;
3use std::path::{Path, PathBuf};
4
5pub struct Dictionary {
11 user_words: HashSet<String>,
12 bundled_words: HashSet<String>,
13 workspace_path: Option<PathBuf>,
14}
15
16impl Default for Dictionary {
17 fn default() -> Self {
18 Self::new()
19 }
20}
21
22impl Dictionary {
23 #[must_use]
24 pub fn new() -> Self {
25 Self {
26 user_words: HashSet::new(),
27 bundled_words: HashSet::new(),
28 workspace_path: None,
29 }
30 }
31
32 pub fn load(workspace_root: &Path) -> Result<Self> {
35 let mut dict = Self::new();
36 let dict_path = workspace_root.join(".languagecheck").join("dictionary.txt");
37 dict.workspace_path = Some(dict_path.clone());
38
39 if dict_path.exists() {
40 let content = std::fs::read_to_string(&dict_path)?;
41 for line in content.lines() {
42 let word = line.trim();
43 if !word.is_empty() && !word.starts_with('#') {
44 dict.user_words.insert(word.to_lowercase());
45 }
46 }
47 }
48
49 Ok(dict)
50 }
51
52 pub fn load_bundled(&mut self) {
56 for words_str in bundled::ALL {
57 parse_wordlist_into(words_str, &mut self.bundled_words);
58 }
59 }
60
61 pub fn load_wordlist_file(&mut self, path: &Path, base: &Path) -> Result<()> {
66 let resolved = if path.is_absolute() {
67 path.to_path_buf()
68 } else {
69 base.join(path)
70 };
71
72 let resolved = resolved.canonicalize().map_err(|e| {
73 anyhow::anyhow!("Cannot resolve wordlist path {}: {e}", resolved.display())
74 })?;
75
76 let canonical_base = base.canonicalize().unwrap_or_else(|_| base.to_path_buf());
79 if !resolved.starts_with(&canonical_base)
80 && !resolved.starts_with(dirs::config_dir().unwrap_or_default())
81 && !resolved.starts_with(dirs::home_dir().unwrap_or_default().join(".config"))
82 {
83 anyhow::bail!(
84 "Wordlist path {} is outside the workspace and known config directories",
85 resolved.display()
86 );
87 }
88
89 let content = std::fs::read_to_string(&resolved)
90 .map_err(|e| anyhow::anyhow!("Cannot read wordlist {}: {e}", resolved.display()))?;
91 parse_wordlist_into(&content, &mut self.bundled_words);
92 Ok(())
93 }
94
95 pub fn add_word(&mut self, word: &str) -> Result<()> {
97 let lower = word.to_lowercase();
98 if self.user_words.insert(lower) {
99 self.persist()?;
100 }
101 Ok(())
102 }
103
104 #[must_use]
107 pub fn contains(&self, word: &str) -> bool {
108 let lower = word.to_lowercase();
109 self.user_words.contains(&lower) || self.bundled_words.contains(&lower)
110 }
111
112 pub fn words(&self) -> impl Iterator<Item = &String> {
114 self.user_words.iter().chain(self.bundled_words.iter())
115 }
116
117 #[must_use]
119 pub fn len(&self) -> usize {
120 self.user_words.len() + self.bundled_words.len()
121 }
122
123 #[must_use]
125 pub fn is_empty(&self) -> bool {
126 self.user_words.is_empty() && self.bundled_words.is_empty()
127 }
128
129 fn persist(&self) -> Result<()> {
131 let Some(path) = &self.workspace_path else {
132 return Ok(());
133 };
134
135 if let Some(parent) = path.parent() {
136 std::fs::create_dir_all(parent)?;
137 }
138
139 let mut words: Vec<&str> = self.user_words.iter().map(String::as_str).collect();
140 words.sort_unstable();
141 let content = words.join("\n");
142 std::fs::write(path, content + "\n")?;
143 Ok(())
144 }
145}
146
147fn parse_wordlist_into(content: &str, set: &mut HashSet<String>) {
149 for line in content.lines() {
150 let word = line.trim();
151 if !word.is_empty() && !word.starts_with('#') {
152 set.insert(word.to_lowercase());
153 }
154 }
155}
156
157pub mod bundled {
160 pub const SOFTWARE_TERMS: &str = include_str!("../dictionaries/bundled/software-terms.txt");
163
164 pub const TYPESCRIPT: &str = include_str!("../dictionaries/bundled/typescript.txt");
167
168 pub const COMPANIES: &str = include_str!("../dictionaries/bundled/companies.txt");
171
172 pub const JARGON: &str = include_str!("../dictionaries/bundled/jargon.txt");
176
177 pub const ALL: &[&str] = &[SOFTWARE_TERMS, TYPESCRIPT, COMPANIES, JARGON];
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184
185 #[test]
186 fn new_dictionary_is_empty() {
187 let dict = Dictionary::new();
188 assert!(!dict.contains("anything"));
189 }
190
191 #[test]
192 fn add_and_contains() {
193 let mut dict = Dictionary::new();
194 dict.user_words.insert("hello".to_string());
195 assert!(dict.contains("hello"));
196 assert!(dict.contains("Hello")); assert!(dict.contains("HELLO"));
198 }
199
200 #[test]
201 fn persistence_roundtrip() {
202 let dir = std::env::temp_dir().join("lang_check_test_dict");
203 let _ = std::fs::remove_dir_all(&dir);
204 std::fs::create_dir_all(&dir).unwrap();
205
206 {
208 let mut dict = Dictionary::load(&dir).unwrap();
209 dict.add_word("kubernetes").unwrap();
210 dict.add_word("terraform").unwrap();
211 }
212
213 {
215 let dict = Dictionary::load(&dir).unwrap();
216 assert!(dict.contains("kubernetes"));
217 assert!(dict.contains("Kubernetes")); assert!(dict.contains("terraform"));
219 assert!(!dict.contains("nonexistent"));
220 }
221
222 let _ = std::fs::remove_dir_all(&dir);
223 }
224
225 #[test]
226 fn skips_comments_and_blank_lines() {
227 let dir = std::env::temp_dir().join("lang_check_test_dict_comments");
228 let _ = std::fs::remove_dir_all(&dir);
229 let dict_dir = dir.join(".languagecheck");
230 std::fs::create_dir_all(&dict_dir).unwrap();
231 std::fs::write(
232 dict_dir.join("dictionary.txt"),
233 "# This is a comment\n\nkubernetes\n \n# Another comment\nterraform\n",
234 )
235 .unwrap();
236
237 let dict = Dictionary::load(&dir).unwrap();
238 assert!(dict.contains("kubernetes"));
239 assert!(dict.contains("terraform"));
240 assert_eq!(dict.words().count(), 2);
241
242 let _ = std::fs::remove_dir_all(&dir);
243 }
244
245 #[test]
246 fn add_duplicate_word_is_idempotent() {
247 let mut dict = Dictionary::new();
248 dict.user_words.insert("test".to_string());
249 let initial_count = dict.words().count();
250 dict.user_words.insert("test".to_string());
251 assert_eq!(dict.words().count(), initial_count);
252 }
253
254 #[test]
255 fn words_iterator() {
256 let mut dict = Dictionary::new();
257 dict.user_words.insert("alpha".to_string());
258 dict.user_words.insert("beta".to_string());
259 assert_eq!(dict.words().count(), 2);
260 }
261
262 #[test]
263 fn bundled_dictionaries_load() {
264 let mut dict = Dictionary::new();
265 dict.load_bundled();
266
267 assert!(
269 dict.len() > 5000,
270 "Expected > 5000 bundled words, got {}",
271 dict.len()
272 );
273
274 assert!(
276 dict.contains("kubernetes"),
277 "software-terms should include kubernetes"
278 );
279 assert!(
280 dict.contains("webpack"),
281 "software-terms should include webpack"
282 );
283 assert!(
284 dict.contains("instanceof"),
285 "typescript should include instanceof"
286 );
287 assert!(dict.contains("stdout"), "jargon should include stdout");
288 }
289
290 #[test]
291 fn bundled_plus_user_words() {
292 let mut dict = Dictionary::new();
293 dict.load_bundled();
294 let bundled_count = dict.len();
295
296 dict.user_words.insert("myprojectword".to_string());
297 assert_eq!(dict.len(), bundled_count + 1);
298 assert!(dict.contains("myprojectword"));
299 assert!(dict.contains("kubernetes"));
301 }
302
303 #[test]
304 fn load_wordlist_file_works() {
305 let dir = std::env::temp_dir().join("lang_check_test_wordlist");
306 let _ = std::fs::remove_dir_all(&dir);
307 std::fs::create_dir_all(&dir).unwrap();
308
309 let wordlist = dir.join("custom.txt");
310 std::fs::write(&wordlist, "# My custom words\nfoobar\nbazqux\n").unwrap();
311
312 let mut dict = Dictionary::new();
313 dict.load_wordlist_file(&wordlist, &dir).unwrap();
314
315 assert!(dict.contains("foobar"));
316 assert!(dict.contains("bazqux"));
317 assert_eq!(dict.len(), 2);
318
319 let _ = std::fs::remove_dir_all(&dir);
320 }
321
322 #[test]
323 fn persistence_excludes_bundled_words() {
324 let dir = std::env::temp_dir().join("lang_check_test_dict_bundled_persist");
325 let _ = std::fs::remove_dir_all(&dir);
326 std::fs::create_dir_all(&dir).unwrap();
327
328 {
330 let mut dict = Dictionary::load(&dir).unwrap();
331 dict.load_bundled();
332 dict.add_word("myuserword").unwrap();
333 }
334
335 let dict_path = dir.join(".languagecheck").join("dictionary.txt");
337 let content = std::fs::read_to_string(&dict_path).unwrap();
338 assert!(
339 content.contains("myuserword"),
340 "User word should be persisted"
341 );
342 assert!(
343 !content.contains("kubernetes"),
344 "Bundled words should NOT be persisted"
345 );
346
347 {
349 let mut dict = Dictionary::load(&dir).unwrap();
350 dict.load_bundled();
351 assert!(dict.contains("myuserword"));
352 assert!(dict.contains("kubernetes"));
353 }
354
355 let _ = std::fs::remove_dir_all(&dir);
356 }
357
358 #[test]
359 fn load_wordlist_file_relative_path() {
360 let dir = std::env::temp_dir().join("lang_check_test_wordlist_rel");
361 let _ = std::fs::remove_dir_all(&dir);
362 std::fs::create_dir_all(&dir).unwrap();
363
364 std::fs::write(dir.join("terms.txt"), "myterm\n").unwrap();
365
366 let mut dict = Dictionary::new();
367 dict.load_wordlist_file(Path::new("terms.txt"), &dir)
368 .unwrap();
369
370 assert!(dict.contains("myterm"));
371
372 let _ = std::fs::remove_dir_all(&dir);
373 }
374}