1use std::collections::HashSet;
2use std::collections::hash_map::DefaultHasher;
3use std::hash::{Hash, Hasher};
4use std::path::{Path, PathBuf};
5
6use anyhow::Result;
7use serde::{Deserialize, Serialize};
8
9fn floor_char_boundary(s: &str, byte: usize) -> usize {
11 let mut i = byte.min(s.len());
12 while i > 0 && !s.is_char_boundary(i) {
13 i -= 1;
14 }
15 i
16}
17
18fn ceil_char_boundary(s: &str, byte: usize) -> usize {
20 let mut i = byte.min(s.len());
21 while i < s.len() && !s.is_char_boundary(i) {
22 i += 1;
23 }
24 i
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct DiagnosticFingerprint {
29 pub message_hash: u64,
30 pub context_hash: u64,
31 pub anchor_hash: u64,
32}
33
34impl DiagnosticFingerprint {
35 #[must_use]
36 pub fn new(message: &str, text: &str, start_byte: usize, end_byte: usize) -> Self {
37 let mut message_hasher = DefaultHasher::new();
38 message.hash(&mut message_hasher);
39
40 let start = floor_char_boundary(text, start_byte.saturating_sub(20));
42 let end = ceil_char_boundary(text, (end_byte + 20).min(text.len()));
43 let context = &text[start..end];
44
45 let mut context_hasher = DefaultHasher::new();
46 context.hash(&mut context_hasher);
47
48 let mut anchor_hasher = DefaultHasher::new();
50 Self::extract_word_anchor(text, start_byte, end_byte).hash(&mut anchor_hasher);
51
52 Self {
53 message_hash: message_hasher.finish(),
54 context_hash: context_hasher.finish(),
55 anchor_hash: anchor_hasher.finish(),
56 }
57 }
58
59 fn extract_word_anchor(text: &str, start_byte: usize, end_byte: usize) -> String {
60 let sb = floor_char_boundary(text, start_byte.min(text.len()));
61 let before: String = text[..sb]
62 .split_whitespace()
63 .rev()
64 .take(3)
65 .collect::<Vec<_>>()
66 .into_iter()
67 .rev()
68 .collect::<Vec<_>>()
69 .join(" ");
70 let eb = ceil_char_boundary(text, end_byte.min(text.len()));
71 let after: String = text[eb..]
72 .split_whitespace()
73 .take(3)
74 .collect::<Vec<_>>()
75 .join(" ");
76 format!("{before}|{after}")
77 }
78
79 fn combined_hash(&self) -> u64 {
80 let mut hasher = DefaultHasher::new();
81 self.message_hash.hash(&mut hasher);
82 self.context_hash.hash(&mut hasher);
83 self.anchor_hash.hash(&mut hasher);
84 hasher.finish()
85 }
86}
87
88#[derive(Serialize, Deserialize)]
89struct IgnoreStoreData {
90 fingerprints: Vec<u64>,
91}
92
93pub struct IgnoreStore {
94 ignored_fingerprints: HashSet<u64>,
95 persist_path: Option<PathBuf>,
96}
97
98impl Default for IgnoreStore {
99 fn default() -> Self {
100 Self::new()
101 }
102}
103
104impl IgnoreStore {
105 #[must_use]
106 pub fn new() -> Self {
107 Self {
108 ignored_fingerprints: HashSet::new(),
109 persist_path: None,
110 }
111 }
112
113 pub fn load(workspace_root: &Path) -> Result<Self> {
115 let persist_path = workspace_root.join(".languagecheck").join("ignores.json");
116 let mut store = Self {
117 ignored_fingerprints: HashSet::new(),
118 persist_path: Some(persist_path.clone()),
119 };
120
121 if persist_path.exists() {
122 let data = std::fs::read_to_string(&persist_path)?;
123 let stored: IgnoreStoreData = serde_json::from_str(&data)?;
124 store.ignored_fingerprints = stored.fingerprints.into_iter().collect();
125 }
126
127 Ok(store)
128 }
129
130 pub fn ignore(&mut self, fingerprint: &DiagnosticFingerprint) {
131 self.ignored_fingerprints
132 .insert(fingerprint.combined_hash());
133 if let Err(e) = self.persist() {
134 eprintln!("Warning: failed to persist ignore store: {e}");
135 }
136 }
137
138 #[must_use]
139 pub fn is_ignored(&self, fingerprint: &DiagnosticFingerprint) -> bool {
140 self.ignored_fingerprints
141 .contains(&fingerprint.combined_hash())
142 }
143
144 fn persist(&self) -> Result<()> {
145 let Some(path) = &self.persist_path else {
146 return Ok(());
147 };
148
149 if let Some(parent) = path.parent() {
150 std::fs::create_dir_all(parent)?;
151 }
152
153 let data = IgnoreStoreData {
154 fingerprints: self.ignored_fingerprints.iter().copied().collect(),
155 };
156 std::fs::write(path, serde_json::to_string_pretty(&data)?)?;
157 Ok(())
158 }
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 #[test]
166 fn fingerprint_same_input_same_hash() {
167 let fp1 = DiagnosticFingerprint::new("bad grammar", "This has bad grammar here.", 9, 12);
168 let fp2 = DiagnosticFingerprint::new("bad grammar", "This has bad grammar here.", 9, 12);
169 assert_eq!(fp1.combined_hash(), fp2.combined_hash());
170 }
171
172 #[test]
173 fn fingerprint_different_message_different_hash() {
174 let fp1 = DiagnosticFingerprint::new("bad grammar", "This has bad grammar here.", 9, 12);
175 let fp2 = DiagnosticFingerprint::new("spelling error", "This has bad grammar here.", 9, 12);
176 assert_ne!(fp1.combined_hash(), fp2.combined_hash());
177 }
178
179 #[test]
180 fn fingerprint_different_context_different_hash() {
181 let fp1 = DiagnosticFingerprint::new("error", "AAA error BBB", 4, 9);
182 let fp2 = DiagnosticFingerprint::new("error", "CCC error DDD", 4, 9);
183 assert_ne!(fp1.combined_hash(), fp2.combined_hash());
184 }
185
186 #[test]
187 fn fingerprint_word_anchor_extraction() {
188 let text = "one two three ERROR four five six";
189 let anchor = DiagnosticFingerprint::extract_word_anchor(text, 14, 19);
190 assert_eq!(anchor, "one two three|four five six");
191 }
192
193 #[test]
194 fn fingerprint_word_anchor_at_start() {
195 let text = "ERROR some words after";
196 let anchor = DiagnosticFingerprint::extract_word_anchor(text, 0, 5);
197 assert_eq!(anchor, "|some words after");
198 }
199
200 #[test]
201 fn fingerprint_word_anchor_at_end() {
202 let text = "words before ERROR";
203 let anchor = DiagnosticFingerprint::extract_word_anchor(text, 13, 18);
204 assert_eq!(anchor, "words before|");
205 }
206
207 #[test]
208 fn ignore_store_basic_operations() {
209 let mut store = IgnoreStore::new();
210 let fp = DiagnosticFingerprint::new("test msg", "some test msg context", 5, 13);
211
212 assert!(!store.is_ignored(&fp));
213 store.ignore(&fp);
214 assert!(store.is_ignored(&fp));
215 }
216
217 #[test]
218 fn ignore_store_does_not_ignore_different_fingerprint() {
219 let mut store = IgnoreStore::new();
220 let fp1 = DiagnosticFingerprint::new("msg A", "context A msg A here", 10, 15);
221 let fp2 = DiagnosticFingerprint::new("msg B", "context B msg B here", 10, 15);
222
223 store.ignore(&fp1);
224 assert!(store.is_ignored(&fp1));
225 assert!(!store.is_ignored(&fp2));
226 }
227
228 #[test]
229 fn ignore_store_persistence_roundtrip() {
230 let dir = std::env::temp_dir().join("lang_check_test_ignore_persist");
231 let _ = std::fs::remove_dir_all(&dir);
232 std::fs::create_dir_all(&dir).unwrap();
233
234 let fp = DiagnosticFingerprint::new("persist test", "the persist test text", 4, 16);
235
236 {
238 let mut store = IgnoreStore::load(&dir).unwrap();
239 store.ignore(&fp);
240 }
241
242 {
244 let store = IgnoreStore::load(&dir).unwrap();
245 assert!(store.is_ignored(&fp));
246 }
247
248 let _ = std::fs::remove_dir_all(&dir);
249 }
250
251 #[test]
252 fn fingerprint_handles_multibyte_utf8() {
253 let text = "Ärger mit Ölförderung"; let fp = DiagnosticFingerprint::new("test", text, 11, 15);
258 assert!(fp.combined_hash() != 0 || fp.combined_hash() == 0);
260 }
261
262 #[test]
263 fn ignore_store_empty_persistence() {
264 let dir = std::env::temp_dir().join("lang_check_test_ignore_empty");
265 let _ = std::fs::remove_dir_all(&dir);
266 std::fs::create_dir_all(&dir).unwrap();
267
268 let store = IgnoreStore::load(&dir).unwrap();
269 let fp = DiagnosticFingerprint::new("not ignored", "some context", 0, 5);
270 assert!(!store.is_ignored(&fp));
271
272 let _ = std::fs::remove_dir_all(&dir);
273 }
274}