Skip to main content

lang_check/
workspace.rs

1use crate::checker::Diagnostic;
2use crate::insights::ProseInsights;
3use anyhow::Result;
4use redb::{Database, ReadableDatabase, TableDefinition};
5use std::collections::hash_map::DefaultHasher;
6use std::hash::{Hash, Hasher};
7use std::path::{Path, PathBuf};
8
9const DIAGNOSTICS_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("diagnostics");
10const INSIGHTS_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("insights");
11const FILE_HASHES_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("file_hashes");
12
13pub struct WorkspaceIndex {
14    db: Database,
15    root_path: PathBuf,
16}
17
18impl WorkspaceIndex {
19    /// Create or open a workspace index.
20    ///
21    /// If `db_path` is provided, the database is created at that exact path.
22    /// Otherwise, the database is stored in the user data directory
23    /// (`~/.local/share/language-check/dbs/` on Linux,
24    ///  `~/Library/Application Support/language-check/dbs/` on macOS,
25    ///  `%APPDATA%/language-check/dbs/` on Windows),
26    /// named by a hash of the workspace root to avoid collisions.
27    pub fn new(workspace_root: &Path, db_path: Option<&Path>) -> Result<Self> {
28        let resolved_path = match db_path {
29            Some(p) => p.to_path_buf(),
30            None => default_db_path(workspace_root)?,
31        };
32
33        if let Some(parent) = resolved_path.parent() {
34            std::fs::create_dir_all(parent)?;
35        }
36
37        let db = Database::create(&resolved_path)?;
38
39        let write_txn = db.begin_write()?;
40        {
41            let _table = write_txn.open_table(DIAGNOSTICS_TABLE)?;
42            let _table = write_txn.open_table(INSIGHTS_TABLE)?;
43            let _table = write_txn.open_table(FILE_HASHES_TABLE)?;
44        }
45        write_txn.commit()?;
46
47        Ok(Self {
48            db,
49            root_path: workspace_root.to_path_buf(),
50        })
51    }
52
53    #[must_use]
54    pub fn get_root_path(&self) -> Option<&Path> {
55        Some(&self.root_path)
56    }
57
58    /// Check if a file's content has changed since last indexing.
59    /// Returns true if unchanged (cache hit), false if changed or new.
60    #[must_use]
61    pub fn is_file_unchanged(&self, file_path: &str, content: &str) -> bool {
62        let new_hash = Self::hash_content(content);
63        let Ok(read_txn) = self.db.begin_read() else {
64            return false;
65        };
66        let Ok(table) = read_txn.open_table(FILE_HASHES_TABLE) else {
67            return false;
68        };
69        let Ok(Some(stored)) = table.get(file_path) else {
70            return false;
71        };
72
73        stored.value() == new_hash.to_le_bytes()
74    }
75
76    /// Store the content hash for a file after indexing.
77    pub fn update_file_hash(&self, file_path: &str, content: &str) -> Result<()> {
78        let hash = Self::hash_content(content);
79        let write_txn = self.db.begin_write()?;
80        {
81            let mut table = write_txn.open_table(FILE_HASHES_TABLE)?;
82            table.insert(file_path, hash.to_le_bytes().as_slice())?;
83        }
84        write_txn.commit()?;
85        Ok(())
86    }
87
88    fn hash_content(content: &str) -> u64 {
89        let mut hasher = DefaultHasher::new();
90        content.hash(&mut hasher);
91        hasher.finish()
92    }
93
94    pub fn update_diagnostics(&self, file_path: &str, diagnostics: &[Diagnostic]) -> Result<()> {
95        let mut data = Vec::new();
96        ciborium::into_writer(&diagnostics, &mut data)?;
97        let write_txn = self.db.begin_write()?;
98        {
99            let mut table = write_txn.open_table(DIAGNOSTICS_TABLE)?;
100            table.insert(file_path, data.as_slice())?;
101        }
102        write_txn.commit()?;
103        Ok(())
104    }
105
106    pub fn update_insights(&self, file_path: &str, insights: &ProseInsights) -> Result<()> {
107        let mut data = Vec::new();
108        ciborium::into_writer(&insights, &mut data)?;
109        let write_txn = self.db.begin_write()?;
110        {
111            let mut table = write_txn.open_table(INSIGHTS_TABLE)?;
112            table.insert(file_path, data.as_slice())?;
113        }
114        write_txn.commit()?;
115        Ok(())
116    }
117
118    pub fn get_diagnostics(&self, file_path: &str) -> Result<Option<Vec<Diagnostic>>> {
119        let read_txn = self.db.begin_read()?;
120        let table = read_txn.open_table(DIAGNOSTICS_TABLE)?;
121        let result = table.get(file_path)?;
122
123        if let Some(data) = result {
124            let diagnostics = ciborium::from_reader(data.value())?;
125            Ok(Some(diagnostics))
126        } else {
127            Ok(None)
128        }
129    }
130
131    pub fn get_insights(&self, file_path: &str) -> Result<Option<ProseInsights>> {
132        let read_txn = self.db.begin_read()?;
133        let table = read_txn.open_table(INSIGHTS_TABLE)?;
134        let result = table.get(file_path)?;
135
136        if let Some(data) = result {
137            let insights = ciborium::from_reader(data.value())?;
138            Ok(Some(insights))
139        } else {
140            Ok(None)
141        }
142    }
143}
144
145/// Compute the default database path for a workspace.
146///
147/// Uses `dirs::data_dir()` (`~/.local/share` on Linux, `~/Library/Application Support`
148/// on macOS, `%APPDATA%` on Windows) as the base, then appends
149/// `language-check/dbs/<hex-hash>.db` where the hash is derived from the
150/// canonical workspace root path.
151fn default_db_path(workspace_root: &Path) -> Result<PathBuf> {
152    let data_dir = dirs::data_dir()
153        .ok_or_else(|| anyhow::anyhow!("Could not determine user data directory"))?;
154
155    let canonical = workspace_root
156        .canonicalize()
157        .unwrap_or_else(|_| workspace_root.to_path_buf());
158
159    let mut hasher = DefaultHasher::new();
160    canonical.to_string_lossy().hash(&mut hasher);
161    let hash = hasher.finish();
162
163    let db_dir = data_dir.join("language-check").join("dbs");
164    Ok(db_dir.join(format!("{hash:016x}.db")))
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170
171    fn temp_workspace(name: &str) -> (WorkspaceIndex, PathBuf) {
172        let dir = std::env::temp_dir().join(format!("lang_check_ws_{}", name));
173        let _ = std::fs::remove_dir_all(&dir);
174        std::fs::create_dir_all(&dir).unwrap();
175        // Tests use explicit db_path in temp dir to avoid polluting user data dir
176        let db_path = dir.join(".languagecheck.db");
177        let idx = WorkspaceIndex::new(&dir, Some(&db_path)).unwrap();
178        (idx, dir)
179    }
180
181    fn cleanup(dir: &Path) {
182        let _ = std::fs::remove_dir_all(dir);
183    }
184
185    #[test]
186    fn create_workspace_index() {
187        let (idx, dir) = temp_workspace("create");
188        assert_eq!(idx.get_root_path().unwrap(), &dir);
189        cleanup(&dir);
190    }
191
192    #[test]
193    fn diagnostics_roundtrip() {
194        let (idx, dir) = temp_workspace("diag_rt");
195
196        let diags = vec![Diagnostic {
197            start_byte: 0,
198            end_byte: 5,
199            message: "test error".to_string(),
200            suggestions: vec!["fix".to_string()],
201            rule_id: "test.rule".to_string(),
202            severity: 2,
203            unified_id: "test.unified".to_string(),
204            confidence: 0.9,
205        }];
206
207        idx.update_diagnostics("test.md", &diags).unwrap();
208        let retrieved = idx.get_diagnostics("test.md").unwrap().unwrap();
209        assert_eq!(retrieved.len(), 1);
210        assert_eq!(retrieved[0].message, "test error");
211        assert_eq!(retrieved[0].start_byte, 0);
212        assert_eq!(retrieved[0].suggestions, vec!["fix"]);
213
214        cleanup(&dir);
215    }
216
217    #[test]
218    fn diagnostics_missing_file_returns_none() {
219        let (idx, dir) = temp_workspace("diag_none");
220        let result = idx.get_diagnostics("nonexistent.md").unwrap();
221        assert!(result.is_none());
222        cleanup(&dir);
223    }
224
225    #[test]
226    fn insights_roundtrip() {
227        let (idx, dir) = temp_workspace("insights_rt");
228
229        let insights = ProseInsights {
230            word_count: 100,
231            sentence_count: 5,
232            character_count: 450,
233            reading_level: 8.5,
234        };
235
236        idx.update_insights("doc.md", &insights).unwrap();
237        let retrieved = idx.get_insights("doc.md").unwrap().unwrap();
238        assert_eq!(retrieved.word_count, 100);
239        assert_eq!(retrieved.sentence_count, 5);
240        assert_eq!(retrieved.character_count, 450);
241        assert!((retrieved.reading_level - 8.5).abs() < 0.01);
242
243        cleanup(&dir);
244    }
245
246    #[test]
247    fn file_hash_unchanged_detection() {
248        let (idx, dir) = temp_workspace("hash_unchanged");
249
250        let content = "Hello, world!";
251        idx.update_file_hash("test.md", content).unwrap();
252        assert!(idx.is_file_unchanged("test.md", content));
253
254        cleanup(&dir);
255    }
256
257    #[test]
258    fn file_hash_changed_detection() {
259        let (idx, dir) = temp_workspace("hash_changed");
260
261        idx.update_file_hash("test.md", "original content").unwrap();
262        assert!(!idx.is_file_unchanged("test.md", "modified content"));
263
264        cleanup(&dir);
265    }
266
267    #[test]
268    fn file_hash_new_file() {
269        let (idx, dir) = temp_workspace("hash_new");
270        assert!(!idx.is_file_unchanged("new.md", "any content"));
271        cleanup(&dir);
272    }
273
274    #[test]
275    fn overwrite_diagnostics() {
276        let (idx, dir) = temp_workspace("diag_overwrite");
277
278        let diags1 = vec![Diagnostic {
279            start_byte: 0,
280            end_byte: 3,
281            message: "first".to_string(),
282            ..Default::default()
283        }];
284        idx.update_diagnostics("f.md", &diags1).unwrap();
285
286        let diags2 = vec![
287            Diagnostic {
288                start_byte: 0,
289                end_byte: 3,
290                message: "second".to_string(),
291                ..Default::default()
292            },
293            Diagnostic {
294                start_byte: 10,
295                end_byte: 15,
296                message: "third".to_string(),
297                ..Default::default()
298            },
299        ];
300        idx.update_diagnostics("f.md", &diags2).unwrap();
301
302        let retrieved = idx.get_diagnostics("f.md").unwrap().unwrap();
303        assert_eq!(retrieved.len(), 2);
304        assert_eq!(retrieved[0].message, "second");
305
306        cleanup(&dir);
307    }
308}