1use std::collections::HashMap;
4use std::fs::{self, File};
5use std::io::{BufReader, BufWriter, Write};
6use std::path::{Path, PathBuf};
7
8use parking_lot::RwLock;
9
10use crate::error::{Error, Result};
11use crate::storage::{Storage, StorageOptions, WorkspaceMetadata};
12use crate::types::{Chunk, Document, LanguageStats, WorkspaceId, WorkspaceStats};
13
14pub struct DiskStorage {
16 root_dir: PathBuf,
18 options: StorageOptions,
20 metadata_cache: RwLock<HashMap<WorkspaceId, WorkspaceMetadata>>,
22}
23
24impl DiskStorage {
25 pub fn new(root_dir: PathBuf) -> Result<Self> {
27 Self::with_options(root_dir, StorageOptions::default())
28 }
29
30 pub fn with_options(root_dir: PathBuf, options: StorageOptions) -> Result<Self> {
32 fs::create_dir_all(&root_dir)?;
34
35 Ok(Self {
36 root_dir,
37 options,
38 metadata_cache: RwLock::new(HashMap::new()),
39 })
40 }
41
42 fn workspace_dir(&self, workspace_id: &WorkspaceId) -> PathBuf {
44 self.root_dir.join(workspace_id.to_string())
45 }
46
47 fn metadata_path(&self, workspace_id: &WorkspaceId) -> PathBuf {
49 self.workspace_dir(workspace_id).join("metadata.json")
50 }
51
52 fn documents_path(&self, workspace_id: &WorkspaceId) -> PathBuf {
54 self.workspace_dir(workspace_id).join("documents.bin")
55 }
56
57 fn chunks_path(&self, workspace_id: &WorkspaceId) -> PathBuf {
59 self.workspace_dir(workspace_id).join("chunks.bin")
60 }
61
62 fn embeddings_path(&self, workspace_id: &WorkspaceId) -> PathBuf {
64 self.workspace_dir(workspace_id).join("embeddings.bin")
65 }
66
67 fn write_bincode<T: serde::Serialize>(&self, path: &Path, data: &T) -> Result<()> {
69 let file = File::create(path)?;
70 let mut writer = BufWriter::with_capacity(self.options.buffer_size, file);
71 bincode::serialize_into(&mut writer, data)?;
72 writer.flush()?;
73 Ok(())
74 }
75
76 fn read_bincode<T: serde::de::DeserializeOwned>(&self, path: &Path) -> Result<T> {
78 let file = File::open(path)?;
79 let reader = BufReader::with_capacity(self.options.buffer_size, file);
80 let data = bincode::deserialize_from(reader)?;
81 Ok(data)
82 }
83
84 fn write_json<T: serde::Serialize>(&self, path: &Path, data: &T) -> Result<()> {
86 let file = File::create(path)?;
87 let writer = BufWriter::with_capacity(self.options.buffer_size, file);
88 serde_json::to_writer_pretty(writer, data)?;
89 Ok(())
90 }
91
92 fn read_json<T: serde::de::DeserializeOwned>(&self, path: &Path) -> Result<T> {
94 let file = File::open(path)?;
95 let reader = BufReader::with_capacity(self.options.buffer_size, file);
96 let data = serde_json::from_reader(reader)?;
97 Ok(data)
98 }
99
100 fn dir_size(&self, path: &Path) -> u64 {
102 if !path.exists() {
103 return 0;
104 }
105
106 walkdir::WalkDir::new(path)
107 .into_iter()
108 .filter_map(|e| e.ok())
109 .filter(|e| e.file_type().is_file())
110 .filter_map(|e| e.metadata().ok())
111 .map(|m| m.len())
112 .sum()
113 }
114}
115
116impl Storage for DiskStorage {
117 fn init_workspace(&self, workspace_id: &WorkspaceId, root_path: &PathBuf) -> Result<()> {
118 let ws_dir = self.workspace_dir(workspace_id);
119 fs::create_dir_all(&ws_dir)?;
120
121 fs::create_dir_all(self.tantivy_index_path(workspace_id))?;
123
124 let metadata = WorkspaceMetadata::new(
126 workspace_id.clone(),
127 root_path.clone(),
128 384, );
130
131 self.save_workspace_metadata(workspace_id, &metadata)?;
132
133 Ok(())
134 }
135
136 fn workspace_exists(&self, workspace_id: &WorkspaceId) -> bool {
137 self.metadata_path(workspace_id).exists()
138 }
139
140 fn load_workspace_metadata(&self, workspace_id: &WorkspaceId) -> Result<WorkspaceMetadata> {
141 {
143 let cache = self.metadata_cache.read();
144 if let Some(meta) = cache.get(workspace_id) {
145 return Ok(meta.clone());
146 }
147 }
148
149 let path = self.metadata_path(workspace_id);
150 if !path.exists() {
151 return Err(Error::WorkspaceNotFound(workspace_id.clone()));
152 }
153
154 let metadata: WorkspaceMetadata = self.read_json(&path)?;
155
156 {
158 let mut cache = self.metadata_cache.write();
159 cache.insert(workspace_id.clone(), metadata.clone());
160 }
161
162 Ok(metadata)
163 }
164
165 fn save_workspace_metadata(
166 &self,
167 workspace_id: &WorkspaceId,
168 metadata: &WorkspaceMetadata,
169 ) -> Result<()> {
170 let path = self.metadata_path(workspace_id);
171 self.write_json(&path, metadata)?;
172
173 {
175 let mut cache = self.metadata_cache.write();
176 cache.insert(workspace_id.clone(), metadata.clone());
177 }
178
179 Ok(())
180 }
181
182 fn save_documents(&self, workspace_id: &WorkspaceId, documents: &[Document]) -> Result<()> {
183 let path = self.documents_path(workspace_id);
184 self.write_bincode(&path, &documents.to_vec())
185 }
186
187 fn load_documents(&self, workspace_id: &WorkspaceId) -> Result<Vec<Document>> {
188 let path = self.documents_path(workspace_id);
189 if !path.exists() {
190 return Ok(Vec::new());
191 }
192 self.read_bincode(&path)
193 }
194
195 fn save_chunks(&self, workspace_id: &WorkspaceId, chunks: &[Chunk]) -> Result<()> {
196 let path = self.chunks_path(workspace_id);
197 self.write_bincode(&path, &chunks.to_vec())
198 }
199
200 fn load_chunks(&self, workspace_id: &WorkspaceId) -> Result<Vec<Chunk>> {
201 let path = self.chunks_path(workspace_id);
202 if !path.exists() {
203 return Ok(Vec::new());
204 }
205 self.read_bincode(&path)
206 }
207
208 fn save_embeddings(
209 &self,
210 workspace_id: &WorkspaceId,
211 embeddings: &[(String, Vec<f32>)],
212 ) -> Result<()> {
213 let path = self.embeddings_path(workspace_id);
214 self.write_bincode(&path, &embeddings.to_vec())
215 }
216
217 fn load_embeddings(&self, workspace_id: &WorkspaceId) -> Result<Vec<(String, Vec<f32>)>> {
218 let path = self.embeddings_path(workspace_id);
219 if !path.exists() {
220 return Ok(Vec::new());
221 }
222 self.read_bincode(&path)
223 }
224
225 fn tantivy_index_path(&self, workspace_id: &WorkspaceId) -> PathBuf {
226 self.workspace_dir(workspace_id).join("tantivy")
227 }
228
229 fn vector_index_path(&self, workspace_id: &WorkspaceId) -> PathBuf {
230 self.workspace_dir(workspace_id).join("vectors.usearch")
231 }
232
233 fn delete_workspace(&self, workspace_id: &WorkspaceId) -> Result<()> {
234 let ws_dir = self.workspace_dir(workspace_id);
235 if ws_dir.exists() {
236 fs::remove_dir_all(&ws_dir)?;
237 }
238
239 {
241 let mut cache = self.metadata_cache.write();
242 cache.remove(workspace_id);
243 }
244
245 Ok(())
246 }
247
248 fn list_workspaces(&self) -> Result<Vec<WorkspaceId>> {
249 let mut workspaces = Vec::new();
250
251 if !self.root_dir.exists() {
252 return Ok(workspaces);
253 }
254
255 for entry in fs::read_dir(&self.root_dir)? {
256 let entry = entry?;
257 if entry.file_type()?.is_dir() {
258 if let Some(name) = entry.file_name().to_str() {
259 if let Ok(uuid) = uuid::Uuid::parse_str(name) {
260 workspaces.push(WorkspaceId::from_uuid(uuid));
261 }
262 }
263 }
264 }
265
266 Ok(workspaces)
267 }
268
269 fn get_workspace_stats(&self, workspace_id: &WorkspaceId) -> Result<WorkspaceStats> {
270 let metadata = self.load_workspace_metadata(workspace_id)?;
271
272 let index_size_bytes = self.dir_size(&self.workspace_dir(workspace_id));
273
274 let language_stats: Vec<LanguageStats> = metadata
275 .language_stats
276 .iter()
277 .map(|(lang, info)| LanguageStats {
278 language: *lang,
279 file_count: info.file_count,
280 chunk_count: info.chunk_count,
281 total_bytes: info.total_bytes,
282 })
283 .collect();
284
285 Ok(WorkspaceStats {
286 workspace_id: workspace_id.clone(),
287 root_path: metadata.root_path,
288 document_count: metadata.document_count,
289 chunk_count: metadata.chunk_count,
290 total_bytes: metadata.total_bytes,
291 index_size_bytes,
292 created_at: metadata.created_at,
293 updated_at: metadata.updated_at,
294 language_stats,
295 })
296 }
297
298 fn flush(&self) -> Result<()> {
299 let cache = self.metadata_cache.read();
301 for (workspace_id, metadata) in cache.iter() {
302 let path = self.metadata_path(workspace_id);
303 self.write_json(&path, metadata)?;
304 }
305 Ok(())
306 }
307}
308
309#[cfg(test)]
310mod tests {
311 use super::*;
312 use tempfile::TempDir;
313
314 #[test]
315 fn test_disk_storage_lifecycle() {
316 let temp_dir = TempDir::new().unwrap();
317 let storage = DiskStorage::new(temp_dir.path().to_path_buf()).unwrap();
318
319 let workspace_id = WorkspaceId::new();
320 let root_path = PathBuf::from("/test/project");
321
322 storage.init_workspace(&workspace_id, &root_path).unwrap();
324 assert!(storage.workspace_exists(&workspace_id));
325
326 let metadata = storage.load_workspace_metadata(&workspace_id).unwrap();
328 assert_eq!(metadata.root_path, root_path);
329
330 let workspaces = storage.list_workspaces().unwrap();
332 assert!(workspaces.contains(&workspace_id));
333
334 storage.delete_workspace(&workspace_id).unwrap();
336 assert!(!storage.workspace_exists(&workspace_id));
337 }
338
339 #[test]
340 fn test_document_persistence() {
341 let temp_dir = TempDir::new().unwrap();
342 let storage = DiskStorage::new(temp_dir.path().to_path_buf()).unwrap();
343
344 let workspace_id = WorkspaceId::new();
345 storage
346 .init_workspace(&workspace_id, &PathBuf::from("/test"))
347 .unwrap();
348
349 let docs = vec![Document {
350 id: crate::types::DocumentId::new(),
351 relative_path: PathBuf::from("test.rs"),
352 absolute_path: PathBuf::from("/test/test.rs"),
353 language: Language::Rust,
354 content_hash: "abc123".to_string(),
355 size_bytes: 1000,
356 modified_at: chrono::Utc::now(),
357 }];
358
359 storage.save_documents(&workspace_id, &docs).unwrap();
360
361 let loaded = storage.load_documents(&workspace_id).unwrap();
362 assert_eq!(loaded.len(), 1);
363 assert_eq!(loaded[0].relative_path, PathBuf::from("test.rs"));
364 }
365}