1use anyhow::{Context, Result};
2use rusqlite::{Connection, OptionalExtension};
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::fs;
6use std::path::{Path, PathBuf};
7use chrono;
8
9use crate::config::{Config, IndexMode, CompressionAlgorithm, DeltaAlgorithm};
10use crate::dedup::DedupInfo;
11use crate::delta::DeltaInfo;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct FileEntry {
15 pub id: String,
16 pub original_path: PathBuf,
17 pub stored_path: PathBuf,
18 pub file_size: u64,
19 pub compressed_size: u64,
20 pub created_at: String,
21 pub compression_algorithm: CompressionAlgorithm,
22 #[serde(skip_serializing_if = "Option::is_none")]
24 pub hash: Option<String>,
25 #[serde(skip_serializing_if = "Option::is_none")]
26 pub is_reference: Option<bool>,
27 #[serde(skip_serializing_if = "Option::is_none")]
28 pub original_storage_id: Option<String>,
29 #[serde(skip_serializing_if = "Option::is_none")]
30 pub ref_count: Option<u32>,
31 #[serde(skip_serializing_if = "Option::is_none")]
33 pub is_delta: Option<bool>,
34 #[serde(skip_serializing_if = "Option::is_none")]
35 pub base_storage_id: Option<String>,
36 #[serde(skip_serializing_if = "Option::is_none")]
37 pub similarity_score: Option<f32>,
38 #[serde(skip_serializing_if = "Option::is_none")]
39 pub delta_algorithm: Option<DeltaAlgorithm>,
40}
41
42impl FileEntry {
43 pub fn new(
45 id: String,
46 original_path: PathBuf,
47 stored_path: PathBuf,
48 file_size: u64,
49 compressed_size: u64,
50 compression_algorithm: CompressionAlgorithm,
51 ) -> Self {
52 Self {
53 id,
54 original_path,
55 stored_path,
56 file_size,
57 compressed_size,
58 created_at: chrono::Utc::now().to_rfc3339(),
59 compression_algorithm,
60 hash: None,
61 is_reference: None,
62 original_storage_id: None,
63 ref_count: None,
64 is_delta: None,
65 base_storage_id: None,
66 similarity_score: None,
67 delta_algorithm: None,
68 }
69 }
70
71 pub fn set_dedup_info(&mut self, dedup_info: DedupInfo) {
73 self.hash = Some(dedup_info.hash);
74 self.is_reference = Some(dedup_info.is_reference);
75 self.original_storage_id = dedup_info.original_storage_id;
76 self.ref_count = Some(dedup_info.ref_count);
77 }
78
79 pub fn set_delta_info(&mut self, delta_info: DeltaInfo) {
81 self.is_delta = Some(delta_info.is_delta);
82 self.base_storage_id = delta_info.base_storage_id;
83 self.similarity_score = delta_info.similarity_score;
84 self.delta_algorithm = Some(delta_info.delta_algorithm);
85 self.compressed_size = delta_info.delta_size;
86 }
87
88 pub fn is_reference_file(&self) -> bool {
90 self.is_reference.unwrap_or(false)
91 }
92
93 pub fn is_delta_file(&self) -> bool {
95 self.is_delta.unwrap_or(false)
96 }
97
98 pub fn get_actual_storage_size(&self) -> u64 {
100 if self.is_reference_file() {
101 0 } else {
103 self.compressed_size
104 }
105 }
106}
107
108pub trait IndexStore {
109 fn add_file(&mut self, entry: FileEntry) -> Result<()>;
110 fn get_file(&self, original_path: &Path) -> Result<Option<FileEntry>>;
111 fn remove_file(&mut self, original_path: &Path) -> Result<Option<FileEntry>>;
112 fn list_files(&self) -> Result<Vec<FileEntry>>;
113 fn rename_file(&mut self, old_path: &Path, new_path: &Path) -> Result<()>;
114 fn move_file(&mut self, original_path: &Path, new_path: &Path) -> Result<()>;
115 fn count(&self) -> Result<usize>;
116}
117
118pub struct JsonIndex {
119 index_path: PathBuf,
120 entries: HashMap<PathBuf, FileEntry>,
121}
122
123impl JsonIndex {
124 pub fn new(storage_path: &Path) -> Result<Self> {
125 let index_path = storage_path.join("index.json");
126 let entries = if index_path.exists() {
127 let content = fs::read_to_string(&index_path)
128 .context("Failed to read index file")?;
129 serde_json::from_str(&content)
130 .unwrap_or_else(|_| HashMap::new())
131 } else {
132 HashMap::new()
133 };
134
135 Ok(Self {
136 index_path,
137 entries,
138 })
139 }
140
141 fn save(&self) -> Result<()> {
142 let content = serde_json::to_string_pretty(&self.entries)
143 .context("Failed to serialize index")?;
144 fs::write(&self.index_path, content)
145 .context("Failed to write index file")?;
146 Ok(())
147 }
148}
149
150impl IndexStore for JsonIndex {
151 fn add_file(&mut self, entry: FileEntry) -> Result<()> {
152 self.entries.insert(entry.original_path.clone(), entry);
153 self.save()
154 }
155
156 fn get_file(&self, original_path: &Path) -> Result<Option<FileEntry>> {
157 Ok(self.entries.get(original_path).cloned())
158 }
159
160 fn remove_file(&mut self, original_path: &Path) -> Result<Option<FileEntry>> {
161 let entry = self.entries.remove(original_path);
162 self.save()?;
163 Ok(entry)
164 }
165
166 fn list_files(&self) -> Result<Vec<FileEntry>> {
167 Ok(self.entries.values().cloned().collect())
168 }
169
170 fn rename_file(&mut self, old_path: &Path, new_path: &Path) -> Result<()> {
171 if let Some(mut entry) = self.entries.remove(old_path) {
172 entry.original_path = new_path.to_path_buf();
173 self.entries.insert(new_path.to_path_buf(), entry);
174 self.save()?;
175 }
176 Ok(())
177 }
178
179 fn move_file(&mut self, original_path: &Path, new_path: &Path) -> Result<()> {
180 if let Some(mut entry) = self.entries.remove(original_path) {
181 entry.original_path = new_path.to_path_buf();
182 self.entries.insert(new_path.to_path_buf(), entry);
183 self.save()?;
184 }
185 Ok(())
186 }
187
188 fn count(&self) -> Result<usize> {
189 Ok(self.entries.len())
190 }
191}
192
193pub struct SqliteIndex {
194 conn: Connection,
195}
196
197impl SqliteIndex {
198 pub fn new(storage_path: &Path) -> Result<Self> {
199 let db_path = storage_path.join("index.db");
200 let conn = Connection::open(db_path)
201 .context("Failed to open SQLite database")?;
202
203 conn.execute(
204 "CREATE TABLE IF NOT EXISTS files (
205 original_path TEXT PRIMARY KEY,
206 id TEXT NOT NULL,
207 stored_path TEXT NOT NULL,
208 file_size INTEGER NOT NULL,
209 compressed_size INTEGER NOT NULL,
210 created_at TEXT NOT NULL,
211 compression_algorithm TEXT NOT NULL DEFAULT 'gzip',
212 hash TEXT,
213 is_reference INTEGER DEFAULT 0,
214 original_storage_id TEXT,
215 ref_count INTEGER DEFAULT 1,
216 is_delta INTEGER DEFAULT 0,
217 base_storage_id TEXT,
218 similarity_score REAL,
219 delta_algorithm TEXT
220 )",
221 [],
222 )?;
223
224 Ok(Self { conn })
225 }
226}
227
228impl IndexStore for SqliteIndex {
229 fn add_file(&mut self, entry: FileEntry) -> Result<()> {
230 self.conn.execute(
231 "INSERT OR REPLACE INTO files (
232 original_path, id, stored_path, file_size, compressed_size, created_at,
233 compression_algorithm, hash, is_reference, original_storage_id, ref_count,
234 is_delta, base_storage_id, similarity_score, delta_algorithm
235 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15)",
236 rusqlite::params![
237 entry.original_path.to_string_lossy(),
238 entry.id,
239 entry.stored_path.to_string_lossy(),
240 entry.file_size,
241 entry.compressed_size,
242 entry.created_at,
243 entry.compression_algorithm.to_string(),
244 entry.hash,
245 entry.is_reference.map(|b| if b { 1 } else { 0 }),
246 entry.original_storage_id,
247 entry.ref_count,
248 entry.is_delta.map(|b| if b { 1 } else { 0 }),
249 entry.base_storage_id,
250 entry.similarity_score,
251 entry.delta_algorithm.as_ref().map(|a| a.to_string())
252 ],
253 )?;
254 Ok(())
255 }
256
257 fn get_file(&self, original_path: &Path) -> Result<Option<FileEntry>> {
258 let mut stmt = self.conn.prepare(
259 "SELECT id, stored_path, file_size, compressed_size, created_at,
260 compression_algorithm, hash, is_reference, original_storage_id, ref_count,
261 is_delta, base_storage_id, similarity_score, delta_algorithm
262 FROM files WHERE original_path = ?1"
263 )?;
264
265 let entry = stmt.query_row([original_path.to_string_lossy()], |row| {
266 Ok(FileEntry {
267 id: row.get(0)?,
268 original_path: original_path.to_path_buf(),
269 stored_path: PathBuf::from(row.get::<_, String>(1)?),
270 file_size: row.get(2)?,
271 compressed_size: row.get(3)?,
272 created_at: row.get(4)?,
273 compression_algorithm: row.get::<_, String>(5)?.parse()
274 .map_err(|_| rusqlite::Error::InvalidColumnType(5, "compression_algorithm".to_string(), rusqlite::types::Type::Text))?,
275 hash: row.get(6)?,
276 is_reference: row.get::<_, Option<i32>>(7)?.map(|i| i != 0),
277 original_storage_id: row.get(8)?,
278 ref_count: row.get(9)?,
279 is_delta: row.get::<_, Option<i32>>(10)?.map(|i| i != 0),
280 base_storage_id: row.get(11)?,
281 similarity_score: row.get(12)?,
282 delta_algorithm: row.get::<_, Option<String>>(13)?
283 .map(|s| s.parse())
284 .transpose()
285 .map_err(|_| rusqlite::Error::InvalidColumnType(13, "delta_algorithm".to_string(), rusqlite::types::Type::Text))?,
286 })
287 }).optional()?;
288
289 Ok(entry)
290 }
291
292 fn remove_file(&mut self, original_path: &Path) -> Result<Option<FileEntry>> {
293 let entry = self.get_file(original_path)?;
294 if entry.is_some() {
295 self.conn.execute(
296 "DELETE FROM files WHERE original_path = ?1",
297 [original_path.to_string_lossy()],
298 )?;
299 }
300 Ok(entry)
301 }
302
303 fn list_files(&self) -> Result<Vec<FileEntry>> {
304 let mut stmt = self.conn.prepare(
305 "SELECT original_path, id, stored_path, file_size, compressed_size, created_at,
306 compression_algorithm, hash, is_reference, original_storage_id, ref_count,
307 is_delta, base_storage_id, similarity_score, delta_algorithm
308 FROM files"
309 )?;
310
311 let entries = stmt.query_map([], |row| {
312 Ok(FileEntry {
313 original_path: PathBuf::from(row.get::<_, String>(0)?),
314 id: row.get(1)?,
315 stored_path: PathBuf::from(row.get::<_, String>(2)?),
316 file_size: row.get(3)?,
317 compressed_size: row.get(4)?,
318 created_at: row.get(5)?,
319 compression_algorithm: row.get::<_, String>(6)?.parse()
320 .map_err(|_| rusqlite::Error::InvalidColumnType(6, "compression_algorithm".to_string(), rusqlite::types::Type::Text))?,
321 hash: row.get(7)?,
322 is_reference: row.get::<_, Option<i32>>(8)?.map(|i| i != 0),
323 original_storage_id: row.get(9)?,
324 ref_count: row.get(10)?,
325 is_delta: row.get::<_, Option<i32>>(11)?.map(|i| i != 0),
326 base_storage_id: row.get(12)?,
327 similarity_score: row.get(13)?,
328 delta_algorithm: row.get::<_, Option<String>>(14)?
329 .map(|s| s.parse())
330 .transpose()
331 .map_err(|_| rusqlite::Error::InvalidColumnType(14, "delta_algorithm".to_string(), rusqlite::types::Type::Text))?,
332 })
333 })?.collect::<Result<Vec<_>, _>>()?;
334
335 Ok(entries)
336 }
337
338 fn rename_file(&mut self, old_path: &Path, new_path: &Path) -> Result<()> {
339 self.conn.execute(
340 "UPDATE files SET original_path = ?1 WHERE original_path = ?2",
341 rusqlite::params![
342 new_path.to_string_lossy(),
343 old_path.to_string_lossy()
344 ],
345 )?;
346 Ok(())
347 }
348
349 fn move_file(&mut self, original_path: &Path, new_path: &Path) -> Result<()> {
350 self.conn.execute(
351 "UPDATE files SET original_path = ?1 WHERE original_path = ?2",
352 rusqlite::params![
353 new_path.to_string_lossy(),
354 original_path.to_string_lossy()
355 ],
356 )?;
357 Ok(())
358 }
359
360 fn count(&self) -> Result<usize> {
361 let mut stmt = self.conn.prepare("SELECT COUNT(*) FROM files")?;
362 let count: i64 = stmt.query_row([], |row| row.get(0))?;
363 Ok(count as usize)
364 }
365}
366
367pub fn create_index(config: &Config) -> Result<Box<dyn IndexStore>> {
368 fs::create_dir_all(&config.storage_path)?;
369
370 let mode = match &config.index_mode {
371 IndexMode::Auto => {
372 let json_index = JsonIndex::new(&config.storage_path)?;
374 let count = json_index.count()?;
375 if count >= 1000 {
376 IndexMode::Sqlite
377 } else {
378 IndexMode::Json
379 }
380 }
381 mode => mode.clone(),
382 };
383
384 match mode {
385 IndexMode::Json | IndexMode::Auto => {
386 Ok(Box::new(JsonIndex::new(&config.storage_path)?))
387 }
388 IndexMode::Sqlite => {
389 Ok(Box::new(SqliteIndex::new(&config.storage_path)?))
390 }
391 }
392}