stowr_core/
index.rs

1use anyhow::{Context, Result};
2use rusqlite::{Connection, OptionalExtension};
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::fs;
6use std::path::{Path, PathBuf};
7use chrono;
8
9use crate::config::{Config, IndexMode, CompressionAlgorithm, DeltaAlgorithm};
10use crate::dedup::DedupInfo;
11use crate::delta::DeltaInfo;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct FileEntry {
15    pub id: String,
16    pub original_path: PathBuf,
17    pub stored_path: PathBuf,
18    pub file_size: u64,
19    pub compressed_size: u64,
20    pub created_at: String,
21    pub compression_algorithm: CompressionAlgorithm,
22    // 去重相关字段
23    #[serde(skip_serializing_if = "Option::is_none")]
24    pub hash: Option<String>,
25    #[serde(skip_serializing_if = "Option::is_none")]
26    pub is_reference: Option<bool>,
27    #[serde(skip_serializing_if = "Option::is_none")]
28    pub original_storage_id: Option<String>,
29    #[serde(skip_serializing_if = "Option::is_none")]
30    pub ref_count: Option<u32>,
31    // 差分相关字段
32    #[serde(skip_serializing_if = "Option::is_none")]
33    pub is_delta: Option<bool>,
34    #[serde(skip_serializing_if = "Option::is_none")]
35    pub base_storage_id: Option<String>,
36    #[serde(skip_serializing_if = "Option::is_none")]
37    pub similarity_score: Option<f32>,
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub delta_algorithm: Option<DeltaAlgorithm>,
40}
41
42impl FileEntry {
43    /// 创建新的文件条目
44    pub fn new(
45        id: String,
46        original_path: PathBuf,
47        stored_path: PathBuf,
48        file_size: u64,
49        compressed_size: u64,
50        compression_algorithm: CompressionAlgorithm,
51    ) -> Self {
52        Self {
53            id,
54            original_path,
55            stored_path,
56            file_size,
57            compressed_size,
58            created_at: chrono::Utc::now().to_rfc3339(),
59            compression_algorithm,
60            hash: None,
61            is_reference: None,
62            original_storage_id: None,
63            ref_count: None,
64            is_delta: None,
65            base_storage_id: None,
66            similarity_score: None,
67            delta_algorithm: None,
68        }
69    }
70
71    /// 设置去重信息
72    pub fn set_dedup_info(&mut self, dedup_info: DedupInfo) {
73        self.hash = Some(dedup_info.hash);
74        self.is_reference = Some(dedup_info.is_reference);
75        self.original_storage_id = dedup_info.original_storage_id;
76        self.ref_count = Some(dedup_info.ref_count);
77    }
78
79    /// 设置差分信息
80    pub fn set_delta_info(&mut self, delta_info: DeltaInfo) {
81        self.is_delta = Some(delta_info.is_delta);
82        self.base_storage_id = delta_info.base_storage_id;
83        self.similarity_score = delta_info.similarity_score;
84        self.delta_algorithm = Some(delta_info.delta_algorithm);
85        self.compressed_size = delta_info.delta_size;
86    }
87
88    /// 检查是否为引用文件
89    pub fn is_reference_file(&self) -> bool {
90        self.is_reference.unwrap_or(false)
91    }
92
93    /// 检查是否为差分文件
94    pub fn is_delta_file(&self) -> bool {
95        self.is_delta.unwrap_or(false)
96    }
97
98    /// 获取实际存储大小(考虑引用文件)
99    pub fn get_actual_storage_size(&self) -> u64 {
100        if self.is_reference_file() {
101            0 // 引用文件不占用额外存储空间
102        } else {
103            self.compressed_size
104        }
105    }
106}
107
108pub trait IndexStore {
109    fn add_file(&mut self, entry: FileEntry) -> Result<()>;
110    fn get_file(&self, original_path: &Path) -> Result<Option<FileEntry>>;
111    fn remove_file(&mut self, original_path: &Path) -> Result<Option<FileEntry>>;
112    fn list_files(&self) -> Result<Vec<FileEntry>>;
113    fn rename_file(&mut self, old_path: &Path, new_path: &Path) -> Result<()>;
114    fn move_file(&mut self, original_path: &Path, new_path: &Path) -> Result<()>;
115    fn count(&self) -> Result<usize>;
116}
117
118pub struct JsonIndex {
119    index_path: PathBuf,
120    entries: HashMap<PathBuf, FileEntry>,
121}
122
123impl JsonIndex {
124    pub fn new(storage_path: &Path) -> Result<Self> {
125        let index_path = storage_path.join("index.json");
126        let entries = if index_path.exists() {
127            let content = fs::read_to_string(&index_path)
128                .context("Failed to read index file")?;
129            serde_json::from_str(&content)
130                .unwrap_or_else(|_| HashMap::new())
131        } else {
132            HashMap::new()
133        };
134
135        Ok(Self {
136            index_path,
137            entries,
138        })
139    }
140
141    fn save(&self) -> Result<()> {
142        let content = serde_json::to_string_pretty(&self.entries)
143            .context("Failed to serialize index")?;
144        fs::write(&self.index_path, content)
145            .context("Failed to write index file")?;
146        Ok(())
147    }
148}
149
150impl IndexStore for JsonIndex {
151    fn add_file(&mut self, entry: FileEntry) -> Result<()> {
152        self.entries.insert(entry.original_path.clone(), entry);
153        self.save()
154    }
155
156    fn get_file(&self, original_path: &Path) -> Result<Option<FileEntry>> {
157        Ok(self.entries.get(original_path).cloned())
158    }
159
160    fn remove_file(&mut self, original_path: &Path) -> Result<Option<FileEntry>> {
161        let entry = self.entries.remove(original_path);
162        self.save()?;
163        Ok(entry)
164    }
165
166    fn list_files(&self) -> Result<Vec<FileEntry>> {
167        Ok(self.entries.values().cloned().collect())
168    }
169
170    fn rename_file(&mut self, old_path: &Path, new_path: &Path) -> Result<()> {
171        if let Some(mut entry) = self.entries.remove(old_path) {
172            entry.original_path = new_path.to_path_buf();
173            self.entries.insert(new_path.to_path_buf(), entry);
174            self.save()?;
175        }
176        Ok(())
177    }
178
179    fn move_file(&mut self, original_path: &Path, new_path: &Path) -> Result<()> {
180        if let Some(mut entry) = self.entries.remove(original_path) {
181            entry.original_path = new_path.to_path_buf();
182            self.entries.insert(new_path.to_path_buf(), entry);
183            self.save()?;
184        }
185        Ok(())
186    }
187
188    fn count(&self) -> Result<usize> {
189        Ok(self.entries.len())
190    }
191}
192
193pub struct SqliteIndex {
194    conn: Connection,
195}
196
197impl SqliteIndex {
198    pub fn new(storage_path: &Path) -> Result<Self> {
199        let db_path = storage_path.join("index.db");
200        let conn = Connection::open(db_path)
201            .context("Failed to open SQLite database")?;
202
203        conn.execute(
204            "CREATE TABLE IF NOT EXISTS files (
205                original_path TEXT PRIMARY KEY,
206                id TEXT NOT NULL,
207                stored_path TEXT NOT NULL,
208                file_size INTEGER NOT NULL,
209                compressed_size INTEGER NOT NULL,
210                created_at TEXT NOT NULL,
211                compression_algorithm TEXT NOT NULL DEFAULT 'gzip',
212                hash TEXT,
213                is_reference INTEGER DEFAULT 0,
214                original_storage_id TEXT,
215                ref_count INTEGER DEFAULT 1,
216                is_delta INTEGER DEFAULT 0,
217                base_storage_id TEXT,
218                similarity_score REAL,
219                delta_algorithm TEXT
220            )",
221            [],
222        )?;
223
224        Ok(Self { conn })
225    }
226}
227
228impl IndexStore for SqliteIndex {
229    fn add_file(&mut self, entry: FileEntry) -> Result<()> {
230        self.conn.execute(
231            "INSERT OR REPLACE INTO files (
232                original_path, id, stored_path, file_size, compressed_size, created_at,
233                compression_algorithm, hash, is_reference, original_storage_id, ref_count,
234                is_delta, base_storage_id, similarity_score, delta_algorithm
235            ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15)",
236            rusqlite::params![
237                entry.original_path.to_string_lossy(),
238                entry.id,
239                entry.stored_path.to_string_lossy(),
240                entry.file_size,
241                entry.compressed_size,
242                entry.created_at,
243                entry.compression_algorithm.to_string(),
244                entry.hash,
245                entry.is_reference.map(|b| if b { 1 } else { 0 }),
246                entry.original_storage_id,
247                entry.ref_count,
248                entry.is_delta.map(|b| if b { 1 } else { 0 }),
249                entry.base_storage_id,
250                entry.similarity_score,
251                entry.delta_algorithm.as_ref().map(|a| a.to_string())
252            ],
253        )?;
254        Ok(())
255    }
256
257    fn get_file(&self, original_path: &Path) -> Result<Option<FileEntry>> {
258        let mut stmt = self.conn.prepare(
259            "SELECT id, stored_path, file_size, compressed_size, created_at,
260                    compression_algorithm, hash, is_reference, original_storage_id, ref_count,
261                    is_delta, base_storage_id, similarity_score, delta_algorithm
262             FROM files WHERE original_path = ?1"
263        )?;
264
265        let entry = stmt.query_row([original_path.to_string_lossy()], |row| {
266            Ok(FileEntry {
267                id: row.get(0)?,
268                original_path: original_path.to_path_buf(),
269                stored_path: PathBuf::from(row.get::<_, String>(1)?),
270                file_size: row.get(2)?,
271                compressed_size: row.get(3)?,
272                created_at: row.get(4)?,
273                compression_algorithm: row.get::<_, String>(5)?.parse()
274                    .map_err(|_| rusqlite::Error::InvalidColumnType(5, "compression_algorithm".to_string(), rusqlite::types::Type::Text))?,
275                hash: row.get(6)?,
276                is_reference: row.get::<_, Option<i32>>(7)?.map(|i| i != 0),
277                original_storage_id: row.get(8)?,
278                ref_count: row.get(9)?,
279                is_delta: row.get::<_, Option<i32>>(10)?.map(|i| i != 0),
280                base_storage_id: row.get(11)?,
281                similarity_score: row.get(12)?,
282                delta_algorithm: row.get::<_, Option<String>>(13)?
283                    .map(|s| s.parse())
284                    .transpose()
285                    .map_err(|_| rusqlite::Error::InvalidColumnType(13, "delta_algorithm".to_string(), rusqlite::types::Type::Text))?,
286            })
287        }).optional()?;
288
289        Ok(entry)
290    }
291
292    fn remove_file(&mut self, original_path: &Path) -> Result<Option<FileEntry>> {
293        let entry = self.get_file(original_path)?;
294        if entry.is_some() {
295            self.conn.execute(
296                "DELETE FROM files WHERE original_path = ?1",
297                [original_path.to_string_lossy()],
298            )?;
299        }
300        Ok(entry)
301    }
302
303    fn list_files(&self) -> Result<Vec<FileEntry>> {
304        let mut stmt = self.conn.prepare(
305            "SELECT original_path, id, stored_path, file_size, compressed_size, created_at,
306                    compression_algorithm, hash, is_reference, original_storage_id, ref_count,
307                    is_delta, base_storage_id, similarity_score, delta_algorithm
308             FROM files"
309        )?;
310
311        let entries = stmt.query_map([], |row| {
312            Ok(FileEntry {
313                original_path: PathBuf::from(row.get::<_, String>(0)?),
314                id: row.get(1)?,
315                stored_path: PathBuf::from(row.get::<_, String>(2)?),
316                file_size: row.get(3)?,
317                compressed_size: row.get(4)?,
318                created_at: row.get(5)?,
319                compression_algorithm: row.get::<_, String>(6)?.parse()
320                    .map_err(|_| rusqlite::Error::InvalidColumnType(6, "compression_algorithm".to_string(), rusqlite::types::Type::Text))?,
321                hash: row.get(7)?,
322                is_reference: row.get::<_, Option<i32>>(8)?.map(|i| i != 0),
323                original_storage_id: row.get(9)?,
324                ref_count: row.get(10)?,
325                is_delta: row.get::<_, Option<i32>>(11)?.map(|i| i != 0),
326                base_storage_id: row.get(12)?,
327                similarity_score: row.get(13)?,
328                delta_algorithm: row.get::<_, Option<String>>(14)?
329                    .map(|s| s.parse())
330                    .transpose()
331                    .map_err(|_| rusqlite::Error::InvalidColumnType(14, "delta_algorithm".to_string(), rusqlite::types::Type::Text))?,
332            })
333        })?.collect::<Result<Vec<_>, _>>()?;
334
335        Ok(entries)
336    }
337
338    fn rename_file(&mut self, old_path: &Path, new_path: &Path) -> Result<()> {
339        self.conn.execute(
340            "UPDATE files SET original_path = ?1 WHERE original_path = ?2",
341            rusqlite::params![
342                new_path.to_string_lossy(),
343                old_path.to_string_lossy()
344            ],
345        )?;
346        Ok(())
347    }
348
349    fn move_file(&mut self, original_path: &Path, new_path: &Path) -> Result<()> {
350        self.conn.execute(
351            "UPDATE files SET original_path = ?1 WHERE original_path = ?2",
352            rusqlite::params![
353                new_path.to_string_lossy(),
354                original_path.to_string_lossy()
355            ],
356        )?;
357        Ok(())
358    }
359
360    fn count(&self) -> Result<usize> {
361        let mut stmt = self.conn.prepare("SELECT COUNT(*) FROM files")?;
362        let count: i64 = stmt.query_row([], |row| row.get(0))?;
363        Ok(count as usize)
364    }
365}
366
367pub fn create_index(config: &Config) -> Result<Box<dyn IndexStore>> {
368    fs::create_dir_all(&config.storage_path)?;
369
370    let mode = match &config.index_mode {
371        IndexMode::Auto => {
372            // 尝试读取现有的索引来决定使用哪种模式
373            let json_index = JsonIndex::new(&config.storage_path)?;
374            let count = json_index.count()?;
375            if count >= 1000 {
376                IndexMode::Sqlite
377            } else {
378                IndexMode::Json
379            }
380        }
381        mode => mode.clone(),
382    };
383
384    match mode {
385        IndexMode::Json | IndexMode::Auto => {
386            Ok(Box::new(JsonIndex::new(&config.storage_path)?))
387        }
388        IndexMode::Sqlite => {
389            Ok(Box::new(SqliteIndex::new(&config.storage_path)?))
390        }
391    }
392}