sochdb_storage/
backup.rs

1// Copyright 2025 Sushanth (https://github.com/sushanthpy)
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Backup and restore functionality for SochDB database
16//!
17//! This module provides functionality to create full snapshots of the database
18//! and restore from those snapshots. Backups include all data files, indexes,
19//! and metadata.
20
21use serde::{Deserialize, Serialize};
22use std::fs::{self, File};
23use std::io::Read;
24use std::path::{Path, PathBuf};
25use std::time::{SystemTime, UNIX_EPOCH};
26use sochdb_core::{Result, SochDBError};
27
28/// Metadata about a backup
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct BackupMetadata {
31    /// Timestamp when backup was created (microseconds since Unix epoch)
32    pub timestamp_us: u64,
33
34    /// Human-readable timestamp
35    pub created_at: String,
36
37    /// Total size of backup in bytes
38    pub size_bytes: u64,
39
40    /// Number of files in backup
41    pub file_count: usize,
42
43    /// Database version
44    pub database_version: String,
45
46    /// SHA256 checksum of all files
47    pub checksum: String,
48
49    /// Source database path
50    pub source_path: String,
51}
52
53impl BackupMetadata {
54    /// Generate a backup name from timestamp
55    pub fn generate_name(&self) -> String {
56        format!("sochdb-backup-{}", self.timestamp_us)
57    }
58}
59
60/// Manages backup and restore operations
61pub struct BackupManager {
62    source_path: PathBuf,
63}
64
65impl BackupManager {
66    /// Create a new backup manager for the given database path
67    pub fn new<P: AsRef<Path>>(source_path: P) -> Self {
68        Self {
69            source_path: source_path.as_ref().to_path_buf(),
70        }
71    }
72
73    /// Create a backup of the database to the specified destination
74    ///
75    /// The backup includes:
76    /// - All SSTable files (*.sst)
77    /// - WAL file (wal.log)
78    /// - Causal index (causal.index)
79    /// - Vector index (vector.index)
80    /// - Agent registry (agent_registry.json)
81    /// - Metadata manifest (manifest.json)
82    ///
83    /// # Example
84    /// ```ignore
85    /// let manager = BackupManager::new("./test-db");
86    /// let metadata = manager.create_backup("./backups/backup-2025-11-06")?;
87    /// println!("Backup created with {} files", metadata.file_count);
88    /// ```
89    pub fn create_backup<P: AsRef<Path>>(&self, destination: P) -> Result<BackupMetadata> {
90        let dest_path = destination.as_ref();
91
92        // Create destination directory
93        fs::create_dir_all(dest_path).map_err(|e| {
94            SochDBError::Backup(format!("Failed to create backup directory: {}", e))
95        })?;
96
97        let timestamp_us = SystemTime::now()
98            .duration_since(UNIX_EPOCH)
99            .unwrap()
100            .as_micros() as u64;
101
102        let created_at = chrono::Local::now().to_rfc3339();
103
104        // Collect all files to backup
105        let files_to_backup = self.collect_files()?;
106
107        if files_to_backup.is_empty() {
108            return Err(SochDBError::Backup(
109                "No files found in source database".to_string(),
110            ));
111        }
112
113        let mut total_size = 0u64;
114        let mut checksums = Vec::new();
115
116        // Copy each file
117        for (rel_path, src_path) in &files_to_backup {
118            let dest_file_path = dest_path.join(rel_path);
119
120            // Create parent directory if needed
121            if let Some(parent) = dest_file_path.parent() {
122                fs::create_dir_all(parent).map_err(|e| {
123                    SochDBError::Backup(format!("Failed to create directory: {}", e))
124                })?;
125            }
126
127            // Copy file
128            fs::copy(src_path, &dest_file_path).map_err(|e| {
129                SochDBError::Backup(format!("Failed to copy file {}: {}", rel_path, e))
130            })?;
131
132            // Calculate checksum and size
133            let metadata = fs::metadata(&dest_file_path)
134                .map_err(|e| SochDBError::Backup(format!("Failed to read file metadata: {}", e)))?;
135
136            total_size += metadata.len();
137
138            let checksum = self.calculate_file_checksum(&dest_file_path)?;
139            checksums.push(format!("{}:{}", rel_path, checksum));
140        }
141
142        // Calculate overall checksum (hash of all individual checksums)
143        let overall_checksum = self.calculate_string_checksum(&checksums.join("\n"));
144
145        // Create metadata
146        let metadata = BackupMetadata {
147            timestamp_us,
148            created_at,
149            size_bytes: total_size,
150            file_count: files_to_backup.len(),
151            database_version: env!("CARGO_PKG_VERSION").to_string(),
152            checksum: overall_checksum,
153            source_path: self.source_path.display().to_string(),
154        };
155
156        // Write manifest
157        let manifest_path = dest_path.join("manifest.json");
158        let manifest_json = serde_json::to_string_pretty(&metadata)
159            .map_err(|e| SochDBError::Backup(format!("Failed to serialize manifest: {}", e)))?;
160
161        fs::write(&manifest_path, manifest_json)
162            .map_err(|e| SochDBError::Backup(format!("Failed to write manifest: {}", e)))?;
163
164        Ok(metadata)
165    }
166
167    /// Restore a backup to the specified destination
168    ///
169    /// # Warning
170    /// This will overwrite any existing data at the destination path.
171    ///
172    /// # Example
173    /// ```ignore
174    /// let manager = BackupManager::new("./restored-db");
175    /// manager.restore_backup("./backups/backup-2025-11-06")?;
176    /// ```
177    pub fn restore_backup<P: AsRef<Path>>(&self, backup_path: P) -> Result<BackupMetadata> {
178        let backup_path = backup_path.as_ref();
179
180        // Read and verify manifest
181        let manifest_path = backup_path.join("manifest.json");
182        let manifest_json = fs::read_to_string(&manifest_path)
183            .map_err(|e| SochDBError::Backup(format!("Failed to read manifest: {}", e)))?;
184
185        let metadata: BackupMetadata = serde_json::from_str(&manifest_json)
186            .map_err(|e| SochDBError::Backup(format!("Failed to parse manifest: {}", e)))?;
187
188        // Create destination directory
189        fs::create_dir_all(&self.source_path).map_err(|e| {
190            SochDBError::Backup(format!("Failed to create destination directory: {}", e))
191        })?;
192
193        // Get all files in backup (excluding manifest)
194        let files = self.collect_backup_files(backup_path)?;
195
196        // Copy all files to destination
197        for (rel_path, src_path) in files {
198            let dest_path = self.source_path.join(&rel_path);
199
200            // Create parent directory if needed
201            if let Some(parent) = dest_path.parent() {
202                fs::create_dir_all(parent).map_err(|e| {
203                    SochDBError::Backup(format!("Failed to create directory: {}", e))
204                })?;
205            }
206
207            fs::copy(&src_path, &dest_path).map_err(|e| {
208                SochDBError::Backup(format!("Failed to restore file {}: {}", rel_path, e))
209            })?;
210        }
211
212        Ok(metadata)
213    }
214
215    /// List all backups in the specified directory
216    ///
217    /// Returns a list of backup metadata sorted by timestamp (newest first).
218    pub fn list_backups<P: AsRef<Path>>(backup_dir: P) -> Result<Vec<BackupMetadata>> {
219        let backup_dir = backup_dir.as_ref();
220
221        if !backup_dir.exists() {
222            return Ok(Vec::new());
223        }
224
225        let mut backups = Vec::new();
226
227        let entries = fs::read_dir(backup_dir)
228            .map_err(|e| SochDBError::Backup(format!("Failed to read backup directory: {}", e)))?;
229
230        for entry in entries {
231            let entry = entry.map_err(|e| {
232                SochDBError::Backup(format!("Failed to read directory entry: {}", e))
233            })?;
234
235            let path = entry.path();
236            if path.is_dir() {
237                let manifest_path = path.join("manifest.json");
238                if manifest_path.exists() {
239                    match fs::read_to_string(&manifest_path) {
240                        Ok(json) => {
241                            if let Ok(metadata) = serde_json::from_str::<BackupMetadata>(&json) {
242                                backups.push(metadata);
243                            }
244                        }
245                        Err(_) => continue,
246                    }
247                }
248            }
249        }
250
251        // Sort by timestamp (newest first)
252        backups.sort_by(|a, b| b.timestamp_us.cmp(&a.timestamp_us));
253
254        Ok(backups)
255    }
256
257    /// Verify the integrity of a backup
258    ///
259    /// Checks that all files exist and checksums match the manifest.
260    pub fn verify_backup<P: AsRef<Path>>(backup_path: P) -> Result<bool> {
261        let backup_path = backup_path.as_ref();
262
263        // Read manifest
264        let manifest_path = backup_path.join("manifest.json");
265        let manifest_json = fs::read_to_string(&manifest_path)
266            .map_err(|e| SochDBError::Backup(format!("Failed to read manifest: {}", e)))?;
267
268        let _metadata: BackupMetadata = serde_json::from_str(&manifest_json)
269            .map_err(|e| SochDBError::Backup(format!("Failed to parse manifest: {}", e)))?;
270
271        // Verify all files exist
272        let manager = BackupManager::new(backup_path);
273        let files = manager.collect_backup_files(backup_path)?;
274
275        if files.is_empty() {
276            return Ok(false);
277        }
278
279        // All files exist if we got here
280        Ok(true)
281    }
282
283    // Helper methods
284
285    fn collect_files(&self) -> Result<Vec<(String, PathBuf)>> {
286        let mut files = Vec::new();
287
288        if !self.source_path.exists() {
289            return Err(SochDBError::Backup(
290                "Source database path does not exist".to_string(),
291            ));
292        }
293
294        Self::collect_files_recursive(&self.source_path, &self.source_path, &mut files)?;
295
296        Ok(files)
297    }
298
299    fn collect_files_recursive(
300        current_path: &Path,
301        base_path: &Path,
302        files: &mut Vec<(String, PathBuf)>,
303    ) -> Result<()> {
304        let entries = fs::read_dir(current_path)
305            .map_err(|e| SochDBError::Backup(format!("Failed to read directory: {}", e)))?;
306
307        for entry in entries {
308            let entry =
309                entry.map_err(|e| SochDBError::Backup(format!("Failed to read entry: {}", e)))?;
310
311            let path = entry.path();
312
313            if path.is_dir() {
314                // Recursively collect files from subdirectories
315                Self::collect_files_recursive(&path, base_path, files)?;
316            } else {
317                // Add file with relative path
318                let rel_path = path
319                    .strip_prefix(base_path)
320                    .unwrap()
321                    .to_string_lossy()
322                    .to_string();
323                files.push((rel_path, path));
324            }
325        }
326
327        Ok(())
328    }
329
330    fn collect_backup_files(&self, backup_path: &Path) -> Result<Vec<(String, PathBuf)>> {
331        let mut files = Vec::new();
332        Self::collect_backup_files_recursive(backup_path, backup_path, &mut files)?;
333
334        // Filter out manifest.json
335        files.retain(|(rel_path, _)| rel_path != "manifest.json");
336
337        Ok(files)
338    }
339
340    fn collect_backup_files_recursive(
341        current_path: &Path,
342        base_path: &Path,
343        files: &mut Vec<(String, PathBuf)>,
344    ) -> Result<()> {
345        let entries = fs::read_dir(current_path)
346            .map_err(|e| SochDBError::Backup(format!("Failed to read directory: {}", e)))?;
347
348        for entry in entries {
349            let entry =
350                entry.map_err(|e| SochDBError::Backup(format!("Failed to read entry: {}", e)))?;
351
352            let path = entry.path();
353
354            if path.is_dir() {
355                Self::collect_backup_files_recursive(&path, base_path, files)?;
356            } else {
357                let rel_path = path
358                    .strip_prefix(base_path)
359                    .unwrap()
360                    .to_string_lossy()
361                    .to_string();
362                files.push((rel_path, path));
363            }
364        }
365
366        Ok(())
367    }
368
369    fn calculate_file_checksum(&self, path: &Path) -> Result<String> {
370        use sha2::{Digest, Sha256};
371
372        let mut file = File::open(path)
373            .map_err(|e| SochDBError::Backup(format!("Failed to open file for checksum: {}", e)))?;
374
375        let mut hasher = Sha256::new();
376        let mut buffer = [0u8; 8192];
377
378        loop {
379            let n = file.read(&mut buffer).map_err(|e| {
380                SochDBError::Backup(format!("Failed to read file for checksum: {}", e))
381            })?;
382
383            if n == 0 {
384                break;
385            }
386
387            hasher.update(&buffer[..n]);
388        }
389
390        Ok(format!("{:x}", hasher.finalize()))
391    }
392
393    fn calculate_string_checksum(&self, data: &str) -> String {
394        use sha2::{Digest, Sha256};
395        let mut hasher = Sha256::new();
396        hasher.update(data.as_bytes());
397        format!("{:x}", hasher.finalize())
398    }
399}
400
401#[cfg(test)]
402mod tests {
403    use super::*;
404    use std::fs;
405    use tempfile::TempDir;
406
407    #[test]
408    fn test_create_and_restore_backup() {
409        // Create a temporary database directory
410        let db_dir = TempDir::new().unwrap();
411        let db_path = db_dir.path();
412
413        // Create some test files
414        fs::write(db_path.join("test.sst"), b"test data").unwrap();
415        fs::write(db_path.join("wal.log"), b"wal data").unwrap();
416        fs::create_dir_all(db_path.join("subdir")).unwrap();
417        fs::write(db_path.join("subdir").join("index.dat"), b"index data").unwrap();
418
419        // Create backup
420        let backup_dir = TempDir::new().unwrap();
421        let backup_path = backup_dir.path().join("backup-1");
422
423        let manager = BackupManager::new(db_path);
424        let metadata = manager.create_backup(&backup_path).unwrap();
425
426        assert_eq!(metadata.file_count, 3);
427        assert!(metadata.size_bytes > 0);
428        assert!(backup_path.join("manifest.json").exists());
429        assert!(backup_path.join("test.sst").exists());
430
431        // Restore to new location
432        let restore_dir = TempDir::new().unwrap();
433        let restore_path = restore_dir.path().join("restored");
434
435        let restore_manager = BackupManager::new(&restore_path);
436        let restored_metadata = restore_manager.restore_backup(&backup_path).unwrap();
437
438        assert_eq!(restored_metadata.file_count, metadata.file_count);
439        assert!(restore_path.join("test.sst").exists());
440        assert!(restore_path.join("wal.log").exists());
441        assert!(restore_path.join("subdir").join("index.dat").exists());
442
443        // Verify content
444        let content = fs::read_to_string(restore_path.join("test.sst")).unwrap();
445        assert_eq!(content, "test data");
446    }
447
448    #[test]
449    fn test_list_backups() {
450        let backup_dir = TempDir::new().unwrap();
451        let backup_path = backup_dir.path();
452
453        // Create source database
454        let db_dir = TempDir::new().unwrap();
455        fs::write(db_dir.path().join("test.sst"), b"data").unwrap();
456
457        let manager = BackupManager::new(db_dir.path());
458
459        // Create multiple backups
460        let backup1 = backup_path.join("backup-1");
461        let backup2 = backup_path.join("backup-2");
462
463        manager.create_backup(&backup1).unwrap();
464        std::thread::sleep(std::time::Duration::from_millis(10));
465        manager.create_backup(&backup2).unwrap();
466
467        // List backups
468        let backups = BackupManager::list_backups(backup_path).unwrap();
469
470        assert_eq!(backups.len(), 2);
471        // Should be sorted newest first
472        assert!(backups[0].timestamp_us > backups[1].timestamp_us);
473    }
474
475    #[test]
476    fn test_verify_backup() {
477        let db_dir = TempDir::new().unwrap();
478        fs::write(db_dir.path().join("test.sst"), b"data").unwrap();
479
480        let backup_dir = TempDir::new().unwrap();
481        let backup_path = backup_dir.path().join("backup");
482
483        let manager = BackupManager::new(db_dir.path());
484        manager.create_backup(&backup_path).unwrap();
485
486        let valid = BackupManager::verify_backup(&backup_path).unwrap();
487        assert!(valid);
488    }
489}