Skip to main content

sochdb_storage/
backup.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! Backup and restore functionality for SochDB database
19//!
20//! This module provides functionality to create full snapshots of the database
21//! and restore from those snapshots. Backups include all data files, indexes,
22//! and metadata.
23
24use serde::{Deserialize, Serialize};
25use sochdb_core::{Result, SochDBError};
26use std::fs::{self, File};
27use std::io::Read;
28use std::path::{Path, PathBuf};
29use std::time::{SystemTime, UNIX_EPOCH};
30
31/// Metadata about a backup
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct BackupMetadata {
34    /// Timestamp when backup was created (microseconds since Unix epoch)
35    pub timestamp_us: u64,
36
37    /// Human-readable timestamp
38    pub created_at: String,
39
40    /// Total size of backup in bytes
41    pub size_bytes: u64,
42
43    /// Number of files in backup
44    pub file_count: usize,
45
46    /// Database version
47    pub database_version: String,
48
49    /// SHA256 checksum of all files
50    pub checksum: String,
51
52    /// Source database path
53    pub source_path: String,
54}
55
56impl BackupMetadata {
57    /// Generate a backup name from timestamp
58    pub fn generate_name(&self) -> String {
59        format!("sochdb-backup-{}", self.timestamp_us)
60    }
61}
62
63/// Manages backup and restore operations
64pub struct BackupManager {
65    source_path: PathBuf,
66}
67
68impl BackupManager {
69    /// Create a new backup manager for the given database path
70    pub fn new<P: AsRef<Path>>(source_path: P) -> Self {
71        Self {
72            source_path: source_path.as_ref().to_path_buf(),
73        }
74    }
75
76    /// Create a backup of the database to the specified destination
77    ///
78    /// The backup includes:
79    /// - All SSTable files (*.sst)
80    /// - WAL file (wal.log)
81    /// - Causal index (causal.index)
82    /// - Vector index (vector.index)
83    /// - Agent registry (agent_registry.json)
84    /// - Metadata manifest (manifest.json)
85    ///
86    /// # Example
87    /// ```ignore
88    /// let manager = BackupManager::new("./test-db");
89    /// let metadata = manager.create_backup("./backups/backup-2025-11-06")?;
90    /// println!("Backup created with {} files", metadata.file_count);
91    /// ```
92    pub fn create_backup<P: AsRef<Path>>(&self, destination: P) -> Result<BackupMetadata> {
93        let dest_path = destination.as_ref();
94
95        // Create destination directory
96        fs::create_dir_all(dest_path).map_err(|e| {
97            SochDBError::Backup(format!("Failed to create backup directory: {}", e))
98        })?;
99
100        let timestamp_us = SystemTime::now()
101            .duration_since(UNIX_EPOCH)
102            .unwrap()
103            .as_micros() as u64;
104
105        let created_at = chrono::Local::now().to_rfc3339();
106
107        // Collect all files to backup
108        let files_to_backup = self.collect_files()?;
109
110        if files_to_backup.is_empty() {
111            return Err(SochDBError::Backup(
112                "No files found in source database".to_string(),
113            ));
114        }
115
116        let mut total_size = 0u64;
117        let mut checksums = Vec::new();
118
119        // Copy each file
120        for (rel_path, src_path) in &files_to_backup {
121            let dest_file_path = dest_path.join(rel_path);
122
123            // Create parent directory if needed
124            if let Some(parent) = dest_file_path.parent() {
125                fs::create_dir_all(parent).map_err(|e| {
126                    SochDBError::Backup(format!("Failed to create directory: {}", e))
127                })?;
128            }
129
130            // Copy file
131            fs::copy(src_path, &dest_file_path).map_err(|e| {
132                SochDBError::Backup(format!("Failed to copy file {}: {}", rel_path, e))
133            })?;
134
135            // Calculate checksum and size
136            let metadata = fs::metadata(&dest_file_path)
137                .map_err(|e| SochDBError::Backup(format!("Failed to read file metadata: {}", e)))?;
138
139            total_size += metadata.len();
140
141            let checksum = self.calculate_file_checksum(&dest_file_path)?;
142            checksums.push(format!("{}:{}", rel_path, checksum));
143        }
144
145        // Calculate overall checksum (hash of all individual checksums)
146        let overall_checksum = self.calculate_string_checksum(&checksums.join("\n"));
147
148        // Create metadata
149        let metadata = BackupMetadata {
150            timestamp_us,
151            created_at,
152            size_bytes: total_size,
153            file_count: files_to_backup.len(),
154            database_version: env!("CARGO_PKG_VERSION").to_string(),
155            checksum: overall_checksum,
156            source_path: self.source_path.display().to_string(),
157        };
158
159        // Write manifest
160        let manifest_path = dest_path.join("manifest.json");
161        let manifest_json = serde_json::to_string_pretty(&metadata)
162            .map_err(|e| SochDBError::Backup(format!("Failed to serialize manifest: {}", e)))?;
163
164        fs::write(&manifest_path, manifest_json)
165            .map_err(|e| SochDBError::Backup(format!("Failed to write manifest: {}", e)))?;
166
167        Ok(metadata)
168    }
169
170    /// Restore a backup to the specified destination
171    ///
172    /// # Warning
173    /// This will overwrite any existing data at the destination path.
174    ///
175    /// # Example
176    /// ```ignore
177    /// let manager = BackupManager::new("./restored-db");
178    /// manager.restore_backup("./backups/backup-2025-11-06")?;
179    /// ```
180    pub fn restore_backup<P: AsRef<Path>>(&self, backup_path: P) -> Result<BackupMetadata> {
181        let backup_path = backup_path.as_ref();
182
183        // Read and verify manifest
184        let manifest_path = backup_path.join("manifest.json");
185        let manifest_json = fs::read_to_string(&manifest_path)
186            .map_err(|e| SochDBError::Backup(format!("Failed to read manifest: {}", e)))?;
187
188        let metadata: BackupMetadata = serde_json::from_str(&manifest_json)
189            .map_err(|e| SochDBError::Backup(format!("Failed to parse manifest: {}", e)))?;
190
191        // Create destination directory
192        fs::create_dir_all(&self.source_path).map_err(|e| {
193            SochDBError::Backup(format!("Failed to create destination directory: {}", e))
194        })?;
195
196        // Get all files in backup (excluding manifest)
197        let files = self.collect_backup_files(backup_path)?;
198
199        // Copy all files to destination
200        for (rel_path, src_path) in files {
201            let dest_path = self.source_path.join(&rel_path);
202
203            // Create parent directory if needed
204            if let Some(parent) = dest_path.parent() {
205                fs::create_dir_all(parent).map_err(|e| {
206                    SochDBError::Backup(format!("Failed to create directory: {}", e))
207                })?;
208            }
209
210            fs::copy(&src_path, &dest_path).map_err(|e| {
211                SochDBError::Backup(format!("Failed to restore file {}: {}", rel_path, e))
212            })?;
213        }
214
215        Ok(metadata)
216    }
217
218    /// List all backups in the specified directory
219    ///
220    /// Returns a list of backup metadata sorted by timestamp (newest first).
221    pub fn list_backups<P: AsRef<Path>>(backup_dir: P) -> Result<Vec<BackupMetadata>> {
222        let backup_dir = backup_dir.as_ref();
223
224        if !backup_dir.exists() {
225            return Ok(Vec::new());
226        }
227
228        let mut backups = Vec::new();
229
230        let entries = fs::read_dir(backup_dir)
231            .map_err(|e| SochDBError::Backup(format!("Failed to read backup directory: {}", e)))?;
232
233        for entry in entries {
234            let entry = entry.map_err(|e| {
235                SochDBError::Backup(format!("Failed to read directory entry: {}", e))
236            })?;
237
238            let path = entry.path();
239            if path.is_dir() {
240                let manifest_path = path.join("manifest.json");
241                if manifest_path.exists() {
242                    match fs::read_to_string(&manifest_path) {
243                        Ok(json) => {
244                            if let Ok(metadata) = serde_json::from_str::<BackupMetadata>(&json) {
245                                backups.push(metadata);
246                            }
247                        }
248                        Err(_) => continue,
249                    }
250                }
251            }
252        }
253
254        // Sort by timestamp (newest first)
255        backups.sort_by(|a, b| b.timestamp_us.cmp(&a.timestamp_us));
256
257        Ok(backups)
258    }
259
260    /// Verify the integrity of a backup
261    ///
262    /// Checks that all files exist and checksums match the manifest.
263    pub fn verify_backup<P: AsRef<Path>>(backup_path: P) -> Result<bool> {
264        let backup_path = backup_path.as_ref();
265
266        // Read manifest
267        let manifest_path = backup_path.join("manifest.json");
268        let manifest_json = fs::read_to_string(&manifest_path)
269            .map_err(|e| SochDBError::Backup(format!("Failed to read manifest: {}", e)))?;
270
271        let _metadata: BackupMetadata = serde_json::from_str(&manifest_json)
272            .map_err(|e| SochDBError::Backup(format!("Failed to parse manifest: {}", e)))?;
273
274        // Verify all files exist
275        let manager = BackupManager::new(backup_path);
276        let files = manager.collect_backup_files(backup_path)?;
277
278        if files.is_empty() {
279            return Ok(false);
280        }
281
282        // All files exist if we got here
283        Ok(true)
284    }
285
286    // Helper methods
287
288    fn collect_files(&self) -> Result<Vec<(String, PathBuf)>> {
289        let mut files = Vec::new();
290
291        if !self.source_path.exists() {
292            return Err(SochDBError::Backup(
293                "Source database path does not exist".to_string(),
294            ));
295        }
296
297        Self::collect_files_recursive(&self.source_path, &self.source_path, &mut files)?;
298
299        Ok(files)
300    }
301
302    fn collect_files_recursive(
303        current_path: &Path,
304        base_path: &Path,
305        files: &mut Vec<(String, PathBuf)>,
306    ) -> Result<()> {
307        let entries = fs::read_dir(current_path)
308            .map_err(|e| SochDBError::Backup(format!("Failed to read directory: {}", e)))?;
309
310        for entry in entries {
311            let entry =
312                entry.map_err(|e| SochDBError::Backup(format!("Failed to read entry: {}", e)))?;
313
314            let path = entry.path();
315
316            if path.is_dir() {
317                // Recursively collect files from subdirectories
318                Self::collect_files_recursive(&path, base_path, files)?;
319            } else {
320                // Add file with relative path
321                let rel_path = path
322                    .strip_prefix(base_path)
323                    .unwrap()
324                    .to_string_lossy()
325                    .to_string();
326                files.push((rel_path, path));
327            }
328        }
329
330        Ok(())
331    }
332
333    fn collect_backup_files(&self, backup_path: &Path) -> Result<Vec<(String, PathBuf)>> {
334        let mut files = Vec::new();
335        Self::collect_backup_files_recursive(backup_path, backup_path, &mut files)?;
336
337        // Filter out manifest.json
338        files.retain(|(rel_path, _)| rel_path != "manifest.json");
339
340        Ok(files)
341    }
342
343    fn collect_backup_files_recursive(
344        current_path: &Path,
345        base_path: &Path,
346        files: &mut Vec<(String, PathBuf)>,
347    ) -> Result<()> {
348        let entries = fs::read_dir(current_path)
349            .map_err(|e| SochDBError::Backup(format!("Failed to read directory: {}", e)))?;
350
351        for entry in entries {
352            let entry =
353                entry.map_err(|e| SochDBError::Backup(format!("Failed to read entry: {}", e)))?;
354
355            let path = entry.path();
356
357            if path.is_dir() {
358                Self::collect_backup_files_recursive(&path, base_path, files)?;
359            } else {
360                let rel_path = path
361                    .strip_prefix(base_path)
362                    .unwrap()
363                    .to_string_lossy()
364                    .to_string();
365                files.push((rel_path, path));
366            }
367        }
368
369        Ok(())
370    }
371
372    fn calculate_file_checksum(&self, path: &Path) -> Result<String> {
373        use sha2::{Digest, Sha256};
374
375        let mut file = File::open(path)
376            .map_err(|e| SochDBError::Backup(format!("Failed to open file for checksum: {}", e)))?;
377
378        let mut hasher = Sha256::new();
379        let mut buffer = [0u8; 8192];
380
381        loop {
382            let n = file.read(&mut buffer).map_err(|e| {
383                SochDBError::Backup(format!("Failed to read file for checksum: {}", e))
384            })?;
385
386            if n == 0 {
387                break;
388            }
389
390            hasher.update(&buffer[..n]);
391        }
392
393        Ok(format!("{:x}", hasher.finalize()))
394    }
395
396    fn calculate_string_checksum(&self, data: &str) -> String {
397        use sha2::{Digest, Sha256};
398        let mut hasher = Sha256::new();
399        hasher.update(data.as_bytes());
400        format!("{:x}", hasher.finalize())
401    }
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407    use std::fs;
408    use tempfile::TempDir;
409
410    #[test]
411    fn test_create_and_restore_backup() {
412        // Create a temporary database directory
413        let db_dir = TempDir::new().unwrap();
414        let db_path = db_dir.path();
415
416        // Create some test files
417        fs::write(db_path.join("test.sst"), b"test data").unwrap();
418        fs::write(db_path.join("wal.log"), b"wal data").unwrap();
419        fs::create_dir_all(db_path.join("subdir")).unwrap();
420        fs::write(db_path.join("subdir").join("index.dat"), b"index data").unwrap();
421
422        // Create backup
423        let backup_dir = TempDir::new().unwrap();
424        let backup_path = backup_dir.path().join("backup-1");
425
426        let manager = BackupManager::new(db_path);
427        let metadata = manager.create_backup(&backup_path).unwrap();
428
429        assert_eq!(metadata.file_count, 3);
430        assert!(metadata.size_bytes > 0);
431        assert!(backup_path.join("manifest.json").exists());
432        assert!(backup_path.join("test.sst").exists());
433
434        // Restore to new location
435        let restore_dir = TempDir::new().unwrap();
436        let restore_path = restore_dir.path().join("restored");
437
438        let restore_manager = BackupManager::new(&restore_path);
439        let restored_metadata = restore_manager.restore_backup(&backup_path).unwrap();
440
441        assert_eq!(restored_metadata.file_count, metadata.file_count);
442        assert!(restore_path.join("test.sst").exists());
443        assert!(restore_path.join("wal.log").exists());
444        assert!(restore_path.join("subdir").join("index.dat").exists());
445
446        // Verify content
447        let content = fs::read_to_string(restore_path.join("test.sst")).unwrap();
448        assert_eq!(content, "test data");
449    }
450
451    #[test]
452    fn test_list_backups() {
453        let backup_dir = TempDir::new().unwrap();
454        let backup_path = backup_dir.path();
455
456        // Create source database
457        let db_dir = TempDir::new().unwrap();
458        fs::write(db_dir.path().join("test.sst"), b"data").unwrap();
459
460        let manager = BackupManager::new(db_dir.path());
461
462        // Create multiple backups
463        let backup1 = backup_path.join("backup-1");
464        let backup2 = backup_path.join("backup-2");
465
466        manager.create_backup(&backup1).unwrap();
467        std::thread::sleep(std::time::Duration::from_millis(10));
468        manager.create_backup(&backup2).unwrap();
469
470        // List backups
471        let backups = BackupManager::list_backups(backup_path).unwrap();
472
473        assert_eq!(backups.len(), 2);
474        // Should be sorted newest first
475        assert!(backups[0].timestamp_us > backups[1].timestamp_us);
476    }
477
478    #[test]
479    fn test_verify_backup() {
480        let db_dir = TempDir::new().unwrap();
481        fs::write(db_dir.path().join("test.sst"), b"data").unwrap();
482
483        let backup_dir = TempDir::new().unwrap();
484        let backup_path = backup_dir.path().join("backup");
485
486        let manager = BackupManager::new(db_dir.path());
487        manager.create_backup(&backup_path).unwrap();
488
489        let valid = BackupManager::verify_backup(&backup_path).unwrap();
490        assert!(valid);
491    }
492}