Skip to main content

sqlite_graphrag/commands/
backup.rs

1//! Handler for the `backup` CLI subcommand.
2//!
3//! Uses the SQLite Online Backup API (via rusqlite) to produce a consistent
4//! point-in-time copy of the database file even while the database is in use.
5
6use crate::errors::AppError;
7use crate::output;
8use crate::paths::AppPaths;
9use crate::storage::connection::open_ro;
10use serde::Serialize;
11use std::path::PathBuf;
12
13#[derive(clap::Args)]
14#[command(after_long_help = "EXAMPLES:\n  \
15    # Back up the default database to a specific path\n  \
16    sqlite-graphrag backup --output /backup/graphrag-$(date +%F).sqlite\n\n  \
17    # Back up a custom source database\n  \
18    sqlite-graphrag backup --db /data/graphrag.sqlite --output /backup/snapshot.sqlite\n\n  \
19    # Emit JSON on success\n  \
20    sqlite-graphrag backup --output /tmp/snap.sqlite --json\n\n  \
21NOTES:\n  \
22    Uses the SQLite Online Backup API: safe to run while the database is in use.\n  \
23    The destination file is created (or overwritten) at the path given by --output.\n  \
24    On Unix the destination is chmod 0600 after the backup completes.")]
25pub struct BackupArgs {
26    /// Destination path for the backup file. Required.
27    #[arg(long, value_name = "PATH")]
28    pub output: PathBuf,
29    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
30    pub json: bool,
31    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
32    pub db: Option<String>,
33}
34
35#[derive(Serialize)]
36struct BackupResponse {
37    action: String,
38    source: String,
39    destination: String,
40    size_bytes: u64,
41    elapsed_ms: u64,
42}
43
44pub fn run(args: BackupArgs) -> Result<(), AppError> {
45    let start = std::time::Instant::now();
46    let paths = AppPaths::resolve(args.db.as_deref())?;
47
48    crate::storage::connection::ensure_db_ready(&paths)?;
49
50    // Validate: destination must differ from source.
51    if args.output == paths.db {
52        return Err(AppError::Validation(
53            "destination path must differ from the source database path".to_string(),
54        ));
55    }
56
57    // Create parent directories if necessary.
58    if let Some(parent) = args.output.parent() {
59        if !parent.as_os_str().is_empty() {
60            std::fs::create_dir_all(parent)?;
61        }
62    }
63
64    let src_conn = open_ro(&paths.db)?;
65    let mut dst_conn = rusqlite::Connection::open(&args.output)?;
66
67    {
68        let backup = rusqlite::backup::Backup::new(&src_conn, &mut dst_conn)?;
69        backup.run_to_completion(100, std::time::Duration::from_millis(50), None)?;
70    }
71
72    // Apply 0600 permissions on Unix to prevent leakage in shared directories.
73    #[cfg(unix)]
74    {
75        use std::os::unix::fs::PermissionsExt;
76        if let Ok(meta) = std::fs::metadata(&args.output) {
77            let mut perms = meta.permissions();
78            perms.set_mode(0o600);
79            let _ = std::fs::set_permissions(&args.output, perms);
80        }
81    }
82    #[cfg(windows)]
83    {
84        tracing::debug!(
85            path = %args.output.display(),
86            "skipping Unix mode 0o600 on Windows; NTFS DACL default is private-to-user"
87        );
88    }
89
90    let size_bytes = std::fs::metadata(&args.output)
91        .map(|m| m.len())
92        .unwrap_or(0);
93
94    output::emit_json(&BackupResponse {
95        action: "backed_up".to_string(),
96        source: paths.db.display().to_string(),
97        destination: args.output.display().to_string(),
98        size_bytes,
99        elapsed_ms: start.elapsed().as_millis() as u64,
100    })?;
101
102    Ok(())
103}
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108
109    #[test]
110    fn backup_response_serializes_all_fields() {
111        let resp = BackupResponse {
112            action: "backed_up".to_string(),
113            source: "/data/graphrag.sqlite".to_string(),
114            destination: "/backup/snapshot.sqlite".to_string(),
115            size_bytes: 32768,
116            elapsed_ms: 42,
117        };
118        let json = serde_json::to_value(&resp).expect("serialization failed");
119        assert_eq!(json["action"], "backed_up");
120        assert_eq!(json["source"], "/data/graphrag.sqlite");
121        assert_eq!(json["destination"], "/backup/snapshot.sqlite");
122        assert_eq!(json["size_bytes"], 32768u64);
123        assert_eq!(json["elapsed_ms"], 42u64);
124    }
125
126    #[test]
127    fn backup_response_action_is_backed_up() {
128        let resp = BackupResponse {
129            action: "backed_up".to_string(),
130            source: "/a.sqlite".to_string(),
131            destination: "/b.sqlite".to_string(),
132            size_bytes: 0,
133            elapsed_ms: 0,
134        };
135        let json = serde_json::to_value(&resp).expect("serialization failed");
136        assert_eq!(
137            json["action"], "backed_up",
138            "action must always be 'backed_up'"
139        );
140    }
141
142    #[test]
143    fn backup_rejects_destination_equal_to_source() {
144        // Simulate the guard without a real DB.
145        let src = PathBuf::from("/tmp/graphrag.sqlite");
146        let dst = PathBuf::from("/tmp/graphrag.sqlite");
147        let result: Result<(), AppError> = if dst == src {
148            Err(AppError::Validation(
149                "destination path must differ from the source database path".to_string(),
150            ))
151        } else {
152            Ok(())
153        };
154        assert!(
155            result.is_err(),
156            "must reject identical source and destination"
157        );
158        if let Err(AppError::Validation(msg)) = result {
159            assert!(msg.contains("destination path must differ"));
160        }
161    }
162
163    #[test]
164    fn backup_response_size_bytes_zero_is_valid() {
165        let resp = BackupResponse {
166            action: "backed_up".to_string(),
167            source: "/a.sqlite".to_string(),
168            destination: "/b.sqlite".to_string(),
169            size_bytes: 0,
170            elapsed_ms: 1,
171        };
172        let json = serde_json::to_value(&resp).expect("serialization failed");
173        assert_eq!(json["size_bytes"], 0u64);
174    }
175}