sqlite_graphrag/commands/
backup.rs1use crate::errors::AppError;
7use crate::output;
8use crate::paths::AppPaths;
9use crate::storage::connection::open_ro;
10use serde::Serialize;
11use std::path::PathBuf;
12use tempfile::NamedTempFile;
13
14const DEFAULT_BACKUP_STEP_PAGES: usize = 1000;
21const DEFAULT_BACKUP_STEP_SLEEP_MS: u64 = 5;
22
23#[derive(clap::Args)]
24#[command(after_long_help = "EXAMPLES:\n \
25 # Back up the default database to a specific path\n \
26 sqlite-graphrag backup --output /backup/graphrag-$(date +%F).sqlite\n\n \
27 # Back up a custom source database\n \
28 sqlite-graphrag backup --db /data/graphrag.sqlite --output /backup/snapshot.sqlite\n\n \
29 # Tuned for a 4.3 GB database on local SSD\n \
30 sqlite-graphrag backup --output /backup/snap.sqlite --backup-step-size 2000 --backup-step-sleep-ms 2\n\n \
31 # Maximum throughput (no sleep between steps — risks I/O contention)\n \
32 sqlite-graphrag backup --output /backup/snap.sqlite --backup-no-sleep\n\n \
33NOTES:\n \
34 Uses the SQLite Online Backup API: safe to run while the database is in use.\n \
35 The destination is written atomically via tempfile-rename in the same directory.\n \
36 If the process is interrupted, the previous file (if any) remains intact.\n \
37 On Unix the destination is chmod 0600 after the backup completes.")]
38pub struct BackupArgs {
39 #[arg(long, value_name = "PATH")]
41 pub output: PathBuf,
42 #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
43 pub json: bool,
44 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
45 pub db: Option<String>,
46 #[arg(long, value_name = "PAGES", default_value_t = DEFAULT_BACKUP_STEP_PAGES)]
49 pub backup_step_size: usize,
50 #[arg(long, value_name = "MILLIS", default_value_t = DEFAULT_BACKUP_STEP_SLEEP_MS)]
53 pub backup_step_sleep_ms: u64,
54 #[arg(long, default_value_t = false)]
57 pub backup_no_sleep: bool,
58 #[arg(long, value_name = "PAGES", default_value_t = 100)]
61 pub backup_progress: i32,
62}
63
64#[derive(Serialize)]
65struct BackupResponse {
66 action: String,
67 source: String,
68 destination: String,
69 size_bytes: u64,
70 elapsed_ms: u64,
71 pages_copied: Option<i64>,
72 step_size: usize,
73}
74
75pub fn run(args: BackupArgs) -> Result<(), AppError> {
76 let start = std::time::Instant::now();
77 let paths = AppPaths::resolve(args.db.as_deref())?;
78
79 crate::storage::connection::ensure_db_ready(&paths)?;
80
81 if args.output == paths.db {
83 return Err(AppError::Validation(
84 "destination path must differ from the source database path".to_string(),
85 ));
86 }
87
88 let parent = args.output.parent().unwrap_or(std::path::Path::new("."));
90 if !parent.as_os_str().is_empty() {
91 std::fs::create_dir_all(parent)?;
92 }
93
94 let temp = NamedTempFile::new_in(parent).map_err(AppError::Io)?;
96 let temp_path = temp.path().to_path_buf();
97
98 let src_conn = open_ro(&paths.db)?;
99 let mut dst_conn = rusqlite::Connection::open(&temp_path)?;
100
101 let step_size = args.backup_step_size.max(1);
102 let sleep = if args.backup_no_sleep {
103 std::time::Duration::ZERO
104 } else {
105 std::time::Duration::from_millis(args.backup_step_sleep_ms)
106 };
107
108 let pages_copied: Option<i64> = {
109 let backup = rusqlite::backup::Backup::new(&src_conn, &mut dst_conn)?;
110 let step_size_i32: i32 = step_size.try_into().unwrap_or(1000);
115 let progress_every = args.backup_progress.max(1);
116 let mut last_emit_pages: i32 = -1;
117 loop {
118 use rusqlite::backup::StepResult;
119 match backup.step(step_size_i32) {
120 Ok(StepResult::More) => {
121 if progress_every > 0 {
123 let p = backup.progress();
124 let copied = p.pagecount - p.remaining;
125 if copied > 0 && copied - last_emit_pages >= progress_every {
126 last_emit_pages = copied;
127 let percent = if p.pagecount > 0 {
128 (copied as f64 / p.pagecount as f64) * 100.0
129 } else {
130 100.0
131 };
132 eprintln!(
133 "{{\"progress\":{{\"pages_copied\":{copied},\"total_pages\":{pc},\"percent\":{pct:.2}}}}}",
134 pc = p.pagecount,
135 pct = percent
136 );
137 }
138 }
139 if !sleep.is_zero() {
140 std::thread::sleep(sleep);
141 }
142 }
143 Ok(StepResult::Done) => break, Ok(_) => {
145 std::thread::sleep(std::time::Duration::from_millis(50));
148 }
149 Err(e) => return Err(AppError::Database(e)),
150 }
151 }
152 let progress = backup.progress();
155 let copied = (progress.pagecount - progress.remaining).max(0);
156 Some(copied as i64)
157 };
158 drop(dst_conn);
159
160 temp.persist(&args.output)
161 .map_err(|e| AppError::Io(e.error))?;
162
163 #[cfg(unix)]
165 {
166 use std::os::unix::fs::PermissionsExt;
167 if let Ok(meta) = std::fs::metadata(&args.output) {
168 let mut perms = meta.permissions();
169 perms.set_mode(0o600);
170 if let Err(e) = std::fs::set_permissions(&args.output, perms) {
171 tracing::warn!(target: "backup",
172 path = %args.output.display(),
173 error = %e,
174 "failed to set 0600 permissions on backup file"
175 );
176 }
177 }
178 }
179 #[cfg(windows)]
180 {
181 tracing::debug!(target: "backup",
182 path = %args.output.display(),
183 "skipping Unix mode 0o600 on Windows; NTFS DACL default is private-to-user"
184 );
185 }
186
187 let size_bytes = std::fs::metadata(&args.output)
188 .map(|m| m.len())
189 .unwrap_or(0);
190
191 output::emit_json(&BackupResponse {
192 action: "backed_up".to_string(),
193 source: paths.db.display().to_string(),
194 destination: args.output.display().to_string(),
195 size_bytes,
196 elapsed_ms: start.elapsed().as_millis() as u64,
197 pages_copied,
198 step_size,
199 })?;
200
201 Ok(())
202}
203
204#[cfg(test)]
205mod tests {
206 use super::*;
207
208 #[test]
209 fn backup_response_serializes_all_fields() {
210 let resp = BackupResponse {
211 action: "backed_up".to_string(),
212 source: "/data/graphrag.sqlite".to_string(),
213 destination: "/backup/snapshot.sqlite".to_string(),
214 size_bytes: 32768,
215 elapsed_ms: 42,
216 pages_copied: Some(512),
217 step_size: 1000,
218 };
219 let json = serde_json::to_value(&resp).expect("serialization failed");
220 assert_eq!(json["action"], "backed_up");
221 assert_eq!(json["source"], "/data/graphrag.sqlite");
222 assert_eq!(json["destination"], "/backup/snapshot.sqlite");
223 assert_eq!(json["size_bytes"], 32768u64);
224 assert_eq!(json["elapsed_ms"], 42u64);
225 assert_eq!(json["step_size"], 1000usize);
226 assert_eq!(json["pages_copied"], 512i64);
227 }
228
229 #[test]
230 fn backup_response_action_is_backed_up() {
231 let resp = BackupResponse {
232 action: "backed_up".to_string(),
233 source: "/a.sqlite".to_string(),
234 destination: "/b.sqlite".to_string(),
235 size_bytes: 0,
236 elapsed_ms: 0,
237 pages_copied: None,
238 step_size: 1000,
239 };
240 let json = serde_json::to_value(&resp).expect("serialization failed");
241 assert_eq!(
242 json["action"], "backed_up",
243 "action must always be 'backed_up'"
244 );
245 }
246
247 #[test]
248 fn backup_rejects_destination_equal_to_source() {
249 let src = PathBuf::from("/tmp/graphrag.sqlite");
251 let dst = PathBuf::from("/tmp/graphrag.sqlite");
252 let result: Result<(), AppError> = if dst == src {
253 Err(AppError::Validation(
254 "destination path must differ from the source database path".to_string(),
255 ))
256 } else {
257 Ok(())
258 };
259 assert!(
260 result.is_err(),
261 "must reject identical source and destination"
262 );
263 if let Err(AppError::Validation(msg)) = result {
264 assert!(msg.contains("destination path must differ"));
265 }
266 }
267
268 #[test]
269 fn backup_response_size_bytes_zero_is_valid() {
270 let resp = BackupResponse {
271 action: "backed_up".to_string(),
272 source: "/a.sqlite".to_string(),
273 destination: "/b.sqlite".to_string(),
274 size_bytes: 0,
275 elapsed_ms: 1,
276 pages_copied: Some(0),
277 step_size: 1000,
278 };
279 let json = serde_json::to_value(&resp).expect("serialization failed");
280 assert!(json["size_bytes"].as_u64().is_some());
281 }
282
283 #[test]
284 fn backup_default_step_size_is_one_thousand() {
285 assert_eq!(DEFAULT_BACKUP_STEP_PAGES, 1000);
289 assert_eq!(DEFAULT_BACKUP_STEP_SLEEP_MS, 5);
290 }
291}