use crate::errors::AppError;
use crate::output;
use crate::paths::AppPaths;
use crate::storage::connection::open_ro;
use serde::Serialize;
use std::path::PathBuf;
use tempfile::NamedTempFile;
const DEFAULT_BACKUP_STEP_PAGES: usize = 1000;
const DEFAULT_BACKUP_STEP_SLEEP_MS: u64 = 5;
#[derive(clap::Args)]
#[command(after_long_help = "EXAMPLES:\n \
# Back up the default database to a specific path\n \
sqlite-graphrag backup --output /backup/graphrag-$(date +%F).sqlite\n\n \
# Back up a custom source database\n \
sqlite-graphrag backup --db /data/graphrag.sqlite --output /backup/snapshot.sqlite\n\n \
# Tuned for a 4.3 GB database on local SSD\n \
sqlite-graphrag backup --output /backup/snap.sqlite --backup-step-size 2000 --backup-step-sleep-ms 2\n\n \
# Maximum throughput (no sleep between steps — risks I/O contention)\n \
sqlite-graphrag backup --output /backup/snap.sqlite --backup-no-sleep\n\n \
NOTES:\n \
Uses the SQLite Online Backup API: safe to run while the database is in use.\n \
The destination is written atomically via tempfile-rename in the same directory.\n \
If the process is interrupted, the previous file (if any) remains intact.\n \
On Unix the destination is chmod 0600 after the backup completes.")]
pub struct BackupArgs {
#[arg(long, value_name = "PATH")]
pub output: PathBuf,
#[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
pub json: bool,
#[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
pub db: Option<String>,
#[arg(long, value_name = "PAGES", default_value_t = DEFAULT_BACKUP_STEP_PAGES)]
pub backup_step_size: usize,
#[arg(long, value_name = "MILLIS", default_value_t = DEFAULT_BACKUP_STEP_SLEEP_MS)]
pub backup_step_sleep_ms: u64,
#[arg(long, default_value_t = false)]
pub backup_no_sleep: bool,
#[arg(long, value_name = "PAGES", default_value_t = 100)]
pub backup_progress: i32,
}
#[derive(Serialize)]
struct BackupResponse {
action: String,
source: String,
destination: String,
size_bytes: u64,
elapsed_ms: u64,
pages_copied: Option<i64>,
step_size: usize,
}
pub fn run(args: BackupArgs) -> Result<(), AppError> {
let start = std::time::Instant::now();
let paths = AppPaths::resolve(args.db.as_deref())?;
crate::storage::connection::ensure_db_ready(&paths)?;
if args.output == paths.db {
return Err(AppError::Validation(
"destination path must differ from the source database path".to_string(),
));
}
let parent = args.output.parent().unwrap_or(std::path::Path::new("."));
if !parent.as_os_str().is_empty() {
std::fs::create_dir_all(parent)?;
}
let temp = NamedTempFile::new_in(parent).map_err(AppError::Io)?;
let temp_path = temp.path().to_path_buf();
let src_conn = open_ro(&paths.db)?;
let mut dst_conn = rusqlite::Connection::open(&temp_path)?;
let step_size = args.backup_step_size.max(1);
let sleep = if args.backup_no_sleep {
std::time::Duration::ZERO
} else {
std::time::Duration::from_millis(args.backup_step_sleep_ms)
};
let pages_copied: Option<i64> = {
let backup = rusqlite::backup::Backup::new(&src_conn, &mut dst_conn)?;
let step_size_i32: i32 = step_size.try_into().unwrap_or(1000);
let progress_every = args.backup_progress.max(1);
let mut last_emit_pages: i32 = -1;
loop {
use rusqlite::backup::StepResult;
match backup.step(step_size_i32) {
Ok(StepResult::More) => {
if progress_every > 0 {
let p = backup.progress();
let copied = p.pagecount - p.remaining;
if copied > 0 && copied - last_emit_pages >= progress_every {
last_emit_pages = copied;
let percent = if p.pagecount > 0 {
(copied as f64 / p.pagecount as f64) * 100.0
} else {
100.0
};
eprintln!(
"{{\"progress\":{{\"pages_copied\":{copied},\"total_pages\":{pc},\"percent\":{pct:.2}}}}}",
pc = p.pagecount,
pct = percent
);
}
}
if !sleep.is_zero() {
std::thread::sleep(sleep);
}
}
Ok(StepResult::Done) => break, Ok(_) => {
std::thread::sleep(std::time::Duration::from_millis(50));
}
Err(e) => return Err(AppError::Database(e)),
}
}
let progress = backup.progress();
let copied = (progress.pagecount - progress.remaining).max(0);
Some(copied as i64)
};
drop(dst_conn);
temp.persist(&args.output)
.map_err(|e| AppError::Io(e.error))?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
if let Ok(meta) = std::fs::metadata(&args.output) {
let mut perms = meta.permissions();
perms.set_mode(0o600);
if let Err(e) = std::fs::set_permissions(&args.output, perms) {
tracing::warn!(target: "backup",
path = %args.output.display(),
error = %e,
"failed to set 0600 permissions on backup file"
);
}
}
}
#[cfg(windows)]
{
tracing::debug!(target: "backup",
path = %args.output.display(),
"skipping Unix mode 0o600 on Windows; NTFS DACL default is private-to-user"
);
}
let size_bytes = std::fs::metadata(&args.output)
.map(|m| m.len())
.unwrap_or(0);
output::emit_json(&BackupResponse {
action: "backed_up".to_string(),
source: paths.db.display().to_string(),
destination: args.output.display().to_string(),
size_bytes,
elapsed_ms: start.elapsed().as_millis() as u64,
pages_copied,
step_size,
})?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn backup_response_serializes_all_fields() {
let resp = BackupResponse {
action: "backed_up".to_string(),
source: "/data/graphrag.sqlite".to_string(),
destination: "/backup/snapshot.sqlite".to_string(),
size_bytes: 32768,
elapsed_ms: 42,
pages_copied: Some(512),
step_size: 1000,
};
let json = serde_json::to_value(&resp).expect("serialization failed");
assert_eq!(json["action"], "backed_up");
assert_eq!(json["source"], "/data/graphrag.sqlite");
assert_eq!(json["destination"], "/backup/snapshot.sqlite");
assert_eq!(json["size_bytes"], 32768u64);
assert_eq!(json["elapsed_ms"], 42u64);
assert_eq!(json["step_size"], 1000usize);
assert_eq!(json["pages_copied"], 512i64);
}
#[test]
fn backup_response_action_is_backed_up() {
let resp = BackupResponse {
action: "backed_up".to_string(),
source: "/a.sqlite".to_string(),
destination: "/b.sqlite".to_string(),
size_bytes: 0,
elapsed_ms: 0,
pages_copied: None,
step_size: 1000,
};
let json = serde_json::to_value(&resp).expect("serialization failed");
assert_eq!(
json["action"], "backed_up",
"action must always be 'backed_up'"
);
}
#[test]
fn backup_rejects_destination_equal_to_source() {
let src = PathBuf::from("/tmp/graphrag.sqlite");
let dst = PathBuf::from("/tmp/graphrag.sqlite");
let result: Result<(), AppError> = if dst == src {
Err(AppError::Validation(
"destination path must differ from the source database path".to_string(),
))
} else {
Ok(())
};
assert!(
result.is_err(),
"must reject identical source and destination"
);
if let Err(AppError::Validation(msg)) = result {
assert!(msg.contains("destination path must differ"));
}
}
#[test]
fn backup_response_size_bytes_zero_is_valid() {
let resp = BackupResponse {
action: "backed_up".to_string(),
source: "/a.sqlite".to_string(),
destination: "/b.sqlite".to_string(),
size_bytes: 0,
elapsed_ms: 1,
pages_copied: Some(0),
step_size: 1000,
};
let json = serde_json::to_value(&resp).expect("serialization failed");
assert!(json["size_bytes"].as_u64().is_some());
}
#[test]
fn backup_default_step_size_is_one_thousand() {
assert_eq!(DEFAULT_BACKUP_STEP_PAGES, 1000);
assert_eq!(DEFAULT_BACKUP_STEP_SLEEP_MS, 5);
}
}