Skip to main content

ai_memory/cli/
backup.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! `cmd_backup` and `cmd_restore` migrations. See `cli::store` for the
5//! design pattern.
6
7use crate::cli::CliOutput;
8use crate::db;
9use anyhow::{Context, Result};
10use clap::Args;
11use std::path::{Path, PathBuf};
12
13/// `<stem>.manifest.json` — sidecar manifest name for a snapshot stem
14/// (#1558 batch 6).
15fn manifest_file_name(stem: &str) -> String {
16    format!("{stem}.manifest.json")
17}
18
19/// Timestamp format used for snapshot filenames. RFC3339-compatible but
20/// filesystem-safe: no colons, no slashes.
21const BACKUP_TS_FMT: &str = "%Y-%m-%dT%H%M%SZ";
22
23#[derive(Args)]
24pub struct BackupArgs {
25    /// Directory where the snapshot and manifest are written. Created if
26    /// missing.
27    #[arg(long, default_value = "./backups")]
28    pub to: PathBuf,
29    /// Retention: after writing a new snapshot, delete the oldest
30    /// snapshots so that at most this many remain. 0 disables rotation.
31    #[arg(long, default_value_t = 48)]
32    pub keep: usize,
33}
34
35#[derive(Args)]
36pub struct RestoreArgs {
37    /// Path to a snapshot file OR a backup directory. When a directory is
38    /// supplied, the most recent snapshot is used.
39    #[arg(long)]
40    pub from: PathBuf,
41    /// Skip sha256 verification against the manifest. Not recommended.
42    #[arg(long)]
43    pub skip_verify: bool,
44}
45
46#[derive(serde::Serialize, serde::Deserialize)]
47pub struct BackupManifest {
48    pub snapshot: String,
49    pub sha256: String,
50    pub bytes: u64,
51    pub source_db: String,
52    pub version: String,
53    pub created_at: String,
54}
55
56/// `backup` handler.
57pub fn run_backup(
58    db_path: &Path,
59    args: &BackupArgs,
60    json_out: bool,
61    out: &mut CliOutput<'_>,
62) -> Result<()> {
63    use std::io::Read;
64    std::fs::create_dir_all(&args.to)
65        .with_context(|| format!("creating backup dir {}", args.to.display()))?;
66    // SQLite VACUUM INTO is hot-backup-safe and produces a defragmented
67    // file. Equivalent to `sqlite3 source '.backup dest'` in effect but
68    // runs in-process via our existing connection.
69    let conn = db::open(db_path).context("opening source DB for backup")?;
70    let ts = chrono::Utc::now().format(BACKUP_TS_FMT).to_string();
71    let snapshot_name = format!("ai-memory-{ts}.db");
72    let snapshot_path = args.to.join(&snapshot_name);
73    if snapshot_path.exists() {
74        anyhow::bail!(
75            "refusing to overwrite existing snapshot {}",
76            snapshot_path.display()
77        );
78    }
79    conn.execute(
80        "VACUUM INTO ?1",
81        rusqlite::params![snapshot_path.to_string_lossy()],
82    )
83    .context("VACUUM INTO failed")?;
84    drop(conn);
85
86    let bytes = std::fs::metadata(&snapshot_path)?.len();
87    let sha = {
88        use sha2::Digest;
89        let mut hasher = sha2::Sha256::new();
90        let mut f = std::fs::File::open(&snapshot_path)?;
91        let mut buf = vec![0u8; 64 * 1024];
92        loop {
93            let n = f.read(&mut buf)?;
94            if n == 0 {
95                break;
96            }
97            hasher.update(&buf[..n]);
98        }
99        format!("{:x}", hasher.finalize())
100    };
101
102    let manifest = BackupManifest {
103        snapshot: snapshot_name.clone(),
104        sha256: sha.clone(),
105        bytes,
106        source_db: db_path.to_string_lossy().into_owned(),
107        version: crate::PKG_VERSION.to_string(),
108        created_at: chrono::Utc::now().to_rfc3339(),
109    };
110    let manifest_path = args.to.join(format!("ai-memory-{ts}.manifest.json"));
111    let manifest_text = serde_json::to_string_pretty(&manifest)?;
112    std::fs::write(&manifest_path, manifest_text.as_bytes())?;
113
114    // Rotation — newest-first listing, drop everything past `keep`.
115    if args.keep > 0 {
116        prune_old_snapshots(&args.to, args.keep)?;
117    }
118
119    if json_out {
120        writeln!(out.stdout, "{}", serde_json::to_string(&manifest)?)?;
121    } else {
122        writeln!(out.stdout, "Snapshot: {}", snapshot_path.display())?;
123        writeln!(out.stdout, "Manifest: {}", manifest_path.display())?;
124        writeln!(out.stdout, "SHA-256 : {sha}")?;
125        writeln!(out.stdout, "Bytes   : {bytes}")?;
126    }
127    Ok(())
128}
129
130/// Enumerate existing `ai-memory-*.db` snapshot files newest-first and
131/// delete everything past `keep`. Also deletes the matching manifest
132/// for each removed snapshot.
133fn prune_old_snapshots(dir: &Path, keep: usize) -> Result<()> {
134    let mut snaps: Vec<(std::time::SystemTime, PathBuf)> = std::fs::read_dir(dir)?
135        .filter_map(std::result::Result::ok)
136        .filter_map(|entry| {
137            let path = entry.path();
138            let name = path.file_name()?.to_str()?.to_owned();
139            let is_snapshot = name.starts_with("ai-memory-")
140                && path
141                    .extension()
142                    .is_some_and(|ext| ext.eq_ignore_ascii_case("db"));
143            if is_snapshot {
144                let mtime = entry.metadata().ok()?.modified().ok()?;
145                Some((mtime, path))
146            } else {
147                None
148            }
149        })
150        .collect();
151    snaps.sort_by_key(|b| std::cmp::Reverse(b.0));
152    for (_, path) in snaps.into_iter().skip(keep) {
153        let _ = std::fs::remove_file(&path);
154        // Matching manifest (same stem, .manifest.json extension pattern)
155        if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
156            let manifest = dir.join(manifest_file_name(stem));
157            let _ = std::fs::remove_file(manifest);
158        }
159    }
160    Ok(())
161}
162
163/// `restore` handler.
164pub fn run_restore(
165    db_path: &Path,
166    args: &RestoreArgs,
167    json_out: bool,
168    out: &mut CliOutput<'_>,
169) -> Result<()> {
170    use std::io::Read;
171    let (snapshot_path, manifest_path) = if args.from.is_dir() {
172        // Pick the newest snapshot in the directory.
173        let mut snaps: Vec<(std::time::SystemTime, PathBuf)> = std::fs::read_dir(&args.from)?
174            .filter_map(std::result::Result::ok)
175            .filter_map(|entry| {
176                let path = entry.path();
177                let name = path.file_name()?.to_str()?.to_owned();
178                let is_snapshot = name.starts_with("ai-memory-")
179                    && path
180                        .extension()
181                        .is_some_and(|ext| ext.eq_ignore_ascii_case("db"));
182                if is_snapshot {
183                    let mtime = entry.metadata().ok()?.modified().ok()?;
184                    Some((mtime, path))
185                } else {
186                    None
187                }
188            })
189            .collect();
190        snaps.sort_by_key(|b| std::cmp::Reverse(b.0));
191        let snap = snaps
192            .into_iter()
193            .next()
194            .map(|(_, p)| p)
195            .ok_or_else(|| anyhow::anyhow!("no snapshots found in {}", args.from.display()))?;
196        let stem = snap.file_stem().and_then(|s| s.to_str()).unwrap_or("");
197        let manifest = args.from.join(manifest_file_name(stem));
198        (snap, manifest)
199    } else {
200        // File path supplied directly.
201        let snap = args.from.clone();
202        let stem = snap.file_stem().and_then(|s| s.to_str()).unwrap_or("");
203        let parent = snap.parent().unwrap_or_else(|| Path::new("."));
204        let manifest = parent.join(manifest_file_name(stem));
205        (snap, manifest)
206    };
207
208    if !snapshot_path.exists() {
209        anyhow::bail!("snapshot {} does not exist", snapshot_path.display());
210    }
211
212    // SHA-256 verification against manifest.
213    if !args.skip_verify {
214        if !manifest_path.exists() {
215            anyhow::bail!(
216                "manifest {} not found; pass --skip-verify to restore anyway",
217                manifest_path.display()
218            );
219        }
220        let manifest_text = std::fs::read_to_string(&manifest_path)?;
221        let manifest: BackupManifest = serde_json::from_str(&manifest_text)
222            .with_context(|| format!("parsing manifest {}", manifest_path.display()))?;
223        let observed = {
224            use sha2::Digest;
225            let mut hasher = sha2::Sha256::new();
226            let mut f = std::fs::File::open(&snapshot_path)?;
227            let mut buf = vec![0u8; 64 * 1024];
228            loop {
229                let n = f.read(&mut buf)?;
230                if n == 0 {
231                    break;
232                }
233                hasher.update(&buf[..n]);
234            }
235            format!("{:x}", hasher.finalize())
236        };
237        if observed != manifest.sha256 {
238            anyhow::bail!(
239                "sha256 mismatch — manifest says {}, snapshot is {}",
240                manifest.sha256,
241                observed
242            );
243        }
244    }
245
246    // Move current DB aside as a safety net (only if it exists).
247    if db_path.exists() {
248        let ts = chrono::Utc::now().format(BACKUP_TS_FMT).to_string();
249        let aside = db_path.with_extension(format!("pre-restore-{ts}.db"));
250        std::fs::rename(db_path, &aside)
251            .with_context(|| format!("moving current DB aside to {}", aside.display()))?;
252        if !json_out {
253            writeln!(out.stdout, "Previous DB moved to {}", aside.display())?;
254        }
255    }
256
257    std::fs::copy(&snapshot_path, db_path)
258        .with_context(|| format!("copying snapshot to {}", db_path.display()))?;
259
260    if json_out {
261        writeln!(
262            out.stdout,
263            "{}",
264            serde_json::json!({
265                "status": "restored",
266                "from": snapshot_path.to_string_lossy(),
267                "to": db_path.to_string_lossy(),
268            })
269        )?;
270    } else {
271        writeln!(
272            out.stdout,
273            "Restored {} → {}",
274            snapshot_path.display(),
275            db_path.display()
276        )?;
277    }
278    Ok(())
279}
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284    use crate::cli::test_utils::{TestEnv, seed_memory};
285
286    #[test]
287    fn test_backup_happy_path_creates_snapshot_and_manifest() {
288        let mut env = TestEnv::fresh();
289        let db = env.db_path.clone();
290        seed_memory(&db, "ns", "t", "c");
291        let backup_dir = db.parent().unwrap().join("backups-x1");
292        let args = BackupArgs {
293            to: backup_dir.clone(),
294            keep: 48,
295        };
296        {
297            let mut out = env.output();
298            run_backup(&db, &args, false, &mut out).unwrap();
299        }
300        // At least one snapshot + manifest must exist.
301        let mut snap_count = 0;
302        let mut manifest_count = 0;
303        for entry in std::fs::read_dir(&backup_dir).unwrap().flatten() {
304            let name = entry.file_name();
305            let s = name.to_string_lossy();
306            if s.starts_with("ai-memory-") && s.ends_with(".db") {
307                snap_count += 1;
308            }
309            if s.ends_with(".manifest.json") {
310                manifest_count += 1;
311            }
312        }
313        assert!(snap_count >= 1, "expected at least one snapshot");
314        assert!(manifest_count >= 1, "expected at least one manifest");
315        assert!(env.stdout_str().contains("Snapshot:"));
316    }
317
318    #[test]
319    fn test_backup_json_emits_manifest_with_sha256() {
320        let mut env = TestEnv::fresh();
321        let db = env.db_path.clone();
322        seed_memory(&db, "ns", "t", "c");
323        let backup_dir = db.parent().unwrap().join("backups-x2");
324        let args = BackupArgs {
325            to: backup_dir,
326            keep: 48,
327        };
328        {
329            let mut out = env.output();
330            run_backup(&db, &args, true, &mut out).unwrap();
331        }
332        let v: serde_json::Value = serde_json::from_str(env.stdout_str().trim()).unwrap();
333        assert!(v["sha256"].is_string());
334        let sha = v["sha256"].as_str().unwrap();
335        assert_eq!(sha.len(), 64); // hex sha256
336    }
337
338    #[test]
339    fn test_restore_from_directory_picks_newest() {
340        let mut env = TestEnv::fresh();
341        let db = env.db_path.clone();
342        seed_memory(&db, "ns", "before-backup", "stuff");
343        let backup_dir = db.parent().unwrap().join("backups-x3");
344        let backup_args = BackupArgs {
345            to: backup_dir.clone(),
346            keep: 48,
347        };
348        {
349            let mut out = env.output();
350            run_backup(&db, &backup_args, false, &mut out).unwrap();
351        }
352        env.stdout.clear();
353        env.stderr.clear();
354        let restore_args = RestoreArgs {
355            from: backup_dir,
356            skip_verify: false,
357        };
358        {
359            let mut out = env.output();
360            run_restore(&db, &restore_args, false, &mut out).unwrap();
361        }
362        assert!(env.stdout_str().contains("Restored"));
363    }
364
365    #[test]
366    fn test_restore_from_explicit_file_path() {
367        let mut env = TestEnv::fresh();
368        let db = env.db_path.clone();
369        seed_memory(&db, "ns", "t", "c");
370        let backup_dir = db.parent().unwrap().join("backups-x4");
371        let backup_args = BackupArgs {
372            to: backup_dir.clone(),
373            keep: 48,
374        };
375        {
376            let mut out = env.output();
377            run_backup(&db, &backup_args, true, &mut out).unwrap();
378        }
379        let manifest: BackupManifest = serde_json::from_str(env.stdout_str().trim()).unwrap();
380        let snap_path = backup_dir.join(&manifest.snapshot);
381        env.stdout.clear();
382        env.stderr.clear();
383        let restore_args = RestoreArgs {
384            from: snap_path,
385            skip_verify: false,
386        };
387        {
388            let mut out = env.output();
389            run_restore(&db, &restore_args, true, &mut out).unwrap();
390        }
391        let v: serde_json::Value = serde_json::from_str(env.stdout_str().trim()).unwrap();
392        assert_eq!(v["status"].as_str().unwrap(), "restored");
393    }
394
395    #[test]
396    fn test_restore_with_skip_verify_succeeds_without_manifest() {
397        let mut env = TestEnv::fresh();
398        let db = env.db_path.clone();
399        seed_memory(&db, "ns", "t", "c");
400        let backup_dir = db.parent().unwrap().join("backups-x5");
401        let backup_args = BackupArgs {
402            to: backup_dir.clone(),
403            keep: 48,
404        };
405        {
406            let mut out = env.output();
407            run_backup(&db, &backup_args, true, &mut out).unwrap();
408        }
409        let manifest: BackupManifest = serde_json::from_str(env.stdout_str().trim()).unwrap();
410        let snap_path = backup_dir.join(&manifest.snapshot);
411        // Delete manifest file so verification would fail; skip_verify = true should still pass.
412        let manifest_path = backup_dir.join(format!(
413            "{}.manifest.json",
414            snap_path.file_stem().unwrap().to_string_lossy()
415        ));
416        std::fs::remove_file(&manifest_path).unwrap();
417        env.stdout.clear();
418        env.stderr.clear();
419        let restore_args = RestoreArgs {
420            from: snap_path,
421            skip_verify: true,
422        };
423        {
424            let mut out = env.output();
425            run_restore(&db, &restore_args, false, &mut out).unwrap();
426        }
427        assert!(env.stdout_str().contains("Restored"));
428    }
429
430    #[test]
431    fn test_restore_bad_sha256_errors() {
432        let mut env = TestEnv::fresh();
433        let db = env.db_path.clone();
434        seed_memory(&db, "ns", "t", "c");
435        let backup_dir = db.parent().unwrap().join("backups-x6");
436        let backup_args = BackupArgs {
437            to: backup_dir.clone(),
438            keep: 48,
439        };
440        {
441            let mut out = env.output();
442            run_backup(&db, &backup_args, true, &mut out).unwrap();
443        }
444        let manifest: BackupManifest = serde_json::from_str(env.stdout_str().trim()).unwrap();
445        let manifest_path = backup_dir.join(format!(
446            "{}.manifest.json",
447            std::path::Path::new(&manifest.snapshot)
448                .file_stem()
449                .unwrap()
450                .to_string_lossy()
451        ));
452        // Corrupt sha in manifest.
453        let mut bad = manifest;
454        bad.sha256 = "0000000000000000000000000000000000000000000000000000000000000000".to_string();
455        std::fs::write(&manifest_path, serde_json::to_string(&bad).unwrap()).unwrap();
456        let snap_path = backup_dir.join(&bad.snapshot);
457        let restore_args = RestoreArgs {
458            from: snap_path,
459            skip_verify: false,
460        };
461        let mut out = env.output();
462        let res = run_restore(&db, &restore_args, false, &mut out);
463        assert!(res.is_err());
464        assert!(res.unwrap_err().to_string().contains("sha256 mismatch"));
465    }
466
467    #[test]
468    fn test_backup_retention_prunes_old_snapshots() {
469        let mut env = TestEnv::fresh();
470        let db = env.db_path.clone();
471        seed_memory(&db, "ns", "t", "c");
472        let backup_dir = db.parent().unwrap().join("backups-x7");
473        // Take a few backups in succession; with `keep=1` only the newest must remain.
474        for _ in 0..3 {
475            // Sleep 1 second to avoid filename collision (BACKUP_TS_FMT is per-second).
476            std::thread::sleep(std::time::Duration::from_secs(1));
477            let args = BackupArgs {
478                to: backup_dir.clone(),
479                keep: 1,
480            };
481            let mut out = env.output();
482            run_backup(&db, &args, true, &mut out).unwrap();
483            drop(out);
484            env.stdout.clear();
485            env.stderr.clear();
486        }
487        let snaps: Vec<_> = std::fs::read_dir(&backup_dir)
488            .unwrap()
489            .flatten()
490            .filter(|e| {
491                let name = e.file_name();
492                let s = name.to_string_lossy();
493                s.starts_with("ai-memory-") && s.ends_with(".db")
494            })
495            .collect();
496        assert_eq!(snaps.len(), 1, "retention should keep exactly 1 snapshot");
497    }
498}