claw-core 0.1.1

Embedded local database engine for ClawDB — an agent-native cognitive database
Documentation
//! Snapshot and restore logic for claw-core.
//!
//! This module implements point-in-time snapshots of the SQLite database.
//! Snapshots are plain copies of the database file written to a configurable
//! snapshot directory. A WAL checkpoint is performed before the copy so that
//! all committed data is in the main database file.

use std::path::{Path, PathBuf};

use crate::error::{ClawError, ClawResult};
use serde::{Deserialize, Serialize};

/// Metadata describing a snapshot created by [`Snapshotter::take`].
///
/// # Example
///
/// ```rust,no_run
/// # use claw_core::{ClawEngine, ClawConfig};
/// # async fn example() -> claw_core::ClawResult<()> {
/// # let config = ClawConfig::builder()
/// #     .db_path("/tmp/snap_test.db")
/// #     .snapshot_dir("/tmp/snaps")
/// #     .build()?;
/// # let engine = ClawEngine::open(config).await?;
/// let meta = engine.snapshot_create().await?;
/// println!("snapshot size: {} bytes", meta.size_bytes);
/// # Ok(())
/// # }
/// ```
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SnapshotMeta {
    /// Absolute path to the snapshot file.
    pub path: PathBuf,
    /// Timestamp when the snapshot was taken.
    pub created_at: chrono::DateTime<chrono::Utc>,
    /// Size of the snapshot file in bytes.
    pub size_bytes: u64,
    /// BLAKE3 hex-encoded checksum of the snapshot file.
    pub checksum: String,
}

/// A JSON manifest that tracks all snapshots in a snapshot directory.
///
/// Persisted to `<snapshot_dir>/manifest.json` and updated each time a new
/// snapshot is taken.
///
/// # Example
///
/// ```rust,no_run
/// # use claw_core::Snapshotter;
/// let s = Snapshotter::new("/tmp/snaps").expect("ok");
/// let manifest = s.load_manifest().expect("manifest");
/// println!("{} snapshots tracked", manifest.entries.len());
/// ```
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct SnapshotManifest {
    /// Ordered list of snapshot metadata entries (oldest first).
    pub entries: Vec<SnapshotMeta>,
}

impl SnapshotManifest {
    /// Create an empty manifest.
    pub fn new() -> Self {
        Self::default()
    }
}

/// Manages snapshot creation and restoration for a claw-core engine.
///
/// Construct via [`Snapshotter::new`], then call [`Snapshotter::take`] or
/// [`Snapshotter::restore`] as required.
///
/// # Example
///
/// ```rust,no_run
/// # use claw_core::Snapshotter;
/// # use std::path::Path;
/// let snapper = Snapshotter::new("/tmp/snapshots").expect("snapshotter");
/// ```
#[derive(Debug)]
pub struct Snapshotter {
    /// Directory where snapshot files are written.
    snapshot_dir: PathBuf,
}

impl Snapshotter {
    /// Create a new [`Snapshotter`] that stores snapshots in `snapshot_dir`.
    ///
    /// # Errors
    ///
    /// Returns [`ClawError::Snapshot`] if `snapshot_dir` cannot be created.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// use claw_core::Snapshotter;
    /// let s = Snapshotter::new("/tmp/snapshots").expect("ok");
    /// ```
    pub fn new(snapshot_dir: impl Into<PathBuf>) -> ClawResult<Self> {
        let dir = snapshot_dir.into();
        std::fs::create_dir_all(&dir).map_err(|e| {
            ClawError::Snapshot(format!(
                "cannot create snapshot directory '{}': {e}",
                dir.display()
            ))
        })?;
        Ok(Snapshotter { snapshot_dir: dir })
    }

    /// Take a snapshot of the database at `db_path` and write it to the
    /// snapshot directory. Returns [`SnapshotMeta`] describing the created file.
    ///
    /// The snapshot file name is derived from the current UTC timestamp so that
    /// snapshots sort chronologically.
    ///
    /// # Errors
    ///
    /// Returns [`ClawError::Snapshot`] if the file copy or metadata read fails.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use claw_core::Snapshotter;
    /// # use std::path::Path;
    /// # let s = Snapshotter::new("/tmp/snaps").unwrap();
    /// let meta = s.take(Path::new("/tmp/claw.db")).expect("snapshot taken");
    /// assert!(meta.path.exists());
    /// ```
    pub fn take(&self, db_path: &Path) -> ClawResult<SnapshotMeta> {
        let timestamp = chrono::Utc::now().format("%Y%m%dT%H%M%SZ");
        let file_name = format!("snapshot-{timestamp}.db");
        let dest = self.snapshot_dir.join(&file_name);

        std::fs::copy(db_path, &dest).map_err(|e| {
            ClawError::Snapshot(format!(
                "failed to copy '{}' → '{}': {e}",
                db_path.display(),
                dest.display()
            ))
        })?;

        let size_bytes = std::fs::metadata(&dest)
            .map_err(|e| {
                ClawError::Snapshot(format!(
                    "failed to read metadata for '{}': {e}",
                    dest.display()
                ))
            })?
            .len();

        tracing::info!(path = %dest.display(), "snapshot taken");
        // Compute BLAKE3 checksum.
        let checksum = blake3_file_hex(&dest)?;

        let meta = SnapshotMeta {
            path: dest,
            created_at: chrono::Utc::now(),
            size_bytes,
            checksum,
        };

        // Update the manifest.
        let mut manifest = self.load_manifest().unwrap_or_default();
        manifest.entries.push(meta.clone());
        self.save_manifest(&manifest)?;

        tracing::info!(path = %meta.path.display(), size_bytes = meta.size_bytes, "manifest updated");
        Ok(meta)
    }

    /// Restore the database at `db_path` from the snapshot at `snapshot_path`.
    ///
    /// **Warning:** This overwrites the live database file. The caller is
    /// responsible for ensuring the engine is shut down before restoring.
    ///
    /// Any existing WAL (`-wal`) and shared-memory (`-shm`) sidecar files at
    /// `db_path` are removed after the copy so that SQLite does not replay
    /// mutations from the pre-restore session on next open.
    ///
    /// # Errors
    ///
    /// Returns [`ClawError::Snapshot`] if the file copy fails.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use claw_core::Snapshotter;
    /// # use std::path::Path;
    /// # let s = Snapshotter::new("/tmp/snaps").unwrap();
    /// # let meta = s.take(Path::new("/tmp/claw.db")).unwrap();
    /// s.restore(&meta.path, Path::new("/tmp/claw.db")).expect("restored");
    /// ```
    pub fn restore(&self, snapshot_path: &Path, db_path: &Path) -> ClawResult<()> {
        // Validate SQLite magic bytes before overwriting the live database.
        validate_sqlite_magic(snapshot_path)?;

        std::fs::copy(snapshot_path, db_path).map_err(|e| {
            ClawError::Snapshot(format!(
                "failed to restore '{}' → '{}': {e}",
                snapshot_path.display(),
                db_path.display()
            ))
        })?;

        // Remove WAL/SHM sidecars so SQLite does not replay pre-restore writes.
        for suffix in &["-wal", "-shm"] {
            let sidecar = PathBuf::from(format!("{}{suffix}", db_path.display()));
            if sidecar.exists() {
                let _ = std::fs::remove_file(&sidecar);
            }
        }

        tracing::info!(
            from = %snapshot_path.display(),
            to = %db_path.display(),
            "snapshot restored"
        );
        Ok(())
    }

    /// List available snapshot files in the snapshot directory, sorted
    /// chronologically (oldest first).
    ///
    /// # Errors
    ///
    /// Returns [`ClawError::Snapshot`] if the directory cannot be read.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use claw_core::Snapshotter;
    /// # let s = Snapshotter::new("/tmp/snaps").unwrap();
    /// let snaps = s.list().expect("list ok");
    /// println!("{} snapshots found", snaps.len());
    /// ```
    pub fn list(&self) -> ClawResult<Vec<PathBuf>> {
        let mut entries = std::fs::read_dir(&self.snapshot_dir)
            .map_err(|e| {
                ClawError::Snapshot(format!(
                    "cannot read snapshot directory '{}': {e}",
                    self.snapshot_dir.display()
                ))
            })?
            .filter_map(|r| r.ok())
            .map(|e| e.path())
            .filter(|p| p.extension().map(|e| e == "db").unwrap_or(false))
            .collect::<Vec<_>>();

        entries.sort();
        Ok(entries)
    }

    /// Load the snapshot manifest from `<snapshot_dir>/manifest.json`.
    ///
    /// Returns an empty [`SnapshotManifest`] if the file does not yet exist.
    ///
    /// # Errors
    ///
    /// Returns [`ClawError::Snapshot`] if the file exists but cannot be parsed.
    pub fn load_manifest(&self) -> ClawResult<SnapshotManifest> {
        let path = self.snapshot_dir.join("manifest.json");
        if !path.exists() {
            return Ok(SnapshotManifest::default());
        }
        let bytes = std::fs::read(&path).map_err(|e| {
            ClawError::Snapshot(format!("cannot read manifest '{}': {e}", path.display()))
        })?;
        serde_json::from_slice(&bytes).map_err(|e| {
            ClawError::Snapshot(format!("cannot parse manifest '{}': {e}", path.display()))
        })
    }

    /// Persist `manifest` to `<snapshot_dir>/manifest.json`.
    ///
    /// # Errors
    ///
    /// Returns [`ClawError::Snapshot`] if the file cannot be written.
    fn save_manifest(&self, manifest: &SnapshotManifest) -> ClawResult<()> {
        let path = self.snapshot_dir.join("manifest.json");
        let bytes = serde_json::to_vec_pretty(manifest)
            .map_err(|e| ClawError::Snapshot(format!("cannot serialise manifest: {e}")))?;
        std::fs::write(&path, bytes).map_err(|e| {
            ClawError::Snapshot(format!("cannot write manifest '{}': {e}", path.display()))
        })
    }
}

// ── helpers ───────────────────────────────────────────────────────────────────

/// Compute the BLAKE3 checksum of a file and return it as a lower-case hex string.
fn blake3_file_hex(path: &Path) -> ClawResult<String> {
    use std::io::Read;
    let mut hasher = blake3::Hasher::new();
    let mut file = std::fs::File::open(path).map_err(|e| {
        ClawError::Snapshot(format!("cannot open '{}' for hashing: {e}", path.display()))
    })?;
    let mut buf = vec![0u8; 65536];
    loop {
        let n = file.read(&mut buf).map_err(|e| {
            ClawError::Snapshot(format!(
                "read error while hashing '{}': {e}",
                path.display()
            ))
        })?;
        if n == 0 {
            break;
        }
        hasher.update(&buf[..n]);
    }
    Ok(hasher.finalize().to_hex().to_string())
}

/// Validate that the first 16 bytes of `path` match the SQLite 3 file header.
///
/// # Errors
///
/// Returns [`ClawError::Snapshot`] if the file cannot be read or is not a
/// valid SQLite 3 database.
fn validate_sqlite_magic(path: &Path) -> ClawResult<()> {
    use std::io::Read;
    const SQLITE_MAGIC: &[u8; 16] = b"SQLite format 3\0";
    let mut header = [0u8; 16];
    let mut file = std::fs::File::open(path)
        .map_err(|e| ClawError::Snapshot(format!("cannot open snapshot for validation: {e}")))?;
    file.read_exact(&mut header)
        .map_err(|e| ClawError::Snapshot(format!("cannot read snapshot header: {e}")))?;
    if &header != SQLITE_MAGIC {
        return Err(ClawError::Snapshot(
            "snapshot file does not have a valid SQLite 3 header".to_string(),
        ));
    }
    Ok(())
}