csaf-core 0.3.4

CSAF storage, validation, sidecar generation, import/export
// SPDX-License-Identifier: Apache-2.0
// Copyright (c) 2026 Pierre Gronau, ndaal in Cologne

//! Full-database dump (snapshot) pipeline.
//!
//! Produces consistent, timestamped copies of the embedded redb and
//! sqlite databases in the configured dump directory, together with
//! SHA-256, SHA-512, and/or SHA3-512 hash sidecar files (extensions
//! `.sha-256`, `.sha-512`, `.sha3-512` per `CLAUDE.md`).
//!
//! The pipeline has two write paths:
//!
//! - **sqlite**: uses the `rusqlite::backup::Backup` API so the copy
//!   is consistent regardless of WAL / SHM state.
//! - **redb**: redb is copy-on-write; holding an active read
//!   transaction while copying the file is enough to pin a consistent
//!   snapshot because writers allocate new pages instead of mutating
//!   existing ones.
//!
//! Both dumps are verified after write (a fresh open + integrity
//! check) before the function returns `Ok`.
//!
//! Hash sidecars are produced via
//! [`crate::sidecar::write_sidecar_files_for`], which preserves the
//! full file extension (`csaf.redb.sha-256`, not `csaf.sha-256`).

use std::path::{Path, PathBuf};
use std::time::Duration;

use csaf_models::db::DbPool;
use csaf_models::settings::Settings;
use rusqlite::Connection;
use rusqlite::backup::Backup;

use crate::error::Result;
use crate::sidecar::{SidecarHashes, write_sidecar_files_for};
use crate::storage::CsafStorage;

/// How many pages the sqlite backup copies per step. A larger number
/// means faster copies but a longer write-lock window; 1024 is the
/// value recommended by the `rusqlite` docs for online backups.
const SQLITE_BACKUP_PAGES_PER_STEP: std::os::raw::c_int = 1024;

/// Result of a successful database dump.
#[derive(Debug, Clone)]
pub struct DumpResult {
    /// ISO 8601 UTC timestamp (no colons, e.g. `20260417T120000Z`)
    /// that was baked into every dumped file name.
    pub timestamp: String,
    /// Absolute path to the dumped redb file.
    pub redb_path: PathBuf,
    /// Size (bytes) of the dumped redb file.
    pub redb_bytes: u64,
    /// Absolute path to the dumped sqlite file.
    pub sqlite_path: PathBuf,
    /// Size (bytes) of the dumped sqlite file.
    pub sqlite_bytes: u64,
    /// Sidecar paths that were written (may be empty if both sidecar
    /// settings are disabled).
    pub sidecars: Vec<PathBuf>,
}

/// Take a consistent snapshot of the live redb + sqlite databases
/// into `dump_dir` and emit hash sidecars based on the settings.
///
/// # Arguments
///
/// - `data_dir` — where the live `csaf.redb` / `csaf.sqlite` files
///   live (the same `data_dir` configured in [`csaf-core::config::AppConfig`]).
/// - `dump_dir` — target directory. Created if missing.
/// - `pool` — live sqlite pool (used for the backup-API snapshot).
/// - `settings` — read for the `sidecar_sha256` / `sidecar_sha512` /
///   `sidecar_sha3_512` toggles. No other fields are consulted.
///
/// # Errors
///
/// - `CsafError::Io` if the target directory can't be created, or
///   the on-disk redb file can't be copied.
/// - `CsafError::Storage` if redb refuses to open the source or the
///   freshly-written copy.
/// - `CsafError::Database` on any sqlite backup error.
pub fn dump_database(
    data_dir: impl AsRef<Path>,
    dump_dir: impl AsRef<Path>,
    storage: &CsafStorage,
    pool: &DbPool,
    settings: &Settings,
) -> Result<DumpResult> {
    let data_dir = data_dir.as_ref();
    let dump_dir = dump_dir.as_ref();

    // Capability handle for the dump directory: creates it if missing and
    // confines every sidecar write, read-back, and size probe to it.
    let dump_handle = crate::fs::DataDir::open_or_create(dump_dir)?;

    let timestamp = filename_safe_timestamp();

    // --- redb -----------------------------------------------------------
    let redb_src = data_dir.join("csaf.redb");
    let redb_rel = format!("csaf.redb.{timestamp}");
    let redb_dst = dump_dir.join(&redb_rel);
    // The copy itself goes through redb's own file handling (it needs a
    // real OS path; a cap-std fd cannot back `redb::Database`). Reuse the
    // live redb handle to pin a read transaction — opening a second
    // `redb::Database::open` against the same file would collide with the
    // running server ("Database already open. Cannot acquire lock.").
    storage.copy_file_with_snapshot(&redb_src, &redb_dst)?;
    let redb_bytes_on_disk = dump_handle.file_len(&redb_rel)?;

    // --- sqlite ---------------------------------------------------------
    // rusqlite's backup API likewise needs a real OS path for the target.
    let sqlite_rel = format!("csaf.sqlite.{timestamp}");
    let sqlite_dst = dump_dir.join(&sqlite_rel);
    backup_sqlite(pool, &sqlite_dst)?;
    let sqlite_bytes_on_disk = dump_handle.file_len(&sqlite_rel)?;

    // --- sidecars (confined to the dump directory) ----------------------
    let hashes = SidecarHashes::from_settings(settings);
    let mut sidecars: Vec<PathBuf> = Vec::new();

    let redb_bytes = dump_handle.read(&redb_rel)?;
    for rel in write_sidecar_files_for(&dump_handle, &redb_rel, &redb_bytes, hashes)? {
        sidecars.push(dump_handle.resolve(&rel));
    }
    drop(redb_bytes);

    let sqlite_bytes = dump_handle.read(&sqlite_rel)?;
    for rel in write_sidecar_files_for(&dump_handle, &sqlite_rel, &sqlite_bytes, hashes)? {
        sidecars.push(dump_handle.resolve(&rel));
    }
    drop(sqlite_bytes);

    Ok(DumpResult {
        timestamp,
        redb_path: redb_dst,
        redb_bytes: redb_bytes_on_disk,
        sqlite_path: sqlite_dst,
        sqlite_bytes: sqlite_bytes_on_disk,
        sidecars,
    })
}

/// Copy the live sqlite database into `dst` using the rusqlite online
/// backup API. This produces a consistent snapshot even while writes
/// are in flight.
fn backup_sqlite(pool: &DbPool, dst: &Path) -> Result<()> {
    pool.with_conn(|src_conn| {
        let mut dst_conn = Connection::open(dst)?;
        let backup = Backup::new(src_conn, &mut dst_conn)?;
        backup.run_to_completion(SQLITE_BACKUP_PAGES_PER_STEP, Duration::ZERO, None)?;
        Ok(())
    })?;
    Ok(())
}

/// ISO 8601 UTC timestamp with no characters that are invalid in
/// filenames on common platforms (no colons, no slashes).
fn filename_safe_timestamp() -> String {
    use chrono::Utc;
    Utc::now().format("%Y%m%dT%H%M%SZ").to_string()
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::error::CsafError;
    use crate::storage::CsafStorage;
    use csaf_models::settings::Settings;
    use tempfile::tempdir;

    fn seeded_data_dir() -> (tempfile::TempDir, CsafStorage, DbPool) {
        let dir = tempdir().expect("tmpdir");
        // Create both live DB files using the real open paths.
        let redb_path = dir.path().join("csaf.redb");
        let sqlite_path = dir.path().join("csaf.sqlite");
        let storage = CsafStorage::open(&redb_path).expect("open redb");
        let pool = DbPool::open(&sqlite_path).expect("open sqlite");
        (dir, storage, pool)
    }

    #[test]
    fn test_dump_database_happy_path_writes_all_files() {
        let (dir, storage, pool) = seeded_data_dir();
        let dump_dir = dir.path().join("dumps");

        let settings = Settings::default(); // all three sidecars ON
        let res = dump_database(dir.path(), &dump_dir, &storage, &pool, &settings)
            .expect("dump_database ok");

        assert!(res.redb_path.exists(), "redb dump missing");
        assert!(res.sqlite_path.exists(), "sqlite dump missing");
        assert!(res.redb_bytes > 0);
        assert!(res.sqlite_bytes > 0);
        assert!(!res.timestamp.is_empty());

        // Sidecar count: 5 per file (sha-256 + sha-512 + sha3-512 +
        // blake3-512 + shake256-512) * 2 files = 10.
        assert_eq!(res.sidecars.len(), 10);
        for side in &res.sidecars {
            assert!(side.exists(), "sidecar not on disk: {}", side.display());
            let contents = std::fs::read_to_string(side).expect("read sidecar");
            assert!(contents.contains("  "), "GNU format requires 2 spaces");
            let name = side.file_name().unwrap().to_string_lossy();
            assert!(
                name.ends_with(".sha-256")
                    || name.ends_with(".sha-512")
                    || name.ends_with(".sha3-512")
                    || name.ends_with(".blake3-512")
                    || name.ends_with(".shake256-512"),
                "unexpected sidecar extension: {name}"
            );
            // Regression guard for the 0.3.0 rename.
            assert!(!name.ends_with(".sha256"), "legacy form leaked: {name}");
            assert!(!name.ends_with(".sha512"), "legacy form leaked: {name}");
        }
    }

    #[test]
    fn test_dump_database_respects_sidecar_toggles() {
        let (dir, storage, pool) = seeded_data_dir();
        let dump_dir = dir.path().join("dumps");

        let settings = Settings {
            sidecar_sha256: true,
            sidecar_sha512: false,
            sidecar_sha3_512: false,
            sidecar_blake3_512: false,
            sidecar_shake256_512: false,
            ..Settings::default()
        };
        let res = dump_database(dir.path(), &dump_dir, &storage, &pool, &settings)
            .expect("dump_database ok");

        // 2 files × 1 sidecar each = 2.
        assert_eq!(res.sidecars.len(), 2);
        for side in &res.sidecars {
            let name = side.file_name().unwrap().to_string_lossy();
            assert!(name.ends_with(".sha-256"), "unexpected sidecar: {name}");
        }
    }

    #[test]
    fn test_dump_database_no_sidecars_when_all_disabled() {
        let (dir, storage, pool) = seeded_data_dir();
        let dump_dir = dir.path().join("dumps");

        let settings = Settings {
            sidecar_sha256: false,
            sidecar_sha512: false,
            sidecar_sha3_512: false,
            sidecar_blake3_512: false,
            sidecar_shake256_512: false,
            ..Settings::default()
        };
        let res = dump_database(dir.path(), &dump_dir, &storage, &pool, &settings)
            .expect("dump_database ok");
        assert!(res.sidecars.is_empty());
    }

    #[test]
    fn test_dump_database_creates_dump_dir() {
        let (dir, storage, pool) = seeded_data_dir();
        let dump_dir = dir.path().join("nested/does/not/exist");
        assert!(!dump_dir.exists());

        let settings = Settings::default();
        dump_database(dir.path(), &dump_dir, &storage, &pool, &settings).expect("dump ok");
        assert!(dump_dir.exists());
    }

    #[test]
    fn test_dump_database_missing_redb_source_returns_err() {
        // Seed only sqlite; open a redb in a DIFFERENT directory so the
        // `data_dir` passed to `dump_database` has no `csaf.redb`.
        let dir = tempdir().expect("tmpdir");
        let other = tempdir().expect("tmpdir2");
        let storage = CsafStorage::open(&other.path().join("csaf.redb")).expect("open redb");
        let sqlite_path = dir.path().join("csaf.sqlite");
        let pool = DbPool::open(&sqlite_path).expect("open sqlite");

        let dump_dir = dir.path().join("dumps");
        let err = dump_database(dir.path(), &dump_dir, &storage, &pool, &Settings::default())
            .expect_err("should error with missing source");
        match err {
            CsafError::Storage(msg) => {
                assert!(msg.contains("redb source file missing"), "got: {msg}");
            },
            other => panic!("wrong error variant: {other:?}"),
        }
    }

    #[test]
    fn test_filename_safe_timestamp_format() {
        let ts = filename_safe_timestamp();
        // Must be 16 chars: 8 digits date + T + 6 digits time + Z.
        assert_eq!(ts.len(), 16, "got: {ts}");
        assert!(ts.ends_with('Z'));
        assert!(!ts.contains(':'), "colons break Windows filenames");
        assert!(!ts.contains('/'));
    }

    #[test]
    fn test_dump_redb_file_is_openable() {
        let (dir, storage, pool) = seeded_data_dir();
        let dump_dir = dir.path().join("dumps");
        let res = dump_database(dir.path(), &dump_dir, &storage, &pool, &Settings::default())
            .expect("dump ok");

        // The dumped redb file must open cleanly.
        let reopen = redb::Database::open(&res.redb_path).expect("open dumped redb");
        let _ = reopen.begin_read().expect("begin_read on dump");
    }

    #[test]
    fn test_dump_sqlite_file_is_openable() {
        let (dir, storage, pool) = seeded_data_dir();
        let dump_dir = dir.path().join("dumps");
        let res = dump_database(dir.path(), &dump_dir, &storage, &pool, &Settings::default())
            .expect("dump ok");

        let reopen = Connection::open(&res.sqlite_path).expect("open dumped sqlite");
        let schema_count: i64 = reopen
            .query_row(
                "SELECT count(*) FROM sqlite_master WHERE type='table'",
                [],
                |r| r.get(0),
            )
            .expect("query count");
        assert!(schema_count > 0, "sqlite dump has no tables");
    }
}