aerovault 0.6.0

Military-grade encrypted vault format and CLI: AES-256-GCM-SIV, Argon2id, AES-KW, HMAC-SHA512, plus detached Reed-Solomon .aerocorrect error-correction sidecars
Documentation
use sha2::{Digest, Sha256};
use std::fs::File;
use std::io::{Read, Write};
use std::path::Path;

use super::sidecar::{
    aerocorrect_windows, validate_window_tiling_iter, AeroCorrectSegment, AeroCorrectSidecar,
    AeroCorrectSidecarReader, AEROCORRECT_WINDOW_SIZE,
};
use super::{
    compute_error_correction_shards_grid, error_correction_grid, reconstruct_from_error_correction,
};

/// Default per-file cap for standalone Error Correction. Generation and repair stream
/// in windows, but the cap keeps accidental huge explicit runs bounded for CLI users.
pub(crate) const STANDALONE_EC_MAX_FILE_SIZE: u64 = 1024 * 1024 * 1024;

const HASH_READ_CHUNK: usize = 4 * 1024 * 1024;

#[derive(Debug, Clone)]
pub(crate) struct StandaloneEcGeneratedSidecar {
    pub(crate) sidecar_bytes: Vec<u8>,
    pub(crate) file_size: u64,
    pub(crate) shards: u64,
    pub(crate) overhead_pct: f64,
    pub(crate) sidecar_len: u64,
}

#[derive(Debug, Clone)]
pub(crate) enum StandaloneEcGenerateResult {
    Generated(StandaloneEcGeneratedSidecar),
    SkippedTooLarge { file_size: u64, max_file_size: u64 },
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum StandaloneEcRepairResult {
    Verified,
    Repaired { recovered_shards: usize },
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum StandaloneVerifyResult {
    Verified,
    NeedsRepair,
}

fn finalize_sha256(hasher: Sha256) -> [u8; 32] {
    let digest = hasher.finalize();
    let mut out = [0u8; 32];
    out.copy_from_slice(&digest);
    out
}

fn hash_file_streaming(path: &Path) -> Result<[u8; 32], String> {
    let mut file =
        File::open(path).map_err(|e| format!("open {} for hashing: {e}", path.display()))?;
    let mut hasher = Sha256::new();
    let mut buf = vec![0u8; HASH_READ_CHUNK];
    loop {
        let n = file
            .read(&mut buf)
            .map_err(|e| format!("read {} for hashing: {e}", path.display()))?;
        if n == 0 {
            break;
        }
        hasher.update(&buf[..n]);
    }
    Ok(finalize_sha256(hasher))
}

fn generated_from(
    sidecar: AeroCorrectSidecar,
    file_size: u64,
    shards: u64,
    avec_payload_len: u64,
) -> StandaloneEcGeneratedSidecar {
    let sidecar_bytes = sidecar.to_bytes();
    let overhead_pct = if file_size > 0 {
        (avec_payload_len as f64 / file_size as f64) * 100.0
    } else {
        0.0
    };
    StandaloneEcGeneratedSidecar {
        sidecar_len: sidecar_bytes.len() as u64,
        sidecar_bytes,
        file_size,
        shards,
        overhead_pct,
    }
}

pub(crate) fn generate_sidecar_for_file_capped(
    rel_path: &str,
    path: &Path,
    pct: u32,
    max_file_size: u64,
) -> Result<StandaloneEcGenerateResult, String> {
    generate_sidecar_for_file_capped_windowed(
        rel_path,
        path,
        pct,
        max_file_size,
        AEROCORRECT_WINDOW_SIZE,
    )
}

/// Stream a file into a deterministic `.aerocorrect` sidecar. The relative path is
/// accepted for caller diagnostics only; the sidecar binds solely to content SHA-256.
fn generate_sidecar_for_file_capped_windowed(
    _rel_path: &str,
    path: &Path,
    pct: u32,
    max_file_size: u64,
    window: u64,
) -> Result<StandaloneEcGenerateResult, String> {
    let metadata = std::fs::metadata(path)
        .map_err(|e| format!("read metadata for {}: {e}", path.display()))?;
    let file_size = metadata.len();
    if file_size > max_file_size {
        return Ok(StandaloneEcGenerateResult::SkippedTooLarge {
            file_size,
            max_file_size,
        });
    }

    let mut file = File::open(path).map_err(|e| format!("open {}: {e}", path.display()))?;
    let (k, p) = error_correction_grid(pct);
    let mut hasher = Sha256::new();
    let mut segments = Vec::new();
    let mut total_shards = 0u64;
    let mut total_avec = 0u64;

    for (off, len) in aerocorrect_windows(file_size, window) {
        let window_len =
            usize::try_from(len).map_err(|_| format!("window length {len} exceeds usize"))?;
        let mut buf = vec![0u8; window_len];
        file.read_exact(&mut buf).map_err(|e| {
            format!(
                "read Error Correction source window at {off} (+{len}) of {}: {e}",
                path.display()
            )
        })?;
        hasher.update(&buf);
        let (avec_bytes, shards, _protected, _overhead) =
            compute_error_correction_shards_grid(&[&buf], k, p);
        total_shards += shards;
        total_avec += avec_bytes.len() as u64;
        segments.push(AeroCorrectSegment {
            window_offset: off,
            window_len: len,
            avec_bytes,
        });
    }

    let file_sha256 = finalize_sha256(hasher);
    let sidecar = AeroCorrectSidecar::new(file_sha256, file_size, segments);
    Ok(StandaloneEcGenerateResult::Generated(generated_from(
        sidecar,
        file_size,
        total_shards,
        total_avec,
    )))
}

fn validate_reader_for_path(
    rel_path: &str,
    path: &Path,
    reader: &AeroCorrectSidecarReader,
) -> Result<[u8; 32], String> {
    let expected = reader.content_sha256;
    reader.verify_binding(&expected).map_err(|e| {
        format!("aerocorrect sidecar for {rel_path} is internally inconsistent: {e}")
    })?;
    let file_size = std::fs::metadata(path)
        .map_err(|e| format!("stat {}: {e}", path.display()))?
        .len();
    if reader.total_len != file_size {
        return Err(format!(
            "aerocorrect sidecar total length {} != file length {file_size} for {rel_path}",
            reader.total_len
        ));
    }
    validate_window_tiling_iter(
        reader
            .segments()
            .iter()
            .map(|s| (s.window_offset, s.window_len)),
        file_size,
    )
    .map_err(|e| format!("aerocorrect sidecar for {rel_path}: {e}"))?;
    Ok(expected)
}

pub(crate) fn verify_standalone_file_streamed(
    rel_path: &str,
    path: &Path,
    sidecar_path: &Path,
) -> Result<StandaloneVerifyResult, String> {
    let reader = AeroCorrectSidecarReader::open(sidecar_path)?;
    let expected = validate_reader_for_path(rel_path, path, &reader)?;
    if hash_file_streaming(path)? == expected {
        Ok(StandaloneVerifyResult::Verified)
    } else {
        Ok(StandaloneVerifyResult::NeedsRepair)
    }
}

/// Repair a standalone file from an on-disk `.aerocorrect` sidecar. The sidecar's
/// content SHA-256 is the expected good hash. The original file is replaced only
/// after every window has been repaired and the full repaired stream hashes back to
/// that expected value.
pub(crate) fn verify_repair_standalone_file_streamed(
    rel_path: &str,
    path: &Path,
    sidecar_path: &Path,
) -> Result<StandaloneEcRepairResult, String> {
    let mut reader = AeroCorrectSidecarReader::open(sidecar_path)?;
    let expected = validate_reader_for_path(rel_path, path, &reader)?;
    if hash_file_streaming(path)? == expected {
        return Ok(StandaloneEcRepairResult::Verified);
    }

    let parent = path.parent().unwrap_or_else(|| Path::new("."));
    let tmp = tempfile::NamedTempFile::new_in(parent).map_err(|e| {
        format!(
            "create Error Correction repair temp in {}: {e}",
            parent.display()
        )
    })?;
    let mut src = File::open(path)
        .map_err(|e| format!("open Error Correction target {}: {e}", path.display()))?;
    let mut hasher = Sha256::new();
    let mut recovered_shards = 0usize;

    {
        let mut out = std::io::BufWriter::new(tmp.as_file());
        let segment_count = reader.segments().len();
        for idx in 0..segment_count {
            let window_len = usize::try_from(reader.segments()[idx].window_len)
                .map_err(|_| format!("window {idx} length exceeds usize"))?;
            let avec = reader.read_segment_avec(idx)?;
            let mut buf = vec![0u8; window_len];
            src.read_exact(&mut buf).map_err(|e| {
                format!(
                    "read Error Correction target window {}: {e}",
                    path.display()
                )
            })?;
            let mut blocks = vec![buf];
            recovered_shards += reconstruct_from_error_correction(&mut blocks, &avec)?;
            hasher.update(&blocks[0]);
            out.write_all(&blocks[0]).map_err(|e| {
                format!("write Error Correction repair temp {}: {e}", path.display())
            })?;
        }
        out.flush()
            .map_err(|e| format!("flush Error Correction repair temp {}: {e}", path.display()))?;
    }

    if finalize_sha256(hasher) != expected {
        return Err("Error Correction repair failed post-repair SHA-256 verification".to_string());
    }
    if let Ok(meta) = std::fs::metadata(path) {
        let _ = std::fs::set_permissions(tmp.path(), meta.permissions());
    }
    tmp.persist(path).map_err(|e| {
        format!(
            "persist repaired Error Correction target {}: {e}",
            path.display()
        )
    })?;
    Ok(StandaloneEcRepairResult::Repaired { recovered_shards })
}

#[cfg(test)]
mod tests {
    use super::super::sidecar::estimate_windowed_sidecar_len;
    use super::*;

    fn sample_data(len: usize) -> Vec<u8> {
        let mut seed = *blake3::hash(b"aerovault-standalone-ec-seed").as_bytes();
        let mut out = Vec::with_capacity(len);
        while out.len() < len {
            seed = *blake3::hash(&seed).as_bytes();
            out.extend_from_slice(&seed);
        }
        out.truncate(len);
        out
    }

    #[test]
    fn generation_repairs_multiple_windows_streaming() {
        let window = 40_000u64;
        let data = sample_data(135_000);
        let dir = tempfile::tempdir().unwrap();
        let path = dir.path().join("payload.bin");
        let sidecar_path = dir.path().join("payload.bin.aerocorrect");
        std::fs::write(&path, &data).unwrap();

        let generated = match generate_sidecar_for_file_capped_windowed(
            "payload.bin",
            &path,
            20,
            STANDALONE_EC_MAX_FILE_SIZE,
            window,
        )
        .unwrap()
        {
            StandaloneEcGenerateResult::Generated(g) => g,
            other => panic!("should generate, got {other:?}"),
        };
        assert_eq!(
            generated.sidecar_len,
            estimate_windowed_sidecar_len(data.len() as u64, 20, window)
        );
        assert!(generated.sidecar_len > data.len() as u64 / 10);
        std::fs::write(&sidecar_path, &generated.sidecar_bytes).unwrap();

        let mut corrupt = data.clone();
        for off in [5_000usize, 50_000, 100_000, 130_000] {
            corrupt[off] ^= 0xA5;
        }
        std::fs::write(&path, &corrupt).unwrap();

        let result = verify_repair_standalone_file_streamed("payload.bin", &path, &sidecar_path)
            .expect("streamed repair should succeed");
        assert!(matches!(result, StandaloneEcRepairResult::Repaired { .. }));
        assert_eq!(std::fs::read(&path).unwrap(), data);

        assert_eq!(
            verify_standalone_file_streamed("payload.bin", &path, &sidecar_path).unwrap(),
            StandaloneVerifyResult::Verified
        );
    }

    #[test]
    fn foreign_sidecar_repair_fails_closed() {
        let data = sample_data(80_000);
        let other = sample_data(80_000)
            .into_iter()
            .map(|b| b ^ 0x33)
            .collect::<Vec<_>>();
        let dir = tempfile::tempdir().unwrap();
        let path = dir.path().join("payload.bin");
        let other_path = dir.path().join("other.bin");
        let sidecar_path = dir.path().join("other.bin.aerocorrect");
        std::fs::write(&path, &data).unwrap();
        std::fs::write(&other_path, &other).unwrap();

        let generated = match generate_sidecar_for_file_capped(
            "other.bin",
            &other_path,
            20,
            STANDALONE_EC_MAX_FILE_SIZE,
        )
        .unwrap()
        {
            StandaloneEcGenerateResult::Generated(g) => g,
            other => panic!("should generate, got {other:?}"),
        };
        std::fs::write(&sidecar_path, &generated.sidecar_bytes).unwrap();

        let before = std::fs::read(&path).unwrap();
        let err = verify_repair_standalone_file_streamed("payload.bin", &path, &sidecar_path)
            .expect_err("foreign sidecar must fail closed");
        assert!(err.contains("post-repair SHA-256"));
        assert_eq!(std::fs::read(&path).unwrap(), before);
    }
}