lcpfs 2026.1.102

LCP File System - A ZFS-inspired copy-on-write filesystem for Rust
// Copyright 2025 LunaOS Contributors
// SPDX-License-Identifier: Apache-2.0

//! # Checksum Computation
//!
//! This module provides cryptographic hash functions for data integrity
//! verification in LCPFS.
//!
//! ## Overview
//!
//! LCPFS uses BLAKE3 as its primary checksum algorithm. BLAKE3 provides:
//! - 256-bit security level
//! - ~3x faster than SHA-256 on modern CPUs
//! - SIMD-optimized implementations (AVX-512, AVX2, SSE4.1)
//! - Streaming and incremental hashing support
//!
//! ## Implementation Notes
//!
//! The checksum is stored as four 64-bit values for efficient comparison
//! and storage in on-disk structures.

use core::convert::TryInto;

/// BLAKE3 checksum structure (256-bit hash)
pub struct Checksum {
    /// Four 64-bit values representing 256-bit hash
    pub value: [u64; 4], // 256-bit Hash
}

/// Trait for calculating structural checksums
pub trait StructuralIntegrity {
    /// Calculate structural checksum for this type
    fn calculate_structural_checksum(&self) -> u64;
}

impl Checksum {
    /// Calculate BLAKE3 hash of data
    pub fn calculate(data: &[u8]) -> Self {
        let hash = blake3::hash(data);
        // SAFETY INVARIANT: BLAKE3 always produces exactly 32 bytes.
        // split_at(8) on 32 bytes produces: a=8, rest=24, b=8, rest=16, c=8, d=8.
        // Each chunk is guaranteed to be exactly 8 bytes.
        let bytes = hash.as_bytes();

        let (a, rest) = bytes.split_at(8);
        let (b, rest) = rest.split_at(8);
        let (c, d) = rest.split_at(8);

        // SAFETY: BLAKE3 produces exactly 32 bytes, split_at(8) guarantees 8-byte chunks
        debug_assert_eq!(a.len(), 8);
        debug_assert_eq!(b.len(), 8);
        debug_assert_eq!(c.len(), 8);
        debug_assert_eq!(d.len(), 8);

        let v0 = u64::from_le_bytes([a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]]);
        let v1 = u64::from_le_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]);
        let v2 = u64::from_le_bytes([c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]]);
        let v3 = u64::from_le_bytes([d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]]);

        Checksum {
            value: [v0, v1, v2, v3],
        }
    }

    /// Legacy compatibility
    pub fn calculate_fletcher4(data: &[u8]) -> Self {
        Self::calculate(data)
    }

    /// Check if checksum matches another checksum
    pub fn matches(&self, other: &[u64; 4]) -> bool {
        self.value == *other
    }

    /// Get first hash value
    pub fn first(&self) -> u64 {
        self.value.first().copied().unwrap_or(0)
    }

    /// Get second hash value
    pub fn second(&self) -> u64 {
        self.value.get(1).copied().unwrap_or(0)
    }

    /// Get third hash value
    pub fn third(&self) -> u64 {
        self.value.get(2).copied().unwrap_or(0)
    }

    /// Get fourth hash value
    pub fn fourth(&self) -> u64 {
        self.value.get(3).copied().unwrap_or(0)
    }

    /// Convert to u64 array (for storage in block pointers)
    pub fn to_u64_array(&self) -> [u64; 4] {
        self.value
    }

    /// Get raw hash bytes
    pub fn as_bytes(&self) -> [u8; 32] {
        let mut bytes = [0u8; 32];

        // Use split_at_mut to avoid direct indexing while remaining clean
        let (chunk0, rest) = bytes.split_at_mut(8);
        let (chunk1, rest) = rest.split_at_mut(8);
        let (chunk2, chunk3) = rest.split_at_mut(8);

        chunk0.copy_from_slice(&self.first().to_le_bytes());
        chunk1.copy_from_slice(&self.second().to_le_bytes());
        chunk2.copy_from_slice(&self.third().to_le_bytes());
        chunk3.copy_from_slice(&self.fourth().to_le_bytes());

        bytes
    }
}