lcpfs 2026.1.102

// Copyright 2025 LunaOS Contributors
// SPDX-License-Identifier: Apache-2.0
//
// Galois Field Arithmetic
// AVX-512 optimized GF(2^8) operations for erasure coding.

// STATUS: CLEAN - Assembly extracted to arch/x86_64/gf_avx512.asm
//
// Note: External assembly is available in gf_avx512.asm for when the build
// system is configured to link it. Currently uses optimized pure Rust.

/// Primitive polynomial for GF(2^8): x^8 + x^4 + x^3 + x^2 + 1 = 0x11D
pub const GF_PRIMITIVE_POLY: u16 = 0x11D;

// ═══════════════════════════════════════════════════════════════════════════════
// TABLE-BASED GALOIS FIELD (for repeated operations)
// ═══════════════════════════════════════════════════════════════════════════════

/// Table-based GF(2^8) implementation for high-performance erasure coding.
///
/// Uses log/exp tables for O(1) multiplication and division after initialization.
/// This is the preferred implementation for Reed-Solomon encoding/decoding.
#[derive(Clone)]
pub struct GaloisField {
    /// Exponential table (gf_exp): exp_table[log_a + log_b] = a * b
    exp_table: [u8; 512],
    /// Logarithm table (gf_log): log_table[a] = discrete log of a
    log_table: [u8; 256],
}

impl Default for GaloisField {
    fn default() -> Self {
        Self::new()
    }
}

impl GaloisField {
    /// Create GF(2^8) with primitive polynomial 0x11D (x^8 + x^4 + x^3 + x^2 + 1).
    ///
    /// Initializes log/exp lookup tables for fast field operations.
    pub fn new() -> Self {
        let mut exp_table = [0u8; 512];
        let mut log_table = [0u8; 256];

        let mut x = 1u16;
        for (i, exp) in exp_table.iter_mut().take(255).enumerate() {
            *exp = x as u8;
            log_table[x as usize] = i as u8;

            // Multiply by generator (2)
            x <<= 1;
            if x & 0x100 != 0 {
                x ^= GF_PRIMITIVE_POLY;
            }
        }

        // Duplicate for wraparound (handles log_a + log_b > 255)
        for i in 255..512 {
            exp_table[i] = exp_table[i - 255];
        }

        Self {
            exp_table,
            log_table,
        }
    }

    /// Multiply two elements in GF(2^8).
    ///
    /// Uses log/exp tables: a * b = exp(log(a) + log(b))
    #[inline]
    pub fn mul(&self, a: u8, b: u8) -> u8 {
        if a == 0 || b == 0 {
            return 0;
        }
        let log_sum = self.log_table[a as usize] as usize + self.log_table[b as usize] as usize;
        self.exp_table[log_sum]
    }

    /// Divide two elements in GF(2^8).
    ///
    /// Uses log/exp tables: a / b = exp(log(a) - log(b))
    ///
    /// # Panics
    ///
    /// Panics if `b == 0` (division by zero).
    #[inline]
    pub fn div(&self, a: u8, b: u8) -> u8 {
        if a == 0 {
            return 0;
        }
        if b == 0 {
            panic!("Division by zero in GF(2^8)");
        }
        let log_diff =
            255 + self.log_table[a as usize] as usize - self.log_table[b as usize] as usize;
        self.exp_table[log_diff]
    }

    /// Raise element to power in GF(2^8).
    ///
    /// Uses log/exp tables: a^n = exp(n * log(a))
    #[inline]
    pub fn pow(&self, a: u8, n: usize) -> u8 {
        if a == 0 {
            return 0;
        }
        let log_prod = (self.log_table[a as usize] as usize * n) % 255;
        self.exp_table[log_prod]
    }

    /// Get multiplicative inverse: a^(-1) such that a * a^(-1) = 1.
    ///
    /// # Panics
    ///
    /// Panics if `a == 0` (zero has no inverse).
    #[inline]
    pub fn inv(&self, a: u8) -> u8 {
        if a == 0 {
            panic!("Zero has no inverse in GF(2^8)");
        }
        // a^(-1) = a^254 in GF(2^8) since a^255 = 1
        self.exp_table[255 - self.log_table[a as usize] as usize]
    }

    /// Add two elements in GF(2^8) (XOR).
    #[inline]
    pub fn add(&self, a: u8, b: u8) -> u8 {
        a ^ b
    }

    /// Subtract two elements in GF(2^8) (same as add, since -1 = 1 in GF(2)).
    #[inline]
    pub fn sub(&self, a: u8, b: u8) -> u8 {
        a ^ b
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// SCALAR GALOIS FIELD (for one-off operations, no tables needed)
// ═══════════════════════════════════════════════════════════════════════════════

/// Galois Field arithmetic implementation (scalar, no lookup tables).
///
/// Use this for one-off operations where table initialization overhead
/// is not justified. For repeated operations, use [`GaloisField`] instead.
pub struct GfAlgo;

impl GfAlgo {
    /// Scalar GF(2^8) multiplication with primitive polynomial 0x11D.
    /// Used for metadata operations and scalar cleanup.
    #[inline]
    pub fn multiply(a: u8, b: u8) -> u8 {
        let mut p: u16 = 0;
        let mut a_copy = a as u16;
        let mut b_copy = b as u16;

        for _ in 0..8 {
            if (a_copy & 1) != 0 {
                p ^= b_copy;
            }
            a_copy >>= 1;
            let carry = (b_copy & 0x80) != 0;
            b_copy <<= 1;
            if carry {
                b_copy ^= 0x11D; // Primitive polynomial for GF(2^8)
            }
        }
        p as u8
    }

    /// Vectorized XOR (RAID-Z1).
    /// Uses optimized word-sized operations for better performance.
    ///
    /// # Safety
    ///
    /// SAFETY INVARIANTS:
    /// 1. dest and src slices are valid and point to allocated memory
    /// 2. dest is writable (mutable borrow)
    /// 3. No aliasing: dest and src do not overlap
    /// 4. Pointer arithmetic (ptr.add(i)) stays within slice bounds
    /// 5. Loop index i never exceeds min(dest.len(), src.len())
    /// 6. Word-aligned reads/writes use read_unaligned/write_unaligned
    ///
    /// VERIFICATION: TODO - Prove loop bounds prevent buffer overflow
    ///
    /// JUSTIFICATION:
    /// RAID-Z1 parity calculation requires XOR of data columns. Word-sized
    /// operations (u64) provide 8x performance vs byte-by-byte. Unsafe needed
    /// for raw pointer arithmetic. Safe fallback (xor_vectors_safe) available.
    #[inline]
    pub unsafe fn xor_vectors(dest: &mut [u8], src: &[u8]) {
        let len = core::cmp::min(dest.len(), src.len());
        if len == 0 {
            return;
        }

        let dest_ptr = dest.as_mut_ptr();
        let src_ptr = src.as_ptr();
        let mut i = 0;

        // Process 8 bytes at a time using u64
        unsafe {
            while i + 8 <= len {
                let d = (dest_ptr.add(i) as *mut u64).read_unaligned();
                let s = (src_ptr.add(i) as *const u64).read_unaligned();
                (dest_ptr.add(i) as *mut u64).write_unaligned(d ^ s);
                i += 8;
            }

            // Byte cleanup for remaining bytes
            while i < len {
                *dest_ptr.add(i) ^= *src_ptr.add(i);
                i += 1;
            }
        }
    }

    /// Vectorized GF Multiplication + XOR (RAID-Z2/Z3).
    /// Computes: `dest[i] ^= gf_multiply(src[i], coefficient)`
    ///
    /// # Safety
    ///
    /// SAFETY INVARIANTS:
    /// 1. dest and src slices are valid and point to allocated memory
    /// 2. dest is writable (mutable borrow)
    /// 3. No aliasing: dest and src do not overlap
    /// 4. Pointer arithmetic (ptr.add(i)) stays within slice bounds
    /// 5. Loop index i never exceeds min(dest.len(), src.len())
    /// 6. GF(2^8) multiplication produces valid u8 values
    ///
    /// VERIFICATION: TODO - Prove Galois field arithmetic correctness
    ///
    /// JUSTIFICATION:
    /// RAID-Z2/Z3 requires Reed-Solomon erasure coding over GF(2^8). Each
    /// byte multiplied by parity coefficient, then XORed. Unsafe needed for
    /// raw pointer arithmetic. Safe fallback (mul_xor_vectors_safe) available.
    #[inline]
    pub unsafe fn mul_xor_vectors(dest: &mut [u8], src: &[u8], c: u8) {
        if c == 0 {
            return;
        }

        let len = core::cmp::min(dest.len(), src.len());
        if len == 0 {
            return;
        }

        if c == 1 {
            // When coefficient is 1, GF multiplication is identity
            unsafe {
                Self::xor_vectors(dest, src);
            }
            return;
        }

        // For non-trivial coefficients, use scalar GF multiplication
        unsafe {
            let dest_ptr = dest.as_mut_ptr();
            let src_ptr = src.as_ptr();
            for i in 0..len {
                *dest_ptr.add(i) ^= Self::multiply(*src_ptr.add(i), c);
            }
        }
    }

    /// Pure Rust fallback XOR (safe version)
    #[inline]
    pub fn xor_vectors_safe(dest: &mut [u8], src: &[u8]) {
        let len = core::cmp::min(dest.len(), src.len());
        for i in 0..len {
            dest[i] ^= src[i];
        }
    }

    /// Pure Rust fallback GF multiply-XOR (safe version)
    #[inline]
    pub fn mul_xor_vectors_safe(dest: &mut [u8], src: &[u8], c: u8) {
        if c == 0 {
            return;
        }
        if c == 1 {
            Self::xor_vectors_safe(dest, src);
            return;
        }

        let len = core::cmp::min(dest.len(), src.len());
        for i in 0..len {
            dest[i] ^= Self::multiply(src[i], c);
        }
    }
}