nitro-hash 1.5.0

A hasing algorithm written in rust that cares about performance.
Documentation
//! # nitro_hash
//!
//! A lightweight algebraic fingerprinting / hash system based on
//! polynomial interpolation over a finite field.
//!
//! ## Overview
//!
//! This crate builds a configurable hash by:
//! - Interpreting input as field elements
//! - Constructing a randomized polynomial
//! - Evaluating via Lagrange interpolation
//! - Encoding outputs as bytes
//!
//! ## Output sizes
//!
//! Each configuration uses `K` evaluation points:
//!
//! - 2 → 122-bit hash
//! - 3 → 183-bit hash
//! - 4 → 244-bit hash
//! - 5 → 305-bit hash
//!

#[derive(Copy, Clone)]
/// Configuration for the hashing system.
///
/// Contains the modulus used for finite field arithmetic.
/// The default is a 61-bit Mersenne prime.
pub struct HashConfig {
    /// Prime modulus used for all field operations.
    pub salt: u64,
    p: u64
}

/// Converts a vector of `u64` field elements into raw bytes.
///
/// Each `u64` is encoded in little-endian format (8 bytes).
///
/// # Arguments
/// - `data`: field elements to serialize
///
/// # Returns
/// A byte vector of length `8 * data.len()`.
pub fn to_vec_u8(data: &[u64]) -> Vec<u8> {
    let mut v: Vec<u8> = vec![];

    for val in data {
        v.extend_from_slice(&val.to_le_bytes());
    }

    v
}

/// SplitMix64 PRNG.
///
/// Used to generate pseudo-random evaluation points for interpolation.
///
/// This is a fast, high-quality integer mixer.
fn splitmix64(mut x: u64) -> u64 {
    x = x.wrapping_add(0x9e3779b97f4a7c15);
    x = (x ^ (x >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
    x = (x ^ (x >> 27)).wrapping_mul(0x94d049bb133111eb);
    x ^ (x >> 31)
}

/// Modular exponentiation: computes `a^e mod m`.
///
/// Used internally for computing modular inverses via Fermat's little theorem.
///
/// # Panics
/// None (safe under overflow using `u128` intermediates).
fn pow_mod(mut a: u64, mut e: u64, m: u64) -> u64 {
    let mut r: u64 = 1;

    while e > 0 {
        if e & 1 == 1 {
            r = ((r as u128 * a as u128) % m as u128) as u64;
        }
        a = ((a as u128 * a as u128) % m as u128) as u64;
        e >>= 1;
    }

    r
}

/// Computes modular multiplicative inverse in a prime field.
///
/// Uses Fermat's little theorem:
/// ```text
/// a⁻¹ ≡ a^(p−2) mod p
/// ```
fn inverse_mod(a: u64, p: u64) -> u64 {
    pow_mod(a, p - 2, p)
}

fn seed_from_data(data: &[u64], salt: u64) -> u64 {
    let mut seed: u64 = 0x9e3779b97f4a7c15 ^ salt; // golden ratio constant

    for &x in data {
        seed = seed.wrapping_add(x);
        seed = splitmix64(seed);
    }

    seed
}

/// Performs Lagrange interpolation over a finite field.
///
/// Evaluates the interpolating polynomial defined by `points`
/// at multiple evaluation points.
///
/// # Arguments
/// - `points`: (x, y) samples defining the polynomial
/// - `x`: evaluation points
/// - `k`: number of evaluation outputs used
/// - `p`: modulus of the finite field
///
/// # Returns
/// A vector of evaluated polynomial values in `[0, p)`.
fn interpolate(
    points: Vec<(u64, u64)>,
    x: [u64; 5],
    k: u8,
    p: u64
) -> [u64; 5] {
    let mut evals: [u64; 5] = [0; 5];

    for (idx, x_val) in x.iter().enumerate() {
        if idx > k as usize {
            break;
        }

        let mut eval: u128 = 0;

        for point in points.iter() {
            let mut basis: u128 = point.1 as u128;

            for point_ in points.iter() {
                if point_ == point {
                    continue;
                }

                basis *= ((p as i128 + *x_val as i128 - point.0 as i128) % p as i128) as u128;
                basis %= p as u128;

                basis *= inverse_mod(
                    ((point_.0 as i128 - point.0 as i128) % p as i128) as u64,
                    p
                ) as u128;
                basis %= p as u128;
            }

            eval = (eval + basis) % p as u128;
        }

        evals[idx] = eval as u64;
    }

    evals
}

/// Converts input data into `(index, value)` pairs.
///
/// Each `u64` is treated as a coefficient in the interpolation domain.
fn x_vals(data: &Vec<u64>) -> Vec<(u64, u64)> {
    data.iter()
        .enumerate()
        .map(|(i, y)| (i as u64, *y))
        .collect()
}

/// Generates deterministic pseudo-random evaluation points.
///
/// The points depend on:
/// - input data
/// - hash strength `K`
/// - modulus `p`
fn x_vec(data: &Vec<u64>, k: u8, p: u64, salt: u64) -> [u64; 5] {
    let seed = seed_from_data(&data[0..], salt);
    let mut out: [u64; 5] = [0; 5];

    out[0] = splitmix64(seed) % p;
    out[1] = splitmix64(out[0]) % p;
    out[2] = splitmix64(out[1]) % p;

    if k >= 4 {out[3] = splitmix64(out[2]) % p;}
    if k == 5 {out[4] = splitmix64(out[3]) % p;}

    out
}

/// Converts a 32-byte array into a hex string.
pub fn hex(bytes: Vec<u8>) -> String {
    bytes.iter().map(|b| format!("{:02x}", b)).collect()
}

/// Converts a string into a vector of `u64` blocks.
///
/// Each block contains up to 8 bytes in little-endian format.
pub fn str_to_vec_i64(s: &str) -> Vec<u64> {
    let bytes = s.as_bytes();
    let mut out = Vec::with_capacity((bytes.len() + 7) / 8);

    let mut i = 0;
    while i < bytes.len() {
        let mut arr = [0u8; 8];
        let end = (i + 8).min(bytes.len());

        arr[..end - i].copy_from_slice(&bytes[i..end]);

        out.push(u64::from_le_bytes(arr));
        i += 8;
    }

    out
}

impl HashConfig {
    /// Internal generic hash function.
    ///
    /// Computes a polynomial fingerprint of the input using `K` evaluation points.
    ///
    /// # Note
    /// Returns raw byte representation of field outputs.
    pub fn _hash<const K: usize>(self, s: &Vec<u64>) -> [u64; 5] {
        interpolate(
            x_vals(s),
            x_vec(s, K as u8, self.p, self.salt),
            K as u8,
            self.p
        )
    }

    /// 122-bit hash (K = 2 evaluations & very small entropy for real usage)
    pub fn hash122(self, s: &Vec<u64>) -> Vec<u8> {
        to_vec_u8(&self._hash::<4>(s)[0..2])
    }

    /// 183-bit hash (K = 3 evaluations & recommended using for better performance)
    pub fn hash183(self, s: &Vec<u64>) -> Vec<u8> {
        to_vec_u8(&self._hash::<4>(s)[0..3])
    }

    /// 244-bit hash (K = 4 evaluations & recommended for most usages)
    pub fn hash244(self, s: &Vec<u64>) -> Vec<u8> {
        to_vec_u8(&self._hash::<4>(s)[0..4])
    }

    /// 305-bit hash (K = 5 evaluations & not recommended for most usages)
    pub fn hash305(self, s: &Vec<u64>) -> Vec<u8> {
        to_vec_u8(&self._hash::<4>(s))
    }

    /// Creates a new hash configuration using a 61-bit Mersenne prime field.
    pub fn new(salt: u64) -> HashConfig {
        HashConfig {
            p: (1 << 61) - 1,
            salt
        }
    }
}