seedfaker-core 0.1.0-alpha.2

Core library for seedfaker — deterministic synthetic generator for realistic, correlated, and noisy test records
Documentation
// SplitMix64 — fast, deterministic, sufficient for synthetic data.
// Passes BigCrush. Seeding is ~2ns vs ~120ns for ChaCha12.
// Algorithm: https://prng.di.unimi.it/splitmix64.c

/// FNV-1a hash.
fn mix(mut h: u64, bytes: &[u8]) -> u64 {
    for &b in bytes {
        h ^= b as u64;
        h = h.wrapping_mul(0x0100_0000_01b3);
    }
    h
}

/// Pre-compute domain hash: `mix(master, domain_bytes)`. Call once per field at startup.
pub fn domain_hash(master: u64, domain: &str) -> u64 {
    mix(master, domain.as_bytes())
}

/// Derive a sub-seed from pre-computed domain hash and record number.
/// Only mixes 8 bytes per call instead of N domain bytes.
pub fn sub_seed(dh: u64, record: u64) -> u64 {
    mix(dh, &record.to_le_bytes())
}

/// Cryptographically random seed from OS entropy.
pub fn random_seed() -> u64 {
    let mut buf = [0u8; 8];
    if getrandom::getrandom(&mut buf).is_err() {
        // Early boot: getrandom may fail before urandom is ready
        let t = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .map_or(42u64, |d| d.as_nanos() as u64);
        buf = t.to_le_bytes();
    }
    u64::from_le_bytes(buf)
}

pub struct Rng {
    state: u64,
    record: u64,
}

impl Rng {
    pub fn new(seed: u64) -> Self {
        Self { state: seed, record: 0 }
    }

    pub fn derive(master: u64, record: u64, domain: &str) -> Self {
        Self { state: sub_seed(domain_hash(master, domain), record), record }
    }

    /// Fast derive from pre-computed domain hash. Use in hot loops.
    pub fn derive_fast(dh: u64, record: u64) -> Self {
        Self { state: sub_seed(dh, record), record }
    }

    /// Current record number (0-based).
    pub fn record(&self) -> u64 {
        self.record
    }

    /// Set the record number (used by library bindings).
    pub fn set_record(&mut self, r: u64) {
        self.record = r;
    }

    #[inline]
    fn next_u64(&mut self) -> u64 {
        self.state = self.state.wrapping_add(0x9E37_79B9_7F4A_7C15);
        let mut z = self.state;
        z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
        z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
        z ^ (z >> 31)
    }

    /// Uniform random in `[0, n)`. Lemire's nearly divisionless method.
    #[inline]
    fn bounded(&mut self, n: u64) -> u64 {
        let mut x = self.next_u64();
        let mut m = (x as u128).wrapping_mul(n as u128);
        let mut l = m as u64;
        if l < n {
            let t = n.wrapping_neg() % n;
            while l < t {
                x = self.next_u64();
                m = (x as u128).wrapping_mul(n as u128);
                l = m as u64;
            }
        }
        (m >> 64) as u64
    }

    pub fn choice<'a, T>(&mut self, items: &'a [T]) -> &'a T {
        &items[self.bounded(items.len() as u64) as usize]
    }

    pub fn maybe(&mut self, p: f64) -> bool {
        ((self.next_u64() >> 11) as f64 / (1u64 << 53) as f64) < p
    }

    pub fn range(&mut self, lo: i64, hi: i64) -> i64 {
        let span = (hi - lo + 1) as u64;
        // bounded(span) < span ≤ i64::MAX, so this cast is always safe
        #[allow(clippy::cast_possible_wrap)]
        let offset = self.bounded(span) as i64;
        lo + offset
    }

    pub fn urange(&mut self, lo: usize, hi: usize) -> usize {
        let span = (hi - lo + 1) as u64;
        lo + self.bounded(span) as usize
    }

    pub fn digits(&mut self, n: usize) -> String {
        let mut s = String::with_capacity(n);
        self.push_digits(&mut s, n);
        s
    }

    pub fn alnum(&mut self, n: usize) -> String {
        let mut s = String::with_capacity(n);
        self.push_alnum(&mut s, n);
        s
    }

    pub fn hex_str(&mut self, n: usize) -> String {
        let mut s = String::with_capacity(n);
        self.push_hex(&mut s, n);
        s
    }

    pub fn lower(&mut self, n: usize) -> String {
        let mut s = String::with_capacity(n);
        self.push_lower(&mut s, n);
        s
    }

    pub fn lower_digit(&mut self, n: usize) -> String {
        let mut s = String::with_capacity(n);
        self.push_lower_digit(&mut s, n);
        s
    }

    pub fn upper(&mut self, n: usize) -> String {
        let mut s = String::with_capacity(n);
        self.push_upper(&mut s, n);
        s
    }

    pub fn upper_digit(&mut self, n: usize) -> String {
        let mut s = String::with_capacity(n);
        self.push_upper_digit(&mut s, n);
        s
    }

    pub fn charset_string(&mut self, charset: &[u8], n: usize) -> String {
        let mut s = String::with_capacity(n);
        self.push_charset(&mut s, charset, n);
        s
    }

    pub fn push_digits(&mut self, buf: &mut String, n: usize) {
        buf.reserve(n);
        for _ in 0..n {
            buf.push((b'0' + self.bounded(10) as u8) as char);
        }
    }

    pub fn push_hex(&mut self, buf: &mut String, n: usize) {
        const HEX: &[u8] = b"0123456789abcdef";
        buf.reserve(n);
        for _ in 0..n {
            buf.push(HEX[self.bounded(16) as usize] as char);
        }
    }

    pub fn push_lower(&mut self, buf: &mut String, n: usize) {
        buf.reserve(n);
        for _ in 0..n {
            buf.push((b'a' + self.bounded(26) as u8) as char);
        }
    }

    pub fn push_alnum(&mut self, buf: &mut String, n: usize) {
        const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
        buf.reserve(n);
        for _ in 0..n {
            buf.push(CHARS[self.bounded(CHARS.len() as u64) as usize] as char);
        }
    }

    pub fn push_upper(&mut self, buf: &mut String, n: usize) {
        buf.reserve(n);
        for _ in 0..n {
            buf.push((b'A' + self.bounded(26) as u8) as char);
        }
    }

    pub fn push_upper_digit(&mut self, buf: &mut String, n: usize) {
        const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
        buf.reserve(n);
        for _ in 0..n {
            buf.push(CHARS[self.bounded(CHARS.len() as u64) as usize] as char);
        }
    }

    pub fn push_lower_digit(&mut self, buf: &mut String, n: usize) {
        const CHARS: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789";
        buf.reserve(n);
        for _ in 0..n {
            buf.push(CHARS[self.bounded(CHARS.len() as u64) as usize] as char);
        }
    }

    pub fn push_charset(&mut self, buf: &mut String, charset: &[u8], n: usize) {
        buf.reserve(n);
        for _ in 0..n {
            buf.push(charset[self.bounded(charset.len() as u64) as usize] as char);
        }
    }

    pub fn sample<T: Clone>(&mut self, items: &[T], k: usize) -> Vec<T> {
        let mut indices: Vec<usize> = (0..items.len()).collect();
        let k = k.min(items.len());
        for i in 0..k {
            let j = i + self.bounded((items.len() - i) as u64) as usize;
            indices.swap(i, j);
        }
        indices[..k].iter().map(|&i| items[i].clone()).collect()
    }
}