prime-formula 0.3.1

High-performance prime number generation and constellation finding using novel wheel factorization
Documentation
//! Provides a sieve based on prime-formula
//!
//! Use for generating large numbers of primes in the range 2 to 10^18.
//! Engine uses a novel prime-formula to select prime candidates, and then
//! applies an optimized sieve algorithm to reject factors of primes using
//! the Periodic Table of Primes. Prime numbers up to 10^9 can be quickly
//! sieved using the formula, and for smaller windows factors up to 10^147
//! can be quickly found too.
//!
//! All primes found by this method have been exhaustively checked, and so
//! this method is not suitable for large primes unless a small window is
//! chosen.
//
// Copyright: Adam Cottrell (cottrela@gmail.com)

use std::collections::HashMap;

use bitvec::prelude::*;
use rayon::prelude::*;

use crate::common::{
    PRIME_ROOTS, align_to_cycle, find_inv_prime_root_vec, get_cyclic_composite_vec,
};

/// Error types for sieve operations
#[derive(Debug, PartialEq)]
pub enum SieveError {
    EmptyRange,
    InvalidSize,
}

/// Operational mode for the sieve algorithm
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum SieveMode {
    /// Novel sieve method - best for medium ranges
    Sieve,
    /// Windowed search - optimized for small number ranges
    Window,
    //     /// Use Miller-Rabin test with deterministic witnesses
    //     MillerRabinDeterministic,
    //     /// Use Miller-Rabin test with random witnesses
    //     MillerRabinProbabilistic,
}

/// Core sieve structure managing prime detection for a single prime root
///
/// Each instance handles numbers of the form: root + k×210
///
/// # Fields
/// - root: Base prime root value (one of PRIME_ROOTS)
/// - root_idx: Base prime root index (0..47)
/// - mode: Search strategy to use
/// - q_hat: Vector of inverse primes used in primal checks
/// - l: Vector of remainders used in primal checks
/// - composites: Bit vector tracking eliminated numbers
/// - b_start: Cycle aligned start index
/// - b_end: Cycle aligned end index
/// - i_limit: Maximum multiplier to check (sqrt(n)/210)
pub struct Sieve {
    root: u8,        // Absolute value of the prime root
    mode: SieveMode, // 1 = sieve, 2=window, 3=best-effort
    // Generated
    q_hat: Vec<u8>, // Inverse primes
    l: Vec<u8>,     // Cyclic composites
    composites: BitVec,
    b_start: u128,
    b_end: u128,
    i_limit: u128,
}

// The sieve implementation takes numbers from a given
// prime root in the form root + i * 210, and then
// sets bits in a bitvec to remove any that happen to
// be composite.
//
// The net effect is similar to the Sieve of Eratosthenes
// but the difference being the novel approach of using
// the prime roots instead of simply guessing the next
// prime number (like the Sieve of Eratosthenes proposes).
impl Sieve {
    /// Create a new Sieve instance for a prime root and number range
    ///
    /// # Arguments
    /// - root_idx: Index in PRIME_ROOTS array (0-47)
    /// - start: Inclusive lower bound of search range
    /// - end: Exclusive upper bound of search range
    /// - mode: Sieve strategy to employ
    ///
    /// # Errors
    /// Returns SieveError if range is invalid or too large
    pub fn new(root_idx: u8, start: u128, end: u128, mode: SieveMode) -> Result<Self, SieveError> {
        let root = PRIME_ROOTS[root_idx as usize];
        let (b_start, b_end) = align_to_cycle(start, end);
        if b_end <= b_start {
            return Err(SieveError::EmptyRange);
        }
        let storage_size = (b_end - b_start)
            .try_into()
            .map_err(|_| SieveError::InvalidSize)?;

        // Generate inverse prime root values and cyclic composite values
        let q_hat = find_inv_prime_root_vec(root_idx as usize);
        let l = get_cyclic_composite_vec(root_idx as usize, &q_hat);
        let mut composites = bitvec![0; storage_size];
        let i_limit = end.isqrt().div_ceil(210);

        // Poison first and/or last entries to keep composites aligned for all roots
        let (b_start_max, b_end_min) = Self::align_to_root(root, start, end);
        Self::poison_storage(
            &mut composites,
            (b_start_max - b_start) as usize,
            (b_end - b_end_min) as usize,
        );

        Ok(Self {
            root,
            mode,
            // Generated
            l,
            q_hat,
            composites,
            b_start,
            b_end,
            i_limit,
        })
    }

    fn align_to_root(root: u8, start: u128, end: u128) -> (u128, u128) {
        // Find aligned range for a given start/end window
        //
        // For any prime number (P):
        //    P = r + b * 210
        //    b = (P - r) / 210
        //
        // Where:
        //   r is the prime root (one of 48)
        //   b is the multiplier (1..N)
        let r = root as u128; // 2..211

        // Get ceil(b_start) and floor(b_end) to find the tightest range
        let b_start = (210 - 1 + start).saturating_sub(r) / 210; // Use integer ceil
        let b_end = (210 + end - r) / 210; // Use integer floor

        // If b_start == b_end, then there is nothing to do
        (b_start, b_end)
    }

    fn poison_storage(composites: &mut BitVec, num_start: usize, num_end: usize) {
        // Poison the start of range
        for i in 0..num_start {
            composites.set(i, true);
        }
        // Poison the end of range
        let sz = composites.len() - 1;
        for i in 0..num_end {
            composites.set(sz - i, true);
        }
    }

    /// Retrieve primes found by this sieve instance
    ///
    /// Returns numbers of the form root + k×210 that passed all composite checks.
    /// This function will not return primes below 11.
    ///
    /// Primes will be sorted, but by root.
    pub fn get_primes(&mut self) -> Vec<u128> {
        self.get_composites();

        self.hydrate_primes()
    }

    /// Count primes in range without storing their values
    ///
    /// More memory-efficient than get_primes() for large ranges
    pub fn count_primes(&mut self) -> usize {
        self.get_composites();

        self.count_primes_in_storage()
    }

    fn find_simple_composites(&mut self) {
        // Case 1: j=0 - find i directly
        //
        // Complexity O~(48m) where:
        //     m is window size (b_end - b_start)
        for (idx, root) in PRIME_ROOTS.iter().enumerate() {
            let q = *root as u128;
            let offset = self.l[idx] as u128 - 1;
            let i = (self.b_start + q - 1).saturating_sub(offset) / q; // using integer ceil
            let mut b_val = offset + i * q;
            // Paint entire window
            while b_val < self.b_end {
                self.composites.set((b_val - self.b_start) as usize, true); // Mark composite
                b_val += q;
            }
        }
    }

    fn get_complex_composites(&mut self, i_start: u128, i_end: u128) {
        // Case 2: j>0 - mark all composites of i, j
        // for a given range of i.
        //
        // Complexity O~(48*sqrt(n)*m) where:
        //     n is highest prime (b_end)
        //     m is windows size (b_end - b_start)
        for (idx, root) in PRIME_ROOTS.iter().enumerate() {
            let q = *root as u128;
            let l = self.l[idx] as u128;

            for i in i_start..i_end {
                let offset = l + i * q - 1;
                let scalar = self.q_hat[idx] as u128 + i * 210;
                let j = ((self.b_start + scalar - 1).saturating_sub(offset)) / scalar; // using integer ceil
                let mut b_val = offset + j * scalar;
                // Paint entire window
                while b_val < self.b_end {
                    self.composites.set((b_val - self.b_start) as usize, true); // Mark composite
                    b_val += scalar;
                }
            }
        }
    }

    fn get_composites_by_window(&mut self, i_start: u128, i_end: u128) -> bool {
        // Case 2b: j>0 - find composites of i, j
        // by checking range i_start..i_end for each candidate
        //
        // This is slower than the sieve method, but for small window
        // sizes this approach is often faster.
        //
        // Complexity O~(48*m*sqrt(n)) where:
        //     n is b_end (highest prime / 210)
        //     m is b_end - b_start (window size)
        let mut num_composites = 0;
        for b in self.b_start..self.b_end {
            if self.composites[(b - self.b_start) as usize] {
                num_composites += 1;
                continue;
            }
            let mut found = false;
            for i in i_start..i_end {
                for (idx, root) in PRIME_ROOTS.iter().enumerate() {
                    let q = *root as u128;
                    let q_hat = self.q_hat[idx] as u128;
                    let l = self.l[idx] as u128;
                    let i_max = (b + q + 210 - 1).saturating_sub(l) / (q + 210);
                    if i < i_max {
                        let offset = l + i * q - 1;
                        let scalar = q_hat + i * 210;
                        if (((b + scalar).saturating_sub(offset)) % scalar) == 0 {
                            self.composites.set((b - self.b_start) as usize, true); // Mark composite
                            found = true;
                            break;
                        }
                    }
                }
                if found {
                    num_composites += 1;
                    break;
                }
            }
        }
        num_composites != self.composites.len() as u128
    }

    // Perform novel sieve function to efficiently find all of the composites
    //
    // Equation performed is equivalent to:
    //   b_val = set(l + iq + jq_hat + 210ij - 1), for all 48 l, q & q_hat
    //
    //  Limited to the window size:
    //    b_start <= L < b_end
    fn get_composites(&mut self) {
        // Regardless of mode, always remove the
        // first order divisors
        self.find_simple_composites();
        const SMALL_LIMIT: u128 = 50_000; // Very fast
        const BIG_LIMIT: u128 = 5_000_000; // A bit slower

        // Select based on preferred mode
        if self.mode == SieveMode::Window {
            // For small windows of interest
            let mut offset = 1;
            while offset < self.i_limit {
                if !self.get_composites_by_window(offset, (offset + SMALL_LIMIT).min(self.i_limit))
                {
                    // No more candidates remain, so exit early
                    return;
                }
                offset += SMALL_LIMIT
            }
        } else {
            // For all others, just sieve to to the limit
            let mut offset = 1;
            while offset < self.i_limit {
                self.get_complex_composites(offset, (offset + BIG_LIMIT).min(self.i_limit));
                offset += BIG_LIMIT;
            }
        }
    }

    // Re-hydrate primes from the bit vector
    // Each bit corresponds to a number in the form
    //  p = root + i * 210
    // Where i is the index, and root is the starting
    // prime root. If the bit vector is not set then
    // by definition the number is a prime.
    fn hydrate_primes(&self) -> Vec<u128> {
        //         }, bitwise_composites: &BitVec, b_start: u128)  {
        let mut primes = Vec::new();
        let root = self.root as u128;

        for (i, val) in self.composites.iter().enumerate() {
            if !val {
                primes.push(root + (self.b_start + i as u128) * 210);
            }
        }

        primes
    }

    // Count primes directly from the bitwise vector
    // We do this with minimal effort
    fn count_primes_in_storage(&self) -> usize {
        self.composites.count_zeros()
    }
}

// Helper function to get raw composites
fn get_composites(root_idx: u8, start: u128, end: u128) -> BitVec {
    match Sieve::new(root_idx, start, end, SieveMode::Sieve) {
        Ok(mut sieve) => {
            sieve.get_composites();
            sieve.composites
        }
        Err(e) => {
            panic!("{:?}", e)
        }
    }
}

/// Returns the raw composite bit vectors for a given set
/// of root indices. Raw mode indicates the positions of
/// composites (1) and primes (0) for each of the roots.
///
/// To obtain the prime number use the equation:
///      p = root + k×210
/// Where k is the absolute position of the prime (0)
pub fn get_raw_composites(
    root_indices: Vec<u8>,
    start: u128,
    end: u128,
    mode: &SieveMode,
) -> HashMap<u8, BitVec> {
    // Parallel computation of composites
    root_indices
        .into_par_iter()
        .map(|i| (i, get_composites(i, start, end)))
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_sieve_primes() {
        let primes;
        let mut inst = Sieve::new(0, 0, 1_000_000, SieveMode::Sieve).unwrap();

        // Check the first million primes
        primes = inst.get_primes();
        assert_eq!(primes.len(), 1649); // Number of primes on root 11 below 1e6
        assert_eq!(primes[0], 11); // First prime from root 11
        assert_eq!(primes[1649 - 1], 999_611); // Last prime on root 11 before 1e6
    }

    #[test]
    fn test_windowed_primes() {
        let primes;
        let mut inst = Sieve::new(1, 0, 1_000, SieveMode::Window).unwrap();

        // Check the first million primes
        primes = inst.get_primes();
        assert_eq!(primes.len(), 5); // Number of primes on root 13 below 1e3
        assert_eq!(primes[0], 13); // First prime from root 13
        assert_eq!(primes[5 - 1], 853); // Last prime on root 13 before 1e3
    }
}