prime_formula/sieve/
mod.rs

1//! Provides a sieve based on prime-formula
2//!
3//! Use for generating large numbers of primes in the range 2 to 10^18.
4//! Engine uses a novel prime-formula to select prime candidates, and then
5//! applies an optimized sieve algorithm to reject factors of primes using
6//! the Periodic Table of Primes. Prime numbers up to 10^9 can be quickly
7//! sieved using the formula, and for smaller windows factors up to 10^147
8//! can be quickly found too.
9//!
10//! All primes found by this method have been exhaustively checked, and so
11//! this method is not suitable for large primes unless a small window is
12//! chosen.
13//
14// Copyright: Adam Cottrell (cottrela@gmail.com)
15
16use std::collections::HashMap;
17
18use bitvec::prelude::*;
19use rayon::prelude::*;
20
21use crate::common::{
22    PRIME_ROOTS, align_to_cycle, find_inv_prime_root_vec, get_cyclic_composite_vec,
23};
24
25/// Error types for sieve operations
26#[derive(Debug, PartialEq)]
27pub enum SieveError {
28    EmptyRange,
29    InvalidSize,
30}
31
32/// Operational mode for the sieve algorithm
33#[derive(Debug, Copy, Clone, PartialEq)]
34pub enum SieveMode {
35    /// Novel sieve method - best for medium ranges
36    Sieve,
37    /// Windowed search - optimized for small number ranges
38    Window,
39    //     /// Use Miller-Rabin test with deterministic witnesses
40    //     MillerRabinDeterministic,
41    //     /// Use Miller-Rabin test with random witnesses
42    //     MillerRabinProbabilistic,
43}
44
45/// Core sieve structure managing prime detection for a single prime root
46///
47/// Each instance handles numbers of the form: root + k×210
48///
49/// # Fields
50/// - root: Base prime root value (one of PRIME_ROOTS)
51/// - root_idx: Base prime root index (0..47)
52/// - mode: Search strategy to use
53/// - q_hat: Vector of inverse primes used in primal checks
54/// - l: Vector of remainders used in primal checks
55/// - composites: Bit vector tracking eliminated numbers
56/// - b_start: Cycle aligned start index
57/// - b_end: Cycle aligned end index
58/// - i_limit: Maximum multiplier to check (sqrt(n)/210)
59pub struct Sieve {
60    root: u8,        // Absolute value of the prime root
61    mode: SieveMode, // 1 = sieve, 2=window, 3=best-effort
62    // Generated
63    q_hat: Vec<u8>, // Inverse primes
64    l: Vec<u8>,     // Cyclic composites
65    composites: BitVec,
66    b_start: u128,
67    b_end: u128,
68    i_limit: u128,
69}
70
71// The sieve implementation takes numbers from a given
72// prime root in the form root + i * 210, and then
73// sets bits in a bitvec to remove any that happen to
74// be composite.
75//
76// The net effect is similar to the Sieve of Eratosthenes
77// but the difference being the novel approach of using
78// the prime roots instead of simply guessing the next
79// prime number (like the Sieve of Eratosthenes proposes).
80impl Sieve {
81    /// Create a new Sieve instance for a prime root and number range
82    ///
83    /// # Arguments
84    /// - root_idx: Index in PRIME_ROOTS array (0-47)
85    /// - start: Inclusive lower bound of search range
86    /// - end: Exclusive upper bound of search range
87    /// - mode: Sieve strategy to employ
88    ///
89    /// # Errors
90    /// Returns SieveError if range is invalid or too large
91    pub fn new(root_idx: u8, start: u128, end: u128, mode: SieveMode) -> Result<Self, SieveError> {
92        let root = PRIME_ROOTS[root_idx as usize];
93        let (b_start, b_end) = align_to_cycle(start, end);
94        if b_end <= b_start {
95            return Err(SieveError::EmptyRange);
96        }
97        let storage_size = (b_end - b_start)
98            .try_into()
99            .map_err(|_| SieveError::InvalidSize)?;
100
101        // Generate inverse prime root values and cyclic composite values
102        let q_hat = find_inv_prime_root_vec(root_idx as usize);
103        let l = get_cyclic_composite_vec(root_idx as usize, &q_hat);
104        let mut composites = bitvec![0; storage_size];
105        let i_limit = end.isqrt().div_ceil(210);
106
107        // Poison first and/or last entries to keep composites aligned for all roots
108        let (b_start_max, b_end_min) = Self::align_to_root(root, start, end);
109        Self::poison_storage(
110            &mut composites,
111            (b_start_max - b_start) as usize,
112            (b_end - b_end_min) as usize,
113        );
114
115        Ok(Self {
116            root,
117            mode,
118            // Generated
119            l,
120            q_hat,
121            composites,
122            b_start,
123            b_end,
124            i_limit,
125        })
126    }
127
128    fn align_to_root(root: u8, start: u128, end: u128) -> (u128, u128) {
129        // Find aligned range for a given start/end window
130        //
131        // For any prime number (P):
132        //    P = r + b * 210
133        //    b = (P - r) / 210
134        //
135        // Where:
136        //   r is the prime root (one of 48)
137        //   b is the multiplier (1..N)
138        let r = root as u128; // 2..211
139
140        // Get ceil(b_start) and floor(b_end) to find the tightest range
141        let b_start = (210 - 1 + start).saturating_sub(r) / 210; // Use integer ceil
142        let b_end = (210 + end - r) / 210; // Use integer floor
143
144        // If b_start == b_end, then there is nothing to do
145        (b_start, b_end)
146    }
147
148    fn poison_storage(composites: &mut BitVec, num_start: usize, num_end: usize) {
149        // Poison the start of range
150        for i in 0..num_start {
151            composites.set(i, true);
152        }
153        // Poison the end of range
154        let sz = composites.len() - 1;
155        for i in 0..num_end {
156            composites.set(sz - i, true);
157        }
158    }
159
160    /// Retrieve primes found by this sieve instance
161    ///
162    /// Returns numbers of the form root + k×210 that passed all composite checks.
163    /// This function will not return primes below 11.
164    ///
165    /// Primes will be sorted, but by root.
166    pub fn get_primes(&mut self) -> Vec<u128> {
167        self.get_composites();
168
169        self.hydrate_primes()
170    }
171
172    /// Count primes in range without storing their values
173    ///
174    /// More memory-efficient than get_primes() for large ranges
175    pub fn count_primes(&mut self) -> usize {
176        self.get_composites();
177
178        self.count_primes_in_storage()
179    }
180
181    fn find_simple_composites(&mut self) {
182        // Case 1: j=0 - find i directly
183        //
184        // Complexity O~(48m) where:
185        //     m is window size (b_end - b_start)
186        for (idx, root) in PRIME_ROOTS.iter().enumerate() {
187            let q = *root as u128;
188            let offset = self.l[idx] as u128 - 1;
189            let i = (self.b_start + q - 1).saturating_sub(offset) / q; // using integer ceil
190            let mut b_val = offset + i * q;
191            // Paint entire window
192            while b_val < self.b_end {
193                self.composites.set((b_val - self.b_start) as usize, true); // Mark composite
194                b_val += q;
195            }
196        }
197    }
198
199    fn get_complex_composites(&mut self, i_start: u128, i_end: u128) {
200        // Case 2: j>0 - mark all composites of i, j
201        // for a given range of i.
202        //
203        // Complexity O~(48*sqrt(n)*m) where:
204        //     n is highest prime (b_end)
205        //     m is windows size (b_end - b_start)
206        for (idx, root) in PRIME_ROOTS.iter().enumerate() {
207            let q = *root as u128;
208            let l = self.l[idx] as u128;
209
210            for i in i_start..i_end {
211                let offset = l + i * q - 1;
212                let scalar = self.q_hat[idx] as u128 + i * 210;
213                let j = ((self.b_start + scalar - 1).saturating_sub(offset)) / scalar; // using integer ceil
214                let mut b_val = offset + j * scalar;
215                // Paint entire window
216                while b_val < self.b_end {
217                    self.composites.set((b_val - self.b_start) as usize, true); // Mark composite
218                    b_val += scalar;
219                }
220            }
221        }
222    }
223
224    fn get_composites_by_window(&mut self, i_start: u128, i_end: u128) -> bool {
225        // Case 2b: j>0 - find composites of i, j
226        // by checking range i_start..i_end for each candidate
227        //
228        // This is slower than the sieve method, but for small window
229        // sizes this approach is often faster.
230        //
231        // Complexity O~(48*m*sqrt(n)) where:
232        //     n is b_end (highest prime / 210)
233        //     m is b_end - b_start (window size)
234        let mut num_composites = 0;
235        for b in self.b_start..self.b_end {
236            if self.composites[(b - self.b_start) as usize] {
237                num_composites += 1;
238                continue;
239            }
240            let mut found = false;
241            for i in i_start..i_end {
242                for (idx, root) in PRIME_ROOTS.iter().enumerate() {
243                    let q = *root as u128;
244                    let q_hat = self.q_hat[idx] as u128;
245                    let l = self.l[idx] as u128;
246                    let i_max = (b + q + 210 - 1).saturating_sub(l) / (q + 210);
247                    if i < i_max {
248                        let offset = l + i * q - 1;
249                        let scalar = q_hat + i * 210;
250                        if (((b + scalar).saturating_sub(offset)) % scalar) == 0 {
251                            self.composites.set((b - self.b_start) as usize, true); // Mark composite
252                            found = true;
253                            break;
254                        }
255                    }
256                }
257                if found {
258                    num_composites += 1;
259                    break;
260                }
261            }
262        }
263        num_composites != self.composites.len() as u128
264    }
265
266    // Perform novel sieve function to efficiently find all of the composites
267    //
268    // Equation performed is equivalent to:
269    //   b_val = set(l + iq + jq_hat + 210ij - 1), for all 48 l, q & q_hat
270    //
271    //  Limited to the window size:
272    //    b_start <= L < b_end
273    fn get_composites(&mut self) {
274        // Regardless of mode, always remove the
275        // first order divisors
276        self.find_simple_composites();
277        const SMALL_LIMIT: u128 = 50_000; // Very fast
278        const BIG_LIMIT: u128 = 5_000_000; // A bit slower
279
280        // Select based on preferred mode
281        if self.mode == SieveMode::Window {
282            // For small windows of interest
283            let mut offset = 1;
284            while offset < self.i_limit {
285                if !self.get_composites_by_window(offset, (offset + SMALL_LIMIT).min(self.i_limit))
286                {
287                    // No more candidates remain, so exit early
288                    return;
289                }
290                offset += SMALL_LIMIT
291            }
292        } else {
293            // For all others, just sieve to to the limit
294            let mut offset = 1;
295            while offset < self.i_limit {
296                self.get_complex_composites(offset, (offset + BIG_LIMIT).min(self.i_limit));
297                offset += BIG_LIMIT;
298            }
299        }
300    }
301
302    // Re-hydrate primes from the bit vector
303    // Each bit corresponds to a number in the form
304    //  p = root + i * 210
305    // Where i is the index, and root is the starting
306    // prime root. If the bit vector is not set then
307    // by definition the number is a prime.
308    fn hydrate_primes(&self) -> Vec<u128> {
309        //         }, bitwise_composites: &BitVec, b_start: u128)  {
310        let mut primes = Vec::new();
311        let root = self.root as u128;
312
313        for (i, val) in self.composites.iter().enumerate() {
314            if !val {
315                primes.push(root + (self.b_start + i as u128) * 210);
316            }
317        }
318
319        primes
320    }
321
322    // Count primes directly from the bitwise vector
323    // We do this with minimal effort
324    fn count_primes_in_storage(&self) -> usize {
325        self.composites.count_zeros()
326    }
327}
328
329// Helper function to get raw composites
330fn get_composites(root_idx: u8, start: u128, end: u128) -> BitVec {
331    match Sieve::new(root_idx, start, end, SieveMode::Sieve) {
332        Ok(mut sieve) => {
333            sieve.get_composites();
334            sieve.composites
335        }
336        Err(e) => {
337            panic!("{:?}", e)
338        }
339    }
340}
341
342/// Returns the raw composite bit vectors for a given set
343/// of root indices. Raw mode indicates the positions of
344/// composites (1) and primes (0) for each of the roots.
345///
346/// To obtain the prime number use the equation:
347///      p = root + k×210
348/// Where k is the absolute position of the prime (0)
349pub fn get_raw_composites(
350    root_indices: Vec<u8>,
351    start: u128,
352    end: u128,
353    mode: &SieveMode,
354) -> HashMap<u8, BitVec> {
355    // Parallel computation of composites
356    root_indices
357        .into_par_iter()
358        .map(|i| (i, get_composites(i, start, end)))
359        .collect()
360}
361
362#[cfg(test)]
363mod tests {
364    use super::*;
365
366    #[test]
367    fn test_sieve_primes() {
368        let primes;
369        let mut inst = Sieve::new(0, 0, 1_000_000, SieveMode::Sieve).unwrap();
370
371        // Check the first million primes
372        primes = inst.get_primes();
373        assert_eq!(primes.len(), 1649); // Number of primes on root 11 below 1e6
374        assert_eq!(primes[0], 11); // First prime from root 11
375        assert_eq!(primes[1649 - 1], 999_611); // Last prime on root 11 before 1e6
376    }
377
378    #[test]
379    fn test_windowed_primes() {
380        let primes;
381        let mut inst = Sieve::new(1, 0, 1_000, SieveMode::Window).unwrap();
382
383        // Check the first million primes
384        primes = inst.get_primes();
385        assert_eq!(primes.len(), 5); // Number of primes on root 13 below 1e3
386        assert_eq!(primes[0], 13); // First prime from root 13
387        assert_eq!(primes[5 - 1], 853); // Last prime on root 13 before 1e3
388    }
389}