coreutils_rs/factor/
core.rs

1/// Prime factorization using trial division for small factors and
2/// Pollard's rho algorithm with Miller-Rabin primality testing for larger factors.
3/// Supports numbers up to u128.
4
5/// Modular multiplication for u128 that avoids overflow.
6/// Computes (a * b) % m using the Russian peasant (binary) method.
7fn mod_mul(mut a: u128, mut b: u128, m: u128) -> u128 {
8    // If the product fits in u128, use direct multiplication
9    if a.leading_zeros() + b.leading_zeros() >= 128 {
10        return (a * b) % m;
11    }
12    a %= m;
13    b %= m;
14    let mut result: u128 = 0;
15    while b > 0 {
16        if b & 1 == 1 {
17            result = result.wrapping_add(a);
18            if result >= m {
19                result -= m;
20            }
21        }
22        a <<= 1;
23        if a >= m {
24            a -= m;
25        }
26        b >>= 1;
27    }
28    result
29}
30
31/// Modular exponentiation: computes (base^exp) % m using repeated squaring.
32fn mod_pow(mut base: u128, mut exp: u128, m: u128) -> u128 {
33    if m == 1 {
34        return 0;
35    }
36    let mut result: u128 = 1;
37    base %= m;
38    while exp > 0 {
39        if exp & 1 == 1 {
40            result = mod_mul(result, base, m);
41        }
42        exp >>= 1;
43        base = mod_mul(base, base, m);
44    }
45    result
46}
47
48/// Deterministic Miller-Rabin primality test.
49/// For n < 3,317,044,064,679,887,385,961,981 the witnesses below are sufficient
50/// for a deterministic result.
51fn is_prime_miller_rabin(n: u128) -> bool {
52    if n < 2 {
53        return false;
54    }
55    if n == 2 || n == 3 {
56        return true;
57    }
58    if n.is_multiple_of(2) || n.is_multiple_of(3) {
59        return false;
60    }
61
62    // Small primes check
63    const SMALL_PRIMES: [u128; 15] = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47];
64    for &p in &SMALL_PRIMES {
65        if n == p {
66            return true;
67        }
68        if n.is_multiple_of(p) {
69            return false;
70        }
71    }
72
73    // Write n-1 as 2^r * d where d is odd
74    let mut d = n - 1;
75    let mut r = 0u32;
76    while d.is_multiple_of(2) {
77        d /= 2;
78        r += 1;
79    }
80
81    // Witnesses sufficient for deterministic results up to very large numbers.
82    // These cover all composites below 3,317,044,064,679,887,385,961,981.
83    let witnesses: &[u128] = &[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37];
84
85    'witness: for &a in witnesses {
86        if a >= n {
87            continue;
88        }
89        let mut x = mod_pow(a, d, n);
90        if x == 1 || x == n - 1 {
91            continue;
92        }
93        for _ in 0..r - 1 {
94            x = mod_mul(x, x, n);
95            if x == n - 1 {
96                continue 'witness;
97            }
98        }
99        return false;
100    }
101    true
102}
103
104/// Compute gcd using the binary GCD algorithm.
105fn gcd(mut a: u128, mut b: u128) -> u128 {
106    if a == 0 {
107        return b;
108    }
109    if b == 0 {
110        return a;
111    }
112    let shift = (a | b).trailing_zeros();
113    a >>= a.trailing_zeros();
114    loop {
115        b >>= b.trailing_zeros();
116        if a > b {
117            std::mem::swap(&mut a, &mut b);
118        }
119        b -= a;
120        if b == 0 {
121            break;
122        }
123    }
124    a << shift
125}
126
127/// Pollard's rho algorithm to find a non-trivial factor of n.
128/// Uses Brent's cycle detection variant with batch GCD for efficiency.
129/// Batch GCD reduces the number of expensive GCD operations by ~100x.
130fn pollard_rho(n: u128) -> u128 {
131    if n.is_multiple_of(2) {
132        return 2;
133    }
134
135    for c_offset in 1u128..n {
136        let c = c_offset;
137        let mut x: u128 = c_offset.wrapping_mul(6364136223846793005).wrapping_add(1) % n;
138        let mut y = x;
139
140        // Batch GCD: accumulate differences and check GCD every 128 iterations
141        let mut ys = x;
142        let mut q: u128 = 1;
143        let mut r: u128 = 1;
144        let mut d: u128 = 1;
145
146        while d == 1 {
147            x = y;
148            for _ in 0..r {
149                y = mod_mul(y, y, n).wrapping_add(c) % n;
150            }
151            let mut k: u128 = 0;
152            while k < r && d == 1 {
153                ys = y;
154                let m = (r - k).min(128);
155                for _ in 0..m {
156                    y = mod_mul(y, y, n).wrapping_add(c) % n;
157                    q = mod_mul(q, x.abs_diff(y), n);
158                }
159                d = gcd(q, n);
160                k += m;
161            }
162            r *= 2;
163        }
164
165        if d == n {
166            // Backtrack: find the exact factor
167            loop {
168                ys = mod_mul(ys, ys, n).wrapping_add(c) % n;
169                d = gcd(x.abs_diff(ys), n);
170                if d > 1 {
171                    break;
172                }
173            }
174        }
175
176        if d != n {
177            return d;
178        }
179    }
180    n
181}
182
183/// Recursively factor n, appending prime factors to the vector.
184fn factor_recursive(n: u128, factors: &mut Vec<u128>) {
185    if n <= 1 {
186        return;
187    }
188    if is_prime_miller_rabin(n) {
189        factors.push(n);
190        return;
191    }
192
193    // Find a non-trivial factor
194    let mut d = pollard_rho(n);
195    // In rare cases, pollard_rho may return n itself; try again with trial division fallback
196    if d == n {
197        // Brute force trial division for this case
198        d = 2;
199        while d * d <= n {
200            if n.is_multiple_of(d) {
201                break;
202            }
203            d += 1;
204        }
205        if d * d > n {
206            // n is prime after all
207            factors.push(n);
208            return;
209        }
210    }
211    factor_recursive(d, factors);
212    factor_recursive(n / d, factors);
213}
214
215/// Return the sorted list of prime factors of n (with repetition).
216/// For n <= 1, returns an empty vector.
217///
218/// Uses trial division for small primes, then Pollard's rho for large factors.
219pub fn factorize(n: u128) -> Vec<u128> {
220    if n <= 1 {
221        return vec![];
222    }
223
224    let mut factors = Vec::new();
225    let mut n = n;
226
227    // Trial division by small primes
228    const SMALL_PRIMES: [u128; 15] = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47];
229    for &p in &SMALL_PRIMES {
230        while n.is_multiple_of(p) {
231            factors.push(p);
232            n /= p;
233        }
234    }
235
236    // Continue trial division with 6k +/- 1 up to a threshold
237    let mut i: u128 = 53;
238    while i * i <= n && i < 10000 {
239        while n.is_multiple_of(i) {
240            factors.push(i);
241            n /= i;
242        }
243        i += 2;
244        while n.is_multiple_of(i) {
245            factors.push(i);
246            n /= i;
247        }
248        i += 4;
249    }
250
251    // Use Pollard's rho for whatever remains
252    if n > 1 {
253        factor_recursive(n, &mut factors);
254        factors.sort();
255    }
256
257    factors
258}
259
260/// Format a factorization result as "NUMBER: FACTOR FACTOR ..." matching GNU factor output.
261/// Uses stack-allocated buffer to avoid heap allocations for the common case.
262pub fn format_factors(n: u128) -> String {
263    let factors = factorize(n);
264    // Pre-allocate with estimated capacity to avoid reallocation
265    let mut result = String::with_capacity(64);
266    // Use itoa for fast u64 formatting when possible, fall back to Display for u128
267    if n <= u64::MAX as u128 {
268        let mut buf = itoa::Buffer::new();
269        result.push_str(buf.format(n as u64));
270    } else {
271        use std::fmt::Write;
272        let _ = write!(result, "{}", n);
273    }
274    result.push(':');
275    for f in &factors {
276        result.push(' ');
277        if *f <= u64::MAX as u128 {
278            let mut buf = itoa::Buffer::new();
279            result.push_str(buf.format(*f as u64));
280        } else {
281            use std::fmt::Write;
282            let _ = write!(result, "{}", f);
283        }
284    }
285    result
286}
coreutils_rs/factor/core.rs

coreutils_rs/factor/
core.rs