sql_cli/sql/functions/
mathematics.rs

1use anyhow::{anyhow, Result};
2use lazy_static::lazy_static;
3use std::collections::HashSet;
4use std::sync::RwLock;
5
6use super::{ArgCount, FunctionCategory, FunctionSignature, SqlFunction};
7use crate::data::datatable::DataValue;
8
9// Include the generated prime data
10include!(concat!(env!("OUT_DIR"), "/primes_data.rs"));
11
12// Create a HashSet for O(1) primality testing up to 100K-th prime
13lazy_static! {
14    static ref PRIME_SET: HashSet<u32> = PRIMES_100K.iter().copied().collect();
15    static ref EXTENDED_PRIME_CACHE: RwLock<Vec<u64>> = RwLock::new(Vec::new());
16}
17
18/// Engine for prime number operations
19pub struct PrimeEngine;
20
21impl PrimeEngine {
22    /// Get the nth prime number (1-indexed)
23    pub fn nth_prime(n: usize) -> Result<u64> {
24        if n == 0 {
25            return Err(anyhow!("Prime index must be >= 1"));
26        }
27
28        // Fast path: use pre-computed tables
29        if n <= 1_000 {
30            return Ok(u64::from(PRIMES_1K[n - 1]));
31        }
32        if n <= 10_000 {
33            return Ok(u64::from(PRIMES_10K[n - 1]));
34        }
35        if n <= 100_000 {
36            return Ok(u64::from(PRIMES_100K[n - 1]));
37        }
38
39        // Slow path: generate on demand and cache
40        Self::generate_nth_prime(n)
41    }
42
43    /// Check if a number is prime
44    #[must_use]
45    pub fn is_prime(n: u64) -> bool {
46        if n < 2 {
47            return false;
48        }
49        if n == 2 {
50            return true;
51        }
52        if n % 2 == 0 {
53            return false;
54        }
55
56        // Fast path: check pre-computed set (up to 1,299,709)
57        if n <= 1_299_709 {
58            return PRIME_SET.contains(&(n as u32));
59        }
60
61        // Medium numbers: trial division with pre-computed primes
62        if n < 1_000_000_000_000 {
63            let sqrt_n = (n as f64).sqrt() as u64;
64
65            // Use our pre-computed primes for trial division
66            for &p in PRIMES_100K {
67                let p64 = u64::from(p);
68                if p64 > sqrt_n {
69                    return true;
70                }
71                if n % p64 == 0 {
72                    return false;
73                }
74            }
75
76            // If we've exhausted our prime list and haven't found a factor,
77            // continue with wheel factorization
78            Self::is_prime_wheel(n, u64::from(PRIMES_100K[PRIMES_100K.len() - 1]))
79        } else {
80            // Very large numbers: Miller-Rabin test
81            Self::miller_rabin(n)
82        }
83    }
84
85    /// Count primes up to n (prime counting function π(n))
86    #[must_use]
87    pub fn prime_count(n: u64) -> usize {
88        if n < 2 {
89            return 0;
90        }
91
92        // Fast path: binary search in pre-computed list
93        if n <= 1_299_709 {
94            match PRIMES_100K.binary_search(&(n as u32)) {
95                Ok(idx) => idx + 1, // Found exact match
96                Err(idx) => idx,    // Found insertion point
97            }
98        } else {
99            // For large n, use approximation or actual counting
100            // For now, use prime number theorem approximation
101            Self::approximate_prime_count(n)
102        }
103    }
104
105    /// Find the next prime >= n
106    #[must_use]
107    pub fn next_prime(n: u64) -> u64 {
108        if n <= 2 {
109            return 2;
110        }
111
112        // Fast path: binary search in pre-computed primes
113        if n <= 1_299_709 {
114            let target = n as u32;
115            match PRIMES_100K.binary_search(&target) {
116                Ok(_) => n, // n is prime
117                Err(idx) => {
118                    if idx < PRIMES_100K.len() {
119                        u64::from(PRIMES_100K[idx])
120                    } else {
121                        // n is larger than our biggest pre-computed prime
122                        Self::find_next_prime_slow(n)
123                    }
124                }
125            }
126        } else {
127            Self::find_next_prime_slow(n)
128        }
129    }
130
131    /// Find the previous prime <= n
132    #[must_use]
133    pub fn prev_prime(n: u64) -> Option<u64> {
134        if n < 2 {
135            return None;
136        }
137        if n == 2 {
138            return Some(2);
139        }
140
141        // Fast path: binary search in pre-computed primes
142        if n <= 1_299_709 {
143            let target = n as u32;
144            match PRIMES_100K.binary_search(&target) {
145                Ok(_) => Some(n), // n is prime
146                Err(idx) => {
147                    if idx > 0 {
148                        Some(u64::from(PRIMES_100K[idx - 1]))
149                    } else {
150                        None // n < 2
151                    }
152                }
153            }
154        } else {
155            Self::find_prev_prime_slow(n)
156        }
157    }
158
159    /// Get prime factorization of n
160    #[must_use]
161    pub fn factor(mut n: u64) -> Vec<(u64, u32)> {
162        if n <= 1 {
163            return vec![];
164        }
165
166        let mut factors = Vec::new();
167
168        // Trial division with pre-computed primes
169        for &p in PRIMES_10K {
170            let p64 = u64::from(p);
171            if p64 * p64 > n {
172                break;
173            }
174
175            let mut count = 0;
176            while n % p64 == 0 {
177                n /= p64;
178                count += 1;
179            }
180
181            if count > 0 {
182                factors.push((p64, count));
183            }
184        }
185
186        // If n > 1 at this point, it's either prime or has large prime factors
187        if n > 1 {
188            // Check if it's prime
189            if Self::is_prime(n) {
190                factors.push((n, 1));
191            } else {
192                // Try to factor using Pollard's rho for large composites
193                // For now, just add as prime (simplified)
194                factors.push((n, 1));
195            }
196        }
197
198        factors
199    }
200
201    // Helper functions
202
203    /// Generate the nth prime for n > 100,000
204    fn generate_nth_prime(n: usize) -> Result<u64> {
205        // Check cache first
206        let cache = EXTENDED_PRIME_CACHE.read().unwrap();
207        let cache_start = 100_001;
208        let cache_idx = n - cache_start;
209
210        if cache_idx < cache.len() {
211            return Ok(cache[cache_idx]);
212        }
213        drop(cache);
214
215        // Generate primes beyond our pre-computed range
216        let mut cache = EXTENDED_PRIME_CACHE.write().unwrap();
217
218        // Start from the last pre-computed prime
219        let mut candidate = u64::from(PRIMES_100K[PRIMES_100K.len() - 1]) + 2;
220        let mut count = 100_000 + cache.len();
221
222        while count < n {
223            if Self::is_prime(candidate) {
224                cache.push(candidate);
225                count += 1;
226            }
227            candidate += 2;
228        }
229
230        Ok(cache[cache_idx])
231    }
232
233    /// Wheel factorization for primality testing
234    fn is_prime_wheel(n: u64, start: u64) -> bool {
235        // Use wheel factorization mod 30 (2*3*5)
236        const WHEEL: &[u64] = &[1, 7, 11, 13, 17, 19, 23, 29];
237
238        let sqrt_n = (n as f64).sqrt() as u64;
239        let mut base = ((start / 30) + 1) * 30;
240
241        while base <= sqrt_n {
242            for &offset in WHEEL {
243                let candidate = base + offset;
244                if candidate > sqrt_n {
245                    return true;
246                }
247                if candidate > start && n % candidate == 0 {
248                    return false;
249                }
250            }
251            base += 30;
252        }
253        true
254    }
255
256    /// Miller-Rabin primality test for very large numbers
257    fn miller_rabin(n: u64) -> bool {
258        // Witnesses for deterministic test up to 2^64
259        const WITNESSES: &[u64] = &[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37];
260
261        // Write n-1 as 2^r * d
262        let mut d = n - 1;
263        let mut r = 0;
264        while d % 2 == 0 {
265            d /= 2;
266            r += 1;
267        }
268
269        'witness: for &a in WITNESSES {
270            if a >= n {
271                continue;
272            }
273
274            let mut x = Self::mod_pow(a, d, n);
275            if x == 1 || x == n - 1 {
276                continue;
277            }
278
279            for _ in 0..r - 1 {
280                x = Self::mod_mul(x, x, n);
281                if x == n - 1 {
282                    continue 'witness;
283                }
284            }
285
286            return false;
287        }
288
289        true
290    }
291
292    /// Modular exponentiation: base^exp mod m
293    fn mod_pow(mut base: u64, mut exp: u64, m: u64) -> u64 {
294        let mut result = 1;
295        base %= m;
296
297        while exp > 0 {
298            if exp % 2 == 1 {
299                result = Self::mod_mul(result, base, m);
300            }
301            base = Self::mod_mul(base, base, m);
302            exp /= 2;
303        }
304
305        result
306    }
307
308    /// Modular multiplication avoiding overflow
309    fn mod_mul(a: u64, b: u64, m: u64) -> u64 {
310        ((u128::from(a) * u128::from(b)) % u128::from(m)) as u64
311    }
312
313    /// Find next prime slowly (for n > largest pre-computed)
314    fn find_next_prime_slow(mut n: u64) -> u64 {
315        if n % 2 == 0 {
316            n += 1;
317        }
318
319        while !Self::is_prime(n) {
320            n += 2;
321        }
322
323        n
324    }
325
326    /// Find previous prime slowly
327    fn find_prev_prime_slow(mut n: u64) -> Option<u64> {
328        if n % 2 == 0 {
329            n -= 1;
330        }
331
332        while n > 2 {
333            if Self::is_prime(n) {
334                return Some(n);
335            }
336            n -= 2;
337        }
338
339        if n == 2 {
340            Some(2)
341        } else {
342            None
343        }
344    }
345
346    /// Approximate prime count using prime number theorem
347    fn approximate_prime_count(n: u64) -> usize {
348        if n < 2 {
349            return 0;
350        }
351
352        let n_f = n as f64;
353        let ln_n = n_f.ln();
354
355        // Use improved approximation
356        let approx = n_f / (ln_n - 1.0);
357        approx as usize
358    }
359}
360
361// SQL Function implementations
362
363/// PRIME(n) - Returns the nth prime number
364pub struct PrimeFunction;
365
366impl SqlFunction for PrimeFunction {
367    fn signature(&self) -> FunctionSignature {
368        FunctionSignature {
369            name: "PRIME",
370            category: FunctionCategory::Mathematical,
371            arg_count: ArgCount::Fixed(1),
372            description: "Returns the Nth prime number (1-indexed)",
373            returns: "INTEGER",
374            examples: vec![
375                "SELECT PRIME(1)",     // Returns 2
376                "SELECT PRIME(100)",   // Returns 541
377                "SELECT PRIME(10000)", // Returns 104729
378            ],
379        }
380    }
381
382    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
383        self.validate_args(args)?;
384
385        let n = match &args[0] {
386            DataValue::Integer(i) if *i > 0 => *i as usize,
387            DataValue::Integer(_) => return Err(anyhow!("PRIME index must be positive")),
388            DataValue::Float(f) if *f > 0.0 => *f as usize,
389            _ => return Err(anyhow!("PRIME requires a positive integer argument")),
390        };
391
392        let prime = PrimeEngine::nth_prime(n)?;
393        Ok(DataValue::Integer(prime as i64))
394    }
395}
396
397/// `NTH_PRIME(n)` - Alias for PRIME function (more descriptive name)
398pub struct NthPrimeFunction;
399
400impl SqlFunction for NthPrimeFunction {
401    fn signature(&self) -> FunctionSignature {
402        FunctionSignature {
403            name: "NTH_PRIME",
404            category: FunctionCategory::Mathematical,
405            arg_count: ArgCount::Fixed(1),
406            description: "Returns the Nth prime number (1-indexed) - alias for PRIME",
407            returns: "INTEGER",
408            examples: vec![
409                "SELECT NTH_PRIME(1)",     // Returns 2
410                "SELECT NTH_PRIME(100)",   // Returns 541
411                "SELECT NTH_PRIME(10000)", // Returns 104729
412            ],
413        }
414    }
415
416    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
417        // Delegate to PRIME implementation
418        PrimeFunction.evaluate(args)
419    }
420}
421
422/// `IS_PRIME(n)` - Check if a number is prime
423pub struct IsPrimeFunction;
424
425impl SqlFunction for IsPrimeFunction {
426    fn signature(&self) -> FunctionSignature {
427        FunctionSignature {
428            name: "IS_PRIME",
429            category: FunctionCategory::Mathematical,
430            arg_count: ArgCount::Fixed(1),
431            description: "Returns true if the number is prime, false otherwise",
432            returns: "BOOLEAN",
433            examples: vec![
434                "SELECT IS_PRIME(17)",     // Returns true
435                "SELECT IS_PRIME(100)",    // Returns false
436                "SELECT IS_PRIME(104729)", // Returns true
437            ],
438        }
439    }
440
441    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
442        self.validate_args(args)?;
443
444        let n = match &args[0] {
445            DataValue::Integer(i) if *i >= 0 => *i as u64,
446            DataValue::Integer(_) => return Ok(DataValue::Boolean(false)),
447            DataValue::Float(f) if *f >= 0.0 => *f as u64,
448            _ => return Err(anyhow!("IS_PRIME requires a non-negative integer argument")),
449        };
450
451        Ok(DataValue::Boolean(PrimeEngine::is_prime(n)))
452    }
453}
454
455/// `PRIME_COUNT(n)` - Count primes up to n
456pub struct PrimeCountFunction;
457
458impl SqlFunction for PrimeCountFunction {
459    fn signature(&self) -> FunctionSignature {
460        FunctionSignature {
461            name: "PRIME_COUNT",
462            category: FunctionCategory::Mathematical,
463            arg_count: ArgCount::Fixed(1),
464            description: "Returns the count of prime numbers up to n (π(n))",
465            returns: "INTEGER",
466            examples: vec![
467                "SELECT PRIME_COUNT(10)",   // Returns 4 (2,3,5,7)
468                "SELECT PRIME_COUNT(100)",  // Returns 25
469                "SELECT PRIME_COUNT(1000)", // Returns 168
470            ],
471        }
472    }
473
474    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
475        self.validate_args(args)?;
476
477        let n = match &args[0] {
478            DataValue::Integer(i) if *i >= 0 => *i as u64,
479            DataValue::Integer(_) => return Ok(DataValue::Integer(0)),
480            DataValue::Float(f) if *f >= 0.0 => *f as u64,
481            _ => {
482                return Err(anyhow!(
483                    "PRIME_COUNT requires a non-negative integer argument"
484                ))
485            }
486        };
487
488        Ok(DataValue::Integer(PrimeEngine::prime_count(n) as i64))
489    }
490}
491
492/// `PRIME_PI(n)` - Alias for PRIME_COUNT (standard mathematical notation π(n))
493pub struct PrimePiFunction;
494
495impl SqlFunction for PrimePiFunction {
496    fn signature(&self) -> FunctionSignature {
497        FunctionSignature {
498            name: "PRIME_PI",
499            category: FunctionCategory::Mathematical,
500            arg_count: ArgCount::Fixed(1),
501            description:
502                "Returns the count of prime numbers up to n (π(n)) - alias for PRIME_COUNT",
503            returns: "INTEGER",
504            examples: vec![
505                "SELECT PRIME_PI(10)",   // Returns 4 (2,3,5,7)
506                "SELECT PRIME_PI(100)",  // Returns 25
507                "SELECT PRIME_PI(1000)", // Returns 168
508            ],
509        }
510    }
511
512    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
513        // Delegate to PRIME_COUNT implementation
514        PrimeCountFunction.evaluate(args)
515    }
516}
517
518/// `NEXT_PRIME(n)` - Find the next prime >= n
519pub struct NextPrimeFunction;
520
521impl SqlFunction for NextPrimeFunction {
522    fn signature(&self) -> FunctionSignature {
523        FunctionSignature {
524            name: "NEXT_PRIME",
525            category: FunctionCategory::Mathematical,
526            arg_count: ArgCount::Fixed(1),
527            description: "Returns the smallest prime number >= n",
528            returns: "INTEGER",
529            examples: vec![
530                "SELECT NEXT_PRIME(100)",  // Returns 101
531                "SELECT NEXT_PRIME(97)",   // Returns 97 (already prime)
532                "SELECT NEXT_PRIME(1000)", // Returns 1009
533            ],
534        }
535    }
536
537    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
538        self.validate_args(args)?;
539
540        let n = match &args[0] {
541            DataValue::Integer(i) if *i >= 0 => *i as u64,
542            DataValue::Integer(_) => return Ok(DataValue::Integer(2)),
543            DataValue::Float(f) if *f >= 0.0 => *f as u64,
544            _ => {
545                return Err(anyhow!(
546                    "NEXT_PRIME requires a non-negative integer argument"
547                ))
548            }
549        };
550
551        Ok(DataValue::Integer(PrimeEngine::next_prime(n) as i64))
552    }
553}
554
555/// `PREV_PRIME(n)` - Find the previous prime <= n
556pub struct PrevPrimeFunction;
557
558impl SqlFunction for PrevPrimeFunction {
559    fn signature(&self) -> FunctionSignature {
560        FunctionSignature {
561            name: "PREV_PRIME",
562            category: FunctionCategory::Mathematical,
563            arg_count: ArgCount::Fixed(1),
564            description: "Returns the largest prime number <= n",
565            returns: "INTEGER",
566            examples: vec![
567                "SELECT PREV_PRIME(100)",  // Returns 97
568                "SELECT PREV_PRIME(97)",   // Returns 97 (already prime)
569                "SELECT PREV_PRIME(1000)", // Returns 997
570            ],
571        }
572    }
573
574    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
575        self.validate_args(args)?;
576
577        let n = match &args[0] {
578            DataValue::Integer(i) if *i >= 0 => *i as u64,
579            DataValue::Integer(_) => return Ok(DataValue::Null),
580            DataValue::Float(f) if *f >= 0.0 => *f as u64,
581            _ => {
582                return Err(anyhow!(
583                    "PREV_PRIME requires a non-negative integer argument"
584                ))
585            }
586        };
587
588        match PrimeEngine::prev_prime(n) {
589            Some(p) => Ok(DataValue::Integer(p as i64)),
590            None => Ok(DataValue::Null),
591        }
592    }
593}
594
595#[cfg(test)]
596mod tests {
597    use super::*;
598
599    #[test]
600    fn test_nth_prime() {
601        assert_eq!(PrimeEngine::nth_prime(1).unwrap(), 2);
602        assert_eq!(PrimeEngine::nth_prime(10).unwrap(), 29);
603        assert_eq!(PrimeEngine::nth_prime(100).unwrap(), 541);
604        assert_eq!(PrimeEngine::nth_prime(1000).unwrap(), 7919);
605        assert_eq!(PrimeEngine::nth_prime(10000).unwrap(), 104729);
606    }
607
608    #[test]
609    fn test_is_prime() {
610        assert!(!PrimeEngine::is_prime(0));
611        assert!(!PrimeEngine::is_prime(1));
612        assert!(PrimeEngine::is_prime(2));
613        assert!(PrimeEngine::is_prime(17));
614        assert!(!PrimeEngine::is_prime(100));
615        assert!(PrimeEngine::is_prime(104729));
616        assert!(PrimeEngine::is_prime(1299709)); // 100,000th prime
617    }
618
619    #[test]
620    fn test_prime_count() {
621        assert_eq!(PrimeEngine::prime_count(10), 4); // 2, 3, 5, 7
622        assert_eq!(PrimeEngine::prime_count(100), 25);
623        assert_eq!(PrimeEngine::prime_count(1000), 168);
624    }
625
626    #[test]
627    fn test_next_prev_prime() {
628        assert_eq!(PrimeEngine::next_prime(100), 101);
629        assert_eq!(PrimeEngine::next_prime(97), 97);
630
631        assert_eq!(PrimeEngine::prev_prime(100), Some(97));
632        assert_eq!(PrimeEngine::prev_prime(97), Some(97));
633        assert_eq!(PrimeEngine::prev_prime(1), None);
634    }
635
636    #[test]
637    fn test_factorization() {
638        let factors = PrimeEngine::factor(60);
639        assert_eq!(factors, vec![(2, 2), (3, 1), (5, 1)]);
640
641        let factors = PrimeEngine::factor(97);
642        assert_eq!(factors, vec![(97, 1)]);
643
644        let factors = PrimeEngine::factor(1001);
645        assert_eq!(factors, vec![(7, 1), (11, 1), (13, 1)]);
646    }
647}