m61_modulus/
lib.rs

1//! Functions for performing arithmetic modulo the 61st Mersenne number.
2//! Aimed at testing bignum implementations.
3//!
4//! ## Usage
5//!
6//! The crate comes with a trait [`M61Reduction`] and a type [`M61`].
7//! `M61` is an integer in which all arithmetic is performed the
8//! 61st Mersenne number, `2^61 - 1`.
9//!
10//! ```
11//! use m61_modulus::*;
12//!
13//! let x = M61::from(1u64 << 61);
14//! let y = M61::from(1u64);
15//!
16//! assert_eq!(x, y);
17//! ```
18//!
19//! The trait `M61Reduction` is implemented for unsigned integer slices,
20//! providing two functions for reducing the modulo `2^61 - 1`,
21//! as if they were digits in a bignum implementation.
22//!
23//! ```
24//! use m61_modulus::*;
25//!
26//! let x = [1u16, 734u16, 24u16].reduce_m61();
27//! let y = M61::from(1) + M61::from(734 << 16) + M61::from(24u64 << 32);
28//!
29//! assert_eq!(x, y);
30//! ```
31//!
32//! The functions are `reduce_m61`, which is single-threaded, and `reduce_m61_parallelized`,
33//! which may spawn additional threads.
34//!
35//! This crate comes with two features:
36//! * `nightly`, which enables support for additional nightly-only ISA extensions
37//!   like AVX512. Disabled by default.
38//! * `std`, which provides access to the `reduce_m61_parallelized` function,
39//!   which requires the Rust standard library. If disabled, this crate will
40//!   also work on `no-std` targets. Enabled by default.
41//!
42//! ## Background
43//!
44//! This crate is designed around verifying the results of bignum implementations
45//! (like `num-bigint`) in a cheap manner. By repeating an operation
46//! using modular arithmetic one can test the results without having to
47//! resort to simpler, but slower implementations involving arbitrary-precision
48//! arithmetic.
49//!
50//! Arithetic modulo the 61st Mersenne number is particulary suitable for this:
51//! * It is a prime number, which means the results distribute well given random input.
52//! * Its difference of one to the next power of two makes calcuations incredibly cheap.
53
54#![allow(unsafe_op_in_unsafe_fn)]
55
56#![cfg_attr(feature = "nightly", feature(avx512_target_feature, stdsimd))]
57#![cfg_attr(not(feature = "std"), no_std)]
58
59mod definition;
60
61cfg_if::cfg_if! {
62    if #[cfg(all(
63        not(miri),
64        target_endian = "little",
65        any(
66            target_arch = "x86_64",
67            target_arch = "aarch64",
68            all(target_family = "wasm", target_feature = "simd128"),
69        ),
70    ))] {
71        #[path = "./simd/mod.rs"]
72        mod implementation;
73    } else {
74        #[path = "./fallback.rs"]
75        mod implementation;
76    }
77}
78
79#[cfg(all(test, not(miri)))]
80mod fallback;
81
82#[cfg(feature = "std")]
83mod parallelized;
84
85pub use crate::definition::M61;
86
87/// A helper trait for making the functions accessible using the dot operator.
88pub trait M61Reduction {
89    /// Calculates `self mod (2^61 - 1)`, assuming `self` is a number
90    /// base `2^Self::BITS`, with digits stored in little-endian ordering.
91    #[must_use]
92    fn reduce_m61(&self) -> M61;
93
94    /// Calculates `self mod (2^61 - 1)`, assuming `self` is a number
95    /// base `2^Self::BITS`, with digits stored in little-endian ordering.
96    ///
97    /// This function is parallelized, using at most `max_thread_count`
98    /// threads to calculate the result.
99    #[cfg(feature = "std")]
100    #[must_use]
101    fn reduce_m61_parallelized(&self, max_thread_count: usize) -> M61;
102}
103
104impl M61Reduction for [u8] {
105    #[inline(always)]
106    fn reduce_m61(&self) -> M61 {
107        // SAFETY: The `implementation` module only defers to unsafe
108        // versions if their safety conditions are met.
109        #[allow(unused_unsafe)]
110        unsafe {
111            implementation::reduce_u8(self)
112        }
113    }
114
115    #[cfg(feature = "std")]
116    #[inline(always)]
117    fn reduce_m61_parallelized(&self, max_thread_count: usize) -> M61 {
118        parallelized::reduce_u8(self, max_thread_count)
119    }
120}
121
122impl M61Reduction for [u16] {
123    #[inline(always)]
124    fn reduce_m61(&self) -> M61 {
125        // SAFETY: The `implementation` module only defers to unsafe
126        // versions if their safety conditions are met.
127        #[allow(unused_unsafe)]
128        unsafe {
129            implementation::reduce_u16(self)
130        }
131    }
132
133    #[cfg(feature = "std")]
134    #[inline(always)]
135    fn reduce_m61_parallelized(&self, max_thread_count: usize) -> M61 {
136        parallelized::reduce_u16(self, max_thread_count)
137    }
138}
139
140impl M61Reduction for [u32] {
141    #[inline(always)]
142    fn reduce_m61(&self) -> M61 {
143        // SAFETY: The `implementation` module only defers to unsafe
144        // versions if their safety conditions are met.
145        #[allow(unused_unsafe)]
146        unsafe {
147            implementation::reduce_u32(self)
148        }
149    }
150
151    #[cfg(feature = "std")]
152    #[inline(always)]
153    fn reduce_m61_parallelized(&self, max_thread_count: usize) -> M61 {
154        parallelized::reduce_u32(self, max_thread_count)
155    }
156}
157
158impl M61Reduction for [u64] {
159    #[inline(always)]
160    fn reduce_m61(&self) -> M61 {
161        // SAFETY: The `implementation` module only defers to unsafe
162        // versions if their safety conditions are met.
163        #[allow(unused_unsafe)]
164        unsafe {
165            implementation::reduce_u64(self)
166        }
167    }
168
169    #[cfg(feature = "std")]
170    #[inline(always)]
171    fn reduce_m61_parallelized(&self, max_thread_count: usize) -> M61 {
172        parallelized::reduce_u64(self, max_thread_count)
173    }
174}
175
176impl M61Reduction for [usize] {
177    #[inline(always)]
178    fn reduce_m61(&self) -> M61 {
179        // SAFETY: Within the body, we turn the input slice into a
180        // slice of of the same length and with a identically sized type.
181        // Thus, the memory regions are the same.
182        unsafe {
183            use core::slice::from_raw_parts;
184            let (ptr, len) = (self.as_ptr(), self.len());
185            match core::mem::size_of::<usize>() {
186                2 => from_raw_parts(ptr as *const u16, len).reduce_m61(),
187                4 => from_raw_parts(ptr as *const u32, len).reduce_m61(),
188                8 => from_raw_parts(ptr as *const u64, len).reduce_m61(),
189                _ => unreachable!("an address has only 16, 32 or 64 bits"),
190            }
191        }
192    }
193
194    #[cfg(feature = "std")]
195    #[inline(always)]
196    fn reduce_m61_parallelized(&self, max_thread_count: usize) -> M61 {
197        // SAFETY: Within the body, we turn the input slice into a
198        // slice of of the same length and with a identically sized type.
199        // Thus, the memory regions are the same.
200        unsafe {
201            use core::slice::from_raw_parts;
202            let (ptr, len) = (self.as_ptr(), self.len());
203            match core::mem::size_of::<usize>() {
204                2 => {
205                    from_raw_parts(ptr as *const u16, len).reduce_m61_parallelized(max_thread_count)
206                }
207                4 => {
208                    from_raw_parts(ptr as *const u32, len).reduce_m61_parallelized(max_thread_count)
209                }
210                8 => {
211                    from_raw_parts(ptr as *const u64, len).reduce_m61_parallelized(max_thread_count)
212                }
213                _ => unreachable!("an address has only 16, 32 or 64 bits"),
214            }
215        }
216    }
217}