1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
//! Variable-time 512-bit modular reduction for sparse primes.
use super::U512;
use crate::u256::U256;
impl U512 {
/// Reduces this 512-bit value modulo a sparse prime where the complement
/// `2^256 − modulus` fits in a single `u64`.
///
/// Exploits the identity `2^256 ≡ comp (mod modulus)` to fold the high
/// 256 bits into the low 256 bits using just 5 `u64 × u64` multiplies
/// instead of Knuth Algorithm D's full multi-precision division.
///
/// Round 1 fuses the high-half multiply (`hi × comp`) with the low-half
/// addition in a single carry-chain pass. Round 2 folds the remaining
/// overflow (typically ~33 bits for secp256k1). At most 2 conditional
/// subtractions normalize the result into `[0, modulus)`.
///
/// # Examples
///
/// ```
/// use cnfy_uint::u256::U256;
/// use cnfy_uint::u512::U512;
/// let P = U256::from_be_limbs([
/// 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
/// 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFEFFFFFC2F,
/// ]);
///
/// let a = U256::from_be_limbs([0, 0, 0, 7]);
/// let product = a * a;
/// assert_eq!(product.sparse_reduce_vt(&P, 0x1000003D1), U256::from_be_limbs([0, 0, 0, 49]));
/// ```
#[inline]
pub fn sparse_reduce_vt(&self, modulus: &U256, comp: u64) -> U256 {
let c = comp as u128;
// Round 1: lo + hi * comp, fused multiply-accumulate.
// LE layout: low half = [0..4], high half = [4..8].
// Each step: self.0[i] + comp * self.0[i+4] + carry.
// Maximum per step: (2^64-1) + (2^64-1)^2 + (2^64-1) = 2^128-1. Fits in u128.
let sum = (self.0[0] as u128) + c * (self.0[4] as u128);
let r0 = sum as u64;
let carry = sum >> 64;
let sum = (self.0[1] as u128) + c * (self.0[5] as u128) + carry;
let r1 = sum as u64;
let carry = sum >> 64;
let sum = (self.0[2] as u128) + c * (self.0[6] as u128) + carry;
let r2 = sum as u64;
let carry = sum >> 64;
let sum = (self.0[3] as u128) + c * (self.0[7] as u128) + carry;
let r3 = sum as u64;
let overflow = (sum >> 64) as u64;
// Round 2: fold the overflow. For secp256k1 (comp = 33 bits),
// overflow is at most ~33 bits and correction is at most ~66 bits.
if overflow == 0 {
return U256([r0, r1, r2, r3]).modulo(modulus);
}
let correction = (overflow as u128) * c;
let sum = (r0 as u128) + (correction & 0xFFFF_FFFF_FFFF_FFFF);
let r0 = sum as u64;
let carry = sum >> 64;
let sum = (r1 as u128) + (correction >> 64) + carry;
let r1 = sum as u64;
let carry = sum >> 64;
let sum = (r2 as u128) + carry;
let r2 = sum as u64;
let carry = sum >> 64;
let sum = (r3 as u128) + carry;
let r3 = sum as u64;
let overflow2 = (sum >> 64) as u64;
let mut result = U256([r0, r1, r2, r3]);
if (overflow2 != 0) | (result >= *modulus) {
result = result.overflowing_sub(modulus).0;
}
result.modulo(modulus)
}
}
#[cfg(test)]
mod ai_tests {
use super::*;
const P: U256 = U256::from_be_limbs([0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFEFFFFFC2F]);
/// 7^2 = 49, reduced mod P stays 49.
#[test]
fn small_product() {
let a = U256::from_be_limbs([0, 0, 0, 7]);
let product = a * a;
assert_eq!(
product.sparse_reduce_vt(&P, 0x1000003D1),
U256::from_be_limbs([0, 0, 0, 49])
);
}
/// Zero product reduces to zero.
#[test]
fn zero_product() {
let product = U512::from_be_limbs([0; 8]);
assert_eq!(product.sparse_reduce_vt(&P, 0x1000003D1), U256::ZERO);
}
/// Max product: (2^256-1)^2 mod P matches mul_mod.
#[test]
fn max_product() {
let max = U256::from_be_limbs([u64::MAX, u64::MAX, u64::MAX, u64::MAX]);
let product = max * max;
let expected = max.mul_mod(&max, &P);
assert_eq!(product.sparse_reduce_vt(&P, 0x1000003D1), expected);
}
/// (P-1)^2 mod P = 1.
#[test]
fn p_minus_one_squared() {
let p_minus_1 = U256::from_be_limbs([
0xFFFFFFFFFFFFFFFF,
0xFFFFFFFFFFFFFFFF,
0xFFFFFFFFFFFFFFFF,
0xFFFFFFFEFFFFFC2E,
]);
let product = p_minus_1 * p_minus_1;
assert_eq!(product.sparse_reduce_vt(&P, 0x1000003D1), U256::ONE);
}
/// Sparse reduce matches mul_mod for various products.
#[test]
fn matches_mul_mod() {
let cases = [
(
U256::from_be_limbs([0, 0, 0, 1]),
U256::from_be_limbs([0, 0, 0, 1]),
),
(
U256::from_be_limbs([
0x123456789ABCDEF0,
0xFEDCBA9876543210,
0x1111222233334444,
0x5555666677778888,
]),
U256::from_be_limbs([
0xAAAABBBBCCCCDDDD,
0xEEEEFFFF00001111,
0x2222333344445555,
0x6666777788889999,
]),
),
];
for (a, b) in &cases {
let product = *a * *b;
let expected = a.mul_mod(b, &P);
let sparse = product.sparse_reduce_vt(&P, 0x1000003D1);
assert_eq!(sparse, expected, "mismatch for {a:?} * {b:?}");
}
}
}