1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
use ark_ff::prelude::*;
use ark_std::vec::Vec;
use crate::{AffineCurve, ProjectiveCurve};
#[cfg(feature = "parallel")]
use rayon::prelude::*;
pub struct VariableBaseMSM;
impl VariableBaseMSM {
pub fn multi_scalar_mul<G: AffineCurve>(
bases: &[G],
scalars: &[<G::ScalarField as PrimeField>::BigInt],
) -> G::Projective {
let size = ark_std::cmp::min(bases.len(), scalars.len());
let scalars = &scalars[..size];
let bases = &bases[..size];
let scalars_and_bases_iter = scalars.iter().zip(bases).filter(|(s, _)| !s.is_zero());
let c = if size < 32 {
3
} else {
super::ln_without_floats(size) + 2
};
let num_bits = <G::ScalarField as PrimeField>::Params::MODULUS_BITS as usize;
let fr_one = G::ScalarField::one().into_repr();
let zero = G::Projective::zero();
let window_starts: Vec<_> = (0..num_bits).step_by(c).collect();
// Each window is of size `c`.
// We divide up the bits 0..num_bits into windows of size `c`, and
// in parallel process each such window.
let window_sums: Vec<_> = ark_std::cfg_into_iter!(window_starts)
.map(|w_start| {
let mut res = zero;
// We don't need the "zero" bucket, so we only have 2^c - 1 buckets.
let mut buckets = vec![zero; (1 << c) - 1];
// This clone is cheap, because the iterator contains just a
// pointer and an index into the original vectors.
scalars_and_bases_iter.clone().for_each(|(&scalar, base)| {
if scalar == fr_one {
// We only process unit scalars once in the first window.
if w_start == 0 {
res.add_assign_mixed(base);
}
} else {
let mut scalar = scalar;
// We right-shift by w_start, thus getting rid of the
// lower bits.
scalar.divn(w_start as u32);
// We mod the remaining bits by 2^{window size}, thus taking `c` bits.
let scalar = scalar.as_ref()[0] % (1 << c);
// If the scalar is non-zero, we update the corresponding
// bucket.
// (Recall that `buckets` doesn't have a zero bucket.)
if scalar != 0 {
buckets[(scalar - 1) as usize].add_assign_mixed(base);
}
}
});
// Compute sum_{i in 0..num_buckets} (sum_{j in i..num_buckets} bucket[j])
// This is computed below for b buckets, using 2b curve additions.
//
// We could first normalize `buckets` and then use mixed-addition
// here, but that's slower for the kinds of groups we care about
// (Short Weierstrass curves and Twisted Edwards curves).
// In the case of Short Weierstrass curves,
// mixed addition saves ~4 field multiplications per addition.
// However normalization (with the inversion batched) takes ~6
// field multiplications per element,
// hence batch normalization is a slowdown.
// `running_sum` = sum_{j in i..num_buckets} bucket[j],
// where we iterate backward from i = num_buckets to 0.
let mut running_sum = G::Projective::zero();
buckets.into_iter().rev().for_each(|b| {
running_sum += &b;
res += &running_sum;
});
res
})
.collect();
// We store the sum for the lowest window.
let lowest = *window_sums.first().unwrap();
// We're traversing windows from high to low.
lowest
+ &window_sums[1..]
.iter()
.rev()
.fold(zero, |mut total, sum_i| {
total += sum_i;
for _ in 0..c {
total.double_in_place();
}
total
})
}
}