use crate::pairing::{
CurveAffine,
CurveProjective,
Engine
};
use crate::pairing::ff::{
PrimeField,
Field,
PrimeFieldRepr,
ScalarEngine};
use std::sync::Arc;
use super::source::*;
use std::future::{Future};
use std::task::{Context, Poll};
use std::pin::{Pin};
extern crate futures;
use self::futures::future::{join_all, JoinAll};
use self::futures::executor::block_on;
use super::worker::{Worker, WorkerFuture};
use super::SynthesisError;
use cfg_if;
fn multiexp_inner<Q, D, G, S>(
pool: &Worker,
bases: S,
density_map: D,
exponents: Arc<Vec<<G::Scalar as PrimeField>::Repr>>,
skip: u32,
c: u32,
handle_trivial: bool
) -> WorkerFuture< <G as CurveAffine>::Projective, SynthesisError>
where for<'a> &'a Q: QueryDensity,
D: Send + Sync + 'static + Clone + AsRef<Q>,
G: CurveAffine,
S: SourceBuilder<G>
{
let this = {
pool.compute(move || {
let mut acc = G::Projective::zero();
let mut bases = bases.new();
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); (1 << c) - 1];
let zero = <G::Engine as ScalarEngine>::Fr::zero().into_repr();
let one = <G::Engine as ScalarEngine>::Fr::one().into_repr();
for (&exp, density) in exponents.iter().zip(density_map.as_ref().iter()) {
if density {
if exp == zero {
bases.skip(1)?;
} else if exp == one {
if handle_trivial {
bases.add_assign_mixed(&mut acc)?;
} else {
bases.skip(1)?;
}
} else {
let mut exp = exp;
exp.shr(skip);
let exp = exp.as_ref()[0] % (1 << c);
if exp != 0 {
bases.add_assign_mixed(&mut buckets[(exp - 1) as usize])?;
} else {
bases.skip(1)?;
}
}
}
}
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
Ok(acc)
})
};
this
}
#[inline(always)]
fn multiexp_inner_impl<Q, D, G, S>(
pool: &Worker,
bases: S,
density_map: D,
exponents: Arc<Vec<<G::Scalar as PrimeField>::Repr>>,
skip: u32,
c: u32,
handle_trivial: bool
) -> WorkerFuture< <G as CurveAffine>::Projective, SynthesisError>
where for<'a> &'a Q: QueryDensity,
D: Send + Sync + 'static + Clone + AsRef<Q>,
G: CurveAffine,
S: SourceBuilder<G>
{
multiexp_inner(pool, bases, density_map, exponents, skip, c, handle_trivial)
}
fn multiexp_inner_with_prefetch_stable<Q, D, G, S>(
pool: &Worker,
bases: S,
density_map: D,
exponents: Arc<Vec<<G::Scalar as PrimeField>::Repr>>,
skip: u32,
c: u32,
handle_trivial: bool
) -> WorkerFuture< <G as CurveAffine>::Projective, SynthesisError>
where for<'a> &'a Q: QueryDensity,
D: Send + Sync + 'static + Clone + AsRef<Q>,
G: CurveAffine,
S: SourceBuilder<G>
{
let this = {
let bases = bases.clone();
let exponents = exponents.clone();
let density_map = density_map.clone();
pool.compute(move || {
let mut acc = G::Projective::zero();
let mut bases = bases.new();
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); (1 << c) - 1];
let zero = <G::Engine as ScalarEngine>::Fr::zero().into_repr();
let one = <G::Engine as ScalarEngine>::Fr::one().into_repr();
let padding = Arc::new(vec![zero]);
let mask = (1u64 << c) - 1;
for ((&exp, &next_exp), density) in exponents.iter()
.zip(exponents.iter().skip(1).chain(padding.iter()))
.zip(density_map.as_ref().iter()) {
if next_exp != zero && next_exp != one {
let mut next_exp = next_exp;
next_exp.shr(skip);
let next_exp = next_exp.as_ref()[0] & mask;
if next_exp != 0 {
let p: *const <G as CurveAffine>::Projective = &buckets[(next_exp - 1) as usize];
crate::prefetch::prefetch_l1_pointer(p);
}
}
if density {
if exp == zero {
bases.skip(1)?;
} else if exp == one {
if handle_trivial {
bases.add_assign_mixed(&mut acc)?;
} else {
bases.skip(1)?;
}
} else {
let mut exp = exp;
exp.shr(skip);
let exp = exp.as_ref()[0] % mask;
if exp != 0 {
bases.add_assign_mixed(&mut buckets[(exp - 1) as usize])?;
} else {
bases.skip(1)?;
}
}
}
}
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
Ok(acc)
})
};
this
}
pub fn future_based_multiexp<G: CurveAffine>(
pool: &Worker,
bases: Arc<Vec<G>>,
exponents: Arc<Vec<<G::Scalar as PrimeField>::Repr>>
) -> ChunksJoiner< <G as CurveAffine>::Projective >
{
assert!(exponents.len() <= bases.len());
let c = if exponents.len() < 32 {
3u32
} else {
let mut width = (f64::from(exponents.len() as u32)).ln().ceil() as u32;
let mut num_chunks = <G::Scalar as PrimeField>::NUM_BITS / width;
if <G::Scalar as PrimeField>::NUM_BITS % width != 0 {
num_chunks += 1;
}
if num_chunks < pool.cpus as u32 {
width = <G::Scalar as PrimeField>::NUM_BITS / (pool.cpus as u32);
if <G::Scalar as PrimeField>::NUM_BITS % (pool.cpus as u32) != 0 {
width += 1;
}
}
width
};
let mut skip = 0;
let mut futures = Vec::with_capacity((<G::Engine as ScalarEngine>::Fr::NUM_BITS / c + 1) as usize);
while skip < <G::Engine as ScalarEngine>::Fr::NUM_BITS {
let chunk_future = if skip == 0 {
future_based_dense_multiexp_impl(pool, bases.clone(), exponents.clone(), 0, c, true)
} else {
future_based_dense_multiexp_impl(pool, bases.clone(), exponents.clone(), skip, c, false)
};
futures.push(chunk_future);
skip += c;
}
let join = join_all(futures);
ChunksJoiner {
join,
c
}
}
pub fn future_based_dense_multiexp_over_fixed_width_windows<G: CurveAffine>(
pool: &Worker,
bases: Arc<Vec<G>>,
exponents: Arc<Vec<<G::Scalar as PrimeField>::Repr>>,
c: u32
) -> ChunksJoiner< <G as CurveAffine>::Projective >
{
assert!(exponents.len() <= bases.len());
let mut skip = 0;
let mut futures = Vec::with_capacity((<G::Engine as ScalarEngine>::Fr::NUM_BITS / c + 1) as usize);
while skip < <G::Engine as ScalarEngine>::Fr::NUM_BITS {
let chunk_future = if skip == 0 {
future_based_dense_multiexp_impl(pool, bases.clone(), exponents.clone(), 0, c, true)
} else {
future_based_dense_multiexp_impl(pool, bases.clone(), exponents.clone(), skip, c, false)
};
futures.push(chunk_future);
skip += c;
}
let join = join_all(futures);
ChunksJoiner {
join,
c
}
}
fn future_based_dense_multiexp_impl<G: CurveAffine>(
pool: &Worker,
bases: Arc<Vec<G>>,
exponents: Arc<Vec<<G::Scalar as PrimeField>::Repr>>,
skip: u32,
c: u32,
handle_trivial: bool
) -> WorkerFuture< <G as CurveAffine>::Projective, SynthesisError>
{
let this = {
let bases = bases.clone();
let exponents = exponents.clone();
let bases = bases.clone();
pool.compute(move || {
let mut acc = G::Projective::zero();
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); (1 << c) - 1];
let zero = <G::Engine as ScalarEngine>::Fr::zero().into_repr();
let one = <G::Engine as ScalarEngine>::Fr::one().into_repr();
let padding = Arc::new(vec![zero]);
let mask = 1 << c;
for ((&exp, base), &next_exp) in exponents.iter()
.zip(bases.iter())
.zip(exponents.iter().skip(1).chain(padding.iter())) {
if next_exp != zero && next_exp != one {
let mut next_exp = next_exp;
next_exp.shr(skip);
let next_exp = next_exp.as_ref()[0] % mask;
if next_exp != 0 {
let p: *const <G as CurveAffine>::Projective = &buckets[(next_exp - 1) as usize];
crate::prefetch::prefetch_l1_pointer(p);
}
}
if exp == zero {
continue
} else if exp == one {
if handle_trivial {
acc.add_assign_mixed(base);
} else {
continue
}
} else {
let mut exp = exp;
exp.shr(skip);
let exp = exp.as_ref()[0] % mask;
if exp != 0 {
(&mut buckets[(exp - 1) as usize]).add_assign_mixed(base);
} else {
continue;
}
}
}
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
Ok(acc)
})
};
this
}
fn future_based_buffered_dense_multiexp_impl<G: CurveAffine>(
pool: &Worker,
bases: Arc<Vec<G>>,
exponents: Arc<Vec<<G::Scalar as PrimeField>::Repr>>,
skip: u32,
c: u32,
handle_trivial: bool
) -> WorkerFuture< <G as CurveAffine>::Projective, SynthesisError>
{
let this = {
let bases = bases.clone();
let exponents = exponents.clone();
let bases = bases.clone();
pool.compute(move || {
let mut acc = G::Projective::zero();
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); (1 << c) - 1];
let zero = <G::Engine as ScalarEngine>::Fr::zero().into_repr();
let one = <G::Engine as ScalarEngine>::Fr::one().into_repr();
let mask = 1 << c;
const BUFFER_SIZE: usize = 64;
let mut buffers: Vec<Vec<G>> = vec![Vec::with_capacity(BUFFER_SIZE); (1 << c) - 1];
for (&exp, &base) in exponents.iter()
.zip(bases.iter()) {
if exp == zero {
continue
} else if exp == one {
if handle_trivial {
acc.add_assign_mixed(&base);
} else {
continue
}
} else {
let mut exp = exp;
exp.shr(skip);
let exp = exp.as_ref()[0] % mask;
if exp != 0 {
let idx = (exp - 1) as usize;
if buffers[idx].len() == BUFFER_SIZE {
let mut el = buckets[idx];
for b in buffers[idx].iter(){
el.add_assign_mixed(&b);
}
buffers[idx].truncate(0);
buckets[idx] = el;
}
buffers[idx].push(base);
} else {
continue;
}
}
}
for (idx, buffer) in buffers.into_iter().enumerate() {
let mut el = buckets[idx];
for b in buffer.into_iter() {
el.add_assign_mixed(&b);
}
buckets[idx] = el;
}
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
Ok(acc)
})
};
this
}
pub fn multiexp<Q, D, G, S>(
pool: &Worker,
bases: S,
density_map: D,
exponents: Arc<Vec<<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr>>
) -> ChunksJoiner< <G as CurveAffine>::Projective >
where for<'a> &'a Q: QueryDensity,
D: Send + Sync + 'static + Clone + AsRef<Q>,
G: CurveAffine,
S: SourceBuilder<G>
{
let c = if exponents.len() < 32 {
3u32
} else {
(f64::from(exponents.len() as u32)).ln().ceil() as u32
};
if let Some(query_size) = density_map.as_ref().get_query_size() {
assert!(query_size == exponents.len());
}
let mut skip = 0;
let mut futures = Vec::with_capacity((<G::Engine as ScalarEngine>::Fr::NUM_BITS / c + 1) as usize);
while skip < <G::Engine as ScalarEngine>::Fr::NUM_BITS {
let chunk_future = if skip == 0 {
multiexp_inner_impl(pool, bases.clone(), density_map.clone(), exponents.clone(), 0, c, true)
} else {
multiexp_inner_impl(pool, bases.clone(), density_map.clone(), exponents.clone(), skip, c, false)
};
futures.push(chunk_future);
skip += c;
}
let join = join_all(futures);
ChunksJoiner {
join,
c
}
}
pub(crate) fn multiexp_with_fixed_width<Q, D, G, S>(
pool: &Worker,
bases: S,
density_map: D,
exponents: Arc<Vec<<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr>>,
c: u32
) -> ChunksJoiner< <G as CurveAffine>::Projective >
where for<'a> &'a Q: QueryDensity,
D: Send + Sync + 'static + Clone + AsRef<Q>,
G: CurveAffine,
S: SourceBuilder<G>
{
if let Some(query_size) = density_map.as_ref().get_query_size() {
assert!(query_size == exponents.len());
}
let mut skip = 0;
let mut futures = Vec::with_capacity((<G::Engine as ScalarEngine>::Fr::NUM_BITS / c + 1) as usize);
while skip < <G::Engine as ScalarEngine>::Fr::NUM_BITS {
let chunk_future = if skip == 0 {
multiexp_inner_impl(pool, bases.clone(), density_map.clone(), exponents.clone(), 0, c, true)
} else {
multiexp_inner_impl(pool, bases.clone(), density_map.clone(), exponents.clone(), skip, c, false)
};
futures.push(chunk_future);
skip += c;
}
let join = join_all(futures);
ChunksJoiner {
join,
c
}
}
pub struct ChunksJoiner<G: CurveProjective> {
join: JoinAll< WorkerFuture<G, SynthesisError> >,
c: u32
}
impl<G: CurveProjective> Future for ChunksJoiner<G> {
type Output = Result<G, SynthesisError>;
fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output>
{
let c = self.as_ref().c;
let join = unsafe { self.map_unchecked_mut(|s| &mut s.join) };
match join.poll(cx) {
Poll::Ready(v) => {
let v = join_chunks(v, c);
return Poll::Ready(v);
},
Poll::Pending => {
return Poll::Pending;
}
}
}
}
impl<G: CurveProjective> ChunksJoiner<G> {
pub fn wait(self) -> <Self as Future>::Output {
block_on(self)
}
}
fn join_chunks<G: CurveProjective>
(chunks: Vec<Result<G, SynthesisError>>, c: u32) -> Result<G, SynthesisError> {
if chunks.len() == 0 {
return Ok(G::zero());
}
let mut iter = chunks.into_iter().rev();
let higher = iter.next().expect("is some chunk result");
let mut higher = higher?;
for chunk in iter {
let this = chunk?;
for _ in 0..c {
higher.double();
}
higher.add_assign(&this);
}
Ok(higher)
}
#[allow(dead_code)]
pub fn dense_multiexp<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr]
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
if exponents.len() != bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
let c = if exponents.len() < 32 {
3u32
} else {
let chunk_len = pool.get_chunk_size(exponents.len());
(f64::from(chunk_len as u32)).ln().ceil() as u32
};
dense_multiexp_inner(pool, bases, exponents, 0, c, true)
}
fn dense_multiexp_inner<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
mut skip: u32,
c: u32,
handle_trivial: bool
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
use std::sync::{Mutex};
let this = {
let this_region = Mutex::new(<G as CurveAffine>::Projective::zero());
let arc = Arc::new(this_region);
pool.scope(bases.len(), |scope, chunk| {
for (base, exp) in bases.chunks(chunk).zip(exponents.chunks(chunk)) {
let this_region_rwlock = arc.clone();
scope.spawn(move |_| {
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); (1 << c) - 1];
let mut acc = G::Projective::zero();
let zero = <G::Engine as ScalarEngine>::Fr::zero().into_repr();
let one = <G::Engine as ScalarEngine>::Fr::one().into_repr();
for (base, &exp) in base.iter().zip(exp.iter()) {
if exp != zero {
if exp == one {
if handle_trivial {
acc.add_assign_mixed(base);
}
} else {
let mut exp = exp;
exp.shr(skip);
let exp = exp.as_ref()[0] % (1 << c);
if exp != 0 {
buckets[(exp - 1) as usize].add_assign_mixed(base);
}
}
}
}
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
let mut guard = match this_region_rwlock.lock() {
Ok(guard) => guard,
Err(_) => {
panic!("poisoned!");
}
};
(*guard).add_assign(&acc);
});
}
});
let this_region = Arc::try_unwrap(arc).unwrap();
let this_region = this_region.into_inner().unwrap();
this_region
};
skip += c;
if skip >= <G::Engine as ScalarEngine>::Fr::NUM_BITS {
return Ok(this);
} else {
let mut next_region = dense_multiexp_inner(
pool, bases, exponents, skip, c, false).unwrap();
for _ in 0..c {
next_region.double();
}
next_region.add_assign(&this);
return Ok(next_region);
}
}
fn get_window_size_for_length(length: usize, chunk_length: usize) -> u32 {
if length < 32 {
return 3u32;
} else {
let exact = (f64::from(chunk_length as u32)).ln();
let floor = exact.floor();
if exact > floor + 0.5f64 {
return exact.ceil() as u32;
} else {
return floor as u32;
}
};
}
#[allow(dead_code)]
pub fn dense_multiexp_uniform<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr]
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
if exponents.len() != bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
let c = if exponents.len() < 32 {
3u32
} else {
let chunk_len = pool.get_chunk_size(exponents.len());
(f64::from(chunk_len as u32)).ln().ceil() as u32
};
let num_threads = pool.cpus;
let mut subresults = vec![<G as CurveAffine>::Projective::zero(); num_threads];
use std::sync::{Arc, Barrier};
const SYNCHRONIZATION_STEP: usize = 1 << 17;
const READ_BY: usize = 1 << 7;
const MIN_STACK_SIZE: usize = 1 << 21; assert!(SYNCHRONIZATION_STEP % READ_BY == 0);
let num_rounds = bases.len() / SYNCHRONIZATION_STEP;
let limit = ((<G::Engine as ScalarEngine>::Fr::NUM_BITS / c) + 1) as usize;
let mut buckets_schedule = Vec::with_capacity(limit);
let mut tmp = <G::Engine as ScalarEngine>::Fr::NUM_BITS;
for _ in 0..limit {
if tmp != 0 {
let bits = if tmp >= c {
tmp -= c;
c
} else {
let tt = tmp;
tmp = 0;
tt
};
buckets_schedule.push(bits);
} else {
break;
}
}
let mut barriers = Vec::with_capacity(buckets_schedule.len());
for _ in 0..buckets_schedule.len() {
let mut tt = Vec::with_capacity(num_rounds);
for _ in 0..num_rounds {
let t = Barrier::new(num_threads);
tt.push(t);
}
barriers.push(tt);
}
let barrs = &barriers;
let b_schedule = &buckets_schedule;
let g1_proj_size = std::mem::size_of::<<G as CurveAffine>::Projective>();
let scalar_size = std::mem::size_of::<<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr>();
let _space_to_keep_bases_on_stack = (1 << c) * g1_proj_size;
let mut stack_size = (g1_proj_size + scalar_size) * READ_BY;
if stack_size < MIN_STACK_SIZE {
stack_size = MIN_STACK_SIZE;
}
let thread_step = num_threads * READ_BY;
use crossbeam::thread;
thread::scope(|s| {
for (thread_idx, subresult) in subresults.iter_mut().enumerate() {
let builder = s.builder().stack_size(stack_size);
builder.spawn(move |_| {
let limit = bases.len();
let bases = &bases[..limit];
let exponents = &exponents[..limit];
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); 1 << c];
let mut skip_bits = 0;
for (chunk_schedule_idx, window_size) in b_schedule.iter().enumerate() {
let mask = (1 << window_size) - 1;
if chunk_schedule_idx != 0 {
(&mut buckets).truncate(0);
(&mut buckets).resize(1 << window_size, <G as CurveAffine>::Projective::zero());
}
for (i, (bases, exponents)) in bases.chunks(SYNCHRONIZATION_STEP)
.zip(exponents.chunks(SYNCHRONIZATION_STEP))
.enumerate() {
let num_subchunks = bases.len() / thread_step;
let remainder_start = num_subchunks * thread_step;
let remainder_end = bases.len();
let mut bases_holder = [G::zero(); READ_BY];
let mut exponents_holder = [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr::default(); READ_BY];
for subchunk_start_idx in ((thread_idx*READ_BY)..remainder_start).step_by(thread_step) {
bases_holder.copy_from_slice(&bases[subchunk_start_idx..(subchunk_start_idx+READ_BY)]);
exponents_holder.copy_from_slice(&exponents[subchunk_start_idx..(subchunk_start_idx+READ_BY)]);
let mut exps_iter = exponents_holder.iter();
let mut bases_iter = bases_holder.iter();
let mut tmp = *exps_iter.next().unwrap();
tmp.shr(skip_bits);
let mut this_exp = (tmp.as_ref()[0] & mask) as usize;
let mut base_prefetch_counter = 0;
crate::prefetch::prefetch_l1_pointer(&buckets[this_exp]);
crate::prefetch::prefetch_l1_pointer(&bases_holder[base_prefetch_counter]);
for (&next_exp, this_base) in exps_iter.zip(&mut bases_iter) {
if this_exp != 0 {
buckets[this_exp].add_assign_mixed(this_base);
}
let mut next_exp = next_exp;
next_exp.shr(skip_bits);
this_exp = (next_exp.as_ref()[0] & mask) as usize;
base_prefetch_counter += 1;
crate::prefetch::prefetch_l1_pointer(&buckets[this_exp]);
crate::prefetch::prefetch_l1_pointer(&bases_holder[base_prefetch_counter]);
}
if this_exp != 0 {
let last_base = bases_iter.next().unwrap();
buckets[this_exp].add_assign_mixed(last_base);
}
}
let remainder_start = remainder_start + (thread_idx*READ_BY);
if remainder_start < remainder_end {
let remainder_end = if remainder_start + READ_BY > remainder_end {
remainder_end
} else {
remainder_start + READ_BY
};
let exponents_holder = &exponents[remainder_start..remainder_end];
let bases_holder = &bases[remainder_start..remainder_end];
let mut exps_iter = exponents_holder.iter();
let mut bases_iter = bases_holder.iter();
let mut tmp = *exps_iter.next().unwrap();
tmp.shr(skip_bits);
let mut this_exp = (tmp.as_ref()[0] & mask) as usize;
let mut base_prefetch_counter = 0;
crate::prefetch::prefetch_l1_pointer(&buckets[this_exp]);
crate::prefetch::prefetch_l1_pointer(&bases_holder[base_prefetch_counter]);
for (&next_exp, this_base) in exps_iter.zip(&mut bases_iter) {
if this_exp != 0 {
buckets[this_exp].add_assign_mixed(this_base);
}
let mut next_exp = next_exp;
next_exp.shr(skip_bits);
this_exp = (next_exp.as_ref()[0] & mask) as usize;
base_prefetch_counter += 1;
crate::prefetch::prefetch_l1_pointer(&buckets[this_exp]);
crate::prefetch::prefetch_l1_pointer(&bases_holder[base_prefetch_counter]);
}
if this_exp != 0 {
let last_base = bases_iter.next().unwrap();
buckets[this_exp].add_assign_mixed(last_base);
}
}
(&barrs[chunk_schedule_idx][i]).wait();
}
let mut acc = buckets[1];
let mut running_sum = G::Projective::zero();
for exp in buckets.iter().skip(2).rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..skip_bits {
acc.double();
}
subresult.add_assign(&acc);
skip_bits += window_size;
}
}).unwrap();
}
}).unwrap();
let mut result = subresults.drain(0..1).collect::<Vec<_>>()[0];
for t in subresults.into_iter() {
result.add_assign(&t);
}
Ok(result)
}
pub fn stack_allocated_dense_multiexp<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
if exponents.len() != bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
let chunk_len = pool.get_chunk_size(exponents.len());
let c = get_window_size_for_length(exponents.len(), chunk_len);
match c {
12 => stack_allocated_dense_multiexp_12(pool, bases, exponents),
13 => stack_allocated_dense_multiexp_13(pool, bases, exponents),
14 => stack_allocated_dense_multiexp_14(pool, bases, exponents),
15 => stack_allocated_dense_multiexp_15(pool, bases, exponents),
16 => stack_allocated_dense_multiexp_16(pool, bases, exponents),
17 => stack_allocated_dense_multiexp_17(pool, bases, exponents),
18 => stack_allocated_dense_multiexp_18(pool, bases, exponents),
_ => unimplemented!("not implemented for windows = {}", c)
}
}
pub fn stack_allocated_uncompensated_dense_multiexp<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
if exponents.len() != bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
let c = if exponents.len() < 32 {
3u32
} else {
(f64::from(exponents.len() as u32)).ln().ceil() as u32
};
match c {
12 => stack_allocated_dense_multiexp_12(pool, bases, exponents),
13 => stack_allocated_dense_multiexp_13(pool, bases, exponents),
14 => stack_allocated_dense_multiexp_14(pool, bases, exponents),
15 => stack_allocated_dense_multiexp_15(pool, bases, exponents),
16 => stack_allocated_dense_multiexp_16(pool, bases, exponents),
17 => stack_allocated_dense_multiexp_17(pool, bases, exponents),
18 => stack_allocated_dense_multiexp_18(pool, bases, exponents),
_ => unimplemented!("not implemented for windows = {}", c)
}
}
fn stack_allocated_dense_multiexp_inner<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
c: u32
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
const WINDOW_SIZE: usize = 13;
const SYNCHRONIZATION_STEP: usize = 1 << 17;
const READ_BY: usize = 1 << 7;
const MIN_STACK_SIZE: usize = 1 << 21; const NUM_BUCKETS: usize = 1 << WINDOW_SIZE;
const MASK: u64 = (1 << WINDOW_SIZE) - 1;
assert!(SYNCHRONIZATION_STEP % READ_BY == 0);
assert_eq!(c as usize, WINDOW_SIZE);
let num_threads = pool.cpus;
let mut subresults = vec![<G as CurveAffine>::Projective::zero(); num_threads];
use std::sync::{Arc, Barrier};
let num_rounds = bases.len() / SYNCHRONIZATION_STEP;
let mut num_windows = (<G::Engine as ScalarEngine>::Fr::NUM_BITS / c) as usize;
if <G::Engine as ScalarEngine>::Fr::NUM_BITS % c != 0 {
num_windows += 1;
}
let mut barriers = Vec::with_capacity(num_windows);
for _ in 0..num_windows {
let mut tt = Vec::with_capacity(num_rounds);
for _ in 0..num_rounds {
let t = Barrier::new(num_threads);
tt.push(t);
}
barriers.push(tt);
}
let barrs = &barriers;
let g1_projective_size = std::mem::size_of::<<G as CurveAffine>::Projective>();
let g1_affine_size = std::mem::size_of::<G>();
let scalar_size = std::mem::size_of::<<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr>();
let usize_size = std::mem::size_of::<usize>();
let space_to_keep_buckets_on_stack = (1 << WINDOW_SIZE) * g1_projective_size;
let mut stack_size = (g1_affine_size + scalar_size + usize_size) * READ_BY + space_to_keep_buckets_on_stack + (1<<16);
if stack_size < MIN_STACK_SIZE {
stack_size = MIN_STACK_SIZE;
}
let thread_step = num_threads * READ_BY;
use crossbeam::thread;
thread::scope(|s| {
for (thread_idx, subresult) in subresults.iter_mut().enumerate() {
let builder = s.builder().stack_size(stack_size);
builder.spawn(move |_| {
let limit = bases.len();
let bases = &bases[..limit];
let exponents = &exponents[..limit];
let mut buckets = [<G as CurveAffine>::Projective::zero(); NUM_BUCKETS];
let mut skip_bits = 0;
for chunk_schedule_idx in 0..num_windows {
if chunk_schedule_idx != 0 {
buckets = [<G as CurveAffine>::Projective::zero(); NUM_BUCKETS];
}
for (i, (bases, exponents)) in bases.chunks(SYNCHRONIZATION_STEP)
.zip(exponents.chunks(SYNCHRONIZATION_STEP))
.enumerate() {
let num_subchunks = bases.len() / thread_step;
let remainder_start = num_subchunks * thread_step;
let remainder_end = bases.len();
let mut bases_holder = [G::zero(); READ_BY];
let mut exponents_holder = [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr::default(); READ_BY];
let mut indexes_holder = [0usize; READ_BY];
for subchunk_start_idx in ((thread_idx*READ_BY)..remainder_start).step_by(thread_step) {
exponents_holder.copy_from_slice(&exponents[subchunk_start_idx..(subchunk_start_idx+READ_BY)]);
for (index, &exp) in indexes_holder.iter_mut().zip(exponents_holder.iter()) {
let mut exp = exp;
exp.shr(skip_bits);
*index = (exp.as_ref()[0] & MASK) as usize;
}
bases_holder.copy_from_slice(&bases[subchunk_start_idx..(subchunk_start_idx+READ_BY)]);
let mut bases_iter = bases_holder.iter();
let mut indexes_iter = indexes_holder.iter();
let mut this_exp = *(&mut indexes_iter).next().unwrap();
let mut base_prefetch_counter = 0;
crate::prefetch::prefetch_l1_pointer(&buckets[this_exp]);
crate::prefetch::prefetch_l1_pointer(&bases_holder[base_prefetch_counter]);
for (&index, this_base) in indexes_iter.zip(&mut bases_iter) {
if this_exp != 0 {
buckets[this_exp].add_assign_mixed(&this_base);
}
this_exp = index;
base_prefetch_counter += 1;
crate::prefetch::prefetch_l1_pointer(&buckets[this_exp]);
crate::prefetch::prefetch_l1_pointer(&bases_holder[base_prefetch_counter]);
}
if this_exp != 0 {
let last_base = bases_iter.next().unwrap();
buckets[this_exp].add_assign_mixed(&last_base);
}
}
let remainder_start = remainder_start + (thread_idx*READ_BY);
if remainder_start < remainder_end {
let remainder_end = if remainder_start + READ_BY > remainder_end {
remainder_end
} else {
remainder_start + READ_BY
};
let limit = remainder_end - remainder_start;
exponents_holder[..limit].copy_from_slice(&exponents[remainder_start..remainder_end]);
for (index, &exp) in indexes_holder.iter_mut().zip(exponents_holder.iter()) {
let mut exp = exp;
exp.shr(skip_bits);
*index = (exp.as_ref()[0] & MASK) as usize;
}
bases_holder[..limit].copy_from_slice(&bases[remainder_start..remainder_end]);
let mut bases_iter = bases_holder[..limit].iter();
let mut indexes_iter = indexes_holder[..limit].iter();
let mut this_exp = *indexes_iter.next().unwrap();
let mut base_prefetch_counter = 0;
crate::prefetch::prefetch_l1_pointer(&buckets[this_exp]);
crate::prefetch::prefetch_l1_pointer(&bases_holder[base_prefetch_counter]);
for (&index, this_base) in indexes_iter.zip(&mut bases_iter) {
if this_exp != 0 {
buckets[this_exp].add_assign_mixed(this_base);
}
this_exp = index;
base_prefetch_counter += 1;
crate::prefetch::prefetch_l1_pointer(&buckets[this_exp]);
crate::prefetch::prefetch_l1_pointer(&bases_holder[base_prefetch_counter]);
}
if this_exp != 0 {
let last_base = bases_iter.next().unwrap();
buckets[this_exp].add_assign_mixed(last_base);
}
}
(&barrs[chunk_schedule_idx][i]).wait();
}
let mut acc = buckets[1];
let mut running_sum = G::Projective::zero();
for exp in buckets.iter().skip(2).rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..skip_bits {
acc.double();
}
subresult.add_assign(&acc);
skip_bits += WINDOW_SIZE as u32;
}
}).unwrap();
}
}).unwrap();
let mut result = subresults.drain(0..1).collect::<Vec<_>>()[0];
for t in subresults.into_iter() {
result.add_assign(&t);
}
Ok(result)
}
pub fn map_reduce_multiexp<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
if exponents.len() != bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
let c = if exponents.len() < 32 {
3u32
} else {
let chunk_len = pool.get_chunk_size(exponents.len());
(f64::from(chunk_len as u32)).ln().ceil() as u32
};
let chunk_len = pool.get_chunk_size(exponents.len());
pool.scope(0, |scope, _| {
for (b, e) in bases.chunks(chunk_len).zip(exponents.chunks(chunk_len)) {
scope.spawn(move |_| {
serial_multiexp_inner(b, e, c).unwrap();
});
}
});
Ok(<G as CurveAffine>::Projective::zero())
}
pub fn map_reduce_multiexp_over_fixed_window<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
c: u32
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
if <G::Engine as ScalarEngine>::Fr::NUM_BITS == 254 {
return map_reduce_multiexp_over_fixed_window_254(pool, bases, exponents, c);
}
if exponents.len() != bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
let chunk_len = pool.get_chunk_size(exponents.len());
let num_threads = pool.cpus;
let mut subresults = vec![<G as CurveAffine>::Projective::zero(); num_threads];
pool.scope(0, |scope, _| {
for ((b, e), s) in bases.chunks(chunk_len).zip(exponents.chunks(chunk_len)).zip(subresults.iter_mut()) {
scope.spawn(move |_| {
*s = serial_multiexp_inner(b, e, c).unwrap();
});
}
});
let mut result = <G as CurveAffine>::Projective::zero();
for s in subresults.into_iter() {
result.add_assign(&s);
}
Ok(result)
}
pub fn buffered_multiexp_over_fixed_window_and_buffer_size<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
c: u32,
buffer_size: usize
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
if exponents.len() != bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
let mut num_runs = (<G::Engine as ScalarEngine>::Fr::NUM_BITS / c) as usize;
if <G::Engine as ScalarEngine>::Fr::NUM_BITS % c != 0 {
num_runs += 1;
}
let mut subresults = vec![<G as CurveAffine>::Projective::zero(); num_runs];
pool.scope(0, |scope, _| {
let mut skip = 0u32;
for s in subresults.iter_mut() {
scope.spawn(move |_| {
*s = buffered_multiexp_inner(bases, exponents, c, skip, buffer_size).unwrap();
});
skip += c;
}
});
let mut result = <G as CurveAffine>::Projective::zero();
for s in subresults.into_iter() {
result.add_assign(&s);
}
Ok(result)
}
fn map_reduce_multiexp_over_fixed_window_254<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
c: u32
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
if exponents.len() != bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
let chunk_len = pool.get_chunk_size(exponents.len());
let num_threads = pool.cpus;
let mut subresults = vec![<G as CurveAffine>::Projective::zero(); num_threads];
pool.scope(0, |scope, _| {
for ((b, e), s) in bases.chunks(chunk_len).zip(exponents.chunks(chunk_len)).zip(subresults.iter_mut()) {
scope.spawn(move |_| {
let subres = match c {
7 => map_reduce_multiexp_254_inner_7(b, e),
8 => map_reduce_multiexp_254_inner_8(b, e),
9 => map_reduce_multiexp_254_inner_9(b, e),
10 => map_reduce_multiexp_254_inner_10(b, e),
11 => map_reduce_multiexp_254_inner_11(b, e),
12 => map_reduce_multiexp_254_inner_12(b, e),
13 => map_reduce_multiexp_254_inner_13(b, e),
14 => map_reduce_multiexp_254_inner_14(b, e),
15 => map_reduce_multiexp_254_inner_15(b, e),
16 => map_reduce_multiexp_254_inner_16(b, e),
17 => map_reduce_multiexp_254_inner_17(b, e),
18 => map_reduce_multiexp_254_inner_18(b, e),
_ => unimplemented!("window size is not supported"),
};
*s = subres.expect("must calcualate contribution from serial multiexp");
});
}
});
let mut result = <G as CurveAffine>::Projective::zero();
for s in subresults.into_iter() {
result.add_assign(&s);
}
Ok(result)
}
fn serial_multiexp_inner<G: CurveAffine>(
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
c: u32
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
let num_buckets = (1 << c) - 1;
let mask: u64 = (1u64 << c) - 1u64;
let mut result = <G as CurveAffine>::Projective::zero();
let mut num_runs = (<G::Engine as ScalarEngine>::Fr::NUM_BITS / c) as usize;
if <G::Engine as ScalarEngine>::Fr::NUM_BITS % c != 0 {
num_runs += 1;
}
let mut buckets = vec![vec![<G as CurveAffine>::Projective::zero(); num_buckets]; num_runs as usize];
for (&base, &exp) in bases.into_iter().zip(exponents.into_iter()) {
for window_index in 0..num_runs {
let mut exp = exp;
exp.shr(c);
let index = (exp.as_ref()[0] & mask) as usize;
if index != 0 {
buckets[window_index][index - 1].add_assign_mixed(&base);
}
}
}
let mut skip_bits = 0;
for b in buckets.into_iter() {
let mut acc = G::Projective::zero();
let mut running_sum = G::Projective::zero();
for exp in b.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..skip_bits {
acc.double();
}
result.add_assign(&acc);
skip_bits += c as u32;
}
Ok(result)
}
fn test_memory_serial<G: CurveAffine>(
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
c: u32
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
let num_buckets = (1 << c) - 1;
let mask: u64 = (1u64 << c) - 1u64;
let mut result = <G as CurveAffine>::Projective::zero();
let mut num_runs = (<G::Engine as ScalarEngine>::Fr::NUM_BITS / c) as usize;
if <G::Engine as ScalarEngine>::Fr::NUM_BITS % c != 0 {
num_runs += 1;
}
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); num_runs as usize];
for (&base, &exp) in bases.into_iter().zip(exponents.into_iter()) {
for window_index in 0..num_runs {
let mut exp = exp;
exp.shr(c);
let index = (exp.as_ref()[0] & mask) as usize;
if index != 0 {
buckets[window_index].add_assign_mixed(&base);
}
}
}
for _ in 0..num_runs {
let mut acc = G::Projective::zero();
let mut running_sum = G::Projective::zero();
for exp in buckets.iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..100 {
acc.double();
}
result.add_assign(&acc);
}
Ok(result)
}
fn buffered_multiexp_inner<G: CurveAffine>(
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
c: u32,
skip: u32,
buffer_size: usize
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
let num_buckets = (1 << c) - 1;
let mask: u64 = (1u64 << c) - 1u64;
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); num_buckets];
let mut buffers: Vec<Vec<G>> = vec![Vec::with_capacity(buffer_size); num_buckets];
for (&base, &exp) in bases.into_iter().zip(exponents.into_iter()) {
let mut exp = exp;
exp.shr(skip);
let index = (exp.as_ref()[0] & mask) as usize;
if index != 0 {
let idx = index - 1;
if buffers[idx].len() == buffer_size {
let mut el = buckets[idx];
for b in buffers[idx].iter() {
el.add_assign_mixed(&b);
}
buckets[idx] = el;
buffers[idx].truncate(0);
}
buffers[idx].push(base);
}
}
for (idx, buffer) in buffers.into_iter().enumerate() {
let mut el = buckets[idx];
for b in buffer.into_iter() {
el.add_assign_mixed(&b);
}
buckets[idx] = el;
}
let mut acc = G::Projective::zero();
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..skip {
acc.double();
}
Ok(acc)
}
macro_rules! construct_stack_multiexp {
( $visibility:vis fn $name:ident ( $n_words:tt ); ) => {
$visibility fn $name<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr]
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
if exponents.len() != bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
const WINDOW_SIZE: usize = $n_words;
const SYNCHRONIZATION_STEP: usize = 1 << 17;
const READ_BY: usize = 1 << 7;
const MIN_STACK_SIZE: usize = 1 << 21; const NUM_BUCKETS: usize = 1 << WINDOW_SIZE;
const MASK: u64 = (1 << WINDOW_SIZE) - 1;
assert!(SYNCHRONIZATION_STEP % READ_BY == 0);
let num_threads = pool.cpus;
let mut subresults = vec![<G as CurveAffine>::Projective::zero(); num_threads];
use std::sync::{Arc, Barrier};
let num_rounds = bases.len() / SYNCHRONIZATION_STEP;
let mut num_windows = (<G::Engine as ScalarEngine>::Fr::NUM_BITS as usize) / WINDOW_SIZE;
if (<G::Engine as ScalarEngine>::Fr::NUM_BITS as usize) % WINDOW_SIZE != 0 {
num_windows += 1;
}
let mut barriers = Vec::with_capacity(num_windows);
for _ in 0..num_windows {
let mut tt = Vec::with_capacity(num_rounds);
for _ in 0..num_rounds {
let t = Barrier::new(num_threads);
tt.push(t);
}
barriers.push(tt);
}
let barrs = &barriers;
let g1_projective_size = std::mem::size_of::<<G as CurveAffine>::Projective>();
let g1_affine_size = std::mem::size_of::<G>();
let scalar_size = std::mem::size_of::<<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr>();
let usize_size = std::mem::size_of::<usize>();
let space_to_keep_buckets_on_stack = (1 << WINDOW_SIZE) * g1_projective_size;
let mut stack_size = (g1_affine_size + scalar_size + usize_size) * READ_BY + space_to_keep_buckets_on_stack + (1<<16);
if stack_size < MIN_STACK_SIZE {
stack_size = MIN_STACK_SIZE;
}
let thread_step = num_threads * READ_BY;
use crossbeam::thread;
thread::scope(|s| {
for (thread_idx, subresult) in subresults.iter_mut().enumerate() {
let builder = s.builder().stack_size(stack_size);
builder.spawn(move |_| {
let limit = bases.len();
let bases = &bases[..limit];
let exponents = &exponents[..limit];
let mut buckets = [<G as CurveAffine>::Projective::zero(); NUM_BUCKETS];
let mut skip_bits = 0;
for chunk_schedule_idx in 0..num_windows {
if chunk_schedule_idx != 0 {
buckets = [<G as CurveAffine>::Projective::zero(); NUM_BUCKETS];
}
for (i, (bases, exponents)) in bases.chunks(SYNCHRONIZATION_STEP)
.zip(exponents.chunks(SYNCHRONIZATION_STEP))
.enumerate() {
let num_subchunks = bases.len() / thread_step;
let remainder_start = num_subchunks * thread_step;
let remainder_end = bases.len();
let mut bases_holder = [G::zero(); READ_BY];
let mut exponents_holder = [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr::default(); READ_BY];
let mut indexes_holder = [0usize; READ_BY];
for subchunk_start_idx in ((thread_idx*READ_BY)..remainder_start).step_by(thread_step) {
exponents_holder.copy_from_slice(&exponents[subchunk_start_idx..(subchunk_start_idx+READ_BY)]);
for (index, &exp) in indexes_holder.iter_mut().zip(exponents_holder.iter()) {
let mut exp = exp;
exp.shr(skip_bits);
*index = (exp.as_ref()[0] & MASK) as usize;
}
bases_holder.copy_from_slice(&bases[subchunk_start_idx..(subchunk_start_idx+READ_BY)]);
let mut bases_iter = bases_holder.iter();
let mut indexes_iter = indexes_holder.iter();
let mut this_exp = *(&mut indexes_iter).next().unwrap();
let mut base_prefetch_counter = 0;
crate::prefetch::prefetch_l1_pointer(&buckets[this_exp]);
crate::prefetch::prefetch_l1_pointer(&bases_holder[base_prefetch_counter]);
for (&index, this_base) in indexes_iter.zip(&mut bases_iter) {
if this_exp != 0 {
buckets[this_exp].add_assign_mixed(&this_base);
}
this_exp = index;
base_prefetch_counter += 1;
crate::prefetch::prefetch_l1_pointer(&buckets[this_exp]);
crate::prefetch::prefetch_l1_pointer(&bases_holder[base_prefetch_counter]);
}
if this_exp != 0 {
let last_base = bases_iter.next().unwrap();
buckets[this_exp].add_assign_mixed(&last_base);
}
}
let remainder_start = remainder_start + (thread_idx*READ_BY);
if remainder_start < remainder_end {
let remainder_end = if remainder_start + READ_BY > remainder_end {
remainder_end
} else {
remainder_start + READ_BY
};
let limit = remainder_end - remainder_start;
exponents_holder[..limit].copy_from_slice(&exponents[remainder_start..remainder_end]);
for (index, &exp) in indexes_holder.iter_mut().zip(exponents_holder.iter()) {
let mut exp = exp;
exp.shr(skip_bits);
*index = (exp.as_ref()[0] & MASK) as usize;
}
bases_holder[..limit].copy_from_slice(&bases[remainder_start..remainder_end]);
let mut bases_iter = bases_holder[..limit].iter();
let mut indexes_iter = indexes_holder[..limit].iter();
let mut this_exp = *indexes_iter.next().unwrap();
let mut base_prefetch_counter = 0;
crate::prefetch::prefetch_l1_pointer(&buckets[this_exp]);
crate::prefetch::prefetch_l1_pointer(&bases_holder[base_prefetch_counter]);
for (&index, this_base) in indexes_iter.zip(&mut bases_iter) {
if this_exp != 0 {
buckets[this_exp].add_assign_mixed(this_base);
}
this_exp = index;
base_prefetch_counter += 1;
crate::prefetch::prefetch_l1_pointer(&buckets[this_exp]);
crate::prefetch::prefetch_l1_pointer(&bases_holder[base_prefetch_counter]);
}
if this_exp != 0 {
let last_base = bases_iter.next().unwrap();
buckets[this_exp].add_assign_mixed(last_base);
}
}
(&barrs[chunk_schedule_idx][i]).wait();
}
let mut acc = buckets[1];
let mut running_sum = G::Projective::zero();
for exp in buckets.iter().skip(2).rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..skip_bits {
acc.double();
}
subresult.add_assign(&acc);
skip_bits += WINDOW_SIZE as u32;
}
}).unwrap();
}
}).unwrap();
let mut result = subresults.drain(0..1).collect::<Vec<_>>()[0];
for t in subresults.into_iter() {
result.add_assign(&t);
}
Ok(result)
}
}
}
construct_stack_multiexp!(pub fn stack_allocated_dense_multiexp_12(12););
construct_stack_multiexp!(pub fn stack_allocated_dense_multiexp_13(13););
construct_stack_multiexp!(pub fn stack_allocated_dense_multiexp_14(14););
construct_stack_multiexp!(pub fn stack_allocated_dense_multiexp_15(15););
construct_stack_multiexp!(pub fn stack_allocated_dense_multiexp_16(16););
construct_stack_multiexp!(pub fn stack_allocated_dense_multiexp_17(17););
construct_stack_multiexp!(pub fn stack_allocated_dense_multiexp_18(18););
macro_rules! construct_map_reduce_multiexp_inner {
( $visibility:vis fn $name:ident ( $n_window: tt, $n_bits: tt); ) => {
$visibility fn $name<G: CurveAffine>(
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr]
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
const WINDOW: u32 = $n_window;
const NUM_BUCKETS: usize = (1 << WINDOW) - 1;
const MASK: u64 = (1u64 << WINDOW) - 1u64;
const NUM_RUNS: usize = ($n_bits / $n_window) + (($n_bits % $n_window > 0) as u32) as usize;
let mut num_runs_runtime = (<G::Engine as ScalarEngine>::Fr::NUM_BITS / WINDOW) as usize;
if <G::Engine as ScalarEngine>::Fr::NUM_BITS % WINDOW != 0 {
num_runs_runtime += 1;
}
assert_eq!(NUM_RUNS, num_runs_runtime, "invalid number of windows in runtime");
let mut result = <G as CurveAffine>::Projective::zero();
let mut buckets = vec![vec![<G as CurveAffine>::Projective::zero(); NUM_BUCKETS]; NUM_RUNS as usize];
let mut bucket_indexes = [0usize; NUM_RUNS];
for (&base, &exp) in bases.into_iter().zip(exponents.into_iter()) {
let mut exp = exp;
for (window_index, bucket_index) in bucket_indexes.iter_mut().enumerate() {
let index = (exp.as_ref()[0] & MASK) as usize;
exp.shr(WINDOW);
if index != 0 {
crate::prefetch::prefetch_l1_pointer(&buckets[window_index][index - 1]);
}
*bucket_index = index;
}
for (window_index, &index) in bucket_indexes.iter().enumerate() {
if index != 0 {
buckets[window_index][index - 1].add_assign_mixed(&base);
}
}
}
let mut skip_bits = 0;
for b in buckets.into_iter() {
let mut acc = G::Projective::zero();
let mut running_sum = G::Projective::zero();
for exp in b.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..skip_bits {
acc.double();
}
result.add_assign(&acc);
skip_bits += WINDOW;
}
Ok(result)
}
}
}
construct_map_reduce_multiexp_inner!(pub fn map_reduce_multiexp_254_inner_7(7, 254););
construct_map_reduce_multiexp_inner!(pub fn map_reduce_multiexp_254_inner_8(8, 254););
construct_map_reduce_multiexp_inner!(pub fn map_reduce_multiexp_254_inner_9(9, 254););
construct_map_reduce_multiexp_inner!(pub fn map_reduce_multiexp_254_inner_10(10, 254););
construct_map_reduce_multiexp_inner!(pub fn map_reduce_multiexp_254_inner_11(11, 254););
construct_map_reduce_multiexp_inner!(pub fn map_reduce_multiexp_254_inner_12(12, 254););
construct_map_reduce_multiexp_inner!(pub fn map_reduce_multiexp_254_inner_13(13, 254););
construct_map_reduce_multiexp_inner!(pub fn map_reduce_multiexp_254_inner_14(14, 254););
construct_map_reduce_multiexp_inner!(pub fn map_reduce_multiexp_254_inner_15(15, 254););
construct_map_reduce_multiexp_inner!(pub fn map_reduce_multiexp_254_inner_16(16, 254););
construct_map_reduce_multiexp_inner!(pub fn map_reduce_multiexp_254_inner_17(17, 254););
construct_map_reduce_multiexp_inner!(pub fn map_reduce_multiexp_254_inner_18(18, 254););
pub fn l3_shared_multexp<G: CurveAffine>(
pool: &Worker,
common_bases: & [G],
exponents_set: &[&[<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr]],
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
for exponents in exponents_set.iter() {
if exponents.len() != common_bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
}
const NUM_WINDOWS: usize = 22;
const WINDOW_SIZE: u32 = 12;
const MASK: u64 = (1u64 << WINDOW_SIZE) - 1;
const NUM_BUCKETS: usize = 1 << WINDOW_SIZE;
const SYNCHRONIZATION_STEP: usize = 1 << 14;
assert!((WINDOW_SIZE as usize) * NUM_WINDOWS >= 254);
fn get_bits<Repr: PrimeFieldRepr>(el: Repr, start: usize) -> u64 {
const WINDOW_SIZE: u32 = 12;
const MASK: u64 = (1u64 << WINDOW_SIZE) - 1;
let end = (start + (WINDOW_SIZE as usize)) % 256;
let word_begin = start / 64;
let word_end = end / 64;
let result: u64;
if word_begin == word_end {
let shift = start % 64;
result = (el.as_ref()[word_begin] >> shift) & MASK;
} else {
let shift_low = start % 64;
let shift_high = 64 - shift_low;
result = ((el.as_ref()[word_begin] >> shift_low) | (el.as_ref()[word_end] << shift_high)) & MASK;
}
result
}
let limit = common_bases.len() - 2;
use std::sync::Barrier;
let num_threads = NUM_WINDOWS * exponents_set.len();
let num_sync_rounds = limit / SYNCHRONIZATION_STEP;
let mut barriers = Vec::with_capacity(num_sync_rounds);
for _ in 0..num_sync_rounds {
let t = Barrier::new(num_threads);
barriers.push(t);
}
let barrs = &barriers;
pool.scope(0, |scope, _| {
for (exponents_idx, exponents) in exponents_set.iter().enumerate() {
let mut start = 0;
if exponents_idx == 0 {
scope.spawn(move |_| {
let mut barriers_it = barrs.iter();
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); NUM_BUCKETS];
let tmp = exponents[0];
let mut this_index = get_bits(tmp, start) as usize;
for i in 0..limit {
unsafe { crate::prefetch::prefetch_l3(exponents.get_unchecked(i+2)) };
unsafe { crate::prefetch::prefetch_l3(common_bases.get_unchecked(i+1)) };
unsafe { crate::prefetch::prefetch_l1(exponents.get_unchecked(i+2)) };
unsafe { crate::prefetch::prefetch_l1(common_bases.get_unchecked(i+1)) };
let tmp = unsafe { *exponents.get_unchecked(i+1) };
let base = unsafe { *common_bases.get_unchecked(i) };
let next_index = get_bits(tmp, start) as usize;
if this_index != 0 {
unsafe { buckets.get_unchecked_mut(this_index-1).add_assign_mixed(&base) };
}
if next_index != 0 {
unsafe { crate::prefetch::prefetch_l1(buckets.get_unchecked(next_index-1)) };
this_index = next_index;
}
if i !=0 && (i & (SYNCHRONIZATION_STEP - 1)) == 0 {
barriers_it.next().unwrap().wait();
}
}
let mut acc = G::Projective::zero();
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..start {
acc.double();
}
});
} else {
scope.spawn(move |_| {
let mut barriers_it = barrs.iter();
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); NUM_BUCKETS];
let tmp = exponents[0];
let mut this_index = get_bits(tmp, start) as usize;
for i in 0..limit {
unsafe { crate::prefetch::prefetch_l3(exponents.get_unchecked(i+2)) };
unsafe { crate::prefetch::prefetch_l1(exponents.get_unchecked(i+2)) };
unsafe { crate::prefetch::prefetch_l1(common_bases.get_unchecked(i+1)) };
let tmp = unsafe { *exponents.get_unchecked(i+1) };
let base = unsafe { *common_bases.get_unchecked(i) };
let next_index = get_bits(tmp, start) as usize;
if this_index != 0 {
unsafe { buckets.get_unchecked_mut(this_index-1).add_assign_mixed(&base) };
}
if next_index != 0 {
unsafe { crate::prefetch::prefetch_l1(buckets.get_unchecked(next_index-1)) };
this_index = next_index;
}
if i !=0 && (i & (SYNCHRONIZATION_STEP - 1)) == 0 {
barriers_it.next().unwrap().wait();
}
}
let mut acc = G::Projective::zero();
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..start {
acc.double();
}
});
}
for _ in 1..NUM_WINDOWS {
start += WINDOW_SIZE as usize;
scope.spawn(move |_| {
let mut barriers_it = barrs.iter();
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); NUM_BUCKETS];
let tmp = exponents[0];
let mut this_index = get_bits(tmp, start) as usize;
for i in 0..limit {
unsafe { crate::prefetch::prefetch_l1(exponents.get_unchecked(i+2)) };
unsafe { crate::prefetch::prefetch_l1(common_bases.get_unchecked(i+1)) };
let tmp = unsafe { *exponents.get_unchecked(i+1) };
let base = unsafe { *common_bases.get_unchecked(i) };
let next_index = get_bits(tmp, start) as usize;
if this_index != 0 {
unsafe { buckets.get_unchecked_mut(this_index-1).add_assign_mixed(&base) };
}
if next_index != 0 {
unsafe { crate::prefetch::prefetch_l1(buckets.get_unchecked(next_index-1)) };
this_index = next_index;
}
if i !=0 && (i & (SYNCHRONIZATION_STEP - 1)) == 0 {
barriers_it.next().unwrap().wait();
}
}
let mut acc = G::Projective::zero();
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..start {
acc.double();
}
});
}
}
});
Ok(<G as CurveAffine>::Projective::zero())
}
pub fn l3_shared_multexp_with_local_access<G: CurveAffine>(
pool: &Worker,
common_bases: & [G],
exponents_set: &[&[<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr]],
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
use std::sync::atomic::{AtomicUsize, Ordering};
static GLOBAL_THREAD_COUNT: AtomicUsize = AtomicUsize::new(0);
assert_eq!(exponents_set.len(), 2, "only support unroll by 2 for now");
for exponents in exponents_set.iter() {
if exponents.len() != common_bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
}
const NUM_WINDOWS: usize = 22;
const WINDOW_SIZE: u32 = 12;
const MASK: u64 = (1u64 << WINDOW_SIZE) - 1;
const NUM_BUCKETS: usize = 1 << WINDOW_SIZE;
assert!((WINDOW_SIZE as usize) * NUM_WINDOWS >= 254);
fn get_bits<Repr: PrimeFieldRepr>(el: Repr, start: usize) -> u64 {
const WINDOW_SIZE: u32 = 12;
const MASK: u64 = (1u64 << WINDOW_SIZE) - 1;
let end = (start + (WINDOW_SIZE as usize)) % 256;
let word_begin = start / 64;
let word_end = end / 64;
let result: u64;
if word_begin == word_end {
let shift = start % 64;
result = (el.as_ref()[word_begin] >> shift) & MASK;
} else {
let shift_low = start % 64;
let shift_high = 64 - shift_low;
result = ((el.as_ref()[word_begin] >> shift_low) | (el.as_ref()[word_end] << shift_high)) & MASK;
}
result
}
let limit = common_bases.len() - 2;
pool.scope(0, |scope, _| {
for (exponents_idx, exponents) in exponents_set.iter().enumerate() {
let mut start = 0;
if exponents_idx == 0 {
scope.spawn(move |_| {
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); NUM_BUCKETS];
let tmp = exponents[0];
let mut this_index = get_bits(tmp, start) as usize;
for i in 0..limit {
unsafe { crate::prefetch::prefetch_l1(exponents.get_unchecked(i+2)) };
unsafe { crate::prefetch::prefetch_l1(common_bases.get_unchecked(i+1)) };
let tmp = unsafe { *exponents.get_unchecked(i+1) };
let base = unsafe { *common_bases.get_unchecked(i) };
let next_index = get_bits(tmp, start) as usize;
unsafe { buckets.get_unchecked_mut(this_index).add_assign_mixed(&base) };
unsafe { crate::prefetch::prefetch_l1(buckets.get_unchecked(next_index)) };
this_index = next_index;
}
let mut acc = G::Projective::zero();
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..start {
acc.double();
}
});
} else {
scope.spawn(move |_| {
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); NUM_BUCKETS];
let tmp = exponents[0];
let mut this_index = get_bits(tmp, start) as usize;
for i in 0..limit {
unsafe { crate::prefetch::prefetch_l1(exponents.get_unchecked(i+2)) };
unsafe { crate::prefetch::prefetch_l1(common_bases.get_unchecked(i+1)) };
let tmp = unsafe { *exponents.get_unchecked(i+1) };
let base = unsafe { *common_bases.get_unchecked(i) };
let next_index = get_bits(tmp, start) as usize;
unsafe { buckets.get_unchecked_mut(this_index).add_assign_mixed(&base) };
unsafe { crate::prefetch::prefetch_l1(buckets.get_unchecked(next_index)) };
this_index = next_index;
}
let mut acc = G::Projective::zero();
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..start {
acc.double();
}
});
}
for _ in 0..128 {
let _ = GLOBAL_THREAD_COUNT.fetch_add(1, Ordering::SeqCst);
}
for _ in 1..NUM_WINDOWS {
start += WINDOW_SIZE as usize;
scope.spawn(move |_| {
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); NUM_BUCKETS];
let tmp = exponents[0];
let mut this_index = get_bits(tmp, start) as usize;
for i in 0..limit {
unsafe { crate::prefetch::prefetch_l1(exponents.get_unchecked(i+2)) };
unsafe { crate::prefetch::prefetch_l1(common_bases.get_unchecked(i+1)) };
let tmp = unsafe { *exponents.get_unchecked(i+1) };
let base = unsafe { *common_bases.get_unchecked(i) };
let next_index = get_bits(tmp, start) as usize;
unsafe { buckets.get_unchecked_mut(this_index).add_assign_mixed(&base) };
unsafe { crate::prefetch::prefetch_l1(buckets.get_unchecked(next_index)) };
this_index = next_index;
}
let mut acc = G::Projective::zero();
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..start {
acc.double();
}
});
}
}
});
Ok(<G as CurveAffine>::Projective::zero())
}
#[allow(dead_code)]
pub fn dense_unrolled_multiexp_with_prefetch<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr]
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
if exponents.len() != bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
let c = if exponents.len() < 32 {
3u32
} else {
let chunk_len = pool.get_chunk_size(exponents.len());
(f64::from(chunk_len as u32)).ln().ceil() as u32
};
dense_multiexp_inner_unrolled_with_prefetch(pool, bases, exponents, 0, c, true)
}
#[allow(dead_code)]
fn dense_multiexp_inner_unrolled_with_prefetch<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
mut skip: u32,
c: u32,
handle_trivial: bool
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
const UNROLL_BY: usize = 8;
use std::sync::{Mutex};
let this = {
let mask = (1u64 << c) - 1u64;
let this_region = Mutex::new(<G as CurveAffine>::Projective::zero());
let arc = Arc::new(this_region);
pool.scope(bases.len(), |scope, chunk| {
for (bases, exp) in bases.chunks(chunk).zip(exponents.chunks(chunk)) {
let this_region_rwlock = arc.clone();
scope.spawn(move |_| {
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); (1 << c) - 1];
let mut acc = G::Projective::zero();
let zero = <G::Engine as ScalarEngine>::Fr::zero().into_repr();
let one = <G::Engine as ScalarEngine>::Fr::one().into_repr();
let unrolled_steps = bases.len() / UNROLL_BY;
let remainder = bases.len() % UNROLL_BY;
let mut offset = 0;
for _ in 0..unrolled_steps {
for i in 0..UNROLL_BY {
crate::prefetch::prefetch_l3_pointer(&bases[offset+i] as *const _);
crate::prefetch::prefetch_l3_pointer(&exp[offset+i] as *const _);
}
for i in 0..(UNROLL_BY-1) {
let this_exp = exp[offset+i];
let mut next_exp = exp[offset+i+1];
let base = &bases[offset+i];
if this_exp != zero {
if this_exp == one {
if handle_trivial {
acc.add_assign_mixed(base);
}
} else {
let mut this_exp = this_exp;
this_exp.shr(skip);
let this_exp = this_exp.as_ref()[0] & mask;
if this_exp != 0 {
buckets[(this_exp - 1) as usize].add_assign_mixed(base);
}
}
}
{
next_exp.shr(skip);
let next_exp = next_exp.as_ref()[0] & mask;
if next_exp != 0 {
crate::prefetch::prefetch_l3_pointer(&buckets[(next_exp - 1) as usize] as *const _);
}
}
}
let this_exp = exp[offset+(UNROLL_BY-1)];
let base = &bases[offset+(UNROLL_BY-1)];
if this_exp != zero {
if this_exp == one {
if handle_trivial {
acc.add_assign_mixed(base);
}
} else {
let mut this_exp = this_exp;
this_exp.shr(skip);
let this_exp = this_exp.as_ref()[0] & mask;
if this_exp != 0 {
buckets[(this_exp - 1) as usize].add_assign_mixed(base);
}
}
}
offset += UNROLL_BY;
}
for _ in 0..remainder {
let this_exp = exp[offset];
let base = &bases[offset];
if this_exp != zero {
if this_exp == one {
if handle_trivial {
acc.add_assign_mixed(base);
}
} else {
let mut this_exp = this_exp;
this_exp.shr(skip);
let this_exp = this_exp.as_ref()[0] & mask;
if this_exp != 0 {
buckets[(this_exp - 1) as usize].add_assign_mixed(base);
}
}
}
offset += 1;
}
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
let mut guard = match this_region_rwlock.lock() {
Ok(guard) => guard,
Err(_) => {
panic!("poisoned!");
}
};
(*guard).add_assign(&acc);
});
}
});
let this_region = Arc::try_unwrap(arc).unwrap();
let this_region = this_region.into_inner().unwrap();
this_region
};
skip += c;
if skip >= <G::Engine as ScalarEngine>::Fr::NUM_BITS {
return Ok(this);
} else {
let mut next_region = dense_multiexp_inner_unrolled_with_prefetch(
pool, bases, exponents, skip, c, false).unwrap();
for _ in 0..c {
next_region.double();
}
next_region.add_assign(&this);
return Ok(next_region);
}
}
#[allow(dead_code)]
pub fn dense_multiexp_with_manual_unrolling<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr]
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
if exponents.len() != bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
let c = if exponents.len() < 32 {
3u32
} else {
let chunk_len = pool.get_chunk_size(exponents.len());
(f64::from(chunk_len as u32)).ln().ceil() as u32
};
dense_multiexp_with_manual_unrolling_impl(pool, bases, exponents, 0, c, true)
}
#[allow(dead_code)]
fn dense_multiexp_with_manual_unrolling_impl<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
mut skip: u32,
c: u32,
handle_trivial: bool
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
const UNROLL_BY: usize = 1024;
use std::sync::{Mutex};
let this = {
let mask = (1u64 << c) - 1u64;
let this_region = Mutex::new(<G as CurveAffine>::Projective::zero());
let arc = Arc::new(this_region);
pool.scope(bases.len(), |scope, chunk| {
for (bases, exp) in bases.chunks(chunk).zip(exponents.chunks(chunk)) {
let this_region_rwlock = arc.clone();
scope.spawn(move |_| {
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); 1 << c];
let one = <G::Engine as ScalarEngine>::Fr::one().into_repr();
let mut this_chunk_exponents = [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr::default(); UNROLL_BY];
let mut next_chunk_exponents = [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr::default(); UNROLL_BY];
let mut this_chunk_bases = [G::zero(); UNROLL_BY];
let mut next_chunk_bases = [G::zero(); UNROLL_BY];
let unrolled_steps = bases.len() / UNROLL_BY;
assert!(unrolled_steps >= 2);
let remainder = bases.len() % UNROLL_BY;
assert_eq!(remainder, 0);
let mut start_idx = 0;
let mut end_idx = UNROLL_BY;
this_chunk_exponents.copy_from_slice(&exp[start_idx..end_idx]);
this_chunk_bases.copy_from_slice(&bases[start_idx..end_idx]);
start_idx += UNROLL_BY;
end_idx += UNROLL_BY;
next_chunk_exponents.copy_from_slice(&exp[start_idx..end_idx]);
next_chunk_bases.copy_from_slice(&bases[start_idx..end_idx]);
let mut intra_chunk_idx = 0;
let mut previous_exponent_index = 0;
let mut previous_base = G::zero();
let mut this_exponent_index = 0;
let mut this_base = G::zero();
let this_exp = this_chunk_exponents[intra_chunk_idx];
if this_exp == one {
if handle_trivial {
this_exponent_index = 1;
}
} else {
let mut this_exp = this_exp;
this_exp.shr(skip);
let this_exp = this_exp.as_ref()[0] & mask;
this_exponent_index = this_exp as usize;
}
this_base = this_chunk_bases[intra_chunk_idx];
previous_base = this_base;
previous_exponent_index = this_exponent_index;
crate::prefetch::prefetch_l2_pointer(&buckets[previous_exponent_index] as *const _);
intra_chunk_idx += 1;
for _ in 1..(unrolled_steps-1) {
while intra_chunk_idx < UNROLL_BY {
(&mut buckets[previous_exponent_index]).add_assign_mixed(&previous_base);
let this_exp = this_chunk_exponents[intra_chunk_idx];
if this_exp == one {
if handle_trivial {
this_exponent_index = 1;
}
} else {
let mut this_exp = this_exp;
this_exp.shr(skip);
let this_exp = this_exp.as_ref()[0] & mask;
this_exponent_index = this_exp as usize;
}
this_base = this_chunk_bases[intra_chunk_idx];
previous_base = this_base;
previous_exponent_index = this_exponent_index;
crate::prefetch::prefetch_l2_pointer(&buckets[previous_exponent_index] as *const _);
intra_chunk_idx += 1;
}
this_chunk_bases = next_chunk_bases;
this_chunk_exponents = next_chunk_exponents;
start_idx += UNROLL_BY;
end_idx += UNROLL_BY;
next_chunk_exponents.copy_from_slice(&exp[start_idx..end_idx]);
next_chunk_bases.copy_from_slice(&bases[start_idx..end_idx]);
intra_chunk_idx = 0;
}
{
while intra_chunk_idx < UNROLL_BY {
(&mut buckets[previous_exponent_index]).add_assign_mixed(&previous_base);
let this_exp = this_chunk_exponents[intra_chunk_idx];
if this_exp == one {
if handle_trivial {
this_exponent_index = 1;
}
} else {
let mut this_exp = this_exp;
this_exp.shr(skip);
let this_exp = this_exp.as_ref()[0] & mask;
this_exponent_index = this_exp as usize;
}
this_base = this_chunk_bases[intra_chunk_idx];
previous_base = this_base;
previous_exponent_index = this_exponent_index;
crate::prefetch::prefetch_l2_pointer(&buckets[previous_exponent_index] as *const _);
intra_chunk_idx += 1;
}
(&mut buckets[previous_exponent_index]).add_assign_mixed(&previous_base);
}
let _: Vec<_> = buckets.drain(..1).collect();
let acc: Vec<_> = buckets.drain(..1).collect();
let mut acc = acc[0];
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
let mut guard = match this_region_rwlock.lock() {
Ok(guard) => guard,
Err(_) => {
panic!("poisoned!");
}
};
(*guard).add_assign(&acc);
});
}
});
let this_region = Arc::try_unwrap(arc).unwrap();
let this_region = this_region.into_inner().unwrap();
this_region
};
skip += c;
if skip >= <G::Engine as ScalarEngine>::Fr::NUM_BITS {
return Ok(this);
} else {
let mut next_region = dense_multiexp_with_manual_unrolling_impl(
pool, bases, exponents, skip, c, false).unwrap();
for _ in 0..c {
next_region.double();
}
next_region.add_assign(&this);
return Ok(next_region);
}
}
#[allow(dead_code)]
fn dense_multiexp_with_manual_unrolling_impl_2<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: & [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr],
mut skip: u32,
c: u32,
_handle_trivial: bool
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
const CACHE_BY: usize = 1024;
use std::sync::{Mutex};
let this = {
let mask = (1u64 << c) - 1u64;
let this_region = Mutex::new(<G as CurveAffine>::Projective::zero());
let arc = Arc::new(this_region);
pool.scope(bases.len(), |scope, chunk| {
for (bases, exp) in bases.chunks(chunk).zip(exponents.chunks(chunk)) {
let this_region_rwlock = arc.clone();
scope.spawn(move |_| {
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); 1 << c];
let mut exponents_chunk = [<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr::default(); CACHE_BY];
let mut bases_chunk = [G::zero(); CACHE_BY];
let unrolled_steps = bases.len() / CACHE_BY;
assert!(unrolled_steps >= 2);
let remainder = bases.len() % CACHE_BY;
assert_eq!(remainder, 0);
use std::ptr::NonNull;
let mut basket_pointers_to_process = [(NonNull::< <G as CurveAffine>::Projective>::dangling(), G::zero()); CACHE_BY];
let basket_pointer = buckets.as_mut_ptr();
let mut start_idx = 0;
let mut end_idx = CACHE_BY;
for _ in 0..(unrolled_steps-1) {
exponents_chunk.copy_from_slice(&exp[start_idx..end_idx]);
bases_chunk.copy_from_slice(&bases[start_idx..end_idx]);
let mut bucket_idx = 0;
for (e, b) in exponents_chunk.iter().zip(bases_chunk.iter()) {
let mut this_exp = *e;
this_exp.shr(skip);
let this_exp = (this_exp.as_ref()[0] & mask) as usize;
if this_exp != 0 {
let ptr = unsafe { NonNull::new_unchecked(basket_pointer.add(this_exp)) };
basket_pointers_to_process[bucket_idx] = (ptr, *b);
bucket_idx += 1;
}
}
for i in 0..bucket_idx {
crate::prefetch::prefetch_l1_pointer(basket_pointers_to_process[i].0.as_ptr() as *const _);
}
crate::prefetch::prefetch_l2_pointer(&bases[end_idx] as *const _);
crate::prefetch::prefetch_l2_pointer(&bases[end_idx+1] as *const _);
for i in 0..bucket_idx {
let (mut ptr, to_add) = basket_pointers_to_process[i];
let point_ref: &mut _ = unsafe { ptr.as_mut()};
point_ref.add_assign_mixed(&to_add);
}
start_idx += CACHE_BY;
end_idx += CACHE_BY;
}
drop(basket_pointer);
let _: Vec<_> = buckets.drain(..1).collect();
let acc: Vec<_> = buckets.drain(..1).collect();
let mut acc = acc[0];
let mut running_sum = G::Projective::zero();
for exp in buckets.into_iter().rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
let mut guard = match this_region_rwlock.lock() {
Ok(guard) => guard,
Err(_) => {
panic!("poisoned!");
}
};
(*guard).add_assign(&acc);
});
}
});
let this_region = Arc::try_unwrap(arc).unwrap();
let this_region = this_region.into_inner().unwrap();
this_region
};
skip += c;
if skip >= <G::Engine as ScalarEngine>::Fr::NUM_BITS {
return Ok(this);
} else {
let mut next_region = dense_multiexp_with_manual_unrolling_impl_2(
pool, bases, exponents, skip, c, false).unwrap();
for _ in 0..c {
next_region.double();
}
next_region.add_assign(&this);
return Ok(next_region);
}
}
#[allow(dead_code)]
pub fn dense_multiexp_consume<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: Vec<<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr>
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
if exponents.len() != bases.len() {
return Err(SynthesisError::AssignmentMissing);
}
let c = if exponents.len() < 32 {
3u32
} else {
(f64::from(exponents.len() as u32)).ln().ceil() as u32
};
dense_multiexp_inner_consume(pool, bases, exponents, c)
}
fn dense_multiexp_inner_consume<G: CurveAffine>(
pool: &Worker,
bases: & [G],
exponents: Vec<<<G::Engine as ScalarEngine>::Fr as PrimeField>::Repr>,
c: u32,
) -> Result<<G as CurveAffine>::Projective, SynthesisError>
{
use std::sync::mpsc::{channel};
let (tx, rx) = channel();
pool.scope(bases.len(), |scope, chunk| {
for (base, exp) in bases.chunks(chunk).zip(exponents.chunks(chunk)) {
let tx = tx.clone();
scope.spawn(move |_| {
let mut skip = 0;
let mut result = G::Projective::zero();
let mut buckets = vec![<G as CurveAffine>::Projective::zero(); 1 << c];
let zero = <G::Engine as ScalarEngine>::Fr::zero().into_repr();
let padding = Some(<G::Engine as ScalarEngine>::Fr::zero().into_repr());
let mask: u64 = (1 << c) - 1;
loop {
let mut next_bucket_index = (exp[0].as_ref()[0] & mask) as usize;
let exp_next_constant_iter = exp.iter().skip(1);
let mut acc = G::Projective::zero();
for (base, &next_exp_to_prefetch) in base.iter()
.zip(exp_next_constant_iter.chain(padding.iter()))
{
let this_bucket_index = next_bucket_index;
{
if next_exp_to_prefetch != zero {
let mut e = next_exp_to_prefetch;
e.shr(skip);
next_bucket_index = (next_exp_to_prefetch.as_ref()[0] & mask) as usize;
if next_bucket_index > 0 {
crate::prefetch::prefetch_l3_pointer(&buckets[next_bucket_index] as *const _);
}
} else {
next_bucket_index = 0;
}
}
if this_bucket_index > 0 {
buckets[this_bucket_index].add_assign_mixed(base);
}
}
let mut running_sum = G::Projective::zero();
for exp in buckets.iter().skip(1).rev() {
running_sum.add_assign(&exp);
acc.add_assign(&running_sum);
}
for _ in 0..skip {
acc.double();
}
result.add_assign(&acc);
skip += c;
if skip >= <G::Engine as ScalarEngine>::Fr::NUM_BITS {
tx.send(result).unwrap();
break;
} else {
buckets.truncate(0);
buckets.resize(1 << c, <G as CurveAffine>::Projective::zero());
}
}
});
}
});
let mut result = <G as CurveAffine>::Projective::zero();
for value in rx.try_iter() {
result.add_assign(&value);
}
Ok(result)
}
#[cfg(test)]
mod test {
use super::*;
fn naive_multiexp<G: CurveAffine>(
bases: Arc<Vec<G>>,
exponents: Arc<Vec<<G::Scalar as PrimeField>::Repr>>
) -> G::Projective
{
assert_eq!(bases.len(), exponents.len());
let mut acc = G::Projective::zero();
for (base, exp) in bases.iter().zip(exponents.iter()) {
acc.add_assign(&base.mul(*exp));
}
acc
}
#[test]
fn test_new_multiexp_with_bls12() {
use rand::{self, Rand};
use crate::pairing::bls12_381::Bls12;
use self::futures::executor::block_on;
const SAMPLES: usize = 1 << 14;
let rng = &mut rand::thread_rng();
let v = Arc::new((0..SAMPLES).map(|_| <Bls12 as ScalarEngine>::Fr::rand(rng).into_repr()).collect::<Vec<_>>());
let g = Arc::new((0..SAMPLES).map(|_| <Bls12 as Engine>::G1::rand(rng).into_affine()).collect::<Vec<_>>());
let naive = naive_multiexp(g.clone(), v.clone());
let pool = Worker::new();
let fast = block_on(
multiexp(
&pool,
(g, 0),
FullDensity,
v
)
).unwrap();
assert_eq!(naive, fast);
}
#[test]
fn test_valid_bn254_multiexp() {
use rand::{self, Rand};
use crate::pairing::bn256::Bn256;
const SAMPLES: usize = 1 << 14;
let pool = Worker::new();
let rng = &mut rand::thread_rng();
let v = (0..SAMPLES).map(|_| <Bn256 as ScalarEngine>::Fr::rand(rng).into_repr()).collect::<Vec<_>>();
let g = (0..SAMPLES).map(|_| <Bn256 as Engine>::G1::rand(rng).into_affine()).collect::<Vec<_>>();
let dense = dense_multiexp(
&pool,
&g,
&v,
).unwrap();
let v = Arc::new(v);
let g = Arc::new(g);
let naive = naive_multiexp(g.clone(), v.clone());
assert_eq!(dense, naive);
use self::futures::executor::block_on;
let fast_dense = future_based_multiexp(
&pool,
g.clone(),
v.clone()
).wait().unwrap();
assert_eq!(naive, fast_dense);
let fast = block_on(
multiexp(
&pool,
(g, 0),
FullDensity,
v
)
).unwrap();
assert_eq!(naive, fast);
}
#[test]
#[ignore]
fn test_new_multexp_speed_with_bn256() {
use rand::{self, Rand};
use crate::pairing::bn256::Bn256;
use num_cpus;
let cpus = num_cpus::get();
const SAMPLES: usize = 1 << 22;
let rng = &mut rand::thread_rng();
let v = Arc::new((0..SAMPLES).map(|_| <Bn256 as ScalarEngine>::Fr::rand(rng).into_repr()).collect::<Vec<_>>());
let g = Arc::new((0..SAMPLES).map(|_| <Bn256 as Engine>::G1::rand(rng).into_affine()).collect::<Vec<_>>());
let pool = Worker::new();
use self::futures::executor::block_on;
let start = std::time::Instant::now();
let _fast = block_on(
multiexp(
&pool,
(g, 0),
FullDensity,
v
)
).unwrap();
let duration_ns = start.elapsed().as_nanos() as f64;
println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES);
let time_per_sample = duration_ns/(SAMPLES as f64);
println!("Tested on {} samples on {} CPUs with {} ns per multiplication", SAMPLES, cpus, time_per_sample);
}
#[test]
fn test_dense_multiexp_vs_new_multiexp() {
use rand::{XorShiftRng, SeedableRng, Rand, Rng};
use crate::pairing::bn256::Bn256;
use num_cpus;
const SAMPLES: usize = 1 << 16;
let rng = &mut XorShiftRng::from_seed([0x3dbe6259, 0x8d313d76, 0x3237db17, 0xe5bc0654]);
let v = (0..SAMPLES).map(|_| <Bn256 as ScalarEngine>::Fr::rand(rng).into_repr()).collect::<Vec<_>>();
let g = (0..SAMPLES).map(|_| <Bn256 as Engine>::G1::rand(rng).into_affine()).collect::<Vec<_>>();
println!("Done generating test points and scalars");
let pool = Worker::new();
let start = std::time::Instant::now();
let dense = dense_multiexp(
&pool, &g, &v.clone()).unwrap();
let duration_ns = start.elapsed().as_nanos() as f64;
println!("{} ns for dense for {} samples", duration_ns, SAMPLES);
let start = std::time::Instant::now();
let _map_reduce = map_reduce_multiexp_over_fixed_window(
&pool,
&g,
&v,
11
).unwrap();
let duration_ns = start.elapsed().as_nanos() as f64;
println!("{} ns for map reduce for {} samples", duration_ns, SAMPLES);
let start = std::time::Instant::now();
let buffered = buffered_multiexp_over_fixed_window_and_buffer_size(
&pool,
&g,
&v,
11,
64,
).unwrap();
let duration_ns = start.elapsed().as_nanos() as f64;
println!("{} ns for buffered multiexp for {} samples", duration_ns, SAMPLES);
assert_eq!(dense, buffered);
use self::futures::executor::block_on;
let start = std::time::Instant::now();
let sparse = block_on(
multiexp(
&pool,
(Arc::new(g), 0),
FullDensity,
Arc::new(v)
)
).unwrap();
let duration_ns = start.elapsed().as_nanos() as f64;
println!("{} ns for sparse for {} samples", duration_ns, SAMPLES);
assert_eq!(dense, sparse);
}
#[test]
fn test_bench_sparse_multiexp() {
use rand::{XorShiftRng, SeedableRng, Rand, Rng};
use num_cpus;
type Eng = crate::pairing::bls12_381::Bls12;
const SAMPLES: usize = 1 << 22;
let rng = &mut XorShiftRng::from_seed([0x3dbe6259, 0x8d313d76, 0x3237db17, 0xe5bc0654]);
let v = (0..SAMPLES).map(|_| <Eng as ScalarEngine>::Fr::rand(rng).into_repr()).collect::<Vec<_>>();
let g = (0..SAMPLES).map(|_| <Eng as Engine>::G1::rand(rng).into_affine()).collect::<Vec<_>>();
println!("Done generating test points and scalars");
let pool = Worker::new();
let start = std::time::Instant::now();
let _sparse = multiexp(
&pool,
(Arc::new(g), 0),
FullDensity,
Arc::new(v)
).wait().unwrap();
let duration_ns = start.elapsed().as_nanos() as f64;
println!("{} ms for sparse for {} samples on {:?}", duration_ns/1000.0f64, SAMPLES, Eng{});
}
#[test]
fn test_bench_dense_consuming_multiexp() {
use rand::{XorShiftRng, SeedableRng, Rand, Rng};
type Eng = crate::pairing::bls12_381::Bls12;
use num_cpus;
const SAMPLES: usize = 1 << 22;
let rng = &mut XorShiftRng::from_seed([0x3dbe6259, 0x8d313d76, 0x3237db17, 0xe5bc0654]);
let v = (0..SAMPLES).map(|_| <Eng as ScalarEngine>::Fr::rand(rng).into_repr()).collect::<Vec<_>>();
let g = (0..SAMPLES).map(|_| <Eng as Engine>::G1::rand(rng).into_affine()).collect::<Vec<_>>();
let pool = Worker::new();
let start = std::time::Instant::now();
let _dense = dense_multiexp_consume(
&pool,
&g,
v
).unwrap();
println!("{:?} for dense for {} samples", start.elapsed(), SAMPLES);
}
fn calculate_parameters(size: usize, threads: usize, bits: u32) {
let mut chunk_len = size / threads;
if size / threads != 0 {
chunk_len += 1;
}
let raw_size = (f64::from(chunk_len as u32)).ln();
let new_window_size = if raw_size.floor() + 0.5 < raw_size {
raw_size.ceil() as u32
} else {
raw_size.floor() as u32
};
let window_size = (f64::from(chunk_len as u32)).ln().ceil() as u32;
let mut num_windows = bits / window_size;
let leftover = bits % window_size;
if leftover != 0 {
num_windows += 1;
}
let uncompensated_window = (f64::from(size as u32)).ln().ceil() as u32;
let mut num_uncompensated_windows = bits / uncompensated_window;
let uncompensated_leftover = bits % uncompensated_window;
if uncompensated_leftover != 0 {
num_uncompensated_windows += 1;
}
println!("For size {} and {} cores: chunk len {}, {} windows, average window {} bits, leftover {} bits. Alternative window size = {}", size, threads, chunk_len, num_windows, window_size, leftover, new_window_size);
}
#[test]
fn test_sizes_for_bn254() {
let sizes = vec![1<<23, 1<<24];
let cores = vec![8, 12, 16, 24, 32, 48];
for size in sizes {
for &core in &cores {
calculate_parameters(size, core, 254);
}
}
}
}