Skip to content
This repository has been archived by the owner on Aug 15, 2024. It is now read-only.

Commit

Permalink
- add some benches
Browse files Browse the repository at this point in the history
- cleanup dependencies
- will work on WASM and GM17
  • Loading branch information
shamatar committed Jan 23, 2019
1 parent 6e5cfe2 commit e775b47
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 178 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
target
Cargo.lock
pkg
7 changes: 4 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ homepage = "https://github.com/matterinc/bellman"
license = "MIT/Apache-2.0"
name = "bellman"
repository = "https://github.com/matterinc/bellman"
version = "0.1.2"
version = "0.1.3"

[lib]
crate-type = ["cdylib", "lib", "staticlib"]

[dependencies]
rand = "0.4"
Expand All @@ -18,8 +21,6 @@ crossbeam = "0.3"
pairing = { git = 'https://github.com/matterinc/pairing' }
byteorder = "1"
ff = { git = 'https://github.com/matterinc/ff', features = ["derive"] }
pbr = "1.0.1"
time = "0.1"

[features]
default = []
56 changes: 56 additions & 0 deletions src/domain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -509,3 +509,59 @@ fn parallel_fft_consistency() {

test_consistency::<Bls12, _>(rng);
}

#[test]
fn test_field_element_multiplication_bn256() {
use rand::{self, Rand};
use pairing::bn256::Bn256;
use pairing::bn256::Fr;
use num_cpus;

let cpus = num_cpus::get();
const SAMPLES: usize = 1 << 27;

let rng = &mut rand::thread_rng();
let v1 = (0..SAMPLES).map(|_| Scalar::<Bn256>(Fr::rand(rng))).collect::<Vec<_>>();
let v2 = (0..SAMPLES).map(|_| Scalar::<Bn256>(Fr::rand(rng))).collect::<Vec<_>>();

let mut v1 = EvaluationDomain::from_coeffs(v1).unwrap();
let v2 = EvaluationDomain::from_coeffs(v2).unwrap();

let pool = Worker::new();

let start = std::time::Instant::now();

v1.mul_assign(&pool, &v2);

let duration_ns = start.elapsed().as_nanos() as f64;
println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES);
let time_per_sample = duration_ns/(SAMPLES as f64);
println!("Tested on {} samples on {} CPUs with {} ns per field element multiplication", SAMPLES, cpus, time_per_sample);
}

#[test]
fn test_fft_bn256() {
use rand::{self, Rand};
use pairing::bn256::Bn256;
use pairing::bn256::Fr;
use num_cpus;

let cpus = num_cpus::get();
const SAMPLES: usize = 1 << 27;

let rng = &mut rand::thread_rng();
let v1 = (0..SAMPLES).map(|_| Scalar::<Bn256>(Fr::rand(rng))).collect::<Vec<_>>();

let mut v1 = EvaluationDomain::from_coeffs(v1).unwrap();

let pool = Worker::new();

let start = std::time::Instant::now();

v1.ifft(&pool);

let duration_ns = start.elapsed().as_nanos() as f64;
println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES);
let time_per_sample = duration_ns/(SAMPLES as f64);
println!("Tested on {} samples on {} CPUs with {} ns per field element multiplication", SAMPLES, cpus, time_per_sample);
}
25 changes: 13 additions & 12 deletions src/groth16/generator.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
extern crate time;

use super::super::verbose_flag;

use self::time::PreciseTime;

use rand::Rng;

use std::sync::Arc;
Expand Down Expand Up @@ -255,7 +251,9 @@ pub fn generate_parameters<E, C>(
{
// Compute powers of tau
if verbose {eprintln!("computing powers of tau...")};
let start = PreciseTime::now();

let start = std::time::Instant::now();

{
let powers_of_tau = powers_of_tau.as_mut();
worker.scope(powers_of_tau.len(), |scope, chunk| {
Expand All @@ -272,14 +270,16 @@ pub fn generate_parameters<E, C>(
}
});
}
if verbose {eprintln!("powers of tau stage 1 done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0);};
if verbose {eprintln!("powers of tau stage 1 done in {} s", start.elapsed().as_millis() as f64 / 1000.0);};

// coeff = t(x) / delta
let mut coeff = powers_of_tau.z(&tau);
coeff.mul_assign(&delta_inverse);

if verbose {eprintln!("computing the H query with multiple threads...")};
let start = PreciseTime::now();

let start = std::time::Instant::now();

// Compute the H query with multiple threads
worker.scope(h.len(), |scope, chunk| {
for (h, p) in h.chunks_mut(chunk).zip(powers_of_tau.as_ref().chunks(chunk))
Expand All @@ -302,17 +302,18 @@ pub fn generate_parameters<E, C>(
});
}
});
if verbose {eprintln!("computing the H query done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0);};
if verbose {eprintln!("computing the H query done in {} s", start.elapsed().as_millis() as f64 / 1000.0);};
}

if verbose {eprintln!("using inverse FFT to convert powers of tau to Lagrange coefficients...")};
let start = PreciseTime::now();

let start = std::time::Instant::now();

// Use inverse FFT to convert powers of tau to Lagrange coefficients
powers_of_tau.ifft(&worker);
let powers_of_tau = powers_of_tau.into_coeffs();

if verbose {eprintln!("powers of tau stage 2 done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0)};
if verbose {eprintln!("powers of tau stage 2 done in {} s", start.elapsed().as_millis() as f64 / 1000.0)};

let mut a = vec![E::G1::zero(); assembly.num_inputs + assembly.num_aux];
let mut b_g1 = vec![E::G1::zero(); assembly.num_inputs + assembly.num_aux];
Expand All @@ -321,7 +322,7 @@ pub fn generate_parameters<E, C>(
let mut l = vec![E::G1::zero(); assembly.num_aux];

if verbose {eprintln!("evaluating polynomials...")};
let start = PreciseTime::now();
let start = std::time::Instant::now();

fn eval<E: Engine>(
// wNAF window tables
Expand Down Expand Up @@ -474,7 +475,7 @@ pub fn generate_parameters<E, C>(
&worker
);

if verbose {eprintln!("evaluating polynomials done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0);};
if verbose {eprintln!("evaluating polynomials done in {} s", start.elapsed().as_millis() as f64 / 1000.0);};

// Don't allow any elements be unconstrained, so that
// the L query is always fully dense.
Expand Down
23 changes: 8 additions & 15 deletions src/groth16/prover.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
extern crate time;
use self::time::PreciseTime;

use super::super::verbose_flag;

use rand::Rng;
Expand Down Expand Up @@ -173,7 +170,7 @@ impl<E:Engine> PreparedProver<E> {

let vk = params.get_vk(self.assignment.input_assignment.len())?;

let h_start = PreciseTime::now();
let start = std::time::Instant::now();

let h = {
let mut a = EvaluationDomain::from_coeffs(prover.a)?;
Expand Down Expand Up @@ -209,10 +206,9 @@ impl<E:Engine> PreparedProver<E> {
multiexp(&worker, params.get_h(a.len())?, FullDensity, a)
};

let h_end = PreciseTime::now();
if verbose {eprintln!("{} seconds for prover for H evaluation", h_start.to(h_end))};
if verbose {eprintln!("{} seconds for prover for H evaluation", start.elapsed().as_secs())};

let points_start = PreciseTime::now();
let start = std::time::Instant::now();

// TODO: Check that difference in operations for different chunks is small

Expand Down Expand Up @@ -283,8 +279,7 @@ impl<E:Engine> PreparedProver<E> {
g_c.add_assign(&h.wait()?);
g_c.add_assign(&l.wait()?);

let points_end = PreciseTime::now();
if verbose {eprintln!("{} seconds for prover for point multiplication", points_start.to(points_end))};
if verbose {eprintln!("{} seconds for prover for point multiplication", start.elapsed().as_secs())};

Ok(Proof {
a: g_a.into_affine(),
Expand Down Expand Up @@ -437,7 +432,7 @@ pub fn create_proof<E, C, P: ParameterSource<E>>(

let vk = params.get_vk(prover.input_assignment.len())?;

let h_start = PreciseTime::now();
let start = std::time::Instant::now();

let h = {
let mut a = EvaluationDomain::from_coeffs(prover.a)?;
Expand Down Expand Up @@ -473,10 +468,9 @@ pub fn create_proof<E, C, P: ParameterSource<E>>(
multiexp(&worker, params.get_h(a.len())?, FullDensity, a)
};

let h_end = PreciseTime::now();
if verbose {eprintln!("{} seconds for prover for H evaluation", h_start.to(h_end))};
if verbose {eprintln!("{} seconds for prover for H evaluation", start.elapsed().as_secs())};

let points_start = PreciseTime::now();
let start = std::time::Instant::now();

// TODO: Check that difference in operations for different chunks is small

Expand Down Expand Up @@ -547,8 +541,7 @@ pub fn create_proof<E, C, P: ParameterSource<E>>(
g_c.add_assign(&h.wait()?);
g_c.add_assign(&l.wait()?);

let points_end = PreciseTime::now();
if verbose {eprintln!("{} seconds for prover for point multiplication", points_start.to(points_end))};
if verbose {eprintln!("{} seconds for prover for point multiplication", start.elapsed().as_secs())};

Ok(Proof {
a: g_a.into_affine(),
Expand Down
1 change: 0 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ pub mod multicore;
mod multiexp;
pub mod domain;
pub mod groth16;
pub mod progress_bar;

use pairing::{Engine};
use ff::Field;
Expand Down
63 changes: 62 additions & 1 deletion src/multiexp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,36 @@ impl DensityTracker {
}
}

/// This genious piece of code works in the following way:
/// - choose `c` - the bit length of the region that one thread works on
/// - make `2^c - 1` buckets and initialize them with `G = infinity` (that's equivalent of zero)
/// - there is no bucket for "zero" cause it's not necessary
/// - go over the pairs `(base, scalar)`
/// - for each scalar calculate `scalar % 2^c` and add the base (without any multiplications!) to the
/// corresponding bucket
/// - at the end each bucket will have an accumulated value that should be multiplied by the corresponding factor
/// between `1` and `2^c - 1` to get the right value
/// - here comes the first trick - you don't need to do multiplications at all, just add all the buckets together
/// starting from the first one `(a + b + c + ...)` and than add to the first sum another sum of the form
/// `(b + c + d + ...)`, and than the third one `(c + d + ...)`, that will result in the proper prefactor infront of every
/// accumulator, without any multiplication operations at all
/// - that's of course not enough, so spawn the next thread
/// - this thread works with the same bit width `c`, but SKIPS lowers bits completely, so it actually takes values
/// in the form `(scalar >> c) % 2^c`, so works on the next region
/// - spawn more threads until you exhaust all the bit length
/// - you will get roughly `[bitlength / c] + 1` inaccumulators
/// - double the highest accumulator enough times, add to the next one, double the result, add the next accumulator, continue
///
/// Demo why it works:
/// ```
/// a * G + b * H = (a_2 * (2^c)^2 + a_1 * (2^c)^1 + a_0) * G + (b_2 * (2^c)^2 + b_1 * (2^c)^1 + b_0) * H
/// ```
/// - make buckets over `0` labeled coefficients
/// - make buckets over `1` labeled coefficients
/// - make buckets over `2` labeled coefficients
/// - accumulators over each set of buckets will have an implicit factor of `(2^c)^i`, so before summing thme up
/// "higher" accumulators must be doubled `c` times
///
fn multiexp_inner<Q, D, G, S>(
pool: &Worker,
bases: S,
Expand Down Expand Up @@ -195,7 +225,7 @@ fn multiexp_inner<Q, D, G, S>(
} else {
// Place multiplication into the bucket: Separate s * P as
// (s/2^c) * P + (s mod 2^c) P
// First multiplication is c bits less, do one can do it,
// First multiplication is c bits less, so one can do it,
// sum results from different buckets and double it c times,
// then add with (s mod 2^c) P parts
let mut exp = exp;
Expand Down Expand Up @@ -317,3 +347,34 @@ fn test_with_bls12() {

assert_eq!(naive, fast);
}

#[test]
fn test_speed_with_bn256() {
use rand::{self, Rand};
use pairing::bn256::Bn256;
use num_cpus;

let cpus = num_cpus::get();
const SAMPLES: usize = 1 << 22;

let rng = &mut rand::thread_rng();
let v = Arc::new((0..SAMPLES).map(|_| <Bn256 as ScalarEngine>::Fr::rand(rng).into_repr()).collect::<Vec<_>>());
let g = Arc::new((0..SAMPLES).map(|_| <Bn256 as Engine>::G1::rand(rng).into_affine()).collect::<Vec<_>>());

let pool = Worker::new();

let start = std::time::Instant::now();

let _fast = multiexp(
&pool,
(g, 0),
FullDensity,
v
).wait().unwrap();


let duration_ns = start.elapsed().as_nanos() as f64;
println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES);
let time_per_sample = duration_ns/(SAMPLES as f64);
println!("Tested on {} samples on {} CPUs with {} ns per multiplication", SAMPLES, cpus, time_per_sample);
}
Loading

0 comments on commit e775b47

Please sign in to comment.