diff --git a/.gitignore b/.gitignore index a9d37c560..9c858b5d7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ target Cargo.lock +pkg \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 29fb129c3..641a667c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,10 @@ homepage = "https://github.com/matterinc/bellman" license = "MIT/Apache-2.0" name = "bellman" repository = "https://github.com/matterinc/bellman" -version = "0.1.2" +version = "0.1.3" + +[lib] +crate-type = ["cdylib", "lib", "staticlib"] [dependencies] rand = "0.4" @@ -18,8 +21,6 @@ crossbeam = "0.3" pairing = { git = 'https://github.com/matterinc/pairing' } byteorder = "1" ff = { git = 'https://github.com/matterinc/ff', features = ["derive"] } -pbr = "1.0.1" -time = "0.1" [features] default = [] diff --git a/src/domain.rs b/src/domain.rs index e7b9cc35a..43f3d38ed 100644 --- a/src/domain.rs +++ b/src/domain.rs @@ -509,3 +509,59 @@ fn parallel_fft_consistency() { test_consistency::(rng); } + +#[test] +fn test_field_element_multiplication_bn256() { + use rand::{self, Rand}; + use pairing::bn256::Bn256; + use pairing::bn256::Fr; + use num_cpus; + + let cpus = num_cpus::get(); + const SAMPLES: usize = 1 << 27; + + let rng = &mut rand::thread_rng(); + let v1 = (0..SAMPLES).map(|_| Scalar::(Fr::rand(rng))).collect::>(); + let v2 = (0..SAMPLES).map(|_| Scalar::(Fr::rand(rng))).collect::>(); + + let mut v1 = EvaluationDomain::from_coeffs(v1).unwrap(); + let v2 = EvaluationDomain::from_coeffs(v2).unwrap(); + + let pool = Worker::new(); + + let start = std::time::Instant::now(); + + v1.mul_assign(&pool, &v2); + + let duration_ns = start.elapsed().as_nanos() as f64; + println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES); + let time_per_sample = duration_ns/(SAMPLES as f64); + println!("Tested on {} samples on {} CPUs with {} ns per field element multiplication", SAMPLES, cpus, time_per_sample); +} + +#[test] +fn test_fft_bn256() { + use rand::{self, Rand}; + use pairing::bn256::Bn256; + use pairing::bn256::Fr; + use num_cpus; + + let cpus = num_cpus::get(); + const SAMPLES: usize = 1 << 27; + + let rng = &mut rand::thread_rng(); + let v1 = (0..SAMPLES).map(|_| Scalar::(Fr::rand(rng))).collect::>(); + + let mut v1 = EvaluationDomain::from_coeffs(v1).unwrap(); + + let pool = Worker::new(); + + let start = std::time::Instant::now(); + + v1.ifft(&pool); + + let duration_ns = start.elapsed().as_nanos() as f64; + println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES); + let time_per_sample = duration_ns/(SAMPLES as f64); + println!("Tested on {} samples on {} CPUs with {} ns per field element multiplication", SAMPLES, cpus, time_per_sample); +} \ No newline at end of file diff --git a/src/groth16/generator.rs b/src/groth16/generator.rs index 98a384cb1..bb72ef0f1 100644 --- a/src/groth16/generator.rs +++ b/src/groth16/generator.rs @@ -1,9 +1,5 @@ -extern crate time; - use super::super::verbose_flag; -use self::time::PreciseTime; - use rand::Rng; use std::sync::Arc; @@ -255,7 +251,9 @@ pub fn generate_parameters( { // Compute powers of tau if verbose {eprintln!("computing powers of tau...")}; - let start = PreciseTime::now(); + + let start = std::time::Instant::now(); + { let powers_of_tau = powers_of_tau.as_mut(); worker.scope(powers_of_tau.len(), |scope, chunk| { @@ -272,14 +270,16 @@ pub fn generate_parameters( } }); } - if verbose {eprintln!("powers of tau stage 1 done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0);}; + if verbose {eprintln!("powers of tau stage 1 done in {} s", start.elapsed().as_millis() as f64 / 1000.0);}; // coeff = t(x) / delta let mut coeff = powers_of_tau.z(&tau); coeff.mul_assign(&delta_inverse); if verbose {eprintln!("computing the H query with multiple threads...")}; - let start = PreciseTime::now(); + + let start = std::time::Instant::now(); + // Compute the H query with multiple threads worker.scope(h.len(), |scope, chunk| { for (h, p) in h.chunks_mut(chunk).zip(powers_of_tau.as_ref().chunks(chunk)) @@ -302,17 +302,18 @@ pub fn generate_parameters( }); } }); - if verbose {eprintln!("computing the H query done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0);}; + if verbose {eprintln!("computing the H query done in {} s", start.elapsed().as_millis() as f64 / 1000.0);}; } if verbose {eprintln!("using inverse FFT to convert powers of tau to Lagrange coefficients...")}; - let start = PreciseTime::now(); + + let start = std::time::Instant::now(); // Use inverse FFT to convert powers of tau to Lagrange coefficients powers_of_tau.ifft(&worker); let powers_of_tau = powers_of_tau.into_coeffs(); - if verbose {eprintln!("powers of tau stage 2 done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0)}; + if verbose {eprintln!("powers of tau stage 2 done in {} s", start.elapsed().as_millis() as f64 / 1000.0)}; let mut a = vec![E::G1::zero(); assembly.num_inputs + assembly.num_aux]; let mut b_g1 = vec![E::G1::zero(); assembly.num_inputs + assembly.num_aux]; @@ -321,7 +322,7 @@ pub fn generate_parameters( let mut l = vec![E::G1::zero(); assembly.num_aux]; if verbose {eprintln!("evaluating polynomials...")}; - let start = PreciseTime::now(); + let start = std::time::Instant::now(); fn eval( // wNAF window tables @@ -474,7 +475,7 @@ pub fn generate_parameters( &worker ); - if verbose {eprintln!("evaluating polynomials done in {} s", start.to(PreciseTime::now()).num_milliseconds() as f64 / 1000.0);}; + if verbose {eprintln!("evaluating polynomials done in {} s", start.elapsed().as_millis() as f64 / 1000.0);}; // Don't allow any elements be unconstrained, so that // the L query is always fully dense. diff --git a/src/groth16/prover.rs b/src/groth16/prover.rs index ef19ebf6d..98f2b628e 100644 --- a/src/groth16/prover.rs +++ b/src/groth16/prover.rs @@ -1,6 +1,3 @@ -extern crate time; -use self::time::PreciseTime; - use super::super::verbose_flag; use rand::Rng; @@ -173,7 +170,7 @@ impl PreparedProver { let vk = params.get_vk(self.assignment.input_assignment.len())?; - let h_start = PreciseTime::now(); + let start = std::time::Instant::now(); let h = { let mut a = EvaluationDomain::from_coeffs(prover.a)?; @@ -209,10 +206,9 @@ impl PreparedProver { multiexp(&worker, params.get_h(a.len())?, FullDensity, a) }; - let h_end = PreciseTime::now(); - if verbose {eprintln!("{} seconds for prover for H evaluation", h_start.to(h_end))}; + if verbose {eprintln!("{} seconds for prover for H evaluation", start.elapsed().as_secs())}; - let points_start = PreciseTime::now(); + let start = std::time::Instant::now(); // TODO: Check that difference in operations for different chunks is small @@ -283,8 +279,7 @@ impl PreparedProver { g_c.add_assign(&h.wait()?); g_c.add_assign(&l.wait()?); - let points_end = PreciseTime::now(); - if verbose {eprintln!("{} seconds for prover for point multiplication", points_start.to(points_end))}; + if verbose {eprintln!("{} seconds for prover for point multiplication", start.elapsed().as_secs())}; Ok(Proof { a: g_a.into_affine(), @@ -437,7 +432,7 @@ pub fn create_proof>( let vk = params.get_vk(prover.input_assignment.len())?; - let h_start = PreciseTime::now(); + let start = std::time::Instant::now(); let h = { let mut a = EvaluationDomain::from_coeffs(prover.a)?; @@ -473,10 +468,9 @@ pub fn create_proof>( multiexp(&worker, params.get_h(a.len())?, FullDensity, a) }; - let h_end = PreciseTime::now(); - if verbose {eprintln!("{} seconds for prover for H evaluation", h_start.to(h_end))}; + if verbose {eprintln!("{} seconds for prover for H evaluation", start.elapsed().as_secs())}; - let points_start = PreciseTime::now(); + let start = std::time::Instant::now(); // TODO: Check that difference in operations for different chunks is small @@ -547,8 +541,7 @@ pub fn create_proof>( g_c.add_assign(&h.wait()?); g_c.add_assign(&l.wait()?); - let points_end = PreciseTime::now(); - if verbose {eprintln!("{} seconds for prover for point multiplication", points_start.to(points_end))}; + if verbose {eprintln!("{} seconds for prover for point multiplication", start.elapsed().as_secs())}; Ok(Proof { a: g_a.into_affine(), diff --git a/src/lib.rs b/src/lib.rs index 53ccdbbf7..c43cdd1a6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,6 @@ pub mod multicore; mod multiexp; pub mod domain; pub mod groth16; -pub mod progress_bar; use pairing::{Engine}; use ff::Field; diff --git a/src/multiexp.rs b/src/multiexp.rs index b32327e33..e834f4c8b 100644 --- a/src/multiexp.rs +++ b/src/multiexp.rs @@ -142,6 +142,36 @@ impl DensityTracker { } } +/// This genious piece of code works in the following way: +/// - choose `c` - the bit length of the region that one thread works on +/// - make `2^c - 1` buckets and initialize them with `G = infinity` (that's equivalent of zero) +/// - there is no bucket for "zero" cause it's not necessary +/// - go over the pairs `(base, scalar)` +/// - for each scalar calculate `scalar % 2^c` and add the base (without any multiplications!) to the +/// corresponding bucket +/// - at the end each bucket will have an accumulated value that should be multiplied by the corresponding factor +/// between `1` and `2^c - 1` to get the right value +/// - here comes the first trick - you don't need to do multiplications at all, just add all the buckets together +/// starting from the first one `(a + b + c + ...)` and than add to the first sum another sum of the form +/// `(b + c + d + ...)`, and than the third one `(c + d + ...)`, that will result in the proper prefactor infront of every +/// accumulator, without any multiplication operations at all +/// - that's of course not enough, so spawn the next thread +/// - this thread works with the same bit width `c`, but SKIPS lowers bits completely, so it actually takes values +/// in the form `(scalar >> c) % 2^c`, so works on the next region +/// - spawn more threads until you exhaust all the bit length +/// - you will get roughly `[bitlength / c] + 1` inaccumulators +/// - double the highest accumulator enough times, add to the next one, double the result, add the next accumulator, continue +/// +/// Demo why it works: +/// ``` +/// a * G + b * H = (a_2 * (2^c)^2 + a_1 * (2^c)^1 + a_0) * G + (b_2 * (2^c)^2 + b_1 * (2^c)^1 + b_0) * H +/// ``` +/// - make buckets over `0` labeled coefficients +/// - make buckets over `1` labeled coefficients +/// - make buckets over `2` labeled coefficients +/// - accumulators over each set of buckets will have an implicit factor of `(2^c)^i`, so before summing thme up +/// "higher" accumulators must be doubled `c` times +/// fn multiexp_inner( pool: &Worker, bases: S, @@ -195,7 +225,7 @@ fn multiexp_inner( } else { // Place multiplication into the bucket: Separate s * P as // (s/2^c) * P + (s mod 2^c) P - // First multiplication is c bits less, do one can do it, + // First multiplication is c bits less, so one can do it, // sum results from different buckets and double it c times, // then add with (s mod 2^c) P parts let mut exp = exp; @@ -317,3 +347,34 @@ fn test_with_bls12() { assert_eq!(naive, fast); } + +#[test] +fn test_speed_with_bn256() { + use rand::{self, Rand}; + use pairing::bn256::Bn256; + use num_cpus; + + let cpus = num_cpus::get(); + const SAMPLES: usize = 1 << 22; + + let rng = &mut rand::thread_rng(); + let v = Arc::new((0..SAMPLES).map(|_| ::Fr::rand(rng).into_repr()).collect::>()); + let g = Arc::new((0..SAMPLES).map(|_| ::G1::rand(rng).into_affine()).collect::>()); + + let pool = Worker::new(); + + let start = std::time::Instant::now(); + + let _fast = multiexp( + &pool, + (g, 0), + FullDensity, + v + ).wait().unwrap(); + + + let duration_ns = start.elapsed().as_nanos() as f64; + println!("Elapsed {} ns for {} samples", duration_ns, SAMPLES); + let time_per_sample = duration_ns/(SAMPLES as f64); + println!("Tested on {} samples on {} CPUs with {} ns per multiplication", SAMPLES, cpus, time_per_sample); +} diff --git a/src/progress_bar.rs b/src/progress_bar.rs deleted file mode 100644 index f8574c0bf..000000000 --- a/src/progress_bar.rs +++ /dev/null @@ -1,146 +0,0 @@ -extern crate time; - -use std::io::{Write}; -use std::sync::{ - mpsc::{channel, Sender, Receiver}, - Arc, - atomic::{AtomicUsize, Ordering} -}; -use self::time::precise_time_ns; -use std::time::Duration; - -static UPDATE_INTERVAL: u64 = 1000_000 * 1000; // ms - -pub struct MultiBar { - n_workers: u64, - - total: u64, - cur: u64, - - prev: u64, - prev_time: u64, - - total_elapsed: u64, - - step: Arc, - tx: Sender, - rx: Receiver, -} - -pub struct ProgressBar { - //chunk: u64, - acc: u64, - step: Arc, - tx: Option>, -} - -/// Simple efficient thread-safe progress indicator -/// It follows the interface of [https://github.com/a8m/pb](https://github.com/a8m/pb) -impl MultiBar { - - /// Create a new MultiBar for stdout - pub fn new() -> Self { - let (tx, rx) = channel(); - Self{ - n_workers: 0, - total: 0, - cur: 0, - prev: 0, - prev_time: precise_time_ns(), - total_elapsed: 0, - step: Arc::new(AtomicUsize::new(1)), - tx, - rx, - } - } - - // Create a ProgressBar for a process of `total` steps - pub fn create_bar(&mut self, chunk: u64) -> ProgressBar { - self.n_workers += 1; - self.total += chunk; - //println!("step 0 of {}", chunk); - ProgressBar{ - //chunk, - acc: 0, - tx: Some(Sender::clone(&self.tx)), - step: Arc::clone(&self.step), - } - } - - /// Start listening for updates from ProgressBars in different threads - pub fn listen(&mut self) { - //println!(""); - for d in &self.rx { - if d == 0 { - self.n_workers -= 1; - } - if self.n_workers == 0 { - break; - } - - self.cur += d; - let processed = self.cur - self.prev; - if processed > self.step.load(Ordering::Acquire) as u64 * self.n_workers { - let now = time::precise_time_ns(); - let elapsed = now - self.prev_time; - - if elapsed > UPDATE_INTERVAL { - self.prev = self.cur; - self.prev_time = now; - self.total_elapsed += elapsed; - - print!("\rprocessed {:2}%: {} of {}.", self.cur * 100 / self.total, self.cur, self.total); - - let r = Duration::from_nanos((self.total - self.cur) * self.total_elapsed / self.cur).as_secs(); - print!(" Remaining estimated: {} h {} min {} s", r / 3600, r % 3600 / 60, r % 60); - - let new_step = (self.cur * UPDATE_INTERVAL / self.total_elapsed) / self.n_workers; - self.step.store(new_step as usize, Ordering::Release); - - std::io::stdout().flush().unwrap(); - } - } - - } - println!("\rdone "); - } -} - -impl ProgressBar { - - /// Increment progress by `d` steps - pub fn add(&mut self, d: u64) { - self.acc += d; - if self.acc > (self.step.load(Ordering::Relaxed) as u64) { - if let Some(tx) = &self.tx { - tx.send(self.acc).unwrap(); - } - self.acc = 0; - } - } - - /// Finish the process - pub fn finish(&mut self) { - let tx = self.tx.take().unwrap(); - tx.send(0).unwrap(); - drop(tx); - } -} - -#[test] -fn test_progress_display() { - - let mut mb = MultiBar::new(); - - for _j in 1..=0 { - let mut pb = mb.create_bar(3600000); - std::thread::spawn(move || { - for _i in 0..3600000 { - std::thread::sleep(Duration::from_millis(1)); - pb.add(1); - } - pb.finish(); - }); - }; - //mb.listen(); -}