diff --git a/crypto/src/commitments/kzg.rs b/crypto/src/commitments/kzg.rs index 2ecbb483c..c65d64d8c 100644 --- a/crypto/src/commitments/kzg.rs +++ b/crypto/src/commitments/kzg.rs @@ -1,11 +1,16 @@ use super::traits::IsCommitmentScheme; use alloc::{borrow::ToOwned, vec::Vec}; use core::{marker::PhantomData, mem}; +use lambdaworks_math::gpu::icicle::GpuMSMPoint; +use lambdaworks_math::traits::ByteConversion; use lambdaworks_math::{ cyclic_group::IsGroup, elliptic_curve::traits::IsPairing, errors::DeserializationError, - field::{element::FieldElement, traits::IsPrimeField}, + field::{ + element::FieldElement, + traits::{IsField, IsPrimeField}, + }, msm::pippenger::msm, polynomial::Polynomial, traits::{AsBytes, Deserializable}, @@ -136,12 +141,12 @@ where } #[derive(Clone)] -pub struct KateZaveruchaGoldberg { +pub struct KateZaveruchaGoldberg { srs: StructuredReferenceString, phantom: PhantomData, } -impl KateZaveruchaGoldberg { +impl KateZaveruchaGoldberg { pub fn new(srs: StructuredReferenceString) -> Self { Self { srs, @@ -150,20 +155,22 @@ impl KateZaveruchaGoldberg { } } -impl>, P: IsPairing> - IsCommitmentScheme for KateZaveruchaGoldberg +impl< + const N: usize, + F: IsField> + + IsPrimeField>, + P: IsPairing, + > IsCommitmentScheme for KateZaveruchaGoldberg +where + FieldElement: ByteConversion, + P::G1Point: GpuMSMPoint, { type Commitment = P::G1Point; fn commit(&self, p: &Polynomial>) -> Self::Commitment { - let coefficients: Vec<_> = p - .coefficients - .iter() - .map(|coefficient| coefficient.representative()) - .collect(); msm( - &coefficients, - &self.srs.powers_main_group[..coefficients.len()], + &p.coefficients, + &self.srs.powers_main_group[..p.coefficients.len()], ) .expect("`points` is sliced by `cs`'s length") } diff --git a/math/Cargo.toml b/math/Cargo.toml index 02df065d4..847afb37b 100644 --- a/math/Cargo.toml +++ b/math/Cargo.toml @@ -24,11 +24,17 @@ objc = { version = "0.2.7", optional = true } # cuda cudarc = { version = "0.9.7", optional = true } +# Icicle integration +icicle-cuda-runtime = { git = "https://github.com/ingonyama-zk/icicle.git", tag = "v1.4.0", optional = true } +icicle-core = { git = "https://github.com/ingonyama-zk/icicle.git", tag = "v1.4.0", optional = true } +icicle-bls12-377 = { git = "https://github.com/ingonyama-zk/icicle.git", tag = "v1.4.0", optional = true} +icicle-bls12-381 = { git = "https://github.com/ingonyama-zk/icicle.git", tag = "v1.4.0", optional = true } +icicle-bn254 = { git = "https://github.com/ingonyama-zk/icicle.git", tag = "v1.4.0", optional = true } lambdaworks-gpu = { workspace = true, optional = true } [dev-dependencies] -rand = { version = "0.8.5", default-features = false } +rand = { version = "0.8.5" } rand_chacha = "0.3.1" criterion = "0.5.1" const-random = "0.1.15" @@ -45,6 +51,7 @@ lambdaworks-serde-binary = ["dep:serde", "alloc"] lambdaworks-serde-string = ["dep:serde", "dep:serde_json", "alloc"] proptest = ["dep:proptest"] winter_compatibility = ["winter-math", "miden-core"] +icicle = ["dep:icicle-cuda-runtime", "dep:icicle-core", "dep:icicle-bls12-377", "dep:icicle-bls12-381", "dep:icicle-bn254"] # gpu metal = [ @@ -84,6 +91,11 @@ name = "criterion_msm" harness = false required-features = ["parallel"] +[[bench]] +name = "criterion_icicle" +harness = false +required-features = ["icicle"] + [[bench]] name = "criterion_fft" harness = false diff --git a/math/benches/criterion_icicle.rs b/math/benches/criterion_icicle.rs new file mode 100644 index 000000000..07a271220 --- /dev/null +++ b/math/benches/criterion_icicle.rs @@ -0,0 +1,71 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use lambdaworks_math::{ + cyclic_group::IsGroup, + elliptic_curve::{ + short_weierstrass::curves::{ + bls12_377::curve::BLS12377Curve, bls12_381::curve::BLS12381Curve, + bn_254::curve::BN254Curve, + }, + traits::IsEllipticCurve, + }, + field::{element::FieldElement, traits::IsField}, +}; + +use lambdaworks_math::gpu::icicle::{ + bls12_377::bls12_377_g1_msm, bls12_381::bls12_381_g1_msm, bn254::bn254_g1_msm, +}; +use rand::{rngs::StdRng, Rng, SeedableRng}; + +pub fn generate_cs_and_points( + msm_size: usize, +) -> (Vec>, Vec) +where + ::BaseType: From, +{ + // We use a seeded rng so the benchmarks are reproducible. + let mut rng = StdRng::seed_from_u64(42); + + let g = C::generator(); + + let cs: Vec<_> = (0..msm_size) + .map(|_| FieldElement::::new(rng.gen::().into())) + .collect(); + + let points: Vec<_> = (0..msm_size) + .map(|_| g.operate_with_self(rng.gen::())) + .collect(); + + (cs, points) +} + +pub fn msm_benchmarks_with_size(c: &mut Criterion, msm_size: usize) { + let mut group = c.benchmark_group(format!("MSM benchmarks with size {msm_size}")); + + let (cs, points) = generate_cs_and_points::(msm_size); + group.bench_function("BLS12_381", |bench| { + bench.iter(|| black_box(bls12_381_g1_msm(&cs, &points, None))); + }); + + let (cs, points) = generate_cs_and_points::(msm_size); + group.bench_function("BLS12_377", |bench| { + bench.iter(|| black_box(bls12_377_g1_msm(&cs, &points, None))); + }); + + let (cs, points) = generate_cs_and_points::(msm_size); + group.bench_function("BN_254", |bench| { + bench.iter(|| black_box(bn254_g1_msm(&cs, &points, None))); + }); +} + +pub fn run_benchmarks(c: &mut Criterion) { + let exponents = 1..=18; + + for exp in exponents { + let msm_size = 1 << exp; + + msm_benchmarks_with_size(c, msm_size); + } +} + +criterion_group!(icicle_msm, run_benchmarks); +criterion_main!(icicle_msm); diff --git a/math/src/elliptic_curve/short_weierstrass/curves/bls12_377/curve.rs b/math/src/elliptic_curve/short_weierstrass/curves/bls12_377/curve.rs index c324b35de..a9b2958b1 100644 --- a/math/src/elliptic_curve/short_weierstrass/curves/bls12_377/curve.rs +++ b/math/src/elliptic_curve/short_weierstrass/curves/bls12_377/curve.rs @@ -5,6 +5,8 @@ use crate::{ elliptic_curve::short_weierstrass::traits::IsShortWeierstrass, field::element::FieldElement, }; +pub type BLS12377FieldElement = FieldElement; + /// The description of the curve. #[derive(Clone, Debug)] pub struct BLS12377Curve; diff --git a/math/src/fft/errors.rs b/math/src/fft/errors.rs index 4bbec91c2..e27e7d74d 100644 --- a/math/src/fft/errors.rs +++ b/math/src/fft/errors.rs @@ -8,6 +8,9 @@ use lambdaworks_gpu::metal::abstractions::errors::MetalError; #[cfg(feature = "cuda")] use lambdaworks_gpu::cuda::abstractions::errors::CudaError; +#[cfg(feature = "icicle")] +use icicle_core::error::IcicleError; + #[derive(Debug)] pub enum FFTError { RootOfUnityError(u64), @@ -17,6 +20,8 @@ pub enum FFTError { MetalError(MetalError), #[cfg(feature = "cuda")] CudaError(CudaError), + #[cfg(feature = "icicle")] + IcicleError(IcicleError), } impl Display for FFTError { @@ -37,6 +42,8 @@ impl Display for FFTError { FFTError::CudaError(_) => { write!(f, "A CUDA related error has ocurred") } + #[cfg(feature = "icicle")] + FFTError::IcicleError(e) => write!(f, "Icicle GPU backend failure. {:?}", e), } } } diff --git a/math/src/fft/polynomial.rs b/math/src/fft/polynomial.rs index a27811d31..70ad7cabc 100644 --- a/math/src/fft/polynomial.rs +++ b/math/src/fft/polynomial.rs @@ -7,6 +7,7 @@ use crate::{ traits::{IsFFTField, RootsConfig}, }, polynomial::Polynomial, + traits::ByteConversion, }; use alloc::{vec, vec::Vec}; @@ -14,6 +15,13 @@ use alloc::{vec, vec::Vec}; use crate::fft::gpu::cuda::polynomial::{evaluate_fft_cuda, interpolate_fft_cuda}; #[cfg(feature = "metal")] use crate::fft::gpu::metal::polynomial::{evaluate_fft_metal, interpolate_fft_metal}; +#[cfg(feature = "icicle")] +use crate::gpu::icicle::{evaluate_fft_icicle, IcicleFFT}; +#[cfg(feature = "icicle")] +use icicle_core::{ + ntt::NTT, + traits::FieldImpl, +}; use super::cpu::{ops, roots_of_unity}; @@ -26,7 +34,8 @@ impl Polynomial> { poly: &Polynomial>, blowup_factor: usize, domain_size: Option, - ) -> Result>, FFTError> { + ) -> Result>, FFTError> + { let domain_size = domain_size.unwrap_or(0); let len = core::cmp::max(poly.coeff_len(), domain_size).next_power_of_two() * blowup_factor; @@ -51,6 +60,21 @@ impl Polynomial> { } } + /* + #[cfg(feature = "icicle")] + { + if !F::field_name().is_empty() { + Ok(evaluate_fft_icicle::(&coeffs)?) + } else { + println!( + "GPU evaluation failed for field {}. Program will fallback to CPU.", + core::any::type_name::() + ); + evaluate_fft_cpu::(&coeffs) + } + } + */ + #[cfg(feature = "cuda")] { // TODO: support multiple fields with CUDA @@ -134,6 +158,7 @@ pub fn compose_fft( where F: IsFFTField + IsSubFieldOf, E: IsField, + FieldElement: ByteConversion { let poly_2_evaluations = Polynomial::evaluate_fft::(poly_2, 1, None).unwrap(); diff --git a/math/src/gpu/icicle/bls12_377.rs b/math/src/gpu/icicle/bls12_377.rs new file mode 100644 index 000000000..2381e11be --- /dev/null +++ b/math/src/gpu/icicle/bls12_377.rs @@ -0,0 +1,156 @@ +use icicle_bls12_377::curve; +use icicle_core::{error::IcicleError, msm, traits::FieldImpl}; +use icicle_cuda_runtime::{memory::HostOrDeviceSlice, stream::CudaStream}; + +use crate::{ + elliptic_curve::short_weierstrass::{ + curves::bls12_377::{ + curve::{BLS12377Curve, BLS12377FieldElement}, + field_extension::BLS12377PrimeField, + }, + point::ShortWeierstrassProjectivePoint, + }, + errors::ByteConversionError, + field::element::FieldElement, + traits::ByteConversion, +}; + +impl BLS12377FieldElement { + fn to_icicle(&self) -> curve::BaseField { + curve::BaseField::from_bytes_le(&self.to_bytes_le()) + } + + fn to_icicle_scalar(&self) -> curve::ScalarField { + curve::ScalarField::from_bytes_le(&self.to_bytes_le()) + } + + fn from_icicle(icicle: &curve::BaseField) -> Result { + Self::from_bytes_le(&icicle.to_bytes_le()) + } +} + +impl ShortWeierstrassProjectivePoint { + fn to_icicle(&self) -> curve::G1Affine { + let s = self.to_affine(); + curve::G1Affine { + x: s.x().to_icicle(), + y: s.y().to_icicle(), + } + } + + fn from_icicle(icicle: &curve::G1Projective) -> Result { + Ok(Self::new([ + FieldElement::::from_icicle(&icicle.x).unwrap(), + FieldElement::::from_icicle(&icicle.y).unwrap(), + FieldElement::::from_icicle(&icicle.z).unwrap(), + ])) + } +} + +pub fn bls12_377_g1_msm( + scalars: &[BLS12377FieldElement], + points: &[ShortWeierstrassProjectivePoint], + config: Option, +) -> Result, IcicleError> { + let mut cfg = config.unwrap_or(msm::MSMConfig::default()); + let scalars = HostOrDeviceSlice::Host( + scalars + .iter() + .map(|scalar| scalar.to_icicle_scalar()) + .collect::>(), + ); + let points = HostOrDeviceSlice::Host( + points + .iter() + .map(|point| point.to_icicle()) + .collect::>(), + ); + let mut msm_results = HostOrDeviceSlice::cuda_malloc(1).unwrap(); + let stream = CudaStream::create().unwrap(); + cfg.ctx.stream = &stream; + cfg.is_async = true; + msm::msm(&scalars, &points, &cfg, &mut msm_results).unwrap(); + let mut msm_host_result = vec![curve::G1Projective::zero(); 1]; + stream.synchronize().unwrap(); + msm_results.copy_to_host(&mut msm_host_result[..]).unwrap(); + stream.destroy().unwrap(); + let res = + ShortWeierstrassProjectivePoint::::from_icicle(&msm_host_result[0]).unwrap(); + Ok(res) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{ + elliptic_curve::{ + short_weierstrass::curves::bls12_377::curve::BLS12377FieldElement, + traits::IsEllipticCurve, + }, + field::element::FieldElement, + msm::pippenger::msm, + }; + + impl ShortWeierstrassProjectivePoint { + fn from_icicle_affine( + icicle: &curve::G1Affine, + ) -> Result, ByteConversionError> { + Ok(Self::new([ + FieldElement::::from_icicle(&icicle.x).unwrap(), + FieldElement::::from_icicle(&icicle.y).unwrap(), + FieldElement::one(), + ])) + } + } + + fn point_times_5() -> ShortWeierstrassProjectivePoint { + let x = BLS12377FieldElement::from_hex_unchecked( + "3c852d5aab73fbb51e57fbf5a0a8b5d6513ec922b2611b7547bfed74cba0dcdfc3ad2eac2733a4f55d198ec82b9964", + ); + let y = BLS12377FieldElement::from_hex_unchecked( + "a71425e68e55299c64d7eada9ae9c3fb87a9626b941d17128b64685fc07d0e635f3c3a512903b4e0a43e464045967b", + ); + BLS12377Curve::create_point_from_affine(x, y).unwrap() + } + + #[test] + fn to_from_icicle() { + // convert value of 5 to icicle and back again and that icicle 5 matches + let point = point_times_5(); + let icicle_point = point.to_icicle(); + let res = + ShortWeierstrassProjectivePoint::::from_icicle_affine(&icicle_point) + .unwrap(); + assert_eq!(point, res) + } + + #[test] + fn to_from_icicle_generator() { + // Convert generator and see that it matches + let point = BLS12377Curve::generator(); + let icicle_point = point.to_icicle(); + let res = + ShortWeierstrassProjectivePoint::::from_icicle_affine(&icicle_point) + .unwrap(); + assert_eq!(point, res) + } + + #[test] + fn icicle_g1_msm() { + const LEN: usize = 20; + let eight: BLS12377FieldElement = FieldElement::from(8); + let lambda_scalars = vec![eight; LEN]; + let lambda_points = (0..LEN).map(|_| point_times_5()).collect::>(); + let expected = msm( + &lambda_scalars + .clone() + .into_iter() + .map(|x| x.representative()) + .collect::>(), + &lambda_points, + ) + .unwrap(); + let res = bls12_377_g1_msm(&lambda_scalars, &lambda_points, None).unwrap(); + assert_eq!(res, expected); + } +} diff --git a/math/src/gpu/icicle/bls12_381.rs b/math/src/gpu/icicle/bls12_381.rs new file mode 100644 index 000000000..ce1f123b8 --- /dev/null +++ b/math/src/gpu/icicle/bls12_381.rs @@ -0,0 +1,152 @@ +use icicle_bls12_381::curve; +use icicle_core::{error::IcicleError, msm, traits::FieldImpl}; +use icicle_cuda_runtime::{memory::HostOrDeviceSlice, stream::CudaStream}; + +use crate::{ + elliptic_curve::short_weierstrass::{ + curves::bls12_381::{ + curve::{BLS12381Curve, BLS12381FieldElement}, + field_extension::BLS12381PrimeField, + }, + point::ShortWeierstrassProjectivePoint, + }, + errors::ByteConversionError, + field::element::FieldElement, + traits::ByteConversion, +}; + +impl BLS12381FieldElement { + fn to_icicle(&self) -> curve::BaseField { + curve::BaseField::from_bytes_le(&self.to_bytes_le()) + } + + fn to_icicle_scalar(&self) -> curve::ScalarField { + curve::ScalarField::from_bytes_le(&self.to_bytes_le()) + } + + fn from_icicle(icicle: &curve::BaseField) -> Result { + Self::from_bytes_le(&icicle.to_bytes_le()) + } +} + +impl ShortWeierstrassProjectivePoint { + fn to_icicle(&self) -> curve::G1Affine { + let s = self.to_affine(); + curve::G1Affine { + x: s.x().to_icicle(), + y: s.y().to_icicle(), + } + } + + fn from_icicle(icicle: &curve::G1Projective) -> Result { + Ok(Self::new([ + FieldElement::::from_icicle(&icicle.x).unwrap(), + FieldElement::::from_icicle(&icicle.y).unwrap(), + FieldElement::::from_icicle(&icicle.z).unwrap(), + ])) + } +} + +pub fn bls12_381_g1_msm( + scalars: &[BLS12381FieldElement], + points: &[ShortWeierstrassProjectivePoint], + config: Option, +) -> Result, IcicleError> { + let mut cfg = config.unwrap_or(msm::MSMConfig::default()); + let scalars = HostOrDeviceSlice::Host( + scalars + .iter() + .map(|scalar| scalar.to_icicle_scalar()) + .collect::>(), + ); + let points = HostOrDeviceSlice::Host( + points + .iter() + .map(|point| point.to_icicle()) + .collect::>(), + ); + let mut msm_results = HostOrDeviceSlice::cuda_malloc(1).unwrap(); + let stream = CudaStream::create().unwrap(); + cfg.ctx.stream = &stream; + cfg.is_async = true; + msm::msm(&scalars, &points, &cfg, &mut msm_results).unwrap(); + let mut msm_host_result = vec![curve::G1Projective::zero(); 1]; + stream.synchronize().unwrap(); + msm_results.copy_to_host(&mut msm_host_result[..]).unwrap(); + stream.destroy().unwrap(); + let res = + ShortWeierstrassProjectivePoint::::from_icicle(&msm_host_result[0]).unwrap(); + Ok(res) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{ + elliptic_curve::{ + short_weierstrass::curves::bls12_381::curve::BLS12381FieldElement, + traits::IsEllipticCurve, + }, + field::element::FieldElement, + msm::pippenger::msm, + }; + + impl ShortWeierstrassProjectivePoint { + fn from_icicle_affine( + icicle: &curve::G1Affine, + ) -> Result, ByteConversionError> { + Ok(Self::new([ + FieldElement::::from_icicle(&icicle.x).unwrap(), + FieldElement::::from_icicle(&icicle.y).unwrap(), + FieldElement::one(), + ])) + } + } + + fn point_times_5() -> ShortWeierstrassProjectivePoint { + let x = BLS12381FieldElement::from_hex_unchecked( + "32bcce7e71eb50384918e0c9809f73bde357027c6bf15092dd849aa0eac274d43af4c68a65fb2cda381734af5eecd5c", + ); + let y = BLS12381FieldElement::from_hex_unchecked( + "11e48467b19458aabe7c8a42dc4b67d7390fdf1e150534caadddc7e6f729d8890b68a5ea6885a21b555186452b954d88", + ); + BLS12381Curve::create_point_from_affine(x, y).unwrap() + } + + #[test] + fn to_from_icicle() { + // convert value of 5 to icicle and back again and that icicle 5 matches + let point = point_times_5(); + let icicle_point = point.to_icicle(); + let res = + ShortWeierstrassProjectivePoint::::from_icicle_affine(&icicle_point) + .unwrap(); + assert_eq!(point, res) + } + + #[test] + fn to_from_icicle_generator() { + // Convert generator and see that it matches + let point = BLS12381Curve::generator(); + let icicle_point = point.to_icicle(); + let res = + ShortWeierstrassProjectivePoint::::from_icicle_affine(&icicle_point) + .unwrap(); + assert_eq!(point, res) + } + + #[test] + fn icicle_g1_msm() { + const LEN: usize = 20; + let eight: BLS12381FieldElement = FieldElement::from(8); + let lambda_scalars = vec![eight; LEN]; + let lambda_points = (0..LEN).map(|_| point_times_5()).collect::>(); + let expected = msm( + &lambda_scalars, + &lambda_points, + ) + .unwrap(); + let res = bls12_381_g1_msm(&lambda_scalars, &lambda_points, None).unwrap(); + assert_eq!(res, expected); + } +} diff --git a/math/src/gpu/icicle/bn254.rs b/math/src/gpu/icicle/bn254.rs new file mode 100644 index 000000000..daf91a61f --- /dev/null +++ b/math/src/gpu/icicle/bn254.rs @@ -0,0 +1,153 @@ +use icicle_bn254::curve; +use icicle_core::{error::IcicleError, msm, traits::FieldImpl}; +use icicle_cuda_runtime::{memory::HostOrDeviceSlice, stream::CudaStream}; + +use crate::{ + elliptic_curve::short_weierstrass::{ + curves::bn_254::{ + curve::{BN254Curve, BN254FieldElement}, + field_extension::BN254PrimeField, + }, + point::ShortWeierstrassProjectivePoint, + }, + errors::ByteConversionError, + field::element::FieldElement, + traits::ByteConversion, +}; + +impl BN254FieldElement { + fn to_icicle(&self) -> curve::BaseField { + curve::BaseField::from_bytes_le(&self.to_bytes_le()) + } + + fn to_icicle_scalar(&self) -> curve::ScalarField { + curve::ScalarField::from_bytes_le(&self.to_bytes_le()) + } + + fn from_icicle(icicle: &curve::BaseField) -> Result { + Self::from_bytes_le(&icicle.to_bytes_le()) + } +} + +impl ShortWeierstrassProjectivePoint { + fn to_icicle(&self) -> curve::G1Affine { + let s = self.to_affine(); + curve::G1Affine { + x: s.x().to_icicle(), + y: s.y().to_icicle(), + } + } + + fn from_icicle(icicle: &curve::G1Projective) -> Result { + Ok(Self::new([ + FieldElement::::from_icicle(&icicle.x).unwrap(), + FieldElement::::from_icicle(&icicle.y).unwrap(), + FieldElement::::from_icicle(&icicle.z).unwrap(), + ])) + } +} + +pub fn bn254_g1_msm( + scalars: &[BN254FieldElement], + points: &[ShortWeierstrassProjectivePoint], + config: Option, +) -> Result, IcicleError> { + let mut cfg = config.unwrap_or(msm::MSMConfig::default()); + let scalars = HostOrDeviceSlice::Host( + scalars + .iter() + .map(|scalar| scalar.to_icicle_scalar()) + .collect::>(), + ); + let points = HostOrDeviceSlice::Host( + points + .iter() + .map(|point| point.to_icicle()) + .collect::>(), + ); + let mut msm_results = HostOrDeviceSlice::cuda_malloc(1).unwrap(); + let stream = CudaStream::create().unwrap(); + cfg.ctx.stream = &stream; + cfg.is_async = true; + msm::msm(&scalars, &points, &cfg, &mut msm_results).unwrap(); + let mut msm_host_result = vec![curve::G1Projective::zero(); 1]; + stream.synchronize().unwrap(); + msm_results.copy_to_host(&mut msm_host_result[..]).unwrap(); + stream.destroy().unwrap(); + let res = + ShortWeierstrassProjectivePoint::::from_icicle(&msm_host_result[0]).unwrap(); + Ok(res) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{ + elliptic_curve::{ + short_weierstrass::curves::bn_254::curve::BN254FieldElement, traits::IsEllipticCurve, + }, + field::element::FieldElement, + msm::pippenger::msm, + }; + + impl ShortWeierstrassProjectivePoint { + fn from_icicle_affine( + icicle: &curve::G1Affine, + ) -> Result, ByteConversionError> { + Ok(Self::new([ + FieldElement::::from_icicle(&icicle.x).unwrap(), + FieldElement::::from_icicle(&icicle.y).unwrap(), + FieldElement::one(), + ])) + } + } + + fn point_times_5() -> ShortWeierstrassProjectivePoint { + let x = BN254FieldElement::from_hex_unchecked( + "16ab03b69dfb4f870b0143ebf6a71b7b2e4053ca7a4421d09a913b8b834bbfa3", + ); + let y = BN254FieldElement::from_hex_unchecked( + "2512347279ba1049ef97d4ec348d838f939d2b7623e88f4826643cf3889599b2", + ); + BN254Curve::create_point_from_affine(x, y).unwrap() + } + + #[test] + fn to_from_icicle() { + // convert value of 5 to icicle and back again and that icicle 5 matches + let point = point_times_5(); + let icicle_point = point.to_icicle(); + let res = ShortWeierstrassProjectivePoint::::from_icicle_affine(&icicle_point) + .unwrap(); + assert_eq!(point, res) + } + + #[test] + fn to_from_icicle_generator() { + // Convert generator and see that it matches + let point = BN254Curve::generator(); + let icicle_point = point.to_icicle(); + let res = ShortWeierstrassProjectivePoint::::from_icicle_affine(&icicle_point) + .unwrap(); + assert_eq!(point, res) + } + + #[test] + fn icicle_g1_msm() { + const LEN: usize = 20; + let eight: BN254FieldElement = FieldElement::from(8); + let lambda_scalars = vec![eight; LEN]; + let lambda_points = (0..LEN).map(|_| point_times_5()).collect::>(); + let expected = msm( + &lambda_scalars + .clone() + .into_iter() + .map(|x| x.representative()) + .collect::>(), + &lambda_points, + ) + .unwrap(); + let res = bn254_g1_msm(&lambda_scalars, &lambda_points, None).unwrap(); + assert_eq!(res, expected); + } +} diff --git a/math/src/gpu/icicle/mod.rs b/math/src/gpu/icicle/mod.rs new file mode 100644 index 000000000..1eb113d53 --- /dev/null +++ b/math/src/gpu/icicle/mod.rs @@ -0,0 +1,380 @@ +use crate::{ + cyclic_group::IsGroup, + elliptic_curve::{ + short_weierstrass::{ + curves::{ + bls12_377::curve::BLS12377Curve, + bls12_381::{curve::BLS12381Curve, twist::BLS12381TwistCurve, field_extension::BLS12381PrimeField}, + bn_254::{curve::BN254Curve, field_extension::BN254PrimeField} + }, + point::ShortWeierstrassProjectivePoint, + }, + traits::IsEllipticCurve, + }, + errors::ByteConversionError, + field::{element::FieldElement, traits::{IsField, IsSubFieldOf, IsFFTField}}, + msm::naive::MSMError, + fft::errors::FFTError, + traits::ByteConversion, +}; +use icicle_bls12_377::curve::CurveCfg as IcicleBLS12377Curve; +use icicle_bls12_381::curve::{ + CurveCfg as IcicleBLS12381Curve, + ScalarCfg as IcicleBLS12381ScalarCfg +}; +use icicle_bn254::curve::{ + CurveCfg as IcicleBN254Curve, + ScalarCfg as IcicleBN254ScalarCfg +}; +use icicle_core::{ + curve::{Affine, Curve, Projective}, + msm, + ntt::{ntt, NTT, NTTConfig, NTTDir}, + traits::FieldImpl, +}; +use icicle_cuda_runtime::{memory::HostOrDeviceSlice, stream::CudaStream}; + +use std::fmt::Debug; + +impl GpuMSMPoint for ShortWeierstrassProjectivePoint { + type LambdaCurve = BLS12381Curve; + type GpuCurve = IcicleBLS12381Curve; + + fn curve_name() -> &'static str { + "BLS12381" + } + + fn to_icicle_affine(point: &Self) -> Affine { + let s = Self::to_affine(point); + Affine:: { + x: Self::to_icicle_field(s.x()), + y: Self::to_icicle_field(s.y()), + } + } + + fn from_icicle_projective( + icicle: &Projective, + ) -> Result { + Ok(Self::new([ + Self::from_icicle_field(&icicle.x).unwrap(), + Self::from_icicle_field(&icicle.y).unwrap(), + Self::from_icicle_field(&icicle.z).unwrap(), + ])) + } +} + +//NOTE THIS IS A PLACEHOLDER COMPILING ICICLE G2 TOOK TO LONG! +impl GpuMSMPoint for ShortWeierstrassProjectivePoint { + type LambdaCurve = BLS12381Curve; + type GpuCurve = IcicleBLS12381Curve; + + fn curve_name() -> &'static str { + "" + } + + fn to_icicle_affine(point: &Self) -> Affine { + let s = Self::to_affine(point); + Affine:: { + x: Self::to_icicle_field(s.x()), + y: Self::to_icicle_field(s.y()), + } + } + + fn from_icicle_projective( + icicle: &Projective, + ) -> Result { + Ok(Self::new([ + Self::from_icicle_field(&icicle.x).unwrap(), + Self::from_icicle_field(&icicle.y).unwrap(), + Self::from_icicle_field(&icicle.z).unwrap(), + ])) + } +} + +impl GpuMSMPoint for ShortWeierstrassProjectivePoint { + type LambdaCurve = BLS12377Curve; + type GpuCurve = IcicleBLS12377Curve; + fn curve_name() -> &'static str { + "BLS12377" + } + + fn to_icicle_affine(point: &Self) -> Affine { + let s = Self::to_affine(point); + Affine:: { + x: Self::to_icicle_field(s.x()), + y: Self::to_icicle_field(s.y()), + } + } + + fn from_icicle_projective( + icicle: &Projective, + ) -> Result { + Ok(Self::new([ + Self::from_icicle_field(&icicle.x).unwrap(), + Self::from_icicle_field(&icicle.y).unwrap(), + Self::from_icicle_field(&icicle.z).unwrap(), + ])) + } +} + +impl GpuMSMPoint for ShortWeierstrassProjectivePoint { + type LambdaCurve = BN254Curve; + type GpuCurve = IcicleBN254Curve; + fn curve_name() -> &'static str { + "BN254" + } + + fn to_icicle_affine(point: &Self) -> Affine { + let s = Self::to_affine(point); + Affine:: { + x: Self::to_icicle_field(s.x()), + y: Self::to_icicle_field(s.y()), + } + } + + fn from_icicle_projective( + icicle: &Projective, + ) -> Result { + Ok(Self::new([ + Self::from_icicle_field(&icicle.x).unwrap(), + Self::from_icicle_field(&icicle.y).unwrap(), + Self::from_icicle_field(&icicle.z).unwrap(), + ])) + } +} + +pub trait GpuMSMPoint: IsGroup { + type LambdaCurve: IsEllipticCurve + Clone + Debug; + type GpuCurve: Curve + msm::MSM; + //type FE: ByteConversion; + /// Used for searching this field's implementation in other languages, e.g in MSL + /// for executing parallel operations with the Metal API. + fn curve_name() -> &'static str { + "" + } + + fn to_icicle_affine(point: &Self) -> Affine; + + fn from_icicle_projective( + icicle: &Projective, + ) -> Result; + + fn to_icicle_field(element: &FE) -> ::BaseField { + ::BaseField::from_bytes_le(&element.to_bytes_le()) + } + + fn to_icicle_scalar( + element: &FE, + ) -> ::ScalarField { + ::ScalarField::from_bytes_le(&element.to_bytes_le()) + } + + fn from_icicle_field( + icicle: &::BaseField, + ) -> Result { + FE::from_bytes_le(&icicle.to_bytes_le()) + } +} + +pub trait IcicleFFT: IsField +where + FieldElement: ByteConversion, + ::Config: NTT +{ + type ScalarField: FieldImpl; + + fn to_icicle_scalar(element: &FieldElement) -> Self::ScalarField { + Self::ScalarField::from_bytes_le(&element.to_bytes_le()) + } + + fn from_icicle_scalar( + icicle: &Self::ScalarField, + ) -> Result, ByteConversionError> { + FieldElement::::from_bytes_le(&icicle.to_bytes_le()) + } +} + +impl IcicleFFT for BLS12381PrimeField { + type ScalarField = ::ScalarField; +} + +impl IcicleFFT for BN254PrimeField { + type ScalarField = ::ScalarField; +} + +pub fn icicle_msm( + cs: &[FieldElement], + points: &[G], +) -> Result +where + FieldElement: ByteConversion, +{ + let mut cfg = msm::MSMConfig::default(); + let scalars = HostOrDeviceSlice::Host( + cs.iter() + .map(|scalar| G::to_icicle_scalar(scalar)) + .collect::>(), + ); + let points = HostOrDeviceSlice::Host( + points + .iter() + .map(|point| G::to_icicle_affine(point)) + .collect::>(), + ); + let mut msm_results = HostOrDeviceSlice::cuda_malloc(1).unwrap(); + let stream = CudaStream::create().unwrap(); + cfg.ctx.stream = &stream; + cfg.is_async = true; + msm::msm(&scalars, &points, &cfg, &mut msm_results).unwrap(); + let mut msm_host_result = [Projective::::zero(); 1]; + stream.synchronize().unwrap(); + msm_results.copy_to_host(&mut msm_host_result[..]).unwrap(); + stream.destroy().unwrap(); + let res = G::from_icicle_projective(&msm_host_result[0]).unwrap(); + Ok(res) +} + +pub fn evaluate_fft_icicle( + coeffs: &Vec>, +) -> Result>, FFTError> +where + E: IsSubFieldOf, + FieldElement: ByteConversion, + <::ScalarField as FieldImpl>::Config: NTT<::ScalarField>, + E: IsField + IcicleFFT + IsFFTField, +{ + let size = coeffs.len(); + let mut cfg = NTTConfig::default(); + let order = coeffs.len() as u64; + let dir = NTTDir::kForward; + let scalars = HostOrDeviceSlice::Host( + coeffs + .iter() + .map(|scalar| E::to_icicle_scalar(&scalar)) + .collect::>(), + ); + let mut ntt_results = HostOrDeviceSlice::cuda_malloc(size).unwrap(); + let stream = CudaStream::create().unwrap(); + cfg.ctx.stream = &stream; + cfg.is_async = true; + let root_of_unity = E::to_icicle_scalar(&E::get_primitive_root_of_unity(order).unwrap()); + ::Config::initialize_domain(root_of_unity, &cfg.ctx).unwrap(); + ntt(&scalars, dir, &cfg, &mut ntt_results).unwrap(); + stream.synchronize().unwrap(); + let mut ntt_host_results = vec![E::ScalarField::zero(); size]; + ntt_results.copy_to_host(&mut ntt_host_results[..]).unwrap(); + stream.destroy().unwrap(); + let res = ntt_host_results + .as_slice() + .iter() + .map(|scalar| E::from_icicle_scalar(&scalar).unwrap()) + .collect::>(); + Ok(res) +} + +pub fn interpolate_fft_icicle( + coeffs: &Vec>, +) -> Result>, FFTError> +where + F: IsSubFieldOf, + FieldElement: ByteConversion, + <::ScalarField as FieldImpl>::Config: NTT<::ScalarField>, + E: IsField + IcicleFFT + IsFFTField, +{ + let size = coeffs.len(); + let mut cfg = NTTConfig::default(); + let order = coeffs.len() as u64; + let dir = NTTDir::kInverse; + let scalars = HostOrDeviceSlice::Host( + coeffs + .iter() + .map(|scalar| E::to_icicle_scalar(scalar)) + .collect::>(), + ); + let mut ntt_results = HostOrDeviceSlice::cuda_malloc(size).unwrap(); + let stream = CudaStream::create().unwrap(); + cfg.ctx.stream = &stream; + cfg.is_async = true; + let root_of_unity = E::to_icicle_scalar(&E::get_primitive_root_of_unity(order).unwrap()); + ::Config::initialize_domain(root_of_unity, &cfg.ctx).unwrap(); + ntt(&scalars, dir, &cfg, &mut ntt_results).unwrap(); + stream.synchronize().unwrap(); + let mut ntt_host_results = vec![E::ScalarField::zero(); size]; + ntt_results.copy_to_host(&mut ntt_host_results[..]).unwrap(); + stream.destroy().unwrap(); + let res = ntt_host_results + .as_slice() + .iter() + .map(|scalar| E::from_icicle_scalar(&scalar).unwrap()) + .collect::>(); + Ok(res) +} + + +#[cfg(test)] +mod test { + use super::*; + use crate::{ + elliptic_curve::{ + short_weierstrass::curves::bls12_381::curve::BLS12381FieldElement, + traits::IsEllipticCurve, + }, + field::element::FieldElement, + msm::pippenger::msm, + }; + + impl ShortWeierstrassProjectivePoint { + fn from_icicle_affine( + icicle: &curve::G1Affine, + ) -> Result, ByteConversionError> { + Ok(Self::new([ + FieldElement::::from_icicle(&icicle.x).unwrap(), + FieldElement::::from_icicle(&icicle.y).unwrap(), + FieldElement::one(), + ])) + } + } + + fn point_times_5() -> ShortWeierstrassProjectivePoint { + let x = BLS12381FieldElement::from_hex_unchecked( + "32bcce7e71eb50384918e0c9809f73bde357027c6bf15092dd849aa0eac274d43af4c68a65fb2cda381734af5eecd5c", + ); + let y = BLS12381FieldElement::from_hex_unchecked( + "11e48467b19458aabe7c8a42dc4b67d7390fdf1e150534caadddc7e6f729d8890b68a5ea6885a21b555186452b954d88", + ); + BLS12381Curve::create_point_from_affine(x, y).unwrap() + } + + #[test] + fn to_from_icicle() { + // convert value of 5 to icicle and back again and that icicle 5 matches + let point = point_times_5(); + let icicle_point = point.to_icicle(); + let res = + ShortWeierstrassProjectivePoint::::from_icicle_affine(&icicle_point) + .unwrap(); + assert_eq!(point, res) + } + + #[test] + fn to_from_icicle_generator() { + // Convert generator and see that it matches + let point = BLS12381Curve::generator(); + let icicle_point = point.to_icicle(); + let res = + ShortWeierstrassProjectivePoint::::from_icicle_affine(&icicle_point) + .unwrap(); + assert_eq!(point, res) + } + + #[test] + fn icicle_g1_msm() { + const LEN: usize = 20; + let eight: BLS12381FieldElement = FieldElement::from(8); + let lambda_scalars = vec![eight; LEN]; + let lambda_points = (0..LEN).map(|_| point_times_5()).collect::>(); + let expected = msm(&lambda_scalars, &lambda_points).unwrap(); + let res = bls12_381_g1_msm(&lambda_scalars, &lambda_points, None).unwrap(); + assert_eq!(res, expected); + } +} diff --git a/math/src/gpu/mod.rs b/math/src/gpu/mod.rs index ee1867d80..a19c098b2 100644 --- a/math/src/gpu/mod.rs +++ b/math/src/gpu/mod.rs @@ -6,3 +6,6 @@ If you were using the `--all-features` flag please read this crate's Cargo.toml" #[cfg(feature = "cuda")] pub mod cuda; + +#[cfg(feature = "icicle")] +pub mod icicle; diff --git a/math/src/msm/naive.rs b/math/src/msm/naive.rs index fec773073..50a1bad9b 100644 --- a/math/src/msm/naive.rs +++ b/math/src/msm/naive.rs @@ -2,16 +2,22 @@ use core::fmt::Display; use crate::cyclic_group::IsGroup; use crate::unsigned_integer::traits::IsUnsignedInteger; +#[cfg(feature = "icicle")] +use icicle_core::error::IcicleError; #[derive(Debug)] pub enum MSMError { LengthMismatch(usize, usize), + #[cfg(feature = "icicle")] + Icicle(IcicleError), } impl Display for MSMError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { MSMError::LengthMismatch(cs, points) => write!(f, "`cs` and `points` must be of the same length to compute `msm`. Got: {cs} and {points}"), + #[cfg(feature = "icicle")] + MSMError::Icicle(e) => write!(f, "Icicle GPU backend failure. {:?}", e), } } } diff --git a/math/src/msm/pippenger.rs b/math/src/msm/pippenger.rs index 1ccfce9c3..99c7dc759 100644 --- a/math/src/msm/pippenger.rs +++ b/math/src/msm/pippenger.rs @@ -1,7 +1,13 @@ -use crate::{cyclic_group::IsGroup, unsigned_integer::element::UnsignedInteger}; +use crate::{ + cyclic_group::IsGroup, + field::{element::FieldElement, traits::IsField}, + unsigned_integer::element::UnsignedInteger, +}; use super::naive::MSMError; - +#[cfg(feature = "icicle")] +use crate::gpu::icicle::{icicle_msm, GpuMSMPoint}; +use crate::traits::ByteConversion; use alloc::vec; /// This function computes the multiscalar multiplication (MSM). @@ -15,20 +21,39 @@ use alloc::vec; /// If `points` and `cs` are empty, then `msm` returns the zero element of the group. /// /// Panics if `cs` and `points` have different lengths. -pub fn msm( - cs: &[UnsignedInteger], +pub fn msm>, G>( + cs: &[FieldElement], points: &[G], ) -> Result where - G: IsGroup, + G: IsGroup + GpuMSMPoint, + FieldElement: ByteConversion, { if cs.len() != points.len() { return Err(MSMError::LengthMismatch(cs.len(), points.len())); } - let window_size = optimum_window_size(cs.len()); + #[cfg(feature = "icicle")] + { + if !G::curve_name().is_empty() { + icicle_msm(cs, points) + } else { + println!( + "Icicle msm failed for field {}. Program will fallback to CPU.", + core::any::type_name::() + ); + let window_size = optimum_window_size(cs.len()); + let cs = cs.iter().map(|cs| *cs.value()).collect::>(); + Ok(msm_with(&cs, points, window_size)) + } + } - Ok(msm_with(cs, points, window_size)) + #[cfg(not(feature = "icicle"))] + { + let window_size = optimum_window_size(cs.len()); + let cs = cs.iter().map(|cs| *cs.value()).collect::>(); + Ok(msm_with(&cs, points, window_size)) + } } fn optimum_window_size(data_length: usize) -> usize { diff --git a/provers/groth16/src/prover.rs b/provers/groth16/src/prover.rs index b0dbeb579..559bfb5f5 100644 --- a/provers/groth16/src/prover.rs +++ b/provers/groth16/src/prover.rs @@ -66,29 +66,20 @@ impl Proof { pub struct Prover; impl Prover { pub fn prove(w: &[FrElement], qap: &QuadraticArithmeticProgram, pk: &ProvingKey) -> Proof { - let h_coefficients = qap - .calculate_h_coefficients(w) - .iter() - .map(|elem| elem.representative()) - .collect::>(); - - let w = w - .iter() - .map(|elem| elem.representative()) - .collect::>(); + let h_coefficients = qap.calculate_h_coefficients(w); // Sample randomness for hiding let r = sample_fr_elem(); let s = sample_fr_elem(); // [π_1]_1 - let pi1 = msm(&w, &pk.l_tau_g1) + let pi1 = msm(w, &pk.l_tau_g1) .unwrap() .operate_with(&pk.alpha_g1) .operate_with(&pk.delta_g1.operate_with_self(r.representative())); // [π_2]_2 - let pi2 = msm(&w, &pk.r_tau_g2) + let pi2 = msm(w, &pk.r_tau_g2) .unwrap() .operate_with(&pk.beta_g2) .operate_with(&pk.delta_g2.operate_with_self(s.representative())); @@ -108,7 +99,7 @@ impl Prover { .unwrap(); // [π_2]_1 - let pi2_g1 = msm(&w, &pk.r_tau_g1) + let pi2_g1 = msm(w, &pk.r_tau_g1) .unwrap() .operate_with(&pk.beta_g1) .operate_with(&pk.delta_g1.operate_with_self(s.representative())); diff --git a/provers/groth16/src/verifier.rs b/provers/groth16/src/verifier.rs index b83c4b215..0c0728475 100644 --- a/provers/groth16/src/verifier.rs +++ b/provers/groth16/src/verifier.rs @@ -6,14 +6,7 @@ use crate::setup::VerifyingKey; pub fn verify(vk: &VerifyingKey, proof: &Proof, pub_inputs: &[FrElement]) -> bool { // [γ^{-1} * (β*l(τ) + α*r(τ) + o(τ))]_1 - let k_tau_assigned_verifier_g1 = msm( - &pub_inputs - .iter() - .map(|elem| elem.representative()) - .collect::>(), - &vk.verifier_k_tau_g1, - ) - .unwrap(); + let k_tau_assigned_verifier_g1 = msm(pub_inputs, &vk.verifier_k_tau_g1).unwrap(); Pairing::compute(&proof.pi3, &vk.delta_g2).unwrap() * vk.alpha_g1_times_beta_g2.clone()