Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix typos #44

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions src/goldilocks_base_field_avx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// _8 variable can be expressed in 8 bits (<256)

// OBSERVATIONS:
// 1. a + b overflows iff (a + b) < a (AVX does not suport carry, this is the way to check)
// 2. a - b underflows iff (a - b) > a (AVX does not suport carry, this is the way to check)
// 1. a + b overflows iff (a + b) < a (AVX does not support carry, this is the way to check)
// 2. a - b underflows iff (a - b) > a (AVX does not support carry, this is the way to check)
// 3. (unsigned) a < (unsigned) b iff (signed) a_s < (singed) b_s (AVX2 does not support unsingend 64-bit comparisons)
// 4. a_s + b = (a+b)_s. Dem: a+(1<<63)+b = a+b+(1<<63)

Expand Down Expand Up @@ -64,7 +64,7 @@ inline void Goldilocks::toCanonical_avx(__m256i &a_c, const __m256i &a)
shift_avx(a_c, a_sc);
}

// Obtain cannonical representative of a_s,
// Obtain canonical representative of a_s,
// We assume a <= a_c+P
// a_sc a shifted canonical
// a_s a shifted
Expand All @@ -84,7 +84,7 @@ inline void Goldilocks::add_avx(__m256i &c, const __m256i &a, const __m256i &b)
add_avx_a_sc(c, a_sc, b);
}

// we assume a given in shifted cannonical form (a_sc)
// we assume a given in shifted canonical form (a_sc)
inline void Goldilocks::add_avx_a_sc(__m256i &c, const __m256i &a_sc, const __m256i &b)
{
// addition (if only one of the arguments is shifted the sumation is shifted)
Expand All @@ -105,7 +105,7 @@ inline void Goldilocks::add_avx_s_b_small(__m256i &c_s, const __m256i &a_s, cons
const __m256i c0_s = _mm256_add_epi64(a_s, b_small);
// We can use 32-bit comparison that is faster, lets see:
// 1) a_s > c0_s => a_sh >= c0_sh
// 2) If a_sh = c0_sh => there is no overlow (demonstration bellow)
// 2) If a_sh = c0_sh => there is no overflow (demonstration bellow)
// 3) Therefore: overflow iff a_sh > c0_sh
// Dem item 2:
// c0_sh=a_sh+b_h+carry=a_sh
Expand All @@ -125,7 +125,7 @@ inline void Goldilocks::add_avx_b_small(__m256i &c, const __m256i &a, const __m2
const __m256i c0_s = _mm256_add_epi64(a_s, b_small);
// We can use 32-bit comparison that is faster, lets see:
// 1) a_s > c0_s => a_sh >= c0_sh
// 2) If a_sh = c0_sh => there is no overlow (demonstration bellow)
// 2) If a_sh = c0_sh => there is no overflow (demonstration bellow)
// 3) Therefore: overflow iff a_sh > c0_sh
// Dem item 2:
// c0_sh=a_sh+b_h+carry=a_sh
Expand Down Expand Up @@ -180,7 +180,7 @@ inline void Goldilocks::mult_avx(__m256i &c, const __m256i &a, const __m256i &b)
reduce_avx_128_64(c, c_h, c_l);
}

// We assume coeficients of b_8 can be expressed with 8 bits (<256)
// We assume coefficients of b_8 can be expressed with 8 bits (<256)
inline void Goldilocks::mult_avx_8(__m256i &c, const __m256i &a, const __m256i &b_8)
{
__m256i c_h, c_l;
Expand Down Expand Up @@ -422,7 +422,7 @@ inline void Goldilocks::spmv_avx_4x12_a(__m256i &c, const __m256i &a0, const __m
// Sparse matrix-vector product (4x12 sparce matrix formed of four diagonal blocs 4x5 stored in a0...a3)
// c[i]=Sum_j(aj[i]*b[j*4+i]) 0<=i<4 0<=j<3
// We assume b_a aligned on a 32-byte boundary
// We assume coeficients of b_8 can be expressed with 8 bits (<256)
// We assume coefficients of b_8 can be expressed with 8 bits (<256)
inline void Goldilocks::spmv_avx_4x12_8(__m256i &c, const __m256i &a0, const __m256i &a1, const __m256i &a2, const Goldilocks::Element b_8[12])
{

Expand Down Expand Up @@ -512,7 +512,7 @@ inline void Goldilocks::mmult_avx_4x12_a(__m256i &b, const __m256i &a0, const __
}

// Dense matrix-vector product
// We assume coeficients of M_8 can be expressed with 8 bits (<256)
// We assume coefficients of M_8 can be expressed with 8 bits (<256)
inline void Goldilocks::mmult_avx_4x12_8(__m256i &b, const __m256i &a0, const __m256i &a1, const __m256i &a2, const Goldilocks::Element M_8[48])
{
// Generate matrix 4x4
Expand Down Expand Up @@ -560,7 +560,7 @@ inline void Goldilocks::mmult_avx_a(__m256i &a0, __m256i &a1, __m256i &a2, const
a1 = b1;
a2 = b2;
}
// We assume coeficients of M_8 can be expressed with 8 bits (<256)
// We assume coefficients of M_8 can be expressed with 8 bits (<256)
inline void Goldilocks::mmult_avx_8(__m256i &a0, __m256i &a1, __m256i &a2, const Goldilocks::Element M_8[144])
{
__m256i b0, b1, b2;
Expand Down
14 changes: 7 additions & 7 deletions src/goldilocks_base_field_avx512.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// _8 variable can be expressed in 8 bits (<256)

// OBSERVATIONS:
// 1. a + b overflows iff (a + b) < a (AVX does not suport carry, this is the way to check)
// 2. a - b underflows iff (a - b) > a (AVX does not suport carry, this is the way to check)
// 1. a + b overflows iff (a + b) < a (AVX does not support carry, this is the way to check)
// 2. a - b underflows iff (a - b) > a (AVX does not support carry, this is the way to check)

const __m512i P8 = _mm512_set_epi64(GOLDILOCKS_PRIME, GOLDILOCKS_PRIME, GOLDILOCKS_PRIME, GOLDILOCKS_PRIME, GOLDILOCKS_PRIME, GOLDILOCKS_PRIME, GOLDILOCKS_PRIME, GOLDILOCKS_PRIME);
const __m512i P8_n = _mm512_set_epi64(GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG);
Expand All @@ -40,7 +40,7 @@ inline void Goldilocks::store_avx512_a(Goldilocks::Element *a8_a, const __m512i
_mm512_store_si512((__m512i *)a8_a, a);
}

// Obtain cannonical representative of a,
// Obtain canonical representative of a,
// We assume a <= a_c+P
inline void Goldilocks::toCanonical_avx512(__m512i &a_c, const __m512i &a)
{
Expand Down Expand Up @@ -98,7 +98,7 @@ inline void Goldilocks::mult_avx512(__m512i &c, const __m512i &a, const __m512i
reduce_avx512_128_64(c, c_h, c_l);
}

// We assume coeficients of b_8 can be expressed with 8 bits (<256)
// We assume coefficients of b_8 can be expressed with 8 bits (<256)
inline void Goldilocks::mult_avx512_8(__m512i &c, const __m512i &a, const __m512i &b_8)
{
__m512i c_h, c_l;
Expand Down Expand Up @@ -269,7 +269,7 @@ inline void Goldilocks::square_avx512_128(__m512i &c_h, __m512i &c_l, const __m5
c_h = _mm512_add_epi64(c_hh, r0_h);
}

// Data for two arrays of 12 compoments is interleaved: b1=[b[0..3]|b[8..11]|b[16..18]], b2=[b[4..7]|b[12..15]|b[19..23]], first half of a0,a1,a2 is operated with b1, second half with b2.
// Data for two arrays of 12 components is interleaved: b1=[b[0..3]|b[8..11]|b[16..18]], b2=[b[4..7]|b[12..15]|b[19..23]], first half of a0,a1,a2 is operated with b1, second half with b2.
inline void Goldilocks::dot_avx512(Element c[2], const __m512i &a0, const __m512i &a1, const __m512i &a2, const Element b[12])
{
__m512i c_;
Expand All @@ -282,7 +282,7 @@ inline void Goldilocks::dot_avx512(Element c[2], const __m512i &a0, const __m512

// Sparse matrix-vector product (8x24 sparce matrix formed of three diagonal blocks os size 8x8)
// c[i]=Sum_j(aj[i]*b[j*4+i]) 0<=i<8 0<=j<3
// Data for two arrays of 12 compoments is interleaved: b1=[b[0..3]|b[8..11]|b[16..18]], b2=[b[4..7]|b[12..15]|b[19..23]], first half of a0,a1,a2 is operated with b1, second half with b2.
// Data for two arrays of 12 components is interleaved: b1=[b[0..3]|b[8..11]|b[16..18]], b2=[b[4..7]|b[12..15]|b[19..23]], first half of a0,a1,a2 is operated with b1, second half with b2.
inline void Goldilocks::spmv_avx512_4x12(__m512i &c, const __m512i &a0, const __m512i &a1, const __m512i &a2, const Goldilocks::Element b[12])
{

Expand Down Expand Up @@ -400,7 +400,7 @@ inline void Goldilocks::mmult_avx512(__m512i &a0, __m512i &a1, __m512i &a2, cons
a2 = b2;
}

// We assume coeficients of M_8 can be expressed with 8 bits (<256)
// We assume coefficients of M_8 can be expressed with 8 bits (<256)
inline void Goldilocks::mmult_avx512_8(__m512i &a0, __m512i &a1, __m512i &a2, const Goldilocks::Element M_8[144])
{
__m512i b0, b1, b2;
Expand Down