0xPolygonHermez · GoodDaisy · Jan 19, 2024 · Jan 19, 2024
diff --git a/src/goldilocks_base_field_avx.hpp b/src/goldilocks_base_field_avx.hpp
@@ -13,8 +13,8 @@
 // _8 variable can be expressed in 8 bits (<256)
 
 // OBSERVATIONS:
-// 1.  a + b overflows iff (a + b) < a (AVX does not suport carry, this is the way to check)
-// 2.  a - b underflows iff (a - b) > a (AVX does not suport carry, this is the way to check)
+// 1.  a + b overflows iff (a + b) < a (AVX does not support carry, this is the way to check)
+// 2.  a - b underflows iff (a - b) > a (AVX does not support carry, this is the way to check)
 // 3. (unsigned) a < (unsigned) b iff (signed) a_s < (singed) b_s (AVX2 does not support unsingend 64-bit comparisons)
 // 4. a_s + b = (a+b)_s. Dem: a+(1<<63)+b = a+b+(1<<63)
 
@@ -64,7 +64,7 @@ inline void Goldilocks::toCanonical_avx(__m256i &a_c, const __m256i &a)
     shift_avx(a_c, a_sc);
 }
 
-// Obtain cannonical representative of a_s,
+// Obtain canonical representative of a_s,
 // We assume a <= a_c+P
 // a_sc a shifted canonical
 // a_s  a shifted
@@ -84,7 +84,7 @@ inline void Goldilocks::add_avx(__m256i &c, const __m256i &a, const __m256i &b)
     add_avx_a_sc(c, a_sc, b);
 }
 
-// we assume a given in shifted cannonical form (a_sc)
+// we assume a given in shifted canonical form (a_sc)
 inline void Goldilocks::add_avx_a_sc(__m256i &c, const __m256i &a_sc, const __m256i &b)
 {
     // addition (if only one of the arguments is shifted the sumation is shifted)
@@ -105,7 +105,7 @@ inline void Goldilocks::add_avx_s_b_small(__m256i &c_s, const __m256i &a_s, cons
     const __m256i c0_s = _mm256_add_epi64(a_s, b_small);
     // We can use 32-bit comparison that is faster, lets see:
     // 1) a_s > c0_s => a_sh >= c0_sh
-    // 2) If a_sh = c0_sh => there is no overlow (demonstration bellow)
+    // 2) If a_sh = c0_sh => there is no overflow (demonstration bellow)
     // 3) Therefore: overflow iff a_sh > c0_sh
     // Dem item 2:
     //     c0_sh=a_sh+b_h+carry=a_sh
@@ -125,7 +125,7 @@ inline void Goldilocks::add_avx_b_small(__m256i &c, const __m256i &a, const __m2
     const __m256i c0_s = _mm256_add_epi64(a_s, b_small);
     // We can use 32-bit comparison that is faster, lets see:
     // 1) a_s > c0_s => a_sh >= c0_sh
-    // 2) If a_sh = c0_sh => there is no overlow (demonstration bellow)
+    // 2) If a_sh = c0_sh => there is no overflow (demonstration bellow)
     // 3) Therefore: overflow iff a_sh > c0_sh
     // Dem item 2:
     //     c0_sh=a_sh+b_h+carry=a_sh
@@ -180,7 +180,7 @@ inline void Goldilocks::mult_avx(__m256i &c, const __m256i &a, const __m256i &b)
     reduce_avx_128_64(c, c_h, c_l);
 }
 
-// We assume coeficients of b_8 can be expressed with 8 bits (<256)
+// We assume coefficients of b_8 can be expressed with 8 bits (<256)
 inline void Goldilocks::mult_avx_8(__m256i &c, const __m256i &a, const __m256i &b_8)
 {
     __m256i c_h, c_l;
@@ -422,7 +422,7 @@ inline void Goldilocks::spmv_avx_4x12_a(__m256i &c, const __m256i &a0, const __m
 // Sparse matrix-vector product (4x12 sparce matrix formed of four diagonal blocs 4x5 stored in a0...a3)
 // c[i]=Sum_j(aj[i]*b[j*4+i]) 0<=i<4 0<=j<3
 // We assume b_a aligned on a 32-byte boundary
-// We assume coeficients of b_8 can be expressed with 8 bits (<256)
+// We assume coefficients of b_8 can be expressed with 8 bits (<256)
 inline void Goldilocks::spmv_avx_4x12_8(__m256i &c, const __m256i &a0, const __m256i &a1, const __m256i &a2, const Goldilocks::Element b_8[12])
 {
 
@@ -512,7 +512,7 @@ inline void Goldilocks::mmult_avx_4x12_a(__m256i &b, const __m256i &a0, const __
 }
 
 // Dense matrix-vector product
-// We assume coeficients of M_8 can be expressed with 8 bits (<256)
+// We assume coefficients of M_8 can be expressed with 8 bits (<256)
 inline void Goldilocks::mmult_avx_4x12_8(__m256i &b, const __m256i &a0, const __m256i &a1, const __m256i &a2, const Goldilocks::Element M_8[48])
 {
     // Generate matrix 4x4
@@ -560,7 +560,7 @@ inline void Goldilocks::mmult_avx_a(__m256i &a0, __m256i &a1, __m256i &a2, const
     a1 = b1;
     a2 = b2;
 }
-// We assume coeficients of M_8 can be expressed with 8 bits (<256)
+// We assume coefficients of M_8 can be expressed with 8 bits (<256)
 inline void Goldilocks::mmult_avx_8(__m256i &a0, __m256i &a1, __m256i &a2, const Goldilocks::Element M_8[144])
 {
     __m256i b0, b1, b2;

diff --git a/src/goldilocks_base_field_avx512.hpp b/src/goldilocks_base_field_avx512.hpp
@@ -13,8 +13,8 @@
 // _8 variable can be expressed in 8 bits (<256)
 
 // OBSERVATIONS:
-// 1.  a + b overflows iff (a + b) < a (AVX does not suport carry, this is the way to check)
-// 2.  a - b underflows iff (a - b) > a (AVX does not suport carry, this is the way to check)
+// 1.  a + b overflows iff (a + b) < a (AVX does not support carry, this is the way to check)
+// 2.  a - b underflows iff (a - b) > a (AVX does not support carry, this is the way to check)
 
 const __m512i P8 = _mm512_set_epi64(GOLDILOCKS_PRIME, GOLDILOCKS_PRIME, GOLDILOCKS_PRIME, GOLDILOCKS_PRIME, GOLDILOCKS_PRIME, GOLDILOCKS_PRIME, GOLDILOCKS_PRIME, GOLDILOCKS_PRIME);
 const __m512i P8_n = _mm512_set_epi64(GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG, GOLDILOCKS_PRIME_NEG);
@@ -40,7 +40,7 @@ inline void Goldilocks::store_avx512_a(Goldilocks::Element *a8_a, const __m512i
     _mm512_store_si512((__m512i *)a8_a, a);
 }
 
-// Obtain cannonical representative of a,
+// Obtain canonical representative of a,
 // We assume a <= a_c+P
 inline void Goldilocks::toCanonical_avx512(__m512i &a_c, const __m512i &a)
 {
@@ -98,7 +98,7 @@ inline void Goldilocks::mult_avx512(__m512i &c, const __m512i &a, const __m512i
     reduce_avx512_128_64(c, c_h, c_l);
 }
 
-// We assume coeficients of b_8 can be expressed with 8 bits (<256)
+// We assume coefficients of b_8 can be expressed with 8 bits (<256)
 inline void Goldilocks::mult_avx512_8(__m512i &c, const __m512i &a, const __m512i &b_8)
 {
     __m512i c_h, c_l;
@@ -269,7 +269,7 @@ inline void Goldilocks::square_avx512_128(__m512i &c_h, __m512i &c_l, const __m5
     c_h = _mm512_add_epi64(c_hh, r0_h);
 }
 
-// Data for two arrays of 12 compoments is interleaved: b1=[b[0..3]|b[8..11]|b[16..18]], b2=[b[4..7]|b[12..15]|b[19..23]], first half of a0,a1,a2 is operated with b1, second half with b2.
+// Data for two arrays of 12 components is interleaved: b1=[b[0..3]|b[8..11]|b[16..18]], b2=[b[4..7]|b[12..15]|b[19..23]], first half of a0,a1,a2 is operated with b1, second half with b2.
 inline void Goldilocks::dot_avx512(Element c[2], const __m512i &a0, const __m512i &a1, const __m512i &a2, const Element b[12])
 {
     __m512i c_;
@@ -282,7 +282,7 @@ inline void Goldilocks::dot_avx512(Element c[2], const __m512i &a0, const __m512
 
 // Sparse matrix-vector product (8x24 sparce matrix formed of three diagonal blocks os size 8x8)
 // c[i]=Sum_j(aj[i]*b[j*4+i]) 0<=i<8 0<=j<3
-// Data for two arrays of 12 compoments is interleaved: b1=[b[0..3]|b[8..11]|b[16..18]], b2=[b[4..7]|b[12..15]|b[19..23]], first half of a0,a1,a2 is operated with b1, second half with b2.
+// Data for two arrays of 12 components is interleaved: b1=[b[0..3]|b[8..11]|b[16..18]], b2=[b[4..7]|b[12..15]|b[19..23]], first half of a0,a1,a2 is operated with b1, second half with b2.
 inline void Goldilocks::spmv_avx512_4x12(__m512i &c, const __m512i &a0, const __m512i &a1, const __m512i &a2, const Goldilocks::Element b[12])
 {
 
@@ -400,7 +400,7 @@ inline void Goldilocks::mmult_avx512(__m512i &a0, __m512i &a1, __m512i &a2, cons
     a2 = b2;
 }
 
-// We assume coeficients of M_8 can be expressed with 8 bits (<256)
+// We assume coefficients of M_8 can be expressed with 8 bits (<256)
 inline void Goldilocks::mmult_avx512_8(__m512i &a0, __m512i &a1, __m512i &a2, const Goldilocks::Element M_8[144])
 {
     __m512i b0, b1, b2;