diff --git a/benchmarks/bench_elliptic_template.nim b/benchmarks/bench_elliptic_template.nim index ee300fdd..2d9bd8e6 100644 --- a/benchmarks/bench_elliptic_template.nim +++ b/benchmarks/bench_elliptic_template.nim @@ -52,9 +52,9 @@ proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int6 let ns = inNanoseconds((stop-start) div iters) let throughput = 1e9 / float64(ns) when SupportsGetTicks: - echo &"{op:<68} {elliptic:<32} {throughput:>15.3f} ops/s {ns:>16} ns/op {(stopClk - startClk) div iters:>12} CPU cycles (approx)" + echo &"{op:<68} {elliptic:<36} {throughput:>15.3f} ops/s {ns:>16} ns/op {(stopClk - startClk) div iters:>12} CPU cycles (approx)" else: - echo &"{op:<68} {elliptic:<32} {throughput:>15.3f} ops/s {ns:>16} ns/op" + echo &"{op:<68} {elliptic:<36} {throughput:>15.3f} ops/s {ns:>16} ns/op" template bench*(op: string, EC: typedesc, iters: int, body: untyped): untyped = measure(iters, startTime, stopTime, startClk, stopClk, body) @@ -92,8 +92,12 @@ proc mixedAddBench*(EC: typedesc, iters: int) = bench("EC Mixed Addition vartime " & $EC.G, EC, iters): r.madd_vartime(P, Qaff) else: - bench("EC Mixed Addition " & $EC.G, EC, iters): - r.madd(P, Qaff) + block: + bench("EC Mixed Addition " & $EC.G, EC, iters): + r.madd(P, Qaff) + block: + bench("EC Mixed Addition vartime " & $EC.G, EC, iters): + r.madd_vartime(P, Qaff) proc doublingBench*(EC: typedesc, iters: int) = var r {.noInit.}: EC diff --git a/constantine/math/elliptic/ec_scalar_mul_vartime.nim b/constantine/math/elliptic/ec_scalar_mul_vartime.nim index 7b5cdbe3..9a470d58 100644 --- a/constantine/math/elliptic/ec_scalar_mul_vartime.nim +++ b/constantine/math/elliptic/ec_scalar_mul_vartime.nim @@ -30,6 +30,10 @@ iterator unpackBE(scalarByte: byte): bool = # Variable-time scalar multiplication # ------------------------------------------------------------------------------ +template `+=`[F; G: static Subgroup](P: var ECP_ShortW[F, G], Q: ECP_ShortW_Aff[F, G]) = + P.madd_vartime(P, Q) +template `-=`[F; G: static Subgroup](P: var ECP_ShortW[F, G], Q: ECP_ShortW_Aff[F, G]) = + P.msub_vartime(P, Q) func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} = ## **Variable-time** Elliptic Curve Scalar Multiplication @@ -60,37 +64,93 @@ func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime else: P += Paff -func scalarMul_doubleAdd_smallscalar_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} = +func scalarMul_addchain_4bit_vartime[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} = ## **Variable-time** Elliptic Curve Scalar Multiplication - ## This is optimized for small scalars < 16-bits - ## for which affine transformation cannot be amortized over - ## 16 doublings + 8 additions - ## - ## P <- [k] P - ## - ## This uses the double-and-add algorithm - ## This MUST NOT be used with secret data. - ## - ## This is highly VULNERABLE to timing attacks and power analysis attacks. - var scalarCanonical: array[scalar.bits.ceilDiv_vartime(8), byte] - scalarCanonical.marshal(scalar, bigEndian) - - var Porig {.noinit.}: EC - Porig = P - - P.setInf() - var isInf = true + ## This can only handle for small scalars up to 2⁴ = 16 excluded + let s = uint scalar.limbs[0] - for scalarByte in scalarCanonical: - for bit in unpackBE(scalarByte): - if not isInf: - P.double() - if bit: - if isInf: - P = Porig - isInf = false - else: - P += Porig + case s + of 0: + P.setInf() + of 1: + return + of 2: + P.double() + of 3: + var t {.noInit.}: EC + t.double(P) + P.sum_vartime(P, t) + of 4: + P.double() + P.double() + of 5: + var t {.noInit.}: EC + t.double(P) + t.double(P) + P.sum_vartime(P, t) + of 6: + var t {.noInit.}: EC + t.double(P) + P.sum_vartime(P, t) + P.double() + of 7: + var t {.noInit.}: EC + t.double(P) + t.double() + t.double() + P.diff_vartime(t, P) + of 8: + P.double() + P.double() + P.double() + of 9: + var t {.noInit.}: EC + t.double(P) + t.double() + t.double() + P.sum_vartime(P, t) + of 10: + var t {.noInit.}: EC + t.double(P) + t.double() + P.sum_vartime(P, t) + P.double() + of 11: + var t1 {.noInit.}, t2 {.noInit.}: EC + t1.double(P) # [2]P + t2.double(t1) + t2.double() # [8]P + t1.sum_vartime(t1, t2) + P.sum_vartime(P, t1) + of 12: + var t1 {.noInit.}, t2 {.noInit.}: EC + t1.double(P) + t1.double() # [4]P + t2.double(t1) # [8]P + P.sum_vartime(t1, t2) + of 13: + var t1 {.noInit.}, t2 {.noInit.}: EC + t1.double(P) + t1.double() # [4]P + t2.double(t1) # [8]P + t1.sum_vartime(t1, t2) + P.sum_vartime(P, t1) + of 14: + var t {.noInit.}: EC + t.double(P) + t.double() + t.double() + t.diff_vartime(t, P) # [7]P + P.double(t) + of 15: + var t {.noInit.}: EC + t.double(P) + t.double() + t.double() + t.double() + P.diff_vartime(t, P) + else: + unreachable() func scalarMul_minHammingWeight_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} = ## **Variable-time** Elliptic Curve Scalar Multiplication @@ -164,7 +224,7 @@ func scalarMul_minHammingWeight_windowed_vartime*[EC](P: var EC, scalar: BigInt, tabEC[0] = P P2.double(P) for i in 1 ..< tabEC.len: - tabEC[i].sum(tabEC[i-1], P2) + tabEC[i].sum_vartime(tabEC[i-1], P2) var tab {.noinit.}: array[precompSize, affine(EC)] tab.batchAffine(tabEC) @@ -242,7 +302,7 @@ func scalarMulEndo_minHammingWeight_windowed_vartime*[scalBits: static int; EC]( tabEC[m][0] = endomorphisms[m-1] P2.double(endomorphisms[m-1]) for i in 1 ..< tabEC[m].len: - tabEC[m][i].sum(tabEC[m][i-1], P2) + tabEC[m][i].sum_vartime(tabEC[m][i-1], P2) var tab {.noinit.}: array[M, array[precompSize, affine(EC)]] tab.batchAffine(tabEC) @@ -314,12 +374,10 @@ func scalarMul_vartime*[scalBits; EC]( if 64 < usedBits: # With a window of 5, we precompute 2^3 = 8 points P.scalarMul_minHammingWeight_windowed_vartime(scalar, window = 5) - elif 16 < usedBits and usedBits <= 64: + elif 16 < usedBits: # With a window of 3, we precompute 2^1 = 2 points P.scalarMul_minHammingWeight_windowed_vartime(scalar, window = 3) - elif usedBits == 1: - discard - elif usedBits == 0: - P.setInf() + elif 4 < usedBits: + P.scalarMul_doubleAdd_vartime(scalar) else: - P.scalarMul_doubleAdd_smallscalar_vartime(scalar) \ No newline at end of file + P.scalarMul_addchain_4bit_vartime(scalar) \ No newline at end of file diff --git a/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim b/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim index 4ae363a1..91af94d2 100644 --- a/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim +++ b/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim @@ -799,6 +799,106 @@ func sum_vartime*[F; G: static Subgroup]( r.z.prod(p.z, q.z, skipFinalSub = true) r.z *= H +func madd_vartime*[F; G: static Subgroup]( + r: var ECP_ShortW_Jac[F, G], + p: ECP_ShortW_Jac[F, G], + q: ECP_ShortW_Aff[F, G]) + {.tags:[VarTime], meter.} = + ## **Variable-time** Jacobian mixed addition + ## + ## This MUST NOT be used with secret data. + ## + ## This is highly VULNERABLE to timing attacks and power analysis attacks. + + if p.isInf().bool: + r.fromAffine(q) + return + if q.isInf().bool: + r = p + return + + # Accelerate mixed additions + let isPz1 = p.z.isOne().bool + + # Addition, Cohen et al, 1998 + # + # Mixed-addition: 8M + 3S + 6add + 1*2 + # Affine+Affine->Jacobian: 4M + 2S + 6add + 1*2 + + # | Addition, Cohen et al, 1998 | + # | 12M + 4S + 6add + 1*2 | + # | ----------------------------- | + # | Z₁Z₁ = Z₁² | + # | Z₂Z₂ = Z₂² | + # | | + # | U₁ = X₁*Z₂Z₂ | + # | U₂ = X₂*Z₁Z₁ | + # | S₁ = Y₁*Z₂*Z₂Z₂ | + # | S₂ = Y₂*Z₁*Z₁Z₁ | + # | H = U₂-U₁ # P=-Q, P=Inf, P=Q | + # | R = S₂-S₁ # Q=Inf | + # | | + # | HH = H² | + # | V = U₁*HH | + # | HHH = H*HH | + # | | + # | X₃ = R²-HHH-2*V | + # | Y₃ = R*(V-X₃)-S₁*HHH | + # | Z₃ = Z₁*Z₂*H | + + var U {.noInit.}, S{.noInit.}, H{.noInit.}, R{.noInit.}: F + + if not isPz1: # case Z₁ != 1 + R.square(p.z, skipFinalSub = true) # Z₁Z₁ = Z₁² + + U = p.x # U₁ = X₁*Z₂Z₂ + if isPz1: # case Z₁ = Z₂ = 1 + H = q.x + else: + H.prod(q.x, R) + H -= U # H = U₂-U₁ + S = p.y # S₁ = Y₁*Z₂*Z₂Z₂ + + if isPz1: + R = q.y + else: + R.prod(R, p.z, skipFinalSub = true) + R *= q.y # S₂ = Y₂*Z₁*Z₁Z₁ + R -= S # R = S₂-S₁ + + if H.isZero().bool: # Same x coordinate + if R.isZero().bool: # case P = Q + r.double(p) + return + else: # case P = -Q + r.setInf() + return + + var HHH{.noInit.}: F + template V: untyped = U + + HHH.square(H, skipFinalSub = true) + V *= HHH # V = U₁*HH + HHH *= H # HHH = H*HH + + # X₃ = R²-HHH-2*V, we use the y coordinate as temporary (should we? cache misses?) + r.y.square(R) + r.y -= V + r.y -= V + r.x.diff(r.y, HHH) + + # Y₃ = R*(V-X₃)-S₁*HHH + V -= r.x + V *= R + HHH *= S + r.y.diff(V, HHH) + + # Z₃ = Z₁*Z₂*H + if isPz1: + r.z = H + else: + r.z.prod(H, p.z) + func diff_vartime*(r: var ECP_ShortW_Jac, P, Q: ECP_ShortW_Jac) {.inline.} = ## r = P - Q ## @@ -808,3 +908,13 @@ func diff_vartime*(r: var ECP_ShortW_Jac, P, Q: ECP_ShortW_Jac) {.inline.} = var nQ {.noInit.}: typeof(Q) nQ.neg(Q) r.sum_vartime(P, nQ) + +func msub_vartime*(r: var ECP_ShortW_Jac, P: ECP_ShortW_Jac, Q: ECP_ShortW_Aff) {.inline.} = + ## r = P - Q + ## + ## This MUST NOT be used with secret data. + ## + ## This is highly VULNERABLE to timing attacks and power analysis attacks. + var nQ {.noInit.}: typeof(Q) + nQ.neg(Q) + r.madd_vartime(P, nQ) \ No newline at end of file diff --git a/constantine/math/elliptic/ec_shortweierstrass_projective.nim b/constantine/math/elliptic/ec_shortweierstrass_projective.nim index 421d0654..2eda589b 100644 --- a/constantine/math/elliptic/ec_shortweierstrass_projective.nim +++ b/constantine/math/elliptic/ec_shortweierstrass_projective.nim @@ -573,6 +573,95 @@ func sum_vartime*[F; G: static Subgroup]( r.y *= R r.y -= Y1Z2 +func madd_vartime*[F; G: static Subgroup]( + r: var ECP_ShortW_Prj[F, G], + p: ECP_ShortW_Prj[F, G], + q: ECP_ShortW_Aff[F, G]) + {.tags:[VarTime], meter.} = + ## **Variable-time** homogeneous projective mixed addition + ## + ## This MUST NOT be used with secret data. + ## + ## This is highly VULNERABLE to timing attacks and power analysis attacks. + + if p.isInf().bool: + r.fromAffine(q) + return + if q.isInf().bool: + r = p + return + + # Accelerate mixed additions + let isPz1 = p.z.isOne().bool + + # Addition, Cohen et al, 1998 + # General case: 12M + 4S + 6add + 1*2 + # https://hyperelliptic.org/EFD/g1p/auto-shortw-projective.html#addition-add-1998-cmo-2 + # + # Y₁Z₂ = Y₁*Z₂ + # X₁Z₂ = X₁*Z₂ + # Z₁Z₂ = Z₁*Z₂ + # u = Y₂*Z₁-Y₁Z₂ + # uu = u² + # v = X₂*Z₁-X₁Z₂ + # vv = v² + # vvv = v*vv + # R = vv*X₁Z₂ + # A = uu*Z₁Z₂-vvv-2*R + # X₃ = v*A + # Y₃ = u*(R-A)-vvv*Y₁Z₂ + # Z₃ = vvv*Z₁Z₂ + + var Y1Z2 {.noInit.}, R {.noInit.}: F + var U {.noInit.}, V {.noInit.}: F + + R = p.x + Y1Z2 = p.y + + if isPz1: + U = q.y + V = q.x + else: + U.prod(q.y, p.z) + V.prod(q.x, p.z) + V -= R + + if V.isZero().bool: # Same x coordinate + if bool(U == Y1Z2): # case P = Q + r.double(p) + return + else: + r.setInf() # case P = -Q + return + + var VVV{.noInit.}: F + + VVV.square(V, skipFinalSub = true) + R *= VVV + VVV *= V + + r.y.diff(U, Y1Z2) # u = Y₂*Z₁-Y₁Z₂ + U.square(r.y) # uu = u² + + # A and Z₃ depend on Z₁Z₂ + template A:untyped = U + if isPz1: + r.z = VVV + else: + A.prod(U, p.z) + r.z.prod(VVV, p.z) + + A -= VVV + A -= R + A -= R # A = uu*Z₁Z₂-vvv-2*R + + r.x.prod(V, A) + + R -= A + Y1Z2 *= VVV + r.y *= R + r.y -= Y1Z2 + func diff_vartime*(r: var ECP_ShortW_Prj, P, Q: ECP_ShortW_Prj) {.inline.} = ## r = P - Q ## @@ -581,4 +670,14 @@ func diff_vartime*(r: var ECP_ShortW_Prj, P, Q: ECP_ShortW_Prj) {.inline.} = ## This is highly VULNERABLE to timing attacks and power analysis attacks. var nQ {.noInit.}: typeof(Q) nQ.neg(Q) - r.sum_vartime(P, nQ) \ No newline at end of file + r.sum_vartime(P, nQ) + +func msub_vartime*(r: var ECP_ShortW_Prj, P: ECP_ShortW_Prj, Q: ECP_ShortW_Aff) {.inline.} = + ## r = P - Q + ## + ## This MUST NOT be used with secret data. + ## + ## This is highly VULNERABLE to timing attacks and power analysis attacks. + var nQ {.noInit.}: typeof(Q) + nQ.neg(Q) + r.madd_vartime(P, nQ) \ No newline at end of file diff --git a/constantine/math/polynomials/fft.nim b/constantine/math/polynomials/fft.nim index dd1c6a87..632ea8ab 100644 --- a/constantine/math/polynomials/fft.nim +++ b/constantine/math/polynomials/fft.nim @@ -64,18 +64,18 @@ func simpleFT[EC; bits: static int]( # FFT is a recursive algorithm # This is the base-case using a O(n²) algorithm - # TODO: endomorphism acceleration for windowed-NAF - let L = output.len var last {.noInit.}, v {.noInit.}: EC + var v0w0 {.noInit.} = vals[0] + v0w0.scalarMul_vartime(rootsOfUnity[0]) + for i in 0 ..< L: - last = vals[0] - last.scalarMul_vartime(rootsOfUnity[0]) + last = v0w0 for j in 1 ..< L: v = vals[j] v.scalarMul_vartime(rootsOfUnity[(i*j) mod L]) - last += v + last.sum_vartime(last, v) output[i] = last func fft_internal[EC; bits: static int]( @@ -101,8 +101,8 @@ func fft_internal[EC; bits: static int]( # FFT Butterfly y_times_root = output[i+half] y_times_root .scalarMul_vartime(rootsOfUnity[i]) - output[i+half] .diff(output[i], y_times_root) - output[i] += y_times_root + output[i+half] .diff_vartime(output[i], y_times_root) + output[i] .sum_vartime(output[i], y_times_root) func fft*[EC]( desc: ECFFT_Descriptor[EC], diff --git a/tests/math_elliptic_curves/t_ec_template.nim b/tests/math_elliptic_curves/t_ec_template.nim index 73f1edcb..75516079 100644 --- a/tests/math_elliptic_curves/t_ec_template.nim +++ b/tests/math_elliptic_curves/t_ec_template.nim @@ -698,17 +698,19 @@ proc run_EC_mixed_add_impl*( bAff.affine(b) bz1.fromAffine(bAff) # internals special-case Z=1 - var r_generic, r_mixed, r_vartime, r_vartime2: EC + var r_generic, r_mixed, r_vartime, r_vartime2, r_vartime3: EC r_generic.sum(a, b) r_mixed.madd(a, bAff) r_vartime.sum_vartime(a, bz1) r_vartime2.sum_vartime(a, b) + r_vartime3.madd_vartime(a, bAff) check: bool(r_generic == r_mixed) bool(r_generic == r_vartime) bool(r_generic == r_vartime2) + bool(r_generic == r_vartime3) test(ec, randZ = false, gen = Uniform) test(ec, randZ = true, gen = Uniform) @@ -726,16 +728,18 @@ proc run_EC_mixed_add_impl*( aAff.affine(a) az1.fromAffine(aAff) - var r_generic, r_mixed, r_vartime, r_vartime2: EC + var r_generic, r_mixed, r_vartime, r_vartime2, r_vartime3: EC r_generic.double(a) r_mixed.madd(a, aAff) r_vartime.sum_vartime(a, a) r_vartime2.sum_vartime(a, az1) + r_vartime3.madd_vartime(a, aAff) check: bool(r_generic == r_mixed) bool(r_generic == r_vartime) bool(r_generic == r_vartime2) + bool(r_generic == r_vartime3) # Aliasing test r_mixed = a @@ -744,10 +748,13 @@ proc run_EC_mixed_add_impl*( r_vartime.sum_vartime(r_vartime, a) r_vartime2 = az1 r_vartime2.sum_vartime(r_vartime2, az1) + r_vartime3 = a + r_vartime3.madd_vartime(r_vartime3, aAff) check: bool(r_generic == r_mixed) bool(r_generic == r_vartime) bool(r_generic == r_vartime2) + bool(r_generic == r_vartime3) test(ec, randZ = false, gen = Uniform) test(ec, randZ = true, gen = Uniform) @@ -777,20 +784,27 @@ proc run_EC_mixed_add_impl*( bool(a == r_mixed) # vartime - internals special-case Z=1 - var r_vartime: EC + var r_vartime, r_vartime2: EC var b: EC b.fromAffine(bAff) a.setInf() r_vartime.sum_vartime(a, b) + r_vartime2.madd_vartime(a, bAff) - check: bool(r_vartime == r_mixed) + check: + bool(r_vartime == r_mixed) + bool(r_vartime2 == r_mixed) # Aliasing r_vartime.setInf() r_vartime.sum_vartime(r_vartime, b) + r_vartime2.setInf() + r_vartime2.sum_vartime(r_vartime2, b) - check: bool(r_vartime == r_mixed) + check: + bool(r_vartime == r_mixed) + bool(r_vartime2 == r_mixed) test(ec, randZ = false, gen = Uniform) test(ec, randZ = false, gen = HighHammingWeight) @@ -813,19 +827,26 @@ proc run_EC_mixed_add_impl*( check: bool(r == a) # vartime - var r_vartime: EC + var r_vartime, r_vartime2: EC var b: EC b.fromAffine(bAff) r_vartime.sum_vartime(a, b) + r_vartime2.madd_vartime(a, bAff) - check: bool(r_vartime == r) + check: + bool(r_vartime == r) + bool(r_vartime2 == r) # Aliasing r_vartime = a r_vartime.sum_vartime(r_vartime, b) + r_vartime2 = a + r_vartime2.sum_vartime(r_vartime2, b) - check: bool(r_vartime == r) + check: + bool(r_vartime == r) + bool(r_vartime2 == r) test(ec, randZ = false, gen = Uniform) test(ec, randZ = true, gen = Uniform) @@ -853,19 +874,26 @@ proc run_EC_mixed_add_impl*( check: r.isInf().bool # vartime - var r_vartime: EC + var r_vartime, r_vartime2: EC var na: EC na.fromAffine(naAff) r_vartime.sum_vartime(a, na) + r_vartime2.madd_vartime(a, naAff) - check: bool(r_vartime == r) + check: + bool(r_vartime == r) + bool(r_vartime2 == r) # Aliasing r_vartime = a r_vartime.sum_vartime(r_vartime, na) + r_vartime2 = a + r_vartime2.madd_vartime(r_vartime2, naAff) - check: bool(r_vartime == r) + check: + bool(r_vartime == r) + bool(r_vartime2 == r) test(ec, randZ = false, gen = Uniform) test(ec, randZ = true, gen = Uniform)