implement vartime EC add

mratsim · Sep 2, 2023 · 4661e82 · 4661e82
1 parent 35988f5
commit 4661e82
Show file tree

Hide file tree

Showing 6 changed files with 359 additions and 60 deletions.
diff --git a/benchmarks/bench_elliptic_template.nim b/benchmarks/bench_elliptic_template.nim
@@ -52,9 +52,9 @@ proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int6
   let ns = inNanoseconds((stop-start) div iters)
   let throughput = 1e9 / float64(ns)
   when SupportsGetTicks:
-    echo &"{op:<68} {elliptic:<32} {throughput:>15.3f} ops/s {ns:>16} ns/op {(stopClk - startClk) div iters:>12} CPU cycles (approx)"
+    echo &"{op:<68} {elliptic:<36} {throughput:>15.3f} ops/s {ns:>16} ns/op {(stopClk - startClk) div iters:>12} CPU cycles (approx)"
   else:
-    echo &"{op:<68} {elliptic:<32} {throughput:>15.3f} ops/s {ns:>16} ns/op"
+    echo &"{op:<68} {elliptic:<36} {throughput:>15.3f} ops/s {ns:>16} ns/op"
 
 template bench*(op: string, EC: typedesc, iters: int, body: untyped): untyped =
   measure(iters, startTime, stopTime, startClk, stopClk, body)
@@ -92,8 +92,12 @@ proc mixedAddBench*(EC: typedesc, iters: int) =
     bench("EC Mixed Addition vartime " & $EC.G, EC, iters):
       r.madd_vartime(P, Qaff)
   else:
-    bench("EC Mixed Addition " & $EC.G, EC, iters):
-      r.madd(P, Qaff)
+    block:
+      bench("EC Mixed Addition " & $EC.G, EC, iters):
+        r.madd(P, Qaff)
+    block:
+      bench("EC Mixed Addition vartime " & $EC.G, EC, iters):
+        r.madd_vartime(P, Qaff)
 
 proc doublingBench*(EC: typedesc, iters: int) =
   var r {.noInit.}: EC

diff --git a/constantine/math/elliptic/ec_scalar_mul_vartime.nim b/constantine/math/elliptic/ec_scalar_mul_vartime.nim
@@ -30,6 +30,10 @@ iterator unpackBE(scalarByte: byte): bool =
 
 # Variable-time scalar multiplication
 # ------------------------------------------------------------------------------
+template `+=`[F; G: static Subgroup](P: var ECP_ShortW[F, G], Q: ECP_ShortW_Aff[F, G]) =
+  P.madd_vartime(P, Q)
+template `-=`[F; G: static Subgroup](P: var ECP_ShortW[F, G], Q: ECP_ShortW_Aff[F, G]) =
+  P.msub_vartime(P, Q)
 
 func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} =
   ## **Variable-time** Elliptic Curve Scalar Multiplication
@@ -60,37 +64,93 @@ func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime
         else:
           P += Paff
 
-func scalarMul_doubleAdd_smallscalar_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} =
+func scalarMul_addchain_4bit_vartime[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} =
   ## **Variable-time** Elliptic Curve Scalar Multiplication
-  ## This is optimized for small scalars < 16-bits
-  ## for which affine transformation cannot be amortized over
-  ## 16 doublings + 8 additions
-  ##
-  ##   P <- [k] P
-  ##
-  ## This uses the double-and-add algorithm
-  ## This MUST NOT be used with secret data.
-  ##
-  ## This is highly VULNERABLE to timing attacks and power analysis attacks.
-  var scalarCanonical: array[scalar.bits.ceilDiv_vartime(8), byte]
-  scalarCanonical.marshal(scalar, bigEndian)
-
-  var Porig {.noinit.}: EC
-  Porig = P
-
-  P.setInf()
-  var isInf = true
+  ## This can only handle for small scalars up to 2⁴ = 16 excluded
+  let s = uint scalar.limbs[0]
 
-  for scalarByte in scalarCanonical:
-    for bit in unpackBE(scalarByte):
-      if not isInf:
-        P.double()
-      if bit:
-        if isInf:
-          P = Porig
-          isInf = false
-        else:
-          P += Porig
+  case s
+  of 0:
+    P.setInf()
+  of 1:
+    return
+  of 2:
+    P.double()
+  of 3:
+    var t {.noInit.}: EC
+    t.double(P)
+    P.sum_vartime(P, t)
+  of 4:
+    P.double()
+    P.double()
+  of 5:
+    var t {.noInit.}: EC
+    t.double(P)
+    t.double(P)
+    P.sum_vartime(P, t)
+  of 6:
+    var t {.noInit.}: EC
+    t.double(P)
+    P.sum_vartime(P, t)
+    P.double()
+  of 7:
+    var t {.noInit.}: EC
+    t.double(P)
+    t.double()
+    t.double()
+    P.diff_vartime(t, P)
+  of 8:
+    P.double()
+    P.double()
+    P.double()
+  of 9:
+    var t {.noInit.}: EC
+    t.double(P)
+    t.double()
+    t.double()
+    P.sum_vartime(P, t)
+  of 10:
+    var t {.noInit.}: EC
+    t.double(P)
+    t.double()
+    P.sum_vartime(P, t)
+    P.double()
+  of 11:
+    var t1 {.noInit.}, t2 {.noInit.}: EC
+    t1.double(P)  # [2]P
+    t2.double(t1)
+    t2.double()   # [8]P
+    t1.sum_vartime(t1, t2)
+    P.sum_vartime(P, t1)
+  of 12:
+    var t1 {.noInit.}, t2 {.noInit.}: EC
+    t1.double(P)
+    t1.double()   # [4]P
+    t2.double(t1) # [8]P
+    P.sum_vartime(t1, t2)
+  of 13:
+    var t1 {.noInit.}, t2 {.noInit.}: EC
+    t1.double(P)
+    t1.double()   # [4]P
+    t2.double(t1) # [8]P
+    t1.sum_vartime(t1, t2)
+    P.sum_vartime(P, t1)
+  of 14:
+    var t {.noInit.}: EC
+    t.double(P)
+    t.double()
+    t.double()
+    t.diff_vartime(t, P) # [7]P
+    P.double(t)
+  of 15:
+    var t {.noInit.}: EC
+    t.double(P)
+    t.double()
+    t.double()
+    t.double()
+    P.diff_vartime(t, P)
+  else:
+    unreachable()
 
 func scalarMul_minHammingWeight_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].}  =
   ## **Variable-time** Elliptic Curve Scalar Multiplication
@@ -164,7 +224,7 @@ func scalarMul_minHammingWeight_windowed_vartime*[EC](P: var EC, scalar: BigInt,
   tabEC[0] = P
   P2.double(P)
   for i in 1 ..< tabEC.len:
-    tabEC[i].sum(tabEC[i-1], P2)
+    tabEC[i].sum_vartime(tabEC[i-1], P2)
 
   var tab {.noinit.}: array[precompSize, affine(EC)]
   tab.batchAffine(tabEC)
@@ -242,7 +302,7 @@ func scalarMulEndo_minHammingWeight_windowed_vartime*[scalBits: static int; EC](
       tabEC[m][0] = endomorphisms[m-1]
       P2.double(endomorphisms[m-1])
     for i in 1 ..< tabEC[m].len:
-      tabEC[m][i].sum(tabEC[m][i-1], P2)
+      tabEC[m][i].sum_vartime(tabEC[m][i-1], P2)
 
   var tab {.noinit.}: array[M, array[precompSize, affine(EC)]]
   tab.batchAffine(tabEC)
@@ -314,12 +374,10 @@ func scalarMul_vartime*[scalBits; EC](
   if 64 < usedBits:
     # With a window of 5, we precompute 2^3 = 8 points
     P.scalarMul_minHammingWeight_windowed_vartime(scalar, window = 5)
-  elif 16 < usedBits and usedBits <= 64:
+  elif 16 < usedBits:
     # With a window of 3, we precompute 2^1 = 2 points
     P.scalarMul_minHammingWeight_windowed_vartime(scalar, window = 3)
-  elif usedBits == 1:
-    discard
-  elif usedBits == 0:
-    P.setInf()
+  elif 4 < usedBits:
+    P.scalarMul_doubleAdd_vartime(scalar)
   else:
-    P.scalarMul_doubleAdd_smallscalar_vartime(scalar)
+    P.scalarMul_addchain_4bit_vartime(scalar)
diff --git a/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim b/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim
@@ -799,6 +799,106 @@ func sum_vartime*[F; G: static Subgroup](
       r.z.prod(p.z, q.z, skipFinalSub = true)
       r.z *= H
 
+func madd_vartime*[F; G: static Subgroup](
+       r: var ECP_ShortW_Jac[F, G],
+       p: ECP_ShortW_Jac[F, G],
+       q: ECP_ShortW_Aff[F, G])
+       {.tags:[VarTime], meter.} =
+  ## **Variable-time** Jacobian mixed addition
+  ##
+  ## This MUST NOT be used with secret data.
+  ##
+  ## This is highly VULNERABLE to timing attacks and power analysis attacks.
+
+  if p.isInf().bool:
+    r.fromAffine(q)
+    return
+  if q.isInf().bool:
+    r = p
+    return
+
+  # Accelerate mixed additions
+  let isPz1 = p.z.isOne().bool
+
+  # Addition, Cohen et al, 1998
+  #
+  # Mixed-addition:          8M + 3S + 6add + 1*2
+  # Affine+Affine->Jacobian: 4M + 2S + 6add + 1*2
+
+  # |  Addition, Cohen et al, 1998  |
+  # |  12M + 4S + 6add + 1*2        |
+  # | ----------------------------- |
+  # | Z₁Z₁ = Z₁²                    |
+  # | Z₂Z₂ = Z₂²                    |
+  # |                               |
+  # | U₁ = X₁*Z₂Z₂                  |
+  # | U₂ = X₂*Z₁Z₁                  |
+  # | S₁ = Y₁*Z₂*Z₂Z₂               |
+  # | S₂ = Y₂*Z₁*Z₁Z₁               |
+  # | H  = U₂-U₁ # P=-Q, P=Inf, P=Q |
+  # | R  = S₂-S₁ # Q=Inf            |
+  # |                               |
+  # | HH  = H²                      |
+  # | V   = U₁*HH                   |
+  # | HHH = H*HH                    |
+  # |                               |
+  # | X₃ = R²-HHH-2*V               |
+  # | Y₃ = R*(V-X₃)-S₁*HHH          |
+  # | Z₃ = Z₁*Z₂*H                  |
+
+  var U {.noInit.}, S{.noInit.}, H{.noInit.}, R{.noInit.}: F
+
+  if not isPz1:                            # case Z₁ != 1
+    R.square(p.z, skipFinalSub = true)     #   Z₁Z₁ = Z₁²
+
+  U = p.x                                  #   U₁ = X₁*Z₂Z₂
+  if isPz1:                                #   case Z₁ = Z₂ = 1
+    H = q.x
+  else:
+    H.prod(q.x, R)
+  H -= U                                   #   H  = U₂-U₁
+  S = p.y                                  #   S₁ = Y₁*Z₂*Z₂Z₂
+
+  if isPz1:
+    R = q.y
+  else:
+    R.prod(R, p.z, skipFinalSub = true)
+    R *= q.y                               #   S₂ = Y₂*Z₁*Z₁Z₁
+  R -= S                                   # R  = S₂-S₁
+
+  if H.isZero().bool:                      # Same x coordinate
+    if R.isZero().bool:                    # case P = Q
+      r.double(p)
+      return
+    else:                                  # case P = -Q
+      r.setInf()
+      return
+
+  var HHH{.noInit.}: F
+  template V: untyped = U
+
+  HHH.square(H, skipFinalSub = true)
+  V *= HHH                                # V   = U₁*HH
+  HHH *= H                                # HHH = H*HH
+
+  # X₃ = R²-HHH-2*V, we use the y coordinate as temporary (should we? cache misses?)
+  r.y.square(R)
+  r.y -= V
+  r.y -= V
+  r.x.diff(r.y, HHH)
+
+  # Y₃ = R*(V-X₃)-S₁*HHH
+  V -= r.x
+  V *= R
+  HHH *= S
+  r.y.diff(V, HHH)
+
+  # Z₃ = Z₁*Z₂*H
+  if isPz1:
+    r.z = H
+  else:
+    r.z.prod(H, p.z)
+
 func diff_vartime*(r: var ECP_ShortW_Jac, P, Q: ECP_ShortW_Jac) {.inline.} =
   ## r = P - Q
   ##
@@ -808,3 +908,13 @@ func diff_vartime*(r: var ECP_ShortW_Jac, P, Q: ECP_ShortW_Jac) {.inline.} =
   var nQ {.noInit.}: typeof(Q)
   nQ.neg(Q)
   r.sum_vartime(P, nQ)
+
+func msub_vartime*(r: var ECP_ShortW_Jac, P: ECP_ShortW_Jac, Q: ECP_ShortW_Aff) {.inline.} =
+  ## r = P - Q
+  ##
+  ## This MUST NOT be used with secret data.
+  ##
+  ## This is highly VULNERABLE to timing attacks and power analysis attacks.
+  var nQ {.noInit.}: typeof(Q)
+  nQ.neg(Q)
+  r.madd_vartime(P, nQ)