Skip to content

Commit

Permalink
Improve CRL shard leasing (#7030)
Browse files Browse the repository at this point in the history
Simplify the index-picking logic in the SA's leaseOldestCrlShard method.
Specifically, more clearly separate it into "missing" and "non-missing"
cases, which require entirely different logic: picking a random missing
shard, or picking the oldest unleased shard, respectively.

Also change the UpdateCRLShard method to "unlease" shards when they're
updated. This allows the crl-updater to run as quickly as it likes,
while still ensuring that multiple instances do not step on each other's
toes.

The config change for shardWidth and lookbackPeriod instead of
certificateLifetime has been deployed in prod since IN-8445. The config
change changing the shardWidth is just so that the tests neither produce
a bazillion shards, nor have to do a bazillion SA queries for each chunk
within a shard, improving the readability of test logs.

Part of #7023
  • Loading branch information
aarongable authored Aug 9, 2023
1 parent 38fc840 commit 6a450a2
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 42 deletions.
74 changes: 40 additions & 34 deletions sa/sa.go
Original file line number Diff line number Diff line change
Expand Up @@ -948,47 +948,51 @@ func (ssa *SQLStorageAuthority) leaseOldestCRLShard(ctx context.Context, req *sa
&shards,
`SELECT id, issuerID, idx, thisUpdate, nextUpdate, leasedUntil
FROM crlShards
WHERE issuerID = ?`,
req.IssuerNameID,
WHERE issuerID = ?
AND idx BETWEEN ? AND ?`,
req.IssuerNameID, req.MinShardIdx, req.MaxShardIdx,
)
if err != nil {
return -1, fmt.Errorf("selecting candidate shards: %w", err)
}

// Convert the slice to a map to detect shards that we expect to exist, but
// don't. At the same time, determine the oldest known shard.
shardMap := make(map[int]*crlShardModel, len(shards))
var oldest *crlShardModel
for _, shard := range shards {
if shard.Idx < int(req.MinShardIdx) || shard.Idx > int(req.MaxShardIdx) {
continue
// Determine which shard index we want to lease.
var shardIdx int
var needToInsert bool
if len(shards) < (int(req.MaxShardIdx + 1 - req.MinShardIdx)) {
// Some expected shards are missing (i.e. never-before-produced), so we
// pick one at random.
missing := make(map[int]struct{}, req.MaxShardIdx+1-req.MinShardIdx)
for i := req.MinShardIdx; i <= req.MaxShardIdx; i++ {
missing[int(i)] = struct{}{}
}
shardMap[shard.Idx] = shard
if shard.LeasedUntil.After(ssa.clk.Now()) {
continue
for _, shard := range shards {
delete(missing, shard.Idx)
}
if oldest == nil ||
(shard.ThisUpdate == nil && oldest.ThisUpdate != nil) ||
shard.ThisUpdate.Before(*oldest.ThisUpdate) {
oldest = shard
}
}

if oldest == nil {
return -1, fmt.Errorf("issuer %d has no unleased shards in range %d - %d", req.IssuerNameID, req.MinShardIdx, req.MaxShardIdx)
}

// Determine which shard index we want to lease: by default the oldest, but
// an arbitrary missing one if any are missing.
shardIdx := oldest.Idx
needToInsert := false
for i := req.MaxShardIdx; i >= req.MinShardIdx; i-- {
_, ok := shardMap[int(i)]
if !ok {
shardIdx = int(i)
needToInsert = true
for idx := range missing {
// Go map iteration is guaranteed to be in randomized key order.
shardIdx = idx
break
}
needToInsert = true
} else {
// We got all the shards we expect, so we pick the oldest unleased shard.
var oldest *crlShardModel
for _, shard := range shards {
if shard.LeasedUntil.After(ssa.clk.Now()) {
continue
}
if oldest == nil ||
(oldest.ThisUpdate != nil && shard.ThisUpdate == nil) ||
(oldest.ThisUpdate != nil && shard.ThisUpdate.Before(*oldest.ThisUpdate)) {
oldest = shard
}
}
if oldest == nil {
return -1, fmt.Errorf("issuer %d has no unleased shards in range %d-%d", req.IssuerNameID, req.MinShardIdx, req.MaxShardIdx)
}
shardIdx = oldest.Idx
needToInsert = false
}

if needToInsert {
Expand Down Expand Up @@ -1105,7 +1109,8 @@ func (ssa *SQLStorageAuthority) leaseSpecificCRLShard(ctx context.Context, req *
// be the same as the crl-updater's context expiration), it's not inherently a
// sign of an update that should be skipped. It does reject the update if the
// identified CRL shard does not exist in the database (it should exist, as
// rows are created if necessary when leased).
// rows are created if necessary when leased). It also sets the leasedUntil time
// to be equal to thisUpdate, to indicate that the shard is no longer leased.
func (ssa *SQLStorageAuthority) UpdateCRLShard(ctx context.Context, req *sapb.UpdateCRLShardRequest) (*emptypb.Empty, error) {
if core.IsAnyNilOrZero(req.IssuerNameID, req.ThisUpdate, req.NextUpdate) {
return nil, errIncompleteRequest
Expand All @@ -1114,13 +1119,14 @@ func (ssa *SQLStorageAuthority) UpdateCRLShard(ctx context.Context, req *sapb.Up
_, err := db.WithTransaction(ctx, ssa.dbMap, func(tx db.Executor) (interface{}, error) {
res, err := tx.ExecContext(ctx,
`UPDATE crlShards
SET thisUpdate = ?, nextUpdate = ?
SET thisUpdate = ?, nextUpdate = ?, leasedUntil = ?
WHERE issuerID = ?
AND idx = ?
AND thisUpdate < ?
LIMIT 1`,
req.ThisUpdate.AsTime(),
req.NextUpdate.AsTime(),
req.ThisUpdate.AsTime(),
req.IssuerNameID,
req.ShardIdx,
req.ThisUpdate.AsTime(),
Expand Down
7 changes: 5 additions & 2 deletions sa/saro.go
Original file line number Diff line number Diff line change
Expand Up @@ -1360,7 +1360,7 @@ func (ssa *SQLStorageAuthority) GetRevokedCerts(req *sapb.GetRevokedCertsRequest
// have to look.
func (ssa *SQLStorageAuthorityRO) GetMaxExpiration(ctx context.Context, req *emptypb.Empty) (*timestamppb.Timestamp, error) {
var model struct {
MaxNotAfter time.Time `db:"maxNotAfter"`
MaxNotAfter *time.Time `db:"maxNotAfter"`
}
err := ssa.dbReadOnlyMap.SelectOne(
ctx,
Expand All @@ -1370,7 +1370,10 @@ func (ssa *SQLStorageAuthorityRO) GetMaxExpiration(ctx context.Context, req *emp
if err != nil {
return nil, fmt.Errorf("selecting max notAfter: %w", err)
}
return timestamppb.New(model.MaxNotAfter), err
if model.MaxNotAfter == nil {
return nil, errors.New("certificateStatus table notAfter column is empty")
}
return timestamppb.New(*model.MaxNotAfter), err
}

func (ssa *SQLStorageAuthority) GetMaxExpiration(ctx context.Context, req *emptypb.Empty) (*timestamppb.Timestamp, error) {
Expand Down
2 changes: 1 addition & 1 deletion test/config-next/crl-updater.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"/hierarchy/intermediate-cert-ecdsa-a.pem"
],
"numShards": 10,
"shardWidth": "18h",
"shardWidth": "240h",
"lookbackPeriod": "24h",
"updatePeriod": "6h",
"updateOffset": "9120s",
Expand Down
3 changes: 2 additions & 1 deletion test/config/crl-updater.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
"/hierarchy/intermediate-cert-ecdsa-a.pem"
],
"numShards": 10,
"certificateLifetime": "2160h",
"shardWidth": "240h",
"lookbackPeriod": "24h",
"updatePeriod": "6h",
"updateOffset": "9120s",
"maxParallelism": 10,
Expand Down
12 changes: 8 additions & 4 deletions test/integration/crl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ package integration

import (
"database/sql"
"fmt"
"io"
"net/http"
"os"
Expand Down Expand Up @@ -35,7 +34,7 @@ func runUpdater(t *testing.T, configFile string) {
// Print the updater's stdout for debugging, but only if the test fails.
t.Log(line)
}
test.AssertNotError(t, err, fmt.Sprintf("crl-updater failed: %s", out))
test.AssertNotError(t, err, "crl-updater failed")
}

// TestCRLPipeline runs an end-to-end test of the crl issuance process, ensuring
Expand All @@ -48,6 +47,13 @@ func TestCRLPipeline(t *testing.T) {
test.Assert(t, ok, "failed to look up test config directory")
configFile := path.Join(configDir, "crl-updater.json")

// Reset the "leasedUntil" column so that this test isn't dependent on state
// like priors runs of this test.
db, err := sql.Open("mysql", vars.DBConnSAIntegrationFullPerms)
test.AssertNotError(t, err, "opening database connection")
_, err = db.Exec(`UPDATE crlShards SET leasedUntil = ?`, fc.Now().Add(-time.Minute))
test.AssertNotError(t, err, "resetting leasedUntil column")

// Issue a test certificate and save its serial number.
client, err := makeClient()
test.AssertNotError(t, err, "creating acme client")
Expand All @@ -73,8 +79,6 @@ func TestCRLPipeline(t *testing.T) {
test.AssertEquals(t, resp.StatusCode, 200)

// Reset the "leasedUntil" column to prepare for another round of CRLs.
db, err := sql.Open("mysql", vars.DBConnSAIntegrationFullPerms)
test.AssertNotError(t, err, "opening database connection")
_, err = db.Exec(`UPDATE crlShards SET leasedUntil = ?`, fc.Now().Add(-time.Minute))
test.AssertNotError(t, err, "resetting leasedUntil column")

Expand Down

0 comments on commit 6a450a2

Please sign in to comment.