Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 0 additions & 24 deletions certmagic.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,27 +499,3 @@ var (

// Maximum size for the stack trace when recovering from panics.
const stackTraceBufferSize = 1024 * 128

const (
// Storage mode controls the format in which certificates are stored in `Storage`.
//
// Formats:
// - legacy: Store cert, privkey and meta as three separate storage items (.cert, .key, .json).
// - bundle: Store cert, privkey and meta as a single, bundled storage item (.bundle).
//
// Modes:
// - legacy: Store and load certificates in legacy format.
// - transition: Store in legacy and bundle format, load as bundle with fallback to legacy format.
// - bundle: Store and load certificates in bundle format.
//
// In the transition mode, failures around reads and writes of the bundle are soft.
// They should only log errors and try to work with the legacy format as fallback.
// Operations on the legacy format are hard-failures, implying that errors should be propagated up.
//
// The storage mode is controlled via the CERTMAGIC_STORAGE_MODE environment variable
StorageModeEnv = "CERTMAGIC_STORAGE_MODE"

StorageModeLegacy = "legacy"
StorageModeTransition = "transition"
StorageModeBundle = "bundle"
)
15 changes: 12 additions & 3 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ import (
"net"
"net/http"
"net/url"
"os"
"strings"
"time"

Expand Down Expand Up @@ -1272,7 +1271,12 @@ func (cfg *Config) checkStorage(ctx context.Context) error {
// resources related to the certificate for domain.
// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env.
func (cfg *Config) storageHasCertResources(ctx context.Context, issuer Issuer, domain string) bool {
switch os.Getenv(StorageModeEnv) {
storageMode := StorageModeForDomain(domain)
cfg.Logger.Debug("checking if storage has cert resources",
zap.String("domain", domain),
zap.String("storage_mode", storageMode),
zap.Int("rollout_bucket", RolloutBucketForDomain(domain)))
switch storageMode {
case StorageModeTransition:
if cfg.storageHasCertResourcesBundle(ctx, issuer, domain) {
return true
Expand Down Expand Up @@ -1313,7 +1317,12 @@ func (cfg *Config) storageHasCertResourcesBundle(ctx context.Context, issuer Iss
// issuer with the given issuer key.
// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env.
func (cfg *Config) deleteSiteAssets(ctx context.Context, issuerKey, domain string) error {
switch os.Getenv(StorageModeEnv) {
storageMode := StorageModeForDomain(domain)
cfg.Logger.Debug("deleting site assets",
zap.String("domain", domain),
zap.String("storage_mode", storageMode),
zap.Int("rollout_bucket", RolloutBucketForDomain(domain)))
switch storageMode {
case StorageModeTransition:
if err := cfg.deleteSiteAssetsBundle(ctx, issuerKey, domain); err != nil {
cfg.Logger.Warn("unable to delete certificate resource bundle",
Expand Down
6 changes: 3 additions & 3 deletions config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ func mustJSON(val any) []byte {
// testStorageModeSetup creates a test config with the specified storage mode
func testStorageModeSetup(t *testing.T, mode, storagePath string) (*Config, *ACMEIssuer) {
t.Helper()
t.Setenv(StorageModeEnv, mode)
ConfigureStorageMode(mode, 100)

am := &ACMEIssuer{CA: "https://example.com/acme/directory"}
cfg := &Config{
Expand Down Expand Up @@ -291,7 +291,7 @@ func TestStorageModeTransitionFallback(t *testing.T) {
cert := makeCertResource(am, domain, true)

// Save in legacy mode to simulate existing data
os.Setenv(StorageModeEnv, StorageModeLegacy)
ConfigureStorageMode(StorageModeLegacy, 0)
if err := cfg.saveCertResource(ctx, am, cert); err != nil {
t.Fatalf("Failed to save cert in legacy mode: %v", err)
}
Expand All @@ -301,7 +301,7 @@ func TestStorageModeTransitionFallback(t *testing.T) {
assertFileNotExists(t, ctx, cfg.Storage, StorageKeys.SiteBundle(issuerKey, domain))

// Switch to transition mode and verify fallback to legacy works
os.Setenv(StorageModeEnv, StorageModeTransition)
ConfigureStorageMode(StorageModeTransition, 100)
loaded, err := cfg.loadCertResource(ctx, am, domain)
if err != nil {
t.Fatalf("Failed to load cert in transition mode with fallback: %v", err)
Expand Down
17 changes: 13 additions & 4 deletions crypto.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import (
"fmt"
"hash/fnv"
"io/fs"
"os"
"sort"
"strings"

Expand Down Expand Up @@ -144,12 +143,17 @@ func fastHash(input []byte) string {
// saveCertResource saves the certificate resource to disk.
// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env.
func (cfg *Config) saveCertResource(ctx context.Context, issuer Issuer, cert CertificateResource) error {
switch os.Getenv(StorageModeEnv) {
storageMode := StorageModeForDomain(cert.SANs[0])
cfg.Logger.Debug("saving certificate resource",
zap.String("domain", cert.SANs[0]),
zap.String("storage_mode", storageMode),
zap.Int("rollout_bucket", RolloutBucketForDomain(cert.SANs[0])))
switch storageMode {
case StorageModeTransition:
if err := cfg.saveCertResourceBundle(ctx, issuer, cert); err != nil {
cfg.Logger.Warn("unable to store certificate resource bundle",
zap.String("issuer", issuer.IssuerKey()),
zap.String("domain", cert.NamesKey()),
zap.String("domain", cert.SANs[0]),
zap.Error(err))
}
return cfg.saveCertResourceLegacy(ctx, issuer, cert)
Expand Down Expand Up @@ -274,7 +278,12 @@ func (cfg *Config) loadCertResourceAnyIssuer(ctx context.Context, certNamesKey s
// loadCertResource loads a certificate resource from the given issuer's storage location.
// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env.
func (cfg *Config) loadCertResource(ctx context.Context, issuer Issuer, certNamesKey string) (CertificateResource, error) {
switch os.Getenv(StorageModeEnv) {
storageMode := StorageModeForDomain(certNamesKey)
cfg.Logger.Debug("loading certificate resource",
zap.String("domain", certNamesKey),
zap.String("storage_mode", storageMode),
zap.Int("rollout_bucket", RolloutBucketForDomain(certNamesKey)))
switch storageMode {
case StorageModeTransition:
certRes, err := cfg.loadCertResourceBundle(ctx, issuer, certNamesKey)
if err == nil {
Expand Down
25 changes: 20 additions & 5 deletions maintain.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
"errors"
"fmt"
"io/fs"
"os"
"path"
"runtime"
"strings"
Expand Down Expand Up @@ -431,7 +430,12 @@ func (cfg *Config) storageHasNewerARI(ctx context.Context, cert Certificate) (bo
// loadStoredACMECertificateMetadata loads the stored ACME certificate data.
// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env.
func (cfg *Config) loadStoredACMECertificateMetadata(ctx context.Context, cert Certificate) (acme.Certificate, error) {
switch os.Getenv(StorageModeEnv) {
storageMode := StorageModeForDomain(cert.Names[0])
cfg.Logger.Debug("loading stored ACME certificate metadata",
zap.String("domain", cert.Names[0]),
zap.String("storage_mode", storageMode),
zap.Int("rollout_bucket", RolloutBucketForDomain(cert.Names[0])))
switch storageMode {
case StorageModeTransition:
acmecert, err := cfg.loadStoredACMECertificateMetadataBundle(ctx, cert)
if err == nil {
Expand Down Expand Up @@ -496,7 +500,12 @@ func (cfg *Config) loadStoredACMECertificateMetadataBundle(ctx context.Context,
// NeedsRefresh() on the RenewalInfo first, and only call this if that returns true.
// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env.
func (cfg *Config) updateARI(ctx context.Context, cert Certificate, logger *zap.Logger) (updatedCert Certificate, changed bool, err error) {
switch os.Getenv(StorageModeEnv) {
storageMode := StorageModeForDomain(cert.Names[0])
cfg.Logger.Debug("updating ARI",
zap.String("domain", cert.Names[0]),
zap.String("storage_mode", storageMode),
zap.Int("rollout_bucket", RolloutBucketForDomain(cert.Names[0])))
switch storageMode {
case StorageModeTransition:
updatedCert, changed, err = cfg.updateARILegacy(ctx, cert, logger)
if err == nil {
Expand Down Expand Up @@ -1046,7 +1055,8 @@ func deleteOldOCSPStaples(ctx context.Context, storage Storage, logger *zap.Logg
}

func deleteExpiredCerts(ctx context.Context, storage Storage, logger *zap.Logger, gracePeriod time.Duration) error {
switch os.Getenv(StorageModeEnv) {
logger.Debug("deleting expired certs", zap.String("storage_mode", StorageMode))
switch StorageMode {
case StorageModeTransition:
if err := deleteExpiredCertsBundle(ctx, storage, logger, gracePeriod); err != nil {
logger.Warn("unable to delete expired certs from bundle",
Expand Down Expand Up @@ -1321,7 +1331,12 @@ func (cfg *Config) moveCompromisedPrivateKey(ctx context.Context, cert Certifica
// Delete the storage containing the compromised key based on storage mode.
// We intentionally ignore delete errors since the file might not exist,
// and we avoid calling .Exists() before .Delete() to minimize storage roundtrips.
switch os.Getenv(StorageModeEnv) {
storageMode := StorageModeForDomain(cert.Names[0])
logger.Debug("deleting compromised private key",
zap.String("domain", cert.Names[0]),
zap.String("storage_mode", storageMode),
zap.Int("rollout_bucket", RolloutBucketForDomain(cert.Names[0])))
switch storageMode {
case StorageModeTransition:
cfg.Storage.Delete(ctx, bundleKey)
cfg.Storage.Delete(ctx, privKeyStorageKey)
Expand Down
87 changes: 87 additions & 0 deletions storagemode.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package certmagic

import (
"hash/fnv"
"os"
"strconv"
)

const (
// Storage mode controls the format in which certificates are stored in `Storage`.
//
// Formats:
// - legacy: Store cert, privkey and meta as three separate storage items (.cert, .key, .json).
// - bundle: Store cert, privkey and meta as a single, bundled storage item (.bundle).
//
// Modes:
// - legacy: Store and load certificates in legacy format.
// - transition: Store in legacy and bundle format, load as bundle with fallback to legacy format.
// - bundle: Store and load certificates in bundle format.
//
// In the transition mode, failures around reads and writes of the bundle are soft.
// They should only log errors and try to work with the legacy format as fallback.
// Operations on the legacy format are hard-failures, implying that errors should be propagated up.
//
// The rollout percentage enables a phased migration by controlling which domains
// enter the transition phase. If a domain's deterministic bucket (0-99) is below
// the rollout percentage, it uses 'transition' mode (dual-write, bundle-read).
// Otherwise, it remains in 'legacy' mode.
//
// The logic for selection is:
// if mode == StorageModeTransition:
// useTransition = hash(domain)%100 < rollout
// return useTransition ? StorageModeTransition : StorageModeLegacy
//
// The storage mode is controlled via the CERTMAGIC_STORAGE_MODE environment variable
StorageModeEnv = "CERTMAGIC_STORAGE_MODE"

StorageModeLegacy = "legacy"
StorageModeTransition = "transition"
StorageModeBundle = "bundle"

// StorageModeRolloutPercentEnv controls the percentage of domains that will use
// the bundle format when the storage mode is set to "transition".
// An empty rollout precent is equal to 0%.
StorageModeRolloutPercentEnv = "CERTMAGIC_STORAGE_MODE_ROLLOUT_PERCENT"
)

var (
StorageMode string
StorageModeRolloutPercent int
)

func ConfigureStorageMode(mode string, rolloutPercent int) {
// Note: We have no lock protecting these variables. This is a potential race condition, yes.
// But this function is only called once during init(), before anything else happens.
StorageMode = mode
StorageModeRolloutPercent = rolloutPercent
}

func init() {
mode := os.Getenv(StorageModeEnv)

// rolloutPercent becomes zero if env is unset or malformed
rolloutPercent, _ := strconv.Atoi(os.Getenv(StorageModeRolloutPercentEnv))

ConfigureStorageMode(mode, rolloutPercent)
}

func StorageModeForDomain(domain string) string {
if StorageMode == StorageModeBundle {
return StorageModeBundle
}
if StorageMode != StorageModeTransition {
return StorageModeLegacy
}
if RolloutBucketForDomain(domain) < StorageModeRolloutPercent {
return StorageModeTransition
} else {
return StorageModeLegacy
}
}

func RolloutBucketForDomain(domain string) int {
h := fnv.New32a()
h.Write([]byte(domain))
return int(h.Sum32() % 100)
}
84 changes: 84 additions & 0 deletions storagemode_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package certmagic

import "testing"

func TestStorageModeRolloutPercentLegacy(t *testing.T) {
// In legacy mode, storage mode for all domains must be "legacy", no matter the rollout percent.
for _, rolloutPercent := range []int{0, 50, 100} {
ConfigureStorageMode(StorageModeLegacy, rolloutPercent)

for _, domain := range []string{"cyufsv.com", "lgxeeu.com", "msngsw.com"} {
if got := StorageModeForDomain(domain); got != StorageModeLegacy {
t.Errorf("rollout %d%%, StorageModeForDomain(%q) = %q, want %q",
rolloutPercent, domain, got, StorageModeLegacy)
}
}
}
}

func TestStorageModeRolloutPercentBundle(t *testing.T) {
// In bundle mode, storage mode for all domains must be "bundle", no matter the rollout percent.
for _, rolloutPercent := range []int{0, 50, 100} {
ConfigureStorageMode(StorageModeBundle, rolloutPercent)

for _, domain := range []string{"cyufsv.com", "lgxeeu.com", "msngsw.com"} {
if got := StorageModeForDomain(domain); got != StorageModeBundle {
t.Errorf("rollout %d%%, StorageModeForDomain(%q) = %q, want %q",
rolloutPercent, domain, got, StorageModeBundle)
}
}
}
}

func TestStorageModeRolloutPercentTransition(t *testing.T) {
// In transition mode, storage mode for domains can either be "transition" or "legacy", depending on rollout percent.
// Domains are assigned to buckets 0-99 based on their hash. A domain enters transition mode
// if its bucket is below the rollout percent.
//
// Test domains and their buckets:
// "cyufsv.com" -> bucket 0
// "wrgmsg.com" -> bucket 1
// "cdbbdh.com" -> bucket 49
// "lgxeeu.com" -> bucket 50
// "hwqhre.com" -> bucket 51
// "ckycee.com" -> bucket 98
// "msngsw.com" -> bucket 99
tests := []struct {
name string
rolloutPercent int
domain string
domainBucket int
want string
}{
// 0% rollout: no domains should transition
{"0% rollout, bucket 0", 0, "cyufsv.com", 0, StorageModeLegacy},
{"0% rollout, bucket 50", 0, "lgxeeu.com", 50, StorageModeLegacy},
{"0% rollout, bucket 99", 0, "msngsw.com", 99, StorageModeLegacy},

// 100% rollout: all domains should transition
{"100% rollout, bucket 0", 100, "cyufsv.com", 0, StorageModeTransition},
{"100% rollout, bucket 50", 100, "lgxeeu.com", 50, StorageModeTransition},
{"100% rollout, bucket 99", 100, "msngsw.com", 99, StorageModeTransition},

// Edge cases: bucket exactly at rollout boundary
{"50% rollout, bucket 49 (just below)", 50, "cdbbdh.com", 49, StorageModeTransition},
{"50% rollout, bucket 50 (exactly at)", 50, "lgxeeu.com", 50, StorageModeLegacy},
{"50% rollout, bucket 51 (just above)", 50, "hwqhre.com", 51, StorageModeLegacy},

// Edge cases at boundaries
{"1% rollout, bucket 0", 1, "cyufsv.com", 0, StorageModeTransition},
{"1% rollout, bucket 1", 1, "wrgmsg.com", 1, StorageModeLegacy},
{"99% rollout, bucket 98", 99, "ckycee.com", 98, StorageModeTransition},
{"99% rollout, bucket 99", 99, "msngsw.com", 99, StorageModeLegacy},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ConfigureStorageMode(StorageModeTransition, tt.rolloutPercent)
if got := StorageModeForDomain(tt.domain); got != tt.want {
t.Errorf("StorageModeForDomain(%q) = %q, want %q (bucket %d, rollout %d%%)",
tt.domain, got, tt.want, tt.domainBucket, tt.rolloutPercent)
}
})
}
}
Loading