From 9dee912bea270bb7f9811ae0550f569f703d97a0 Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Tue, 6 Jan 2026 10:13:15 +0100 Subject: [PATCH 1/5] Add capability for gradual rollout of transition mode --- certmagic.go | 24 ------ config.go | 5 +- config_test.go | 6 +- crypto.go | 5 +- maintain.go | 9 +-- storagemode.go | 85 ++++++++++++++++++++ storagemode_test.go | 192 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 288 insertions(+), 38 deletions(-) create mode 100644 storagemode.go create mode 100644 storagemode_test.go diff --git a/certmagic.go b/certmagic.go index 2ff53235..322a0f1b 100644 --- a/certmagic.go +++ b/certmagic.go @@ -499,27 +499,3 @@ var ( // Maximum size for the stack trace when recovering from panics. const stackTraceBufferSize = 1024 * 128 - -const ( - // Storage mode controls the format in which certificates are stored in `Storage`. - // - // Formats: - // - legacy: Store cert, privkey and meta as three separate storage items (.cert, .key, .json). - // - bundle: Store cert, privkey and meta as a single, bundled storage item (.bundle). - // - // Modes: - // - legacy: Store and load certificates in legacy format. - // - transition: Store in legacy and bundle format, load as bundle with fallback to legacy format. - // - bundle: Store and load certificates in bundle format. - // - // In the transition mode, failures around reads and writes of the bundle are soft. - // They should only log errors and try to work with the legacy format as fallback. - // Operations on the legacy format are hard-failures, implying that errors should be propagated up. - // - // The storage mode is controlled via the CERTMAGIC_STORAGE_MODE environment variable - StorageModeEnv = "CERTMAGIC_STORAGE_MODE" - - StorageModeLegacy = "legacy" - StorageModeTransition = "transition" - StorageModeBundle = "bundle" -) diff --git a/config.go b/config.go index 8d9c5d65..82044398 100644 --- a/config.go +++ b/config.go @@ -32,7 +32,6 @@ import ( "net" "net/http" "net/url" - "os" "strings" "time" @@ -1272,7 +1271,7 @@ func (cfg *Config) checkStorage(ctx context.Context) error { // resources related to the certificate for domain. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) storageHasCertResources(ctx context.Context, issuer Issuer, domain string) bool { - switch os.Getenv(StorageModeEnv) { + switch StorageModeForDomain(domain) { case StorageModeTransition: if cfg.storageHasCertResourcesBundle(ctx, issuer, domain) { return true @@ -1313,7 +1312,7 @@ func (cfg *Config) storageHasCertResourcesBundle(ctx context.Context, issuer Iss // issuer with the given issuer key. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) deleteSiteAssets(ctx context.Context, issuerKey, domain string) error { - switch os.Getenv(StorageModeEnv) { + switch StorageModeForDomain(domain) { case StorageModeTransition: if err := cfg.deleteSiteAssetsBundle(ctx, issuerKey, domain); err != nil { cfg.Logger.Warn("unable to delete certificate resource bundle", diff --git a/config_test.go b/config_test.go index 0db73428..a3599010 100644 --- a/config_test.go +++ b/config_test.go @@ -158,7 +158,7 @@ func mustJSON(val any) []byte { // testStorageModeSetup creates a test config with the specified storage mode func testStorageModeSetup(t *testing.T, mode, storagePath string) (*Config, *ACMEIssuer) { t.Helper() - t.Setenv(StorageModeEnv, mode) + ConfigureStorageMode(mode, 100) am := &ACMEIssuer{CA: "https://example.com/acme/directory"} cfg := &Config{ @@ -291,7 +291,7 @@ func TestStorageModeTransitionFallback(t *testing.T) { cert := makeCertResource(am, domain, true) // Save in legacy mode to simulate existing data - os.Setenv(StorageModeEnv, StorageModeLegacy) + ConfigureStorageMode(StorageModeLegacy, 0) if err := cfg.saveCertResource(ctx, am, cert); err != nil { t.Fatalf("Failed to save cert in legacy mode: %v", err) } @@ -301,7 +301,7 @@ func TestStorageModeTransitionFallback(t *testing.T) { assertFileNotExists(t, ctx, cfg.Storage, StorageKeys.SiteBundle(issuerKey, domain)) // Switch to transition mode and verify fallback to legacy works - os.Setenv(StorageModeEnv, StorageModeTransition) + ConfigureStorageMode(StorageModeTransition, 100) loaded, err := cfg.loadCertResource(ctx, am, domain) if err != nil { t.Fatalf("Failed to load cert in transition mode with fallback: %v", err) diff --git a/crypto.go b/crypto.go index 73cbc632..d533fcba 100644 --- a/crypto.go +++ b/crypto.go @@ -30,7 +30,6 @@ import ( "fmt" "hash/fnv" "io/fs" - "os" "sort" "strings" @@ -144,7 +143,7 @@ func fastHash(input []byte) string { // saveCertResource saves the certificate resource to disk. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) saveCertResource(ctx context.Context, issuer Issuer, cert CertificateResource) error { - switch os.Getenv(StorageModeEnv) { + switch StorageModeForDomain(cert.NamesKey()) { case StorageModeTransition: if err := cfg.saveCertResourceBundle(ctx, issuer, cert); err != nil { cfg.Logger.Warn("unable to store certificate resource bundle", @@ -274,7 +273,7 @@ func (cfg *Config) loadCertResourceAnyIssuer(ctx context.Context, certNamesKey s // loadCertResource loads a certificate resource from the given issuer's storage location. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) loadCertResource(ctx context.Context, issuer Issuer, certNamesKey string) (CertificateResource, error) { - switch os.Getenv(StorageModeEnv) { + switch StorageModeForDomain(certNamesKey) { case StorageModeTransition: certRes, err := cfg.loadCertResourceBundle(ctx, issuer, certNamesKey) if err == nil { diff --git a/maintain.go b/maintain.go index 0bd4c6f1..47846479 100644 --- a/maintain.go +++ b/maintain.go @@ -22,7 +22,6 @@ import ( "errors" "fmt" "io/fs" - "os" "path" "runtime" "strings" @@ -431,7 +430,7 @@ func (cfg *Config) storageHasNewerARI(ctx context.Context, cert Certificate) (bo // loadStoredACMECertificateMetadata loads the stored ACME certificate data. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) loadStoredACMECertificateMetadata(ctx context.Context, cert Certificate) (acme.Certificate, error) { - switch os.Getenv(StorageModeEnv) { + switch StorageModeForDomain(cert.Names[0]) { case StorageModeTransition: acmecert, err := cfg.loadStoredACMECertificateMetadataBundle(ctx, cert) if err == nil { @@ -496,7 +495,7 @@ func (cfg *Config) loadStoredACMECertificateMetadataBundle(ctx context.Context, // NeedsRefresh() on the RenewalInfo first, and only call this if that returns true. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) updateARI(ctx context.Context, cert Certificate, logger *zap.Logger) (updatedCert Certificate, changed bool, err error) { - switch os.Getenv(StorageModeEnv) { + switch StorageModeForDomain(cert.Names[0]) { case StorageModeTransition: updatedCert, changed, err = cfg.updateARILegacy(ctx, cert, logger) if err == nil { @@ -1046,7 +1045,7 @@ func deleteOldOCSPStaples(ctx context.Context, storage Storage, logger *zap.Logg } func deleteExpiredCerts(ctx context.Context, storage Storage, logger *zap.Logger, gracePeriod time.Duration) error { - switch os.Getenv(StorageModeEnv) { + switch StorageMode { case StorageModeTransition: if err := deleteExpiredCertsBundle(ctx, storage, logger, gracePeriod); err != nil { logger.Warn("unable to delete expired certs from bundle", @@ -1321,7 +1320,7 @@ func (cfg *Config) moveCompromisedPrivateKey(ctx context.Context, cert Certifica // Delete the storage containing the compromised key based on storage mode. // We intentionally ignore delete errors since the file might not exist, // and we avoid calling .Exists() before .Delete() to minimize storage roundtrips. - switch os.Getenv(StorageModeEnv) { + switch StorageModeForDomain(cert.Names[0]) { case StorageModeTransition: cfg.Storage.Delete(ctx, bundleKey) cfg.Storage.Delete(ctx, privKeyStorageKey) diff --git a/storagemode.go b/storagemode.go new file mode 100644 index 00000000..71c7612f --- /dev/null +++ b/storagemode.go @@ -0,0 +1,85 @@ +package certmagic + +import ( + "hash/fnv" + "os" + "strconv" +) + +const ( + // Storage mode controls the format in which certificates are stored in `Storage`. + // + // Formats: + // - legacy: Store cert, privkey and meta as three separate storage items (.cert, .key, .json). + // - bundle: Store cert, privkey and meta as a single, bundled storage item (.bundle). + // + // Modes: + // - legacy: Store and load certificates in legacy format. + // - transition: Store in legacy and bundle format, load as bundle with fallback to legacy format. + // - bundle: Store and load certificates in bundle format. + // + // In the transition mode, failures around reads and writes of the bundle are soft. + // They should only log errors and try to work with the legacy format as fallback. + // Operations on the legacy format are hard-failures, implying that errors should be propagated up. + // + // The rollout percentage enables a phased migration by controlling which domains + // enter the transition phase. If a domain's deterministic bucket (0-99) is below + // the rollout percentage, it uses 'transition' mode (dual-write, bundle-read). + // Otherwise, it remains in 'legacy' mode. + // + // The logic for selection is: + // if mode == StorageModeTransition: + // useTransition = hash(domain)%100 < rollout + // return useTransition ? StorageModeTransition : StorageModeLegacy + // + // The storage mode is controlled via the CERTMAGIC_STORAGE_MODE environment variable + StorageModeEnv = "CERTMAGIC_STORAGE_MODE" + + StorageModeLegacy = "legacy" + StorageModeTransition = "transition" + StorageModeBundle = "bundle" + + // StorageModeRolloutPercentEnv controls the percentage of domains that will use + // the bundle format when the storage mode is set to "transition". + // An empty rollout precent is equal to 0%. + StorageModeRolloutPercentEnv = "CERTMAGIC_STORAGE_MODE_ROLLOUT_PERCENT" +) + +var ( + StorageMode string + StorageModeRolloutPercent int +) + +func ConfigureStorageMode(mode string, rolloutPercent int) { + StorageMode = mode + StorageModeRolloutPercent = rolloutPercent +} + +func init() { + mode := os.Getenv(StorageModeEnv) + + // rolloutPercent becomes zero if env is unset or malformed + rolloutPercent, _ := strconv.Atoi(os.Getenv(StorageModeRolloutPercentEnv)) + + ConfigureStorageMode(mode, rolloutPercent) +} + +func StorageModeForDomain(domain string) string { + if StorageMode == StorageModeBundle { + return StorageModeBundle + } + if StorageMode != StorageModeTransition { + return StorageModeLegacy + } + if RolloutBucketForDomain(domain) < StorageModeRolloutPercent { + return StorageModeTransition + } else { + return StorageModeLegacy + } +} + +func RolloutBucketForDomain(domain string) int { + h := fnv.New32a() + h.Write([]byte(domain)) + return int(h.Sum32() % 100) +} diff --git a/storagemode_test.go b/storagemode_test.go new file mode 100644 index 00000000..8e7a3c44 --- /dev/null +++ b/storagemode_test.go @@ -0,0 +1,192 @@ +package certmagic + +import ( + "crypto/rand" + "fmt" + "testing" +) + +func TestStorageModeRolloutPercentLegacy(t *testing.T) { + // In legacy mode, storage mode for all domains must be "legacy", no matter the rollout percent. + // This test brute-forces all possible test cases, as they are cheap to assert. + for rolloutPercent := range 100 { + ConfigureStorageMode(StorageModeLegacy, rolloutPercent) + + for _, domain := range testStorageModeDomains { + want, got := StorageModeLegacy, StorageModeForDomain(domain) + if want != got { + t.Errorf("expected storage mode %q for domain %q, got: %q", want, domain, got) + } + } + } +} + +func TestStorageModeRolloutPercentBundle(t *testing.T) { + // In legacy mode, storage mode for all domains must be "legacy", no matter the rollout percent. + // This test brute-forces all possible test cases, as they are cheap to assert. + for rolloutPercent := range 100 { + ConfigureStorageMode(StorageModeBundle, rolloutPercent) + + for _, domain := range testStorageModeDomains { + want, got := StorageModeBundle, StorageModeForDomain(domain) + if want != got { + t.Errorf("expected storage mode %q for domain %q, got: %q", want, domain, got) + } + } + } +} + +func TestStorageModeRolloutPercentTransition(t *testing.T) { + // In transition mode, storage mode for domains can either be "transition" or "legacy", depending on rollout percent. + // This test brute-forces all possible test cases, as they are cheap to assert. + for rolloutPercent := range 100 { + ConfigureStorageMode(StorageModeTransition, rolloutPercent) + + for domainBucket, domain := range testStorageModeDomains { + got := StorageModeForDomain(domain) + + var want string + if domainBucket < rolloutPercent { + want = StorageModeTransition + } else { + want = StorageModeLegacy + } + + if want != got { + t.Errorf("expected storage mode %q for domain %q, got: %q", want, domain, got) + } + } + } +} + +func GenerateRandomDomainsForRolloutBuckets(t *testing.T) { + for desiredBucket := range 100 { + for { + domain := RandomDomain() + bucket := RolloutBucketForDomain(domain) + if desiredBucket == bucket { + fmt.Println(domain, bucket) + break + } + } + } +} + +func RandomDomain() string { + alphabet := "abcdefghijklmnopqrstuvwxyz" + src := make([]byte, 6) + rand.Read(src) + for i := range src { + src[i] = alphabet[src[i]%26] + } + return string(src) + ".com" +} + +// testStorageModeDomains are domains whose hashes are deterministic. +// Example: +// - Domain at index 0 hashes to bucket 0 +// - Domain at index 1 hashes to bucket 1 +// - Domain at index 2 hashes to bucket 2 +// - ... +var testStorageModeDomains = []string{ + "cyufsv.com", + "wrgmsg.com", + "brgdjo.com", + "ydwcck.com", + "mflmhz.com", + "haegjj.com", + "zmhovf.com", + "obufpu.com", + "feslvv.com", + "sebycw.com", + "eilifq.com", + "hqbrqi.com", + "msdfdl.com", + "zzyzeg.com", + "omkufr.com", + "wxknzs.com", + "sbrjrs.com", + "oirmum.com", + "ahkfmk.com", + "pasrgp.com", + "wkxoax.com", + "hrften.com", + "awvybq.com", + "sdnroo.com", + "oihglq.com", + "ilomtn.com", + "jsslsa.com", + "xfqsqj.com", + "seccht.com", + "kdggrx.com", + "htueua.com", + "rwnblj.com", + "muuiye.com", + "dmgdwl.com", + "ehcpua.com", + "hheskv.com", + "xapqrp.com", + "rtqlga.com", + "zwejrb.com", + "caijym.com", + "qqobjq.com", + "ylhtvl.com", + "leotig.com", + "xzzdkn.com", + "gtbrls.com", + "ffdfon.com", + "yndvoz.com", + "pcdete.com", + "mqqawg.com", + "cdbbdh.com", + "lgxeeu.com", + "hwqhre.com", + "glzlpq.com", + "wmogra.com", + "cdrpnm.com", + "idrfwa.com", + "ktrubn.com", + "xohmsv.com", + "mmddcs.com", + "mlmgvj.com", + "myxcwb.com", + "rrlbbu.com", + "abifcu.com", + "uarnen.com", + "utvepr.com", + "hvriwm.com", + "ktoobi.com", + "pkucoi.com", + "enszeo.com", + "boerwx.com", + "oftmjp.com", + "conpid.com", + "xsixnx.com", + "acbdut.com", + "oipdfz.com", + "cceope.com", + "shyuyj.com", + "flddpw.com", + "hmdtxy.com", + "lsfqfe.com", + "ynmpsm.com", + "kbkkbn.com", + "xrerap.com", + "dhhhkr.com", + "zdbnpt.com", + "ttxvat.com", + "rkrnjg.com", + "xkanxi.com", + "nbcmqi.com", + "mmhner.com", + "elunlf.com", + "bjupxh.com", + "loosax.com", + "fliwby.com", + "kjelwq.com", + "nlcgov.com", + "jjxavu.com", + "gpalyx.com", + "ckycee.com", + "msngsw.com", +} From e600a41e2ae4e5f2352d3b0ae19410d43f6e4ac2 Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Tue, 6 Jan 2026 11:01:04 +0100 Subject: [PATCH 2/5] Add note about storage mode race conditions --- storagemode.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/storagemode.go b/storagemode.go index 71c7612f..d93c5960 100644 --- a/storagemode.go +++ b/storagemode.go @@ -51,6 +51,8 @@ var ( ) func ConfigureStorageMode(mode string, rolloutPercent int) { + // Note: We have no lock protecting these variables. This is a potential race condition, yes. + // But this function is only called once during init(), before anything else happens. StorageMode = mode StorageModeRolloutPercent = rolloutPercent } From 54f062724359aa78f29757e87291cd54d98764b7 Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Tue, 6 Jan 2026 16:19:25 +0100 Subject: [PATCH 3/5] Use first SAN of CertificateResource for storage mode check --- crypto.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto.go b/crypto.go index d533fcba..4b3e6871 100644 --- a/crypto.go +++ b/crypto.go @@ -143,12 +143,12 @@ func fastHash(input []byte) string { // saveCertResource saves the certificate resource to disk. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) saveCertResource(ctx context.Context, issuer Issuer, cert CertificateResource) error { - switch StorageModeForDomain(cert.NamesKey()) { + switch StorageModeForDomain(cert.SANs[0]) { case StorageModeTransition: if err := cfg.saveCertResourceBundle(ctx, issuer, cert); err != nil { cfg.Logger.Warn("unable to store certificate resource bundle", zap.String("issuer", issuer.IssuerKey()), - zap.String("domain", cert.NamesKey()), + zap.String("domain", cert.SANs[0]), zap.Error(err)) } return cfg.saveCertResourceLegacy(ctx, issuer, cert) From 7737506c974313d2f5c3b4d93b4ddc19daf94e06 Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Wed, 7 Jan 2026 11:39:57 +0100 Subject: [PATCH 4/5] Add debug logging for storage mode operations Log domain, storage_mode, and rollout_bucket at each storage mode decision point to help observe the gradual rollout behavior. --- config.go | 14 ++++++++++++-- crypto.go | 14 ++++++++++++-- maintain.go | 22 +++++++++++++++++++--- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/config.go b/config.go index 82044398..b82d9c3a 100644 --- a/config.go +++ b/config.go @@ -1271,7 +1271,12 @@ func (cfg *Config) checkStorage(ctx context.Context) error { // resources related to the certificate for domain. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) storageHasCertResources(ctx context.Context, issuer Issuer, domain string) bool { - switch StorageModeForDomain(domain) { + storageMode := StorageModeForDomain(domain) + cfg.Logger.Debug("checking if storage has cert resources", + zap.String("domain", domain), + zap.String("storage_mode", storageMode), + zap.Int("rollout_bucket", RolloutBucketForDomain(domain))) + switch storageMode { case StorageModeTransition: if cfg.storageHasCertResourcesBundle(ctx, issuer, domain) { return true @@ -1312,7 +1317,12 @@ func (cfg *Config) storageHasCertResourcesBundle(ctx context.Context, issuer Iss // issuer with the given issuer key. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) deleteSiteAssets(ctx context.Context, issuerKey, domain string) error { - switch StorageModeForDomain(domain) { + storageMode := StorageModeForDomain(domain) + cfg.Logger.Debug("deleting site assets", + zap.String("domain", domain), + zap.String("storage_mode", storageMode), + zap.Int("rollout_bucket", RolloutBucketForDomain(domain))) + switch storageMode { case StorageModeTransition: if err := cfg.deleteSiteAssetsBundle(ctx, issuerKey, domain); err != nil { cfg.Logger.Warn("unable to delete certificate resource bundle", diff --git a/crypto.go b/crypto.go index 4b3e6871..ede3adaa 100644 --- a/crypto.go +++ b/crypto.go @@ -143,7 +143,12 @@ func fastHash(input []byte) string { // saveCertResource saves the certificate resource to disk. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) saveCertResource(ctx context.Context, issuer Issuer, cert CertificateResource) error { - switch StorageModeForDomain(cert.SANs[0]) { + storageMode := StorageModeForDomain(cert.SANs[0]) + cfg.Logger.Debug("saving certificate resource", + zap.String("domain", cert.SANs[0]), + zap.String("storage_mode", storageMode), + zap.Int("rollout_bucket", RolloutBucketForDomain(cert.SANs[0]))) + switch storageMode { case StorageModeTransition: if err := cfg.saveCertResourceBundle(ctx, issuer, cert); err != nil { cfg.Logger.Warn("unable to store certificate resource bundle", @@ -273,7 +278,12 @@ func (cfg *Config) loadCertResourceAnyIssuer(ctx context.Context, certNamesKey s // loadCertResource loads a certificate resource from the given issuer's storage location. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) loadCertResource(ctx context.Context, issuer Issuer, certNamesKey string) (CertificateResource, error) { - switch StorageModeForDomain(certNamesKey) { + storageMode := StorageModeForDomain(certNamesKey) + cfg.Logger.Debug("loading certificate resource", + zap.String("domain", certNamesKey), + zap.String("storage_mode", storageMode), + zap.Int("rollout_bucket", RolloutBucketForDomain(certNamesKey))) + switch storageMode { case StorageModeTransition: certRes, err := cfg.loadCertResourceBundle(ctx, issuer, certNamesKey) if err == nil { diff --git a/maintain.go b/maintain.go index 47846479..96b04973 100644 --- a/maintain.go +++ b/maintain.go @@ -430,7 +430,12 @@ func (cfg *Config) storageHasNewerARI(ctx context.Context, cert Certificate) (bo // loadStoredACMECertificateMetadata loads the stored ACME certificate data. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) loadStoredACMECertificateMetadata(ctx context.Context, cert Certificate) (acme.Certificate, error) { - switch StorageModeForDomain(cert.Names[0]) { + storageMode := StorageModeForDomain(cert.Names[0]) + cfg.Logger.Debug("loading stored ACME certificate metadata", + zap.String("domain", cert.Names[0]), + zap.String("storage_mode", storageMode), + zap.Int("rollout_bucket", RolloutBucketForDomain(cert.Names[0]))) + switch storageMode { case StorageModeTransition: acmecert, err := cfg.loadStoredACMECertificateMetadataBundle(ctx, cert) if err == nil { @@ -495,7 +500,12 @@ func (cfg *Config) loadStoredACMECertificateMetadataBundle(ctx context.Context, // NeedsRefresh() on the RenewalInfo first, and only call this if that returns true. // It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) updateARI(ctx context.Context, cert Certificate, logger *zap.Logger) (updatedCert Certificate, changed bool, err error) { - switch StorageModeForDomain(cert.Names[0]) { + storageMode := StorageModeForDomain(cert.Names[0]) + cfg.Logger.Debug("updating ARI", + zap.String("domain", cert.Names[0]), + zap.String("storage_mode", storageMode), + zap.Int("rollout_bucket", RolloutBucketForDomain(cert.Names[0]))) + switch storageMode { case StorageModeTransition: updatedCert, changed, err = cfg.updateARILegacy(ctx, cert, logger) if err == nil { @@ -1045,6 +1055,7 @@ func deleteOldOCSPStaples(ctx context.Context, storage Storage, logger *zap.Logg } func deleteExpiredCerts(ctx context.Context, storage Storage, logger *zap.Logger, gracePeriod time.Duration) error { + logger.Debug("deleting expired certs", zap.String("storage_mode", StorageMode)) switch StorageMode { case StorageModeTransition: if err := deleteExpiredCertsBundle(ctx, storage, logger, gracePeriod); err != nil { @@ -1320,7 +1331,12 @@ func (cfg *Config) moveCompromisedPrivateKey(ctx context.Context, cert Certifica // Delete the storage containing the compromised key based on storage mode. // We intentionally ignore delete errors since the file might not exist, // and we avoid calling .Exists() before .Delete() to minimize storage roundtrips. - switch StorageModeForDomain(cert.Names[0]) { + storageMode := StorageModeForDomain(cert.Names[0]) + logger.Debug("deleting compromised private key", + zap.String("domain", cert.Names[0]), + zap.String("storage_mode", storageMode), + zap.Int("rollout_bucket", RolloutBucketForDomain(cert.Names[0]))) + switch storageMode { case StorageModeTransition: cfg.Storage.Delete(ctx, bundleKey) cfg.Storage.Delete(ctx, privKeyStorageKey) From cfe29c33158cf302096a0c01b5e4814ea45735a2 Mon Sep 17 00:00:00 2001 From: Philipp Tanlak Date: Thu, 8 Jan 2026 14:50:38 +0100 Subject: [PATCH 5/5] Simplify storage mode rollout tests with curated edge cases Replace brute-force test approach with targeted test cases that verify behavior at rollout boundaries (0%, 1%, 50%, 99%, 100%). Remove helper functions and large domain list that were only used for test generation. --- storagemode_test.go | 216 +++++++++++--------------------------------- 1 file changed, 54 insertions(+), 162 deletions(-) diff --git a/storagemode_test.go b/storagemode_test.go index 8e7a3c44..9ff8d695 100644 --- a/storagemode_test.go +++ b/storagemode_test.go @@ -1,36 +1,30 @@ package certmagic -import ( - "crypto/rand" - "fmt" - "testing" -) +import "testing" func TestStorageModeRolloutPercentLegacy(t *testing.T) { // In legacy mode, storage mode for all domains must be "legacy", no matter the rollout percent. - // This test brute-forces all possible test cases, as they are cheap to assert. - for rolloutPercent := range 100 { + for _, rolloutPercent := range []int{0, 50, 100} { ConfigureStorageMode(StorageModeLegacy, rolloutPercent) - for _, domain := range testStorageModeDomains { - want, got := StorageModeLegacy, StorageModeForDomain(domain) - if want != got { - t.Errorf("expected storage mode %q for domain %q, got: %q", want, domain, got) + for _, domain := range []string{"cyufsv.com", "lgxeeu.com", "msngsw.com"} { + if got := StorageModeForDomain(domain); got != StorageModeLegacy { + t.Errorf("rollout %d%%, StorageModeForDomain(%q) = %q, want %q", + rolloutPercent, domain, got, StorageModeLegacy) } } } } func TestStorageModeRolloutPercentBundle(t *testing.T) { - // In legacy mode, storage mode for all domains must be "legacy", no matter the rollout percent. - // This test brute-forces all possible test cases, as they are cheap to assert. - for rolloutPercent := range 100 { + // In bundle mode, storage mode for all domains must be "bundle", no matter the rollout percent. + for _, rolloutPercent := range []int{0, 50, 100} { ConfigureStorageMode(StorageModeBundle, rolloutPercent) - for _, domain := range testStorageModeDomains { - want, got := StorageModeBundle, StorageModeForDomain(domain) - if want != got { - t.Errorf("expected storage mode %q for domain %q, got: %q", want, domain, got) + for _, domain := range []string{"cyufsv.com", "lgxeeu.com", "msngsw.com"} { + if got := StorageModeForDomain(domain); got != StorageModeBundle { + t.Errorf("rollout %d%%, StorageModeForDomain(%q) = %q, want %q", + rolloutPercent, domain, got, StorageModeBundle) } } } @@ -38,155 +32,53 @@ func TestStorageModeRolloutPercentBundle(t *testing.T) { func TestStorageModeRolloutPercentTransition(t *testing.T) { // In transition mode, storage mode for domains can either be "transition" or "legacy", depending on rollout percent. - // This test brute-forces all possible test cases, as they are cheap to assert. - for rolloutPercent := range 100 { - ConfigureStorageMode(StorageModeTransition, rolloutPercent) + // Domains are assigned to buckets 0-99 based on their hash. A domain enters transition mode + // if its bucket is below the rollout percent. + // + // Test domains and their buckets: + // "cyufsv.com" -> bucket 0 + // "wrgmsg.com" -> bucket 1 + // "cdbbdh.com" -> bucket 49 + // "lgxeeu.com" -> bucket 50 + // "hwqhre.com" -> bucket 51 + // "ckycee.com" -> bucket 98 + // "msngsw.com" -> bucket 99 + tests := []struct { + name string + rolloutPercent int + domain string + domainBucket int + want string + }{ + // 0% rollout: no domains should transition + {"0% rollout, bucket 0", 0, "cyufsv.com", 0, StorageModeLegacy}, + {"0% rollout, bucket 50", 0, "lgxeeu.com", 50, StorageModeLegacy}, + {"0% rollout, bucket 99", 0, "msngsw.com", 99, StorageModeLegacy}, - for domainBucket, domain := range testStorageModeDomains { - got := StorageModeForDomain(domain) + // 100% rollout: all domains should transition + {"100% rollout, bucket 0", 100, "cyufsv.com", 0, StorageModeTransition}, + {"100% rollout, bucket 50", 100, "lgxeeu.com", 50, StorageModeTransition}, + {"100% rollout, bucket 99", 100, "msngsw.com", 99, StorageModeTransition}, - var want string - if domainBucket < rolloutPercent { - want = StorageModeTransition - } else { - want = StorageModeLegacy - } + // Edge cases: bucket exactly at rollout boundary + {"50% rollout, bucket 49 (just below)", 50, "cdbbdh.com", 49, StorageModeTransition}, + {"50% rollout, bucket 50 (exactly at)", 50, "lgxeeu.com", 50, StorageModeLegacy}, + {"50% rollout, bucket 51 (just above)", 50, "hwqhre.com", 51, StorageModeLegacy}, - if want != got { - t.Errorf("expected storage mode %q for domain %q, got: %q", want, domain, got) - } - } + // Edge cases at boundaries + {"1% rollout, bucket 0", 1, "cyufsv.com", 0, StorageModeTransition}, + {"1% rollout, bucket 1", 1, "wrgmsg.com", 1, StorageModeLegacy}, + {"99% rollout, bucket 98", 99, "ckycee.com", 98, StorageModeTransition}, + {"99% rollout, bucket 99", 99, "msngsw.com", 99, StorageModeLegacy}, } -} -func GenerateRandomDomainsForRolloutBuckets(t *testing.T) { - for desiredBucket := range 100 { - for { - domain := RandomDomain() - bucket := RolloutBucketForDomain(domain) - if desiredBucket == bucket { - fmt.Println(domain, bucket) - break + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ConfigureStorageMode(StorageModeTransition, tt.rolloutPercent) + if got := StorageModeForDomain(tt.domain); got != tt.want { + t.Errorf("StorageModeForDomain(%q) = %q, want %q (bucket %d, rollout %d%%)", + tt.domain, got, tt.want, tt.domainBucket, tt.rolloutPercent) } - } + }) } } - -func RandomDomain() string { - alphabet := "abcdefghijklmnopqrstuvwxyz" - src := make([]byte, 6) - rand.Read(src) - for i := range src { - src[i] = alphabet[src[i]%26] - } - return string(src) + ".com" -} - -// testStorageModeDomains are domains whose hashes are deterministic. -// Example: -// - Domain at index 0 hashes to bucket 0 -// - Domain at index 1 hashes to bucket 1 -// - Domain at index 2 hashes to bucket 2 -// - ... -var testStorageModeDomains = []string{ - "cyufsv.com", - "wrgmsg.com", - "brgdjo.com", - "ydwcck.com", - "mflmhz.com", - "haegjj.com", - "zmhovf.com", - "obufpu.com", - "feslvv.com", - "sebycw.com", - "eilifq.com", - "hqbrqi.com", - "msdfdl.com", - "zzyzeg.com", - "omkufr.com", - "wxknzs.com", - "sbrjrs.com", - "oirmum.com", - "ahkfmk.com", - "pasrgp.com", - "wkxoax.com", - "hrften.com", - "awvybq.com", - "sdnroo.com", - "oihglq.com", - "ilomtn.com", - "jsslsa.com", - "xfqsqj.com", - "seccht.com", - "kdggrx.com", - "htueua.com", - "rwnblj.com", - "muuiye.com", - "dmgdwl.com", - "ehcpua.com", - "hheskv.com", - "xapqrp.com", - "rtqlga.com", - "zwejrb.com", - "caijym.com", - "qqobjq.com", - "ylhtvl.com", - "leotig.com", - "xzzdkn.com", - "gtbrls.com", - "ffdfon.com", - "yndvoz.com", - "pcdete.com", - "mqqawg.com", - "cdbbdh.com", - "lgxeeu.com", - "hwqhre.com", - "glzlpq.com", - "wmogra.com", - "cdrpnm.com", - "idrfwa.com", - "ktrubn.com", - "xohmsv.com", - "mmddcs.com", - "mlmgvj.com", - "myxcwb.com", - "rrlbbu.com", - "abifcu.com", - "uarnen.com", - "utvepr.com", - "hvriwm.com", - "ktoobi.com", - "pkucoi.com", - "enszeo.com", - "boerwx.com", - "oftmjp.com", - "conpid.com", - "xsixnx.com", - "acbdut.com", - "oipdfz.com", - "cceope.com", - "shyuyj.com", - "flddpw.com", - "hmdtxy.com", - "lsfqfe.com", - "ynmpsm.com", - "kbkkbn.com", - "xrerap.com", - "dhhhkr.com", - "zdbnpt.com", - "ttxvat.com", - "rkrnjg.com", - "xkanxi.com", - "nbcmqi.com", - "mmhner.com", - "elunlf.com", - "bjupxh.com", - "loosax.com", - "fliwby.com", - "kjelwq.com", - "nlcgov.com", - "jjxavu.com", - "gpalyx.com", - "ckycee.com", - "msngsw.com", -}