-
Notifications
You must be signed in to change notification settings - Fork 1.8k
feat(NODE-7122): exponential backoff between retries in convenient transaction API #4765
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
baileympearson
wants to merge
3
commits into
main
Choose a base branch
from
NODE-7122
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+187
−44
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,5 @@ | ||
| import { setTimeout } from 'timers/promises'; | ||
|
|
||
| import { Binary, type Document, Long, type Timestamp } from './bson'; | ||
| import type { CommandOptions, Connection } from './cmap/connection'; | ||
| import { ConnectionPoolMetrics } from './cmap/metrics'; | ||
|
|
@@ -732,17 +734,61 @@ export class ClientSession | |
| : processTimeMS(); | ||
|
|
||
| let committed = false; | ||
| let result: any; | ||
| let result: T; | ||
|
|
||
| let lastError: Error | null = null; | ||
|
|
||
| try { | ||
| while (!committed) { | ||
| // 2. Invoke startTransaction on the session | ||
| // 3. If `startTransaction` reported an error, propagate that error to the caller of `withTransaction` and return immediately. | ||
| retryTransaction: for ( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TIL "Labeled statements" - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/label |
||
| // 2. Set `transactionAttempt` to `0`. | ||
| let transactionAttempt = 0, isRetry = false; | ||
| !committed; | ||
| ++transactionAttempt, isRetry = transactionAttempt > 0 | ||
| ) { | ||
| // 2. If `transactionAttempt` > 0: | ||
| if (isRetry) { | ||
| // 2.i If elapsed time + `backoffMS` > `TIMEOUT_MS`, then raise the previously encountered error. If the elapsed time of | ||
| // `withTransaction` is less than TIMEOUT_MS, calculate the backoffMS to be | ||
| // `jitter * min(BACKOFF_INITIAL * 1.5 ** (transactionAttempt - 1), BACKOFF_MAX)`. sleep for `backoffMS`. | ||
| // 2.i.i jitter is a random float between \[0, 1) | ||
| // 2.i.ii `transactionAttempt` is the variable defined in step 1. | ||
| // 2.i.iii `BACKOFF_INITIAL` is 5ms | ||
| // 2.i.iv `BACKOFF_MAX` is 500ms | ||
| const BACKOFF_INITIAL_MS = 5; | ||
| const BACKOFF_MAX_MS = 500; | ||
| const BACKOFF_GROWTH = 1.5; | ||
| const jitter = Math.random(); | ||
| const backoffMS = | ||
| jitter * | ||
| Math.min( | ||
| BACKOFF_INITIAL_MS * BACKOFF_GROWTH ** (transactionAttempt - 1), | ||
| BACKOFF_MAX_MS | ||
| ); | ||
|
|
||
| const willExceedTransactionDeadline = | ||
| (this.timeoutContext?.csotEnabled() && | ||
| backoffMS > this.timeoutContext.remainingTimeMS) || | ||
| processTimeMS() + backoffMS > startTime + MAX_TIMEOUT; | ||
|
|
||
| if (willExceedTransactionDeadline) { | ||
| throw ( | ||
| lastError ?? | ||
| new MongoRuntimeError( | ||
| `Transaction retry did not record an error: should never occur. Please file a bug.` | ||
PavelSafronov marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ) | ||
| ); | ||
| } | ||
|
|
||
| await setTimeout(backoffMS); | ||
| } | ||
|
|
||
| // 3. Invoke startTransaction on the session | ||
| // 4. If `startTransaction` reported an error, propagate that error to the caller of `withTransaction` and return immediately. | ||
| this.startTransaction(options); // may throw on error | ||
|
|
||
| try { | ||
| // 4. Invoke the callback. | ||
| // 5. Control returns to withTransaction. (continued below) | ||
| // 5. Invoke the callback. | ||
| // 6. Control returns to withTransaction. (continued below) | ||
| const promise = fn(this); | ||
| if (!isPromiseLike(promise)) { | ||
| throw new MongoInvalidArgumentError( | ||
|
|
@@ -752,18 +798,18 @@ export class ClientSession | |
|
|
||
| result = await promise; | ||
|
|
||
| // 5. (cont.) Determine the current state of the ClientSession (continued below) | ||
| // 6. (cont.) Determine the current state of the ClientSession (continued below) | ||
| if ( | ||
| this.transaction.state === TxnState.NO_TRANSACTION || | ||
| this.transaction.state === TxnState.TRANSACTION_COMMITTED || | ||
| this.transaction.state === TxnState.TRANSACTION_ABORTED | ||
| ) { | ||
| // 7. If the ClientSession is in the "no transaction", "transaction aborted", or "transaction committed" state, | ||
| // 8. If the ClientSession is in the "no transaction", "transaction aborted", or "transaction committed" state, | ||
| // assume the callback intentionally aborted or committed the transaction and return immediately. | ||
| return result; | ||
| } | ||
| // 5. (cont.) and whether the callback reported an error | ||
| // 6. If the callback reported an error: | ||
| // 7. If the callback reported an error: | ||
| } catch (fnError) { | ||
| if (!(fnError instanceof MongoError) || fnError instanceof MongoInvalidArgumentError) { | ||
| // This first preemptive abort regardless of TxnState isn't spec, | ||
|
|
@@ -776,70 +822,80 @@ export class ClientSession | |
| this.transaction.state === TxnState.STARTING_TRANSACTION || | ||
| this.transaction.state === TxnState.TRANSACTION_IN_PROGRESS | ||
| ) { | ||
| // 6.i If the ClientSession is in the "starting transaction" or "transaction in progress" state, | ||
| // 7.i If the ClientSession is in the "starting transaction" or "transaction in progress" state, | ||
| // invoke abortTransaction on the session | ||
| await this.abortTransaction(); | ||
| } | ||
|
|
||
| if ( | ||
| fnError.hasErrorLabel(MongoErrorLabel.TransientTransactionError) && | ||
| (this.timeoutContext != null || processTimeMS() - startTime < MAX_TIMEOUT) | ||
| (this.timeoutContext?.csotEnabled() || processTimeMS() - startTime < MAX_TIMEOUT) | ||
| ) { | ||
| // 6.ii If the callback's error includes a "TransientTransactionError" label and the elapsed time of `withTransaction` | ||
| // 7.ii If the callback's error includes a "TransientTransactionError" label and the elapsed time of `withTransaction` | ||
| // is less than 120 seconds, jump back to step two. | ||
| continue; | ||
| lastError = fnError; | ||
| continue retryTransaction; | ||
| } | ||
|
|
||
| // 6.iii If the callback's error includes a "UnknownTransactionCommitResult" label, the callback must have manually committed a transaction, | ||
| // 7.iii If the callback's error includes a "UnknownTransactionCommitResult" label, the callback must have manually committed a transaction, | ||
| // propagate the callback's error to the caller of withTransaction and return immediately. | ||
| // The 6.iii check is redundant with 6.iv, so we don't write code for it | ||
| // 6.iv Otherwise, propagate the callback's error to the caller of withTransaction and return immediately. | ||
| // The 7.iii check is redundant with 6.iv, so we don't write code for it | ||
| // 7.iv Otherwise, propagate the callback's error to the caller of withTransaction and return immediately. | ||
| throw fnError; | ||
| } | ||
|
|
||
| while (!committed) { | ||
| retryCommit: while (!committed) { | ||
| try { | ||
| /* | ||
| * We will rely on ClientSession.commitTransaction() to | ||
| * apply a majority write concern if commitTransaction is | ||
| * being retried (see: DRIVERS-601) | ||
| */ | ||
| // 8. Invoke commitTransaction on the session. | ||
| // 9. Invoke commitTransaction on the session. | ||
| await this.commitTransaction(); | ||
| committed = true; | ||
| // 9. If commitTransaction reported an error: | ||
| // 10. If commitTransaction reported an error: | ||
| } catch (commitError) { | ||
| /* | ||
| * Note: a maxTimeMS error will have the MaxTimeMSExpired | ||
| * code (50) and can be reported as a top-level error or | ||
| * inside writeConcernError, ex. | ||
| * { ok:0, code: 50, codeName: 'MaxTimeMSExpired' } | ||
| * { ok:1, writeConcernError: { code: 50, codeName: 'MaxTimeMSExpired' } } | ||
| */ | ||
| if ( | ||
| !isMaxTimeMSExpiredError(commitError) && | ||
| commitError.hasErrorLabel(MongoErrorLabel.UnknownTransactionCommitResult) && | ||
| (this.timeoutContext != null || processTimeMS() - startTime < MAX_TIMEOUT) | ||
| ) { | ||
| // 9.i If the `commitTransaction` error includes a "UnknownTransactionCommitResult" label and the error is not | ||
| // MaxTimeMSExpired and the elapsed time of `withTransaction` is less than 120 seconds, jump back to step eight. | ||
| continue; | ||
| // If CSOT is enabled, we repeatedly retry until timeoutMS expires. This is enforced by providing a | ||
| // timeoutContext to each async API, which know how to cancel themselves (i.e., the next retry will | ||
| // abort the withTransaction call). | ||
| // If CSOT is not enabled, do we still have time remaining or have we timed out? | ||
| const hasTimedOut = | ||
| !this.timeoutContext?.csotEnabled() && processTimeMS() - startTime >= MAX_TIMEOUT; | ||
|
|
||
| if (!hasTimedOut) { | ||
| /* | ||
| * Note: a maxTimeMS error will have the MaxTimeMSExpired | ||
| * code (50) and can be reported as a top-level error or | ||
| * inside writeConcernError, ex. | ||
| * { ok:0, code: 50, codeName: 'MaxTimeMSExpired' } | ||
| * { ok:1, writeConcernError: { code: 50, codeName: 'MaxTimeMSExpired' } } | ||
| */ | ||
| if ( | ||
| !isMaxTimeMSExpiredError(commitError) && | ||
| commitError.hasErrorLabel(MongoErrorLabel.UnknownTransactionCommitResult) | ||
| ) { | ||
| // 10.i If the `commitTransaction` error includes a "UnknownTransactionCommitResult" label and the error is not | ||
| // MaxTimeMSExpired and the elapsed time of `withTransaction` is less than 120 seconds, jump back to step eight. | ||
| continue retryCommit; | ||
| } | ||
|
|
||
| if (commitError.hasErrorLabel(MongoErrorLabel.TransientTransactionError)) { | ||
| // 10.ii If the commitTransaction error includes a "TransientTransactionError" label | ||
| // and the elapsed time of withTransaction is less than 120 seconds, jump back to step two. | ||
| lastError = commitError; | ||
|
|
||
| continue retryTransaction; | ||
| } | ||
| } | ||
|
|
||
| if ( | ||
| commitError.hasErrorLabel(MongoErrorLabel.TransientTransactionError) && | ||
| (this.timeoutContext != null || processTimeMS() - startTime < MAX_TIMEOUT) | ||
| ) { | ||
| // 9.ii If the commitTransaction error includes a "TransientTransactionError" label | ||
| // and the elapsed time of withTransaction is less than 120 seconds, jump back to step two. | ||
| break; | ||
| } | ||
|
|
||
| // 9.iii Otherwise, propagate the commitTransaction error to the caller of withTransaction and return immediately. | ||
| // 10.iii Otherwise, propagate the commitTransaction error to the caller of withTransaction and return immediately. | ||
| throw commitError; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // @ts-expect-error Result is always defined if we reach here, the for-loop above convinces TS it is not. | ||
| return result; | ||
| } finally { | ||
| this.timeoutContext = null; | ||
|
|
||
87 changes: 87 additions & 0 deletions
87
test/integration/transactions-convenient-api/transactions-convenient-api.prose.test.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| import { expect } from 'chai'; | ||
| import { test } from 'mocha'; | ||
| import * as sinon from 'sinon'; | ||
|
|
||
| import { type ClientSession, type Collection, type MongoClient } from '../../../src'; | ||
| import { configureFailPoint, type FailCommandFailPoint, measureDuration } from '../../tools/utils'; | ||
|
|
||
| const failCommand: FailCommandFailPoint = { | ||
| configureFailPoint: 'failCommand', | ||
| mode: { | ||
| times: 13 | ||
| }, | ||
| data: { | ||
| failCommands: ['commitTransaction'], | ||
| errorCode: 251 // no such transaction | ||
| } | ||
| }; | ||
|
|
||
| describe('Retry Backoff is Enforced', function () { | ||
| // 1. let client be a MongoClient | ||
| let client: MongoClient; | ||
|
|
||
| // 2. let coll be a collection | ||
| let collection: Collection; | ||
|
|
||
| beforeEach(async function () { | ||
| client = this.configuration.newClient(); | ||
| collection = client.db('foo').collection('bar'); | ||
| }); | ||
|
|
||
| afterEach(async function () { | ||
| sinon.restore(); | ||
| await client?.close(); | ||
| }); | ||
|
|
||
| test( | ||
| 'works', | ||
| { | ||
| requires: { | ||
| mongodb: '>=4.4', // failCommand | ||
| topology: '!single' // transactions can't run on standalone servers | ||
| } | ||
| }, | ||
| async function () { | ||
| const randomStub = sinon.stub(Math, 'random'); | ||
|
|
||
| // 3.i Configure the random number generator used for jitter to always return 0 | ||
| randomStub.returns(0); | ||
dariakp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // 3.ii Configure a fail point that forces 13 retries | ||
| await configureFailPoint(this.configuration, failCommand); | ||
|
|
||
| // 3.iii | ||
| const callback = async (s: ClientSession) => { | ||
| await collection.insertOne({}, { session: s }); | ||
| }; | ||
|
|
||
| // 3.iv Let no_backoff_time be the duration of the withTransaction API call | ||
| const { duration: noBackoffTime } = await measureDuration(() => { | ||
| return client.withSession(async s => { | ||
| await s.withTransaction(callback); | ||
| }); | ||
| }); | ||
|
|
||
| // 4.i Configure the random number generator used for jitter to always return 1. | ||
| randomStub.returns(1); | ||
|
|
||
| // 4.ii Configure a fail point that forces 13 retries like in step 3.2. | ||
| await configureFailPoint(this.configuration, failCommand); | ||
|
|
||
| // 4.iii Use the same callback defined in 3.3. | ||
| // 4.iv Let with_backoff_time be the duration of the withTransaction API call | ||
| const { duration: fullBackoffDuration } = await measureDuration(() => { | ||
| return client.withSession(async s => { | ||
| await s.withTransaction(callback); | ||
| }); | ||
| }); | ||
|
|
||
| // 5. Compare the two time between the two runs. | ||
baileympearson marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // The sum of 13 backoffs is roughly 2.2 seconds. There is a 1-second window to account for potential variance between the two runs. | ||
| expect(fullBackoffDuration).to.be.within( | ||
| noBackoffTime + 2200 - 1000, | ||
PavelSafronov marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| noBackoffTime + 2200 + 1000 | ||
| ); | ||
| } | ||
| ); | ||
| }); | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.