diff --git a/driver-core/src/main/com/mongodb/internal/ExponentialBackoff.java b/driver-core/src/main/com/mongodb/internal/ExponentialBackoff.java new file mode 100644 index 0000000000..518286319a --- /dev/null +++ b/driver-core/src/main/com/mongodb/internal/ExponentialBackoff.java @@ -0,0 +1,174 @@ +/* + * Copyright 2008-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.mongodb.internal; + +import com.mongodb.annotations.NotThreadSafe; + +import java.util.concurrent.ThreadLocalRandom; + +/** + * Implements exponential backoff with jitter for retry scenarios. + * Formula: delayMS = jitter * min(maxBackoffMs, baseBackoffMs * growthFactor^retryCount) + * where jitter is random value [0, 1). + * + *

This class provides factory methods for common use cases: + *

+ */ +@NotThreadSafe +public final class ExponentialBackoff { + // Transaction retry constants (per spec) + private static final double TRANSACTION_BASE_BACKOFF_MS = 5.0; + private static final double TRANSACTION_MAX_BACKOFF_MS = 500.0; + private static final double TRANSACTION_BACKOFF_GROWTH = 1.5; + + // Command retry constants (per spec) + private static final double COMMAND_BASE_BACKOFF_MS = 100.0; + private static final double COMMAND_MAX_BACKOFF_MS = 10000.0; + private static final double COMMAND_BACKOFF_GROWTH = 2.0; + + private final double baseBackoffMs; + private final double maxBackoffMs; + private final double growthFactor; + private int retryCount = 0; + + /** + * Creates an exponential backoff instance with specified parameters. + * + * @param baseBackoffMs Initial backoff in milliseconds + * @param maxBackoffMs Maximum backoff cap in milliseconds + * @param growthFactor Exponential growth factor (e.g., 1.5 or 2.0) + */ + public ExponentialBackoff(final double baseBackoffMs, final double maxBackoffMs, final double growthFactor) { + this.baseBackoffMs = baseBackoffMs; + this.maxBackoffMs = maxBackoffMs; + this.growthFactor = growthFactor; + } + + /** + * Creates a backoff instance configured for withTransaction retries. + * Uses: 5ms base, 500ms max, 1.5 growth factor. + * + * @return ExponentialBackoff configured for transaction retries + */ + public static ExponentialBackoff forTransactionRetry() { + return new ExponentialBackoff( + TRANSACTION_BASE_BACKOFF_MS, + TRANSACTION_MAX_BACKOFF_MS, + TRANSACTION_BACKOFF_GROWTH + ); + } + + /** + * Creates a backoff instance configured for command retries during overload. + * Uses: 100ms base, 10000ms max, 2.0 growth factor. + * + * @return ExponentialBackoff configured for command retries + */ + public static ExponentialBackoff forCommandRetry() { + return new ExponentialBackoff( + COMMAND_BASE_BACKOFF_MS, + COMMAND_MAX_BACKOFF_MS, + COMMAND_BACKOFF_GROWTH + ); + } + + /** + * Calculate next backoff delay with jitter. + * + * @return delay in milliseconds + */ + public long calculateDelayMs() { + double jitter = ThreadLocalRandom.current().nextDouble(); + double exponentialBackoff = baseBackoffMs * Math.pow(growthFactor, retryCount); + double cappedBackoff = Math.min(exponentialBackoff, maxBackoffMs); + retryCount++; + return Math.round(jitter * cappedBackoff); + } + + /** + * Apply backoff delay by sleeping current thread. + * + * @throws InterruptedException if thread is interrupted during sleep + */ + public void applyBackoff() throws InterruptedException { + long delayMs = calculateDelayMs(); + if (delayMs > 0) { + Thread.sleep(delayMs); + } + } + + /** + * Check if applying backoff would exceed the retry time limit. + * @param startTimeMs start time of retry attempts + * @param maxRetryTimeMs maximum retry time allowed + * @return true if backoff would exceed limit, false otherwise + */ +// public boolean wouldExceedTimeLimit(final long startTimeMs, final long maxRetryTimeMs) { +// long elapsedMs = ClientSessionClock.INSTANCE.now() - startTimeMs; +// // Peek at next delay without incrementing counter +// double exponentialBackoff = baseBackoffMs * Math.pow(growthFactor, retryCount); +// double cappedBackoff = Math.min(exponentialBackoff, maxBackoffMs); +// long maxPossibleDelay = Math.round(cappedBackoff); // worst case with jitter=1 +// return elapsedMs + maxPossibleDelay > maxRetryTimeMs; +// } + + /** + * Reset retry counter for new sequence of retries. + */ + public void reset() { + retryCount = 0; + } + + /** + * Get current retry count for testing. + * + * @return current retry count + */ + public int getRetryCount() { + return retryCount; + } + + /** + * Get the base backoff in milliseconds. + * + * @return base backoff + */ + public double getBaseBackoffMs() { + return baseBackoffMs; + } + + /** + * Get the maximum backoff in milliseconds. + * + * @return maximum backoff + */ + public double getMaxBackoffMs() { + return maxBackoffMs; + } + + /** + * Get the growth factor. + * + * @return growth factor + */ + public double getGrowthFactor() { + return growthFactor; + } +} diff --git a/driver-core/src/test/unit/com/mongodb/internal/ExponentialBackoffTest.java b/driver-core/src/test/unit/com/mongodb/internal/ExponentialBackoffTest.java new file mode 100644 index 0000000000..bfee96e67f --- /dev/null +++ b/driver-core/src/test/unit/com/mongodb/internal/ExponentialBackoffTest.java @@ -0,0 +1,205 @@ +/* + * Copyright 2008-present MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.mongodb.internal; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class ExponentialBackoffTest { + + @Test + void testTransactionRetryBackoff() { + ExponentialBackoff backoff = ExponentialBackoff.forTransactionRetry(); + + // Verify configuration + assertEquals(5.0, backoff.getBaseBackoffMs()); + assertEquals(500.0, backoff.getMaxBackoffMs()); + assertEquals(1.5, backoff.getGrowthFactor()); + + // First retry (i=0): delay = jitter * min(5 * 1.5^0, 500) = jitter * 5 + // Since jitter is random [0,1), the delay should be between 0 and 5ms + long delay1 = backoff.calculateDelayMs(); + assertTrue(delay1 >= 0 && delay1 <= 5, "First delay should be 0-5ms, got: " + delay1); + + // Second retry (i=1): delay = jitter * min(5 * 1.5^1, 500) = jitter * 7.5 + long delay2 = backoff.calculateDelayMs(); + assertTrue(delay2 >= 0 && delay2 <= 8, "Second delay should be 0-8ms, got: " + delay2); + + // Third retry (i=2): delay = jitter * min(5 * 1.5^2, 500) = jitter * 11.25 + long delay3 = backoff.calculateDelayMs(); + assertTrue(delay3 >= 0 && delay3 <= 12, "Third delay should be 0-12ms, got: " + delay3); + + // Verify the retry count is incrementing properly + assertEquals(3, backoff.getRetryCount()); + } + + @Test + void testTransactionRetryBackoffRespectsMaximum() { + ExponentialBackoff backoff = ExponentialBackoff.forTransactionRetry(); + + // Advance to a high retry count where backoff would exceed 500ms without capping + for (int i = 0; i < 20; i++) { + backoff.calculateDelayMs(); + } + + // Even at high retry counts, delay should never exceed 500ms + for (int i = 0; i < 5; i++) { + long delay = backoff.calculateDelayMs(); + assertTrue(delay >= 0 && delay <= 500, "Delay should be capped at 500ms, got: " + delay); + } + } + + @Test + void testTransactionRetryBackoffSequenceWithExpectedValues() { + // Test that the backoff sequence follows the expected pattern with growth factor 1.5 + // Expected sequence (without jitter): 5, 7.5, 11.25, 16.875, 25.3125, 37.96875, 56.953125, ... + // With jitter, actual values will be between 0 and these maxima + + ExponentialBackoff backoff = ExponentialBackoff.forTransactionRetry(); + + double[] expectedMaxValues = {5.0, 7.5, 11.25, 16.875, 25.3125, 37.96875, 56.953125, 85.4296875, + 128.14453125, 192.21679688, 288.32519531, 432.48779297, 500.0}; + + for (int i = 0; i < expectedMaxValues.length; i++) { + long delay = backoff.calculateDelayMs(); + assertTrue(delay >= 0 && delay <= Math.round(expectedMaxValues[i]), + String.format("Retry %d: delay should be 0-%d ms, got: %d", i, Math.round(expectedMaxValues[i]), delay)); + } + } + + @Test + void testCommandRetryBackoff() { + ExponentialBackoff backoff = ExponentialBackoff.forCommandRetry(); + + // Verify configuration + assertEquals(100.0, backoff.getBaseBackoffMs()); + assertEquals(10000.0, backoff.getMaxBackoffMs()); + assertEquals(2.0, backoff.getGrowthFactor()); + + // Test sequence with growth factor 2.0 + // Expected max delays: 100, 200, 400, 800, 1600, 3200, 6400, 10000 (capped) + long delay1 = backoff.calculateDelayMs(); + assertTrue(delay1 >= 0 && delay1 <= 100, "First delay should be 0-100ms, got: " + delay1); + + long delay2 = backoff.calculateDelayMs(); + assertTrue(delay2 >= 0 && delay2 <= 200, "Second delay should be 0-200ms, got: " + delay2); + + long delay3 = backoff.calculateDelayMs(); + assertTrue(delay3 >= 0 && delay3 <= 400, "Third delay should be 0-400ms, got: " + delay3); + + long delay4 = backoff.calculateDelayMs(); + assertTrue(delay4 >= 0 && delay4 <= 800, "Fourth delay should be 0-800ms, got: " + delay4); + + long delay5 = backoff.calculateDelayMs(); + assertTrue(delay5 >= 0 && delay5 <= 1600, "Fifth delay should be 0-1600ms, got: " + delay5); + } + + @Test + void testCommandRetryBackoffRespectsMaximum() { + ExponentialBackoff backoff = ExponentialBackoff.forCommandRetry(); + + // Advance to where exponential would exceed 10000ms + for (int i = 0; i < 10; i++) { + backoff.calculateDelayMs(); + } + + // Even at high retry counts, delay should never exceed 10000ms + for (int i = 0; i < 5; i++) { + long delay = backoff.calculateDelayMs(); + assertTrue(delay >= 0 && delay <= 10000, "Delay should be capped at 10000ms, got: " + delay); + } + } + + @Test + void testCustomBackoff() { + // Test with custom parameters + ExponentialBackoff backoff = new ExponentialBackoff(50.0, 2000.0, 1.8); + + assertEquals(50.0, backoff.getBaseBackoffMs()); + assertEquals(2000.0, backoff.getMaxBackoffMs()); + assertEquals(1.8, backoff.getGrowthFactor()); + + // First delay: 0-50ms + long delay1 = backoff.calculateDelayMs(); + assertTrue(delay1 >= 0 && delay1 <= 50, "First delay should be 0-50ms, got: " + delay1); + + // Second delay: 0-90ms (50 * 1.8) + long delay2 = backoff.calculateDelayMs(); + assertTrue(delay2 >= 0 && delay2 <= 90, "Second delay should be 0-90ms, got: " + delay2); + } + + @Test + void testReset() { + ExponentialBackoff backoff = ExponentialBackoff.forTransactionRetry(); + + // Perform some retries + backoff.calculateDelayMs(); + backoff.calculateDelayMs(); + assertEquals(2, backoff.getRetryCount()); + + // Reset and verify counter is back to 0 + backoff.reset(); + assertEquals(0, backoff.getRetryCount()); + + // First delay after reset should be in the initial range again + long delay = backoff.calculateDelayMs(); + assertTrue(delay >= 0 && delay <= 5, "First delay after reset should be 0-5ms, got: " + delay); + } + +// @Test +// void testWouldExceedTimeLimitTransactionRetry() { +// ExponentialBackoff backoff = ExponentialBackoff.forTransactionRetry(); +// long startTime = ClientSessionClock.INSTANCE.now(); +// +// // Initially, should not exceed time limit +// assertFalse(backoff.wouldExceedTimeLimit(startTime, 120000)); +// +// // With very little time remaining (4ms), first backoff (up to 5ms) would exceed +// long nearLimitTime = startTime - 119996; // 4ms remaining +// assertTrue(backoff.wouldExceedTimeLimit(nearLimitTime, 120000)); +// } + +// @Test +// void testWouldExceedTimeLimitCommandRetry() { +// ExponentialBackoff backoff = ExponentialBackoff.forCommandRetry(); +// long startTime = ClientSessionClock.INSTANCE.now(); +// +// // Initially, should not exceed time limit +// assertFalse(backoff.wouldExceedTimeLimit(startTime, 10000)); +// +// // With 99ms remaining, first backoff (up to 100ms) would exceed +// long nearLimitTime = startTime - 9901; // 99ms remaining +// assertTrue(backoff.wouldExceedTimeLimit(nearLimitTime, 10000)); +// } + + @Test + void testCommandRetrySequenceMatchesSpec() { + // Test that command retry follows spec: 100ms * 2^i capped at 10000ms + ExponentialBackoff backoff = ExponentialBackoff.forCommandRetry(); + + double[] expectedMaxValues = {100.0, 200.0, 400.0, 800.0, 1600.0, 3200.0, 6400.0, 10000.0, 10000.0}; + + for (int i = 0; i < expectedMaxValues.length; i++) { + long delay = backoff.calculateDelayMs(); + double expectedMax = expectedMaxValues[i]; + assertTrue(delay >= 0 && delay <= Math.round(expectedMax), + String.format("Retry %d: delay should be 0-%d ms, got: %d", i, Math.round(expectedMax), delay)); + } + } +} diff --git a/driver-sync/src/main/com/mongodb/client/internal/ClientSessionImpl.java b/driver-sync/src/main/com/mongodb/client/internal/ClientSessionImpl.java index aa1414dce5..a65d334088 100644 --- a/driver-sync/src/main/com/mongodb/client/internal/ClientSessionImpl.java +++ b/driver-sync/src/main/com/mongodb/client/internal/ClientSessionImpl.java @@ -22,12 +22,14 @@ import com.mongodb.MongoExecutionTimeoutException; import com.mongodb.MongoInternalException; import com.mongodb.MongoOperationTimeoutException; +import com.mongodb.MongoTimeoutException; import com.mongodb.ReadConcern; import com.mongodb.TransactionOptions; import com.mongodb.WriteConcern; import com.mongodb.client.ClientSession; import com.mongodb.client.TransactionBody; import com.mongodb.internal.TimeoutContext; +import com.mongodb.internal.ExponentialBackoff; import com.mongodb.internal.operation.AbortTransactionOperation; import com.mongodb.internal.operation.CommitTransactionOperation; import com.mongodb.internal.operation.OperationHelper; @@ -251,10 +253,34 @@ public T withTransaction(final TransactionBody transactionBody, final Tra notNull("transactionBody", transactionBody); long startTime = ClientSessionClock.INSTANCE.now(); TimeoutContext withTransactionTimeoutContext = createTimeoutContext(options); + // Use CSOT timeout if set, otherwise default to MAX_RETRY_TIME_LIMIT_MS + Long timeoutMS = withTransactionTimeoutContext.getTimeoutSettings().getTimeoutMS(); + long maxRetryTimeMS = timeoutMS != null ? timeoutMS : MAX_RETRY_TIME_LIMIT_MS; + ExponentialBackoff transactionBackoff = null; + boolean isRetry = false; try { outer: while (true) { + // Apply exponential backoff before retrying transaction + if (isRetry) { + // Check if we've exceeded the retry time limit BEFORE applying backoff + if (ClientSessionClock.INSTANCE.now() - startTime >= maxRetryTimeMS) { + throw withTransactionTimeoutContext.hasTimeoutMS() + ? new MongoOperationTimeoutException("Transaction retry time limit of " + maxRetryTimeMS + "ms exceeded") + : new MongoTimeoutException("Transaction retry time limit of " + maxRetryTimeMS + "ms exceeded"); + } + if (transactionBackoff == null) { + transactionBackoff = ExponentialBackoff.forTransactionRetry(); + } + try { + transactionBackoff.applyBackoff(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new MongoClientException("Transaction retry interrupted", e); + } + } + isRetry = true; T retVal; try { startTransaction(options, withTransactionTimeoutContext.copyTimeoutContext()); @@ -269,7 +295,7 @@ public T withTransaction(final TransactionBody transactionBody, final Tra if (e instanceof MongoException && !(e instanceof MongoOperationTimeoutException)) { MongoException exceptionToHandle = OperationHelper.unwrap((MongoException) e); if (exceptionToHandle.hasErrorLabel(TRANSIENT_TRANSACTION_ERROR_LABEL) - && ClientSessionClock.INSTANCE.now() - startTime < MAX_RETRY_TIME_LIMIT_MS) { + && ClientSessionClock.INSTANCE.now() - startTime < maxRetryTimeMS) { if (transactionSpan != null) { transactionSpan.spanFinalizing(false); } @@ -286,7 +312,7 @@ public T withTransaction(final TransactionBody transactionBody, final Tra } catch (MongoException e) { clearTransactionContextOnError(e); if (!(e instanceof MongoOperationTimeoutException) - && ClientSessionClock.INSTANCE.now() - startTime < MAX_RETRY_TIME_LIMIT_MS) { + && ClientSessionClock.INSTANCE.now() - startTime < maxRetryTimeMS) { applyMajorityWriteConcernToTransactionOptions(); if (!(e instanceof MongoExecutionTimeoutException) diff --git a/driver-sync/src/test/functional/com/mongodb/client/WithTransactionProseTest.java b/driver-sync/src/test/functional/com/mongodb/client/WithTransactionProseTest.java index 1afbf61565..e06f3c8290 100644 --- a/driver-sync/src/test/functional/com/mongodb/client/WithTransactionProseTest.java +++ b/driver-sync/src/test/functional/com/mongodb/client/WithTransactionProseTest.java @@ -27,6 +27,7 @@ import org.junit.jupiter.api.Test; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import static com.mongodb.ClusterFixture.TIMEOUT; import static com.mongodb.ClusterFixture.isDiscoverableReplicaSet; @@ -118,12 +119,12 @@ public void testRetryTimeoutEnforcedTransientTransactionError() { public void testRetryTimeoutEnforcedUnknownTransactionCommit() { MongoDatabase failPointAdminDb = client.getDatabase("admin"); failPointAdminDb.runCommand( - Document.parse("{'configureFailPoint': 'failCommand', 'mode': {'times': 2}, " + Document.parse("{'configureFailPoint': 'failCommand', 'mode': {'times': 1}, " + "'data': {'failCommands': ['commitTransaction'], 'errorCode': 91, 'closeConnection': false}}")); try (ClientSession session = client.startSession()) { ClientSessionClock.INSTANCE.setTime(START_TIME_MS); - session.withTransaction((TransactionBody) () -> { + session.withTransaction(() -> { ClientSessionClock.INSTANCE.setTime(ERROR_GENERATING_INTERVAL); collection.insertOne(session, new Document("_id", 2)); return null; @@ -146,13 +147,13 @@ public void testRetryTimeoutEnforcedUnknownTransactionCommit() { public void testRetryTimeoutEnforcedTransientTransactionErrorOnCommit() { MongoDatabase failPointAdminDb = client.getDatabase("admin"); failPointAdminDb.runCommand( - Document.parse("{'configureFailPoint': 'failCommand', 'mode': {'times': 2}, " + Document.parse("{'configureFailPoint': 'failCommand', 'mode': {'times': 1}, " + "'data': {'failCommands': ['commitTransaction'], 'errorCode': 251, 'codeName': 'NoSuchTransaction', " + "'errmsg': 'Transaction 0 has been aborted', 'closeConnection': false}}")); try (ClientSession session = client.startSession()) { ClientSessionClock.INSTANCE.setTime(START_TIME_MS); - session.withTransaction((TransactionBody) () -> { + session.withTransaction(() -> { ClientSessionClock.INSTANCE.setTime(ERROR_GENERATING_INTERVAL); collection.insertOne(session, Document.parse("{ _id : 1 }")); return null; @@ -203,6 +204,37 @@ public void testTimeoutMSAndLegacySettings() { } } + // + // Test that exponential backoff is applied when retrying transactions + // Backoff uses growth factor of 1.5 as per spec + // + @Test + public void testExponentialBackoffOnTransientError() { + // Configure failpoint to simulate transient errors + MongoDatabase failPointAdminDb = client.getDatabase("admin"); + failPointAdminDb.runCommand( + Document.parse("{'configureFailPoint': 'failCommand', 'mode': {'times': 3}, " + + "'data': {'failCommands': ['insert'], 'errorCode': 112, " + + "'errorLabels': ['TransientTransactionError']}}")); + + try (ClientSession session = client.startSession()) { + long startTime = System.currentTimeMillis(); + + // Track retry count + AtomicInteger retryCount = new AtomicInteger(0); + + session.withTransaction(() -> { + retryCount.incrementAndGet(); // Count the attempt before the operation that might fail + collection.insertOne(session, Document.parse("{ _id : 'backoff-test' }")); + return retryCount; + }); + + assertEquals(4, retryCount.get(), "Expected 1 initial attempt + 3 retries"); + } finally { + failPointAdminDb.runCommand(Document.parse("{'configureFailPoint': 'failCommand', 'mode': 'off'}")); + } + } + private boolean canRunTests() { return isSharded() || isDiscoverableReplicaSet(); }