Skip to content

Commit 3ed3bc4

Browse files
committed
Use linear back-off, document back-off parameters, add details to timeout exception message
1 parent 04dd6b2 commit 3ed3bc4

File tree

6 files changed

+145
-13
lines changed

6 files changed

+145
-13
lines changed

docs/reference/snapshot-restore/repository-s3.asciidoc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,20 @@ include::repository-shared-settings.asciidoc[]
329329
`1000` which is the maximum number supported by the https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListMultipartUploads.html[AWS
330330
ListMultipartUploads API]. If set to `0`, {es} will not attempt to clean up dangling multipart uploads.
331331

332+
`throttled_delete_retry.delay_increment`::
333+
334+
(integer) This value is used as the delay before the first retry and the amount the delay is incremented by on each subsequent retry. Default is 50ms, minimum is 0ms.
335+
336+
`throttled_delete_retry.maximum_delay`::
337+
338+
(integer) This is the upper bound on how long the delays between retries will grow to. Default is 500ms, minimum is 0ms.
339+
340+
`throttled_delete_retry.maximum_number_of_retries`::
341+
342+
(integer) Sets the number times to retry a throttled snapshot deletion. Defaults to `10`, minimum value is `0` which
343+
will disable retries altogether. Note that if retries are enabled in the Azure client, each of these retries will
344+
comprise of that many attempts.
345+
332346
NOTE: The option of defining client settings in the repository settings as
333347
documented below is considered deprecated, and will be removed in a future
334348
version.

modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3BlobStoreRepositoryMetricsTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ public void testRetrySnapshotDeleteMetricsOnEventualSuccess() throws IOException
267267
Settings.builder()
268268
.put(repositorySettings(repositoryName))
269269
.put(S3ClientSettings.MAX_RETRIES_SETTING.getConcreteSettingForNamespace("placeholder").getKey(), 0)
270-
.put(S3Repository.RETRY_THROTTLED_DELETE_INITIAL_DELAY.getKey(), TimeValue.timeValueMillis(10))
270+
.put(S3Repository.RETRY_THROTTLED_DELETE_DELAY_INCREMENT.getKey(), TimeValue.timeValueMillis(10))
271271
.put(S3Repository.RETRY_THROTTLED_DELETE_MAX_NUMBER_OF_RETRIES.getKey(), maxRetries)
272272
.build(),
273273
false
@@ -299,7 +299,7 @@ public void testRetrySnapshotDeleteMetricsWhenRetriesExhausted() {
299299
Settings.builder()
300300
.put(repositorySettings(repositoryName))
301301
.put(S3ClientSettings.MAX_RETRIES_SETTING.getConcreteSettingForNamespace("placeholder").getKey(), 0)
302-
.put(S3Repository.RETRY_THROTTLED_DELETE_INITIAL_DELAY.getKey(), TimeValue.timeValueMillis(10))
302+
.put(S3Repository.RETRY_THROTTLED_DELETE_DELAY_INCREMENT.getKey(), TimeValue.timeValueMillis(10))
303303
.put(S3Repository.RETRY_THROTTLED_DELETE_MAX_NUMBER_OF_RETRIES.getKey(), maxRetries)
304304
.build(),
305305
false

modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3BlobStore.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,11 @@ private boolean maybeDelayAndRetryDelete(Iterator<TimeValue> retries) {
424424
logger.warn("Aborting tenacious snapshot delete retries due to interrupt");
425425
}
426426
} else {
427-
logger.warn("Exceeded maximum tenacious snapshot delete retries, aborting");
427+
logger.warn(
428+
"Exceeded maximum tenacious snapshot delete retries, aborting. Using back-off policy "
429+
+ retryThrottledDeleteBackoffPolicy
430+
+ ", see the throttled_delete_retry.* S3 repository properties to configure the back-off parameters"
431+
);
428432
}
429433
return false;
430434
}

modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -204,17 +204,22 @@ class S3Repository extends MeteredBlobStoreRepository {
204204
);
205205

206206
/**
207-
* We will retry deletes that fail due to throttling. We use an {@link BackoffPolicy#exponentialBackoff(TimeValue, int)}
208-
* with the following <code>initialDelay</code> and <code>maxNumberOfRetries</code>
207+
* We will retry deletes that fail due to throttling. We use an {@link BackoffPolicy#linearBackoff(TimeValue, int, TimeValue)}
208+
* with the following parameters
209209
*/
210-
static final Setting<TimeValue> RETRY_THROTTLED_DELETE_INITIAL_DELAY = Setting.timeSetting(
211-
"throttled_delete_retry.initial_delay",
210+
static final Setting<TimeValue> RETRY_THROTTLED_DELETE_DELAY_INCREMENT = Setting.timeSetting(
211+
"throttled_delete_retry.delay_increment",
212212
new TimeValue(50, TimeUnit.MILLISECONDS),
213-
new TimeValue(10, TimeUnit.MILLISECONDS)
213+
new TimeValue(0, TimeUnit.MILLISECONDS)
214+
);
215+
static final Setting<TimeValue> RETRY_THROTTLED_DELETE_MAXIMUM_DELAY = Setting.timeSetting(
216+
"throttled_delete_retry.maximum_delay",
217+
new TimeValue(500, TimeUnit.MILLISECONDS),
218+
new TimeValue(0, TimeUnit.MILLISECONDS)
214219
);
215220
static final Setting<Integer> RETRY_THROTTLED_DELETE_MAX_NUMBER_OF_RETRIES = Setting.intSetting(
216-
"throttled_delete_retry.max_number_of_retries",
217-
8,
221+
"throttled_delete_retry.maximum_number_of_retries",
222+
10,
218223
0
219224
);
220225

@@ -441,9 +446,10 @@ protected S3BlobStore createBlobStore() {
441446
bigArrays,
442447
threadPool,
443448
s3RepositoriesMetrics,
444-
BackoffPolicy.exponentialBackoff(
445-
RETRY_THROTTLED_DELETE_INITIAL_DELAY.get(metadata.settings()),
446-
RETRY_THROTTLED_DELETE_MAX_NUMBER_OF_RETRIES.get(metadata.settings())
449+
BackoffPolicy.linearBackoff(
450+
RETRY_THROTTLED_DELETE_DELAY_INCREMENT.get(metadata.settings()),
451+
RETRY_THROTTLED_DELETE_MAX_NUMBER_OF_RETRIES.get(metadata.settings()),
452+
RETRY_THROTTLED_DELETE_MAXIMUM_DELAY.get(metadata.settings())
447453
)
448454
);
449455
}

server/src/main/java/org/elasticsearch/common/BackoffPolicy.java

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
*/
99
package org.elasticsearch.common;
1010

11+
import org.elasticsearch.core.Nullable;
1112
import org.elasticsearch.core.TimeValue;
1213

1314
import java.util.Collections;
@@ -81,6 +82,18 @@ public static BackoffPolicy exponentialBackoff(TimeValue initialDelay, int maxNu
8182
return new ExponentialBackoff((int) checkDelay(initialDelay).millis(), maxNumberOfRetries);
8283
}
8384

85+
/**
86+
* Creates a new linear backoff policy with the provided configuration
87+
*
88+
* @param delayIncrement The amount by which to increment the delay on each retry
89+
* @param maxNumberOfRetries The maximum number of retries
90+
* @param maximumDelay The maximum delay
91+
* @return A backoff policy with linear increase in wait time for retries.
92+
*/
93+
public static BackoffPolicy linearBackoff(TimeValue delayIncrement, int maxNumberOfRetries, TimeValue maximumDelay) {
94+
return new LinearBackoff(delayIncrement, maxNumberOfRetries, maximumDelay);
95+
}
96+
8497
/**
8598
* Wraps the backoff policy in one that calls a method every time a new backoff is taken from the policy.
8699
*/
@@ -100,6 +113,11 @@ private static class NoBackoff extends BackoffPolicy {
100113
public Iterator<TimeValue> iterator() {
101114
return Collections.emptyIterator();
102115
}
116+
117+
@Override
118+
public String toString() {
119+
return "NoBackoff";
120+
}
103121
}
104122

105123
private static class ExponentialBackoff extends BackoffPolicy {
@@ -118,6 +136,11 @@ private ExponentialBackoff(int start, int numberOfElements) {
118136
public Iterator<TimeValue> iterator() {
119137
return new ExponentialBackoffIterator(start, numberOfElements);
120138
}
139+
140+
@Override
141+
public String toString() {
142+
return "ExponentialBackoff{start=" + start + ", numberOfElements=" + numberOfElements + '}';
143+
}
121144
}
122145

123146
private static class ExponentialBackoffIterator implements Iterator<TimeValue> {
@@ -163,6 +186,11 @@ private static final class ConstantBackoff extends BackoffPolicy {
163186
public Iterator<TimeValue> iterator() {
164187
return new ConstantBackoffIterator(delay, numberOfElements);
165188
}
189+
190+
@Override
191+
public String toString() {
192+
return "ConstantBackoff{delay=" + delay + ", numberOfElements=" + numberOfElements + '}';
193+
}
166194
}
167195

168196
private static final class ConstantBackoffIterator implements Iterator<TimeValue> {
@@ -203,6 +231,11 @@ private static final class WrappedBackoffPolicy extends BackoffPolicy {
203231
public Iterator<TimeValue> iterator() {
204232
return new WrappedBackoffIterator(delegate.iterator(), onBackoff);
205233
}
234+
235+
@Override
236+
public String toString() {
237+
return "WrappedBackoffPolicy{delegate=" + delegate + ", onBackoff=" + onBackoff + '}';
238+
}
206239
}
207240

208241
private static final class WrappedBackoffIterator implements Iterator<TimeValue> {
@@ -228,4 +261,60 @@ public TimeValue next() {
228261
return delegate.next();
229262
}
230263
}
264+
265+
private static final class LinearBackoff extends BackoffPolicy {
266+
267+
private final TimeValue delayIncrement;
268+
private final int maxNumberOfRetries;
269+
private final TimeValue maximumDelay;
270+
271+
private LinearBackoff(TimeValue delayIncrement, int maxNumberOfRetries, @Nullable TimeValue maximumDelay) {
272+
this.delayIncrement = delayIncrement;
273+
this.maxNumberOfRetries = maxNumberOfRetries;
274+
this.maximumDelay = maximumDelay;
275+
}
276+
277+
@Override
278+
public Iterator<TimeValue> iterator() {
279+
return new LinearBackoffIterator(delayIncrement, maxNumberOfRetries, maximumDelay);
280+
}
281+
282+
@Override
283+
public String toString() {
284+
return "LinearBackoff{"
285+
+ "delayIncrement="
286+
+ delayIncrement
287+
+ ", maxNumberOfRetries="
288+
+ maxNumberOfRetries
289+
+ ", maximumDelay="
290+
+ maximumDelay
291+
+ '}';
292+
}
293+
}
294+
295+
private static final class LinearBackoffIterator implements Iterator<TimeValue> {
296+
297+
private final TimeValue delayIncrement;
298+
private final int maxNumberOfRetries;
299+
private final TimeValue maximumDelay;
300+
private int curr;
301+
302+
private LinearBackoffIterator(TimeValue delayIncrement, int maxNumberOfRetries, @Nullable TimeValue maximumDelay) {
303+
this.delayIncrement = delayIncrement;
304+
this.maxNumberOfRetries = maxNumberOfRetries;
305+
this.maximumDelay = maximumDelay;
306+
}
307+
308+
@Override
309+
public boolean hasNext() {
310+
return curr < maxNumberOfRetries;
311+
}
312+
313+
@Override
314+
public TimeValue next() {
315+
curr++;
316+
TimeValue timeValue = TimeValue.timeValueMillis(curr * delayIncrement.millis());
317+
return maximumDelay == null ? timeValue : timeValue.compareTo(maximumDelay) < 0 ? timeValue : maximumDelay;
318+
}
319+
}
231320
}

server/src/test/java/org/elasticsearch/common/BackoffPolicyTests.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,25 @@ public void testExponentialBackOff() {
7777
}
7878
}
7979

80+
public void testLinearBackoffWithLimit() {
81+
int incrementMillis = randomIntBetween(10, 500);
82+
int limitMillis = randomIntBetween(1000, 5000);
83+
int maxNumberOfRetries = randomIntBetween(0, 30);
84+
BackoffPolicy timeValues = BackoffPolicy.linearBackoff(
85+
timeValueMillis(incrementMillis),
86+
maxNumberOfRetries,
87+
timeValueMillis(limitMillis)
88+
);
89+
int counter = 0;
90+
for (TimeValue timeValue : timeValues) {
91+
counter++;
92+
int unlimitedValue = counter * incrementMillis;
93+
int expectedValue = Math.min(unlimitedValue, limitMillis);
94+
assertEquals(timeValueMillis(expectedValue), timeValue);
95+
}
96+
assertEquals(counter, maxNumberOfRetries);
97+
}
98+
8099
public void testNoBackoff() {
81100
BackoffPolicy noBackoff = BackoffPolicy.noBackoff();
82101
int numberOfBackoffsToPerform = randomIntBetween(1, 3);

0 commit comments

Comments
 (0)