Skip to content

Commit 0b689b7

Browse files
authored
Fix SnapshotBasedIndexRecoveryIT#testNodeDisconnectsDoNotOverAccountRecoveredBytes (elastic#90849)
The clock used to check for timeouts does not provide enough granularity, and sometimes the RESTORE_FILE_FROM_SNAPSHOT is retried while the test expects to fail right away. This commit configures the nodes with a timeout of 0, effectively configuring it to avoid retrying recovery requests. Closes elastic#90665
1 parent 9e83084 commit 0b689b7

File tree

1 file changed

+17
-9
lines changed

1 file changed

+17
-9
lines changed

x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@
100100
import static org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY;
101101
import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_INTERNAL_ACTION_RETRY_TIMEOUT_SETTING;
102102
import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING;
103+
import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_CONCURRENT_FILE_CHUNKS_SETTING;
103104
import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_CONCURRENT_OPERATIONS_SETTING;
104105
import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_CONCURRENT_SNAPSHOT_FILE_DOWNLOADS;
105106
import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_CONCURRENT_SNAPSHOT_FILE_DOWNLOADS_PER_NODE;
@@ -1255,7 +1256,6 @@ public void testRecoveryRetryKeepsTheGrantedSnapshotFileDownloadPermit() throws
12551256
);
12561257
}
12571258

1258-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/90665")
12591259
public void testNodeDisconnectsDoNotOverAccountRecoveredBytes() throws Exception {
12601260
// This test reproduces a rare (but possible scenario) where a shard is recovering using
12611261
// snapshots, using logically equivalent index files, but half-way the connection between
@@ -1266,14 +1266,7 @@ public void testNodeDisconnectsDoNotOverAccountRecoveredBytes() throws Exception
12661266
// - The target updates the recovered bytes for the file it has been downloading, after the recovery state was cleared.
12671267
// This could end up over-accounting the number of recovered bytes
12681268

1269-
Settings nodeSettings = Settings.builder()
1270-
// We use a really low timeout to avoid retrying the first RESTORE_FILE_FROM_SNAPSHOT after the connection is dropped
1271-
.put(INDICES_RECOVERY_INTERNAL_ACTION_RETRY_TIMEOUT_SETTING.getKey(), TimeValue.timeValueMillis(1))
1272-
.put(INDICES_RECOVERY_MAX_CONCURRENT_SNAPSHOT_FILE_DOWNLOADS.getKey(), 1)
1273-
.put(INDICES_RECOVERY_MAX_CONCURRENT_OPERATIONS_SETTING.getKey(), 1)
1274-
.build();
1275-
1276-
List<String> dataNodes = internalCluster().startDataOnlyNodes(3, nodeSettings);
1269+
List<String> dataNodes = internalCluster().startDataOnlyNodes(3);
12771270
String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);
12781271
createIndex(
12791272
indexName,
@@ -1287,6 +1280,21 @@ public void testNodeDisconnectsDoNotOverAccountRecoveredBytes() throws Exception
12871280
);
12881281
ensureGreen(indexName);
12891282

1283+
assertAcked(
1284+
client().admin()
1285+
.cluster()
1286+
.prepareUpdateSettings()
1287+
.setPersistentSettings(
1288+
Settings.builder()
1289+
// Do not retry the first RESTORE_FILE_FROM_SNAPSHOT after the connection is closed
1290+
.put(INDICES_RECOVERY_INTERNAL_ACTION_RETRY_TIMEOUT_SETTING.getKey(), TimeValue.ZERO)
1291+
.put(INDICES_RECOVERY_MAX_CONCURRENT_SNAPSHOT_FILE_DOWNLOADS.getKey(), 1)
1292+
.put(INDICES_RECOVERY_MAX_CONCURRENT_FILE_CHUNKS_SETTING.getKey(), 1)
1293+
.put(INDICES_RECOVERY_MAX_CONCURRENT_OPERATIONS_SETTING.getKey(), 1)
1294+
.build()
1295+
)
1296+
);
1297+
12901298
int numDocs = randomIntBetween(300, 1000);
12911299
indexDocs(indexName, 0, numDocs);
12921300
// Flush to ensure that index_commit_seq_nos(replica) == index_commit_seq_nos(primary),

0 commit comments

Comments
 (0)