Skip to content

Commit d4140e5

Browse files
committed
only rely on ClusterChangedEvent
1 parent f215455 commit d4140e5

File tree

2 files changed

+78
-35
lines changed

2 files changed

+78
-35
lines changed

server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
import java.util.ArrayList;
5454
import java.util.Collections;
5555
import java.util.Comparator;
56-
import java.util.HashSet;
5756
import java.util.Iterator;
5857
import java.util.List;
5958
import java.util.Map;
@@ -83,8 +82,6 @@ public class AllocationService {
8382
private final ClusterInfoService clusterInfoService;
8483
private final SnapshotsInfoService snapshotsInfoService;
8584
private final ShardRoutingRoleStrategy shardRoutingRoleStrategy;
86-
// Tracks node IDs whose shutdown metadata has already been considered for resetting allocation/relocation failures
87-
private final Set<String> processedNodeShutdowns = new HashSet<>();
8885

8986
// only for tests that use the GatewayAllocator as the unique ExistingShardsAllocator
9087
@SuppressWarnings("this-escape")
@@ -595,24 +592,49 @@ public void addAllocFailuresResetListenerTo(ClusterService clusterService) {
595592
*/
596593
private boolean shouldResetAllocationFailures(ClusterChangedEvent changeEvent) {
597594
final var clusterState = changeEvent.state();
598-
boolean hasAllocationFailures = clusterState.getRoutingNodes().hasAllocationFailures();
599-
boolean hasRelocationFailures = clusterState.getRoutingNodes().hasRelocationFailures();
600-
var shutdownEventAffectsAllocation = false;
601-
final var nodes = clusterState.nodes();
602-
final var nodeShutdowns = clusterState.metadata().nodeShutdowns();
603-
// If we remove a shutdown marker from a node, but it is still in the cluster, we could re-attempt failed relocations/allocations.
604-
shutdownEventAffectsAllocation = processedNodeShutdowns.stream()
605-
.anyMatch(nodeId -> nodeShutdowns.contains(nodeId) == false && nodes.get(nodeId) != null);
606-
// Clean up processed shutdowns that are removed from the cluster metadata
607-
processedNodeShutdowns.removeIf(nodeId -> nodeShutdowns.contains(nodeId) == false);
608-
for (var shutdown : nodeShutdowns.getAll().entrySet()) {
595+
596+
if (clusterState.getRoutingNodes().hasAllocationFailures() == false
597+
&& clusterState.getRoutingNodes().hasRelocationFailures() == false) {
598+
return false;
599+
}
600+
if (changeEvent.nodesAdded()) {
601+
return true;
602+
}
603+
604+
final var currentNodeShutdowns = clusterState.metadata().nodeShutdowns();
605+
final var previousNodeShutdowns = changeEvent.previousState().metadata().nodeShutdowns();
606+
607+
if (currentNodeShutdowns.equals(previousNodeShutdowns)) {
608+
return false;
609+
}
610+
611+
for (var currentShutdown : currentNodeShutdowns.getAll().entrySet()) {
612+
var previousNodeShutdown = previousNodeShutdowns.get(currentShutdown.getKey());
613+
if (currentShutdown.equals(previousNodeShutdown)) {
614+
continue;
615+
}
609616
// A RESTART doesn't necessarily move around shards, so no need to consider it for a reset.
610617
// Furthermore, once the node rejoins after restarting, there will be a reset if necessary.
611-
if (shutdown.getValue().getType() != SingleNodeShutdownMetadata.Type.RESTART) {
612-
shutdownEventAffectsAllocation |= processedNodeShutdowns.add(shutdown.getKey());
618+
if (currentShutdown.getValue().getType() == SingleNodeShutdownMetadata.Type.RESTART) {
619+
continue;
620+
}
621+
// A node with no shutdown marker or a RESTART marker receives a non-RESTART shutdown marker
622+
if (previousNodeShutdown == null || previousNodeShutdown.getType() == Type.RESTART) {
623+
return true;
613624
}
614625
}
615-
return (changeEvent.nodesAdded() || shutdownEventAffectsAllocation) && (hasAllocationFailures || hasRelocationFailures);
626+
627+
for (var previousShutdown : previousNodeShutdowns.getAll().entrySet()) {
628+
var nodeId = previousShutdown.getKey();
629+
// A non-RESTART marker is removed but the node is still in the cluster. We could re-attempt failed relocations/allocations.
630+
if (currentNodeShutdowns.get(nodeId) == null
631+
&& previousShutdown.getValue().getType() != SingleNodeShutdownMetadata.Type.RESTART
632+
&& clusterState.nodes().get(nodeId) != null) {
633+
return true;
634+
}
635+
}
636+
637+
return false;
616638
}
617639

618640
private ClusterState rerouteWithResetFailedCounter(ClusterState clusterState) {

x-pack/plugin/shutdown/src/internalClusterTest/java/org/elasticsearch/xpack/shutdown/AllocationFailuresResetOnShutdownIT.java

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -66,24 +66,34 @@ public void beforeIndexShardCreated(ShardRouting routing, Settings indexSettings
6666
assertThat(shardAfterFailures.state(), equalTo(ShardRoutingState.STARTED));
6767
assertThat(stateAfterFailures.nodes().get(shardAfterFailures.currentNodeId()).getName(), equalTo(node1));
6868
failRelocation.set(false);
69-
// A non-RESTART node shutdown should reset the counter and allow more relocation retries
70-
final var request = createRequest(randomFrom(SingleNodeShutdownMetadata.Type.values()), node1, node2);
71-
client().execute(PutShutdownNodeAction.INSTANCE, request).get();
72-
if (request.getType() != SingleNodeShutdownMetadata.Type.RESTART) {
73-
assertBusy(() -> {
74-
var stateAfterNodeJoin = internalCluster().clusterService().state();
75-
var relocatedShard = stateAfterNodeJoin.routingTable().index("index1").shard(0).primaryShard();
76-
assertThat(relocatedShard.relocationFailureInfo().failedRelocations(), Matchers.lessThan(maxAttempts));
77-
assertThat(relocatedShard, notNullValue());
78-
assertThat(stateAfterNodeJoin.nodes().get(relocatedShard.currentNodeId()).getName(), not(equalTo(node1)));
79-
});
80-
} else {
69+
if (randomBoolean()) {
70+
// A RESTART marker shouldn't cause a reset of failures
71+
final var request = createRequest(SingleNodeShutdownMetadata.Type.RESTART, node1, null);
72+
client().execute(PutShutdownNodeAction.INSTANCE, request).get();
8173
var state = clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState();
8274
var shard = state.routingTable().index("index1").shard(0).primaryShard();
8375
assertThat(shard, notNullValue());
8476
assertThat(shard.relocationFailureInfo().failedRelocations(), equalTo(maxAttempts));
8577
assertThat(state.nodes().get(shard.currentNodeId()).getName(), equalTo(node1));
8678
}
79+
// A non-RESTART node shutdown should reset the counter and allow more relocation retries
80+
final var request = createRequest(
81+
randomFrom(
82+
SingleNodeShutdownMetadata.Type.REPLACE,
83+
SingleNodeShutdownMetadata.Type.SIGTERM,
84+
SingleNodeShutdownMetadata.Type.REMOVE
85+
),
86+
node1,
87+
node2
88+
);
89+
client().execute(PutShutdownNodeAction.INSTANCE, request).get();
90+
assertBusy(() -> {
91+
var stateAfterNodeJoin = internalCluster().clusterService().state();
92+
var relocatedShard = stateAfterNodeJoin.routingTable().index("index1").shard(0).primaryShard();
93+
assertThat(relocatedShard.relocationFailureInfo().failedRelocations(), Matchers.lessThan(maxAttempts));
94+
assertThat(relocatedShard, notNullValue());
95+
assertThat(stateAfterNodeJoin.nodes().get(relocatedShard.currentNodeId()).getName(), not(equalTo(node1)));
96+
});
8797
}
8898

8999
public void testResetRelocationFailuresOnNodeShutdownRemovalOfExistingNode() throws Exception {
@@ -178,19 +188,30 @@ public void beforeIndexShardCreated(ShardRouting routing, Settings indexSettings
178188

179189
failAllocation.set(false);
180190

181-
// A non-RESTART node shutdown should reset the counter and allow more relocation retries
182-
final var request = createRequest(randomFrom(SingleNodeShutdownMetadata.Type.values()), node1, node2);
183-
client().execute(PutShutdownNodeAction.INSTANCE, request).get();
184-
if (request.getType() != SingleNodeShutdownMetadata.Type.RESTART) {
185-
ensureGreen("index1");
186-
} else {
191+
if (randomBoolean()) {
192+
// A RESTART marker shouldn't cause a reset of failures
193+
final var request = createRequest(randomFrom(SingleNodeShutdownMetadata.Type.RESTART), node1, null);
194+
client().execute(PutShutdownNodeAction.INSTANCE, request).get();
187195
ensureRed("index1");
188196
var state = clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT).get().getState();
189197
var shard = state.routingTable().index("index1").shard(0).primaryShard();
190198
assertThat(shard, notNullValue());
191199
assertNotNull(shard.unassignedInfo());
192200
assertThat(shard.unassignedInfo().failedAllocations(), equalTo(maxAttempts));
193201
}
202+
203+
// A non-RESTART node shutdown should reset the counter and allow more relocation retries
204+
final var request = createRequest(
205+
randomFrom(
206+
SingleNodeShutdownMetadata.Type.REPLACE,
207+
SingleNodeShutdownMetadata.Type.SIGTERM,
208+
SingleNodeShutdownMetadata.Type.REMOVE
209+
),
210+
node1,
211+
node2
212+
);
213+
client().execute(PutShutdownNodeAction.INSTANCE, request).get();
214+
ensureGreen("index1");
194215
}
195216

196217
public void testResetAllocationFailuresOnNodeShutdownRemovalOfExistingNode() throws Exception {

0 commit comments

Comments
 (0)