Skip to content

Commit 2bfc700

Browse files
DesiredBalanceReconciler always returns AllocationStats (#122458)
Ensures that the DesiredBalanceReconciler always returns a non-empty AllocationStats object, eliminating edge cases where the stats available to DesiredBalanceMetrics may not be updated due to some kind of throttling or the balancer being disabled via cluster settings. Adds documentation around AllocationDecider#canRebalance(RoutingAllocation) Closes ES-10581
1 parent 388c4a1 commit 2bfc700

File tree

9 files changed

+109
-22
lines changed

9 files changed

+109
-22
lines changed

docs/changelog/122458.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 122458
2+
summary: '`DesiredBalanceReconciler` always returns `AllocationStats`'
3+
area: Allocation
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/cluster/routing/RoutingNode.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ public String nodeId() {
112112
return this.nodeId;
113113
}
114114

115+
/**
116+
* Number of shards assigned to this node. Includes relocating shards. Use {@link #numberOfOwningShards()} to exclude relocating shards.
117+
*/
115118
public int size() {
116119
return shards.size();
117120
}

server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -515,11 +515,6 @@ private void moveShards() {
515515
}
516516

517517
private DesiredBalanceMetrics.AllocationStats balance() {
518-
// Check if rebalancing is disabled.
519-
if (allocation.deciders().canRebalance(allocation).type() != Decision.Type.YES) {
520-
return DesiredBalanceMetrics.EMPTY_ALLOCATION_STATS;
521-
}
522-
523518
int unassignedShards = routingNodes.unassigned().size() + routingNodes.unassigned().ignored().size();
524519
int totalAllocations = 0;
525520
int undesiredAllocationsExcludingShuttingDownNodes = 0;
@@ -549,9 +544,15 @@ private DesiredBalanceMetrics.AllocationStats balance() {
549544
}
550545

551546
if (allocation.metadata().nodeShutdowns().contains(shardRouting.currentNodeId()) == false) {
547+
// shard is not on a shutting down node, nor is it on a desired node per the previous check.
552548
undesiredAllocationsExcludingShuttingDownNodes++;
553549
}
554550

551+
if (allocation.deciders().canRebalance(allocation).type() != Decision.Type.YES) {
552+
// Rebalancing is disabled, we're just here to collect the AllocationStats to return.
553+
continue;
554+
}
555+
555556
if (allocation.deciders().canRebalance(shardRouting, allocation).type() != Decision.Type.YES) {
556557
// rebalancing disabled for this shard
557558
continue;

server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDecider.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,9 @@ public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNod
7575
}
7676

7777
/**
78-
* Returns a {@link Decision} whether the cluster can execute
79-
* re-balanced operations at all.
80-
* {@link Decision#ALWAYS}.
78+
* Returns a {@link Decision} on whether the cluster is allowed to rebalance shards to improve relative node shard weights and
79+
* performance.
80+
* @return {@link Decision#ALWAYS} is returned by default if not overridden.
8181
*/
8282
public Decision canRebalance(RoutingAllocation allocation) {
8383
return Decision.ALWAYS;

server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDeciders.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
8282
);
8383
}
8484

85+
/**
86+
* Returns whether rebalancing (move shards to improve relative node weights and performance) is allowed right now.
87+
* Rebalancing can be disabled via cluster settings, or throttled by cluster settings (e.g. max concurrent shard moves).
88+
*/
8589
public Decision canRebalance(RoutingAllocation allocation) {
8690
return withDeciders(
8791
allocation,

server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation alloca
150150
+ "]"
151151
);
152152

153+
/**
154+
* Rebalancing may be enabled, disabled, or only allowed after all primaries have started, depending on the cluster setting
155+
* {@link #CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING}.
156+
*/
153157
@SuppressWarnings("fallthrough")
154158
@Override
155159
public Decision canRebalance(RoutingAllocation allocation) {

server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ConcurrentRebalanceAllocationDecider.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation alloca
6161
return canRebalance(allocation);
6262
}
6363

64+
/**
65+
* We allow a limited number of concurrent shard relocations, per the cluster setting
66+
* {@link #CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING}.
67+
* Returns a {@link Decision#THROTTLE} decision if the limit is exceeded, otherwise returns {@link Decision#YES}.
68+
*/
6469
@Override
6570
public Decision canRebalance(RoutingAllocation allocation) {
6671
int relocatingShards = allocation.routingNodes().getRelocatingShardCount();

server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/EnableAllocationDecider.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocat
146146
};
147147
}
148148

149+
/**
150+
* Rebalancing is limited by the {@link Rebalance} value set on the cluster setting {@link #CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING}.
151+
* We might allow movement only of primary shards, or replica shards, or none, or all.
152+
* This method only concerns itself with whether {@link Rebalance#NONE} is set: rebalancing is allowed for all other setting values.
153+
*/
149154
@Override
150155
public Decision canRebalance(RoutingAllocation allocation) {
151156
if (allocation.ignoreDisable()) {
@@ -243,7 +248,7 @@ public String toString() {
243248
}
244249

245250
/**
246-
* Rebalance values or rather their string representation to be used used with
251+
* Rebalance values or rather their string representation to be used with
247252
* {@link EnableAllocationDecider#CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING} /
248253
* {@link EnableAllocationDecider#INDEX_ROUTING_REBALANCE_ENABLE_SETTING}
249254
* via cluster / index settings.

0 commit comments

Comments
 (0)