Skip to content

Commit ea87aa2

Browse files
authored
YARN-11037. Add configurable logic to split resource request to the least loaded SC. (#5515)
1 parent 655c3df commit ea87aa2

File tree

13 files changed

+623
-21
lines changed

13 files changed

+623
-21
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/EnhancedHeadroom.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,9 @@ public String toString() {
6969
sb.append(">");
7070
return sb.toString();
7171
}
72+
73+
public double getNormalizedPendingCount(long multiplier) {
74+
int totalPendingCount = getTotalPendingCount();
75+
return (double) totalPendingCount * multiplier;
76+
}
7277
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4058,6 +4058,45 @@ public static boolean isAclEnabled(Configuration conf) {
40584058
public static final long DEFAULT_FEDERATION_AMRMPROXY_SUBCLUSTER_TIMEOUT =
40594059
60000; // one minute
40604060

4061+
// Prefix for configs related to selecting SC based on load
4062+
public static final String LOAD_BASED_SC_SELECTOR_PREFIX =
4063+
NM_PREFIX + "least-load-policy-selector.";
4064+
4065+
// Config to enable re-rerouting node requests base on SC load
4066+
public static final String LOAD_BASED_SC_SELECTOR_ENABLED =
4067+
LOAD_BASED_SC_SELECTOR_PREFIX + "enabled";
4068+
public static final boolean DEFAULT_LOAD_BASED_SC_SELECTOR_ENABLED = false;
4069+
4070+
// Pending container threshold for selecting SC
4071+
public static final String LOAD_BASED_SC_SELECTOR_THRESHOLD =
4072+
LOAD_BASED_SC_SELECTOR_PREFIX + "pending-container.threshold";
4073+
public static final int DEFAULT_LOAD_BASED_SC_SELECTOR_THRESHOLD = 10000;
4074+
4075+
// Whether to consider total number of active cores in the subcluster for load
4076+
public static final String LOAD_BASED_SC_SELECTOR_USE_ACTIVE_CORE =
4077+
LOAD_BASED_SC_SELECTOR_PREFIX + "use-active-core";
4078+
public static final boolean DEFAULT_LOAD_BASED_SC_SELECTOR_USE_ACTIVE_CORE = false;
4079+
4080+
// multiplier to normalize pending container to active cores
4081+
public static final String LOAD_BASED_SC_SELECTOR_MULTIPLIER =
4082+
LOAD_BASED_SC_SELECTOR_PREFIX + "multiplier";
4083+
public static final int DEFAULT_LOAD_BASED_SC_SELECTOR_MULTIPLIER = 50000;
4084+
4085+
// max count to maintain for container allocation history
4086+
public static final String FEDERATION_ALLOCATION_HISTORY_MAX_ENTRY =
4087+
FEDERATION_PREFIX + "amrmproxy.allocation.history.max.entry";
4088+
public static final int DEFAULT_FEDERATION_ALLOCATION_HISTORY_MAX_ENTRY = 100;
4089+
4090+
// Whether to fail directly if activeSubCluster is less than 1.
4091+
public static final String LOAD_BASED_SC_SELECTOR_FAIL_ON_ERROR =
4092+
LOAD_BASED_SC_SELECTOR_PREFIX + "fail-on-error";
4093+
public static final boolean DEFAULT_LOAD_BASED_SC_SELECTOR_FAIL_ON_ERROR = true;
4094+
4095+
// Blacklisted subClusters.
4096+
public static final String FEDERATION_BLACKLIST_SUBCLUSTERS =
4097+
LOAD_BASED_SC_SELECTOR_PREFIX + "blacklist-subclusters";
4098+
public static final String DEFAULT_FEDERATION_BLACKLIST_SUBCLUSTERS = "";
4099+
40614100
// AMRMProxy Register UAM Retry-Num
40624101
public static final String FEDERATION_AMRMPROXY_REGISTER_UAM_RETRY_COUNT =
40634102
FEDERATION_PREFIX + "amrmproxy.register.uam.retry-count";

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5558,4 +5558,84 @@
55585558
<value>0.0.0.0:8070</value>
55595559
</property>
55605560

5561+
<property>
5562+
<description>
5563+
This configuration will enable request rerouting according to the load of the subCluster.
5564+
If it is true, it will reroute the request according to the load of the subCluster.
5565+
The default configuration is false.
5566+
</description>
5567+
<name>yarn.nodemanager.least-load-policy-selector.enabled</name>
5568+
<value>false</value>
5569+
</property>
5570+
5571+
<property>
5572+
<description>
5573+
SubCluster pending container threshold. The default value is 10000.
5574+
This configuration will determine the load weight of a subCluster.
5575+
For SC with pending containers count bigger than container threshold / 2,
5576+
use threshold / pending as weight.
5577+
For SC with pending containers count less than threshold / 2, we cap the weight at 2.
5578+
</description>
5579+
<name>yarn.nodemanager.least-load-policy-selector.pending-container.threshold</name>
5580+
<value>10000</value>
5581+
</property>
5582+
5583+
<property>
5584+
<description>
5585+
Whether to consider the configured vcores when calculating the subCluster load.
5586+
The default value is false, we only consider the number of cluster pending containers.
5587+
If this configuration item is set to true, This configuration item needs to be used together
5588+
with yarn.nodemanager.least-load-policy-selector.multiplier. We will use the following formula
5589+
when calculating subCluster pending.
5590+
pendingContainersCountNormalize = (totalPendingContainersCount * multiplier) / totalActiveCores.
5591+
</description>
5592+
<name>yarn.nodemanager.least-load-policy-selector.use-active-core</name>
5593+
<value>false</value>
5594+
</property>
5595+
5596+
<property>
5597+
<description>
5598+
Max count to maintain for container allocation history.
5599+
</description>
5600+
<name>yarn.federation.amrmproxy.allocation.history.max.entry</name>
5601+
<value>100</value>
5602+
</property>
5603+
5604+
<property>
5605+
<description>
5606+
Whether to fail directly if activeSubCluster is less than 1.
5607+
The default is true.
5608+
If We set to false, We will try to re-fetch activeSubCluster list.
5609+
</description>
5610+
<name>yarn.nodemanager.least-load-policy-selector.fail-on-error</name>
5611+
<value>true</value>
5612+
</property>
5613+
5614+
<property>
5615+
<description>
5616+
The subCluster configured in the blacklist will not be forwarded requests.
5617+
The default value is empty.
5618+
</description>
5619+
<name>yarn.nodemanager.least-load-policy-selector.blacklist-subclusters</name>
5620+
<value></value>
5621+
</property>
5622+
5623+
<property>
5624+
<description>
5625+
Max count to maintain for container allocation history.
5626+
</description>
5627+
<name>yarn.federation.amrmproxy.allocation.history.max.entry</name>
5628+
<value>100</value>
5629+
</property>
5630+
5631+
<property>
5632+
<description>
5633+
This configuration will be used
5634+
when yarn.nodemanager.least-load-policy-selector.use-active-core is set to true.
5635+
The purpose of this value is to help normalize the pendingContainersCount.
5636+
</description>
5637+
<name>yarn.nodemanager.least-load-policy-selector.multiplier</name>
5638+
<value>50000</value>
5639+
</property>
5640+
55615641
</configuration>

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.TreeSet;
2929

3030
import org.apache.hadoop.HadoopIllegalArgumentException;
31+
import org.apache.hadoop.conf.Configuration;
3132
import org.apache.hadoop.ipc.RPC;
3233
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
3334
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
@@ -132,16 +133,23 @@ public class AMRMClientRelayer implements ApplicationMasterProtocol {
132133

133134
private AMRMClientRelayerMetrics metrics;
134135

136+
private ContainerAllocationHistory allocationHistory;
137+
135138
public AMRMClientRelayer(ApplicationMasterProtocol rmClient,
136139
ApplicationId appId, String rmId) {
137140
this.resetResponseId = -1;
138141
this.metrics = AMRMClientRelayerMetrics.getInstance();
139-
this.rmId = "";
140142
this.rmClient = rmClient;
141143
this.appId = appId;
142144
this.rmId = rmId;
143145
}
144146

147+
public AMRMClientRelayer(ApplicationMasterProtocol rmClient,
148+
ApplicationId appId, String rmId, Configuration conf) {
149+
this(rmClient, appId, rmId);
150+
this.allocationHistory = new ContainerAllocationHistory(conf);
151+
}
152+
145153
public void setAMRegistrationRequest(
146154
RegisterApplicationMasterRequest registerRequest) {
147155
this.amRegistrationRequest = registerRequest;
@@ -444,6 +452,8 @@ private void updateMetrics(AllocateResponse allocateResponse,
444452
if (this.knownContainers.add(container.getId())) {
445453
this.metrics.addFulfilledQPS(this.rmId, AMRMClientRelayerMetrics
446454
.getRequestType(container.getExecutionType()), 1);
455+
long currentTime = System.currentTimeMillis();
456+
long fulfillLatency = -1;
447457
if (container.getAllocationRequestId() != 0) {
448458
Integer count = this.pendingCountForMetrics
449459
.get(container.getAllocationRequestId());
@@ -453,13 +463,14 @@ private void updateMetrics(AllocateResponse allocateResponse,
453463
this.metrics.decrClientPending(this.rmId,
454464
AMRMClientRelayerMetrics
455465
.getRequestType(container.getExecutionType()), 1);
456-
this.metrics.addFulfillLatency(this.rmId,
457-
AMRMClientRelayerMetrics
458-
.getRequestType(container.getExecutionType()),
459-
System.currentTimeMillis() - this.askTimeStamp
460-
.get(container.getAllocationRequestId()));
466+
fulfillLatency = currentTime - this.askTimeStamp.get(
467+
container.getAllocationRequestId());
468+
AMRMClientRelayerMetrics.RequestType requestType = AMRMClientRelayerMetrics
469+
.getRequestType(container.getExecutionType());
470+
this.metrics.addFulfillLatency(this.rmId, requestType, fulfillLatency);
461471
}
462472
}
473+
addAllocationHistoryEntry(container, currentTime, fulfillLatency);
463474
}
464475
}
465476
}
@@ -576,6 +587,38 @@ private void addResourceRequestToAsk(ResourceRequest remoteRequest) {
576587
this.ask.add(remoteRequest);
577588
}
578589

590+
public ContainerAllocationHistory getAllocationHistory() {
591+
return this.allocationHistory;
592+
}
593+
594+
private void addAllocationHistoryEntry(Container container, long fulfillTimeStamp,
595+
long fulfillLatency) {
596+
ResourceRequestSetKey key = ResourceRequestSetKey.extractMatchingKey(container,
597+
this.remotePendingAsks.keySet());
598+
if (key == null) {
599+
LOG.info("allocation history ignoring {}, no matching request key found.", container);
600+
return;
601+
}
602+
this.allocationHistory.addAllocationEntry(container, this.remotePendingAsks.get(key),
603+
fulfillTimeStamp, fulfillLatency);
604+
}
605+
606+
public void gatherReadOnlyPendingAsksInfo(Map<ResourceRequestSetKey,
607+
ResourceRequestSet> pendingAsks, Map<ResourceRequestSetKey, Long> pendingTime) {
608+
pendingAsks.clear();
609+
pendingTime.clear();
610+
synchronized (this) {
611+
pendingAsks.putAll(this.remotePendingAsks);
612+
for (ResourceRequestSetKey key : pendingAsks.keySet()) {
613+
Long startTime = this.askTimeStamp.get(key.getAllocationRequestId());
614+
if (startTime != null) {
615+
long elapsedMs = System.currentTimeMillis() - startTime;
616+
pendingTime.put(key, elapsedMs);
617+
}
618+
}
619+
}
620+
}
621+
579622
@VisibleForTesting
580623
protected Map<ResourceRequestSetKey, ResourceRequestSet>
581624
getRemotePendingAsks() {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
* <p>
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
* <p>
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.yarn.server;
19+
20+
import java.util.AbstractMap;
21+
import java.util.LinkedList;
22+
import java.util.Map.Entry;
23+
import java.util.Queue;
24+
25+
import org.apache.hadoop.conf.Configuration;
26+
import org.apache.hadoop.yarn.api.records.Container;
27+
import org.apache.hadoop.yarn.conf.YarnConfiguration;
28+
import org.apache.hadoop.yarn.server.scheduler.ResourceRequestSet;
29+
import org.slf4j.Logger;
30+
import org.slf4j.LoggerFactory;
31+
32+
/**
33+
* Records the allocation history from YarnRM and provide aggregated insights.
34+
*/
35+
public class ContainerAllocationHistory {
36+
private static final Logger LOG = LoggerFactory.getLogger(AMRMClientRelayer.class);
37+
38+
private int maxEntryCount;
39+
40+
// Allocate timing history <AllocateTimeStamp, AllocateLatency>
41+
private Queue<Entry<Long, Long>> relaxableG = new LinkedList<>();
42+
43+
public ContainerAllocationHistory(Configuration conf) {
44+
this.maxEntryCount = conf.getInt(
45+
YarnConfiguration.FEDERATION_ALLOCATION_HISTORY_MAX_ENTRY,
46+
YarnConfiguration.DEFAULT_FEDERATION_ALLOCATION_HISTORY_MAX_ENTRY);
47+
}
48+
49+
/**
50+
* Record the allocation history for the container.
51+
*
52+
* @param container to add record for
53+
* @param requestSet resource request ask set
54+
* @param fulfillTimeStamp time at which allocation happened
55+
* @param fulfillLatency time elapsed in allocating since asked
56+
*/
57+
public synchronized void addAllocationEntry(Container container,
58+
ResourceRequestSet requestSet, long fulfillTimeStamp, long fulfillLatency){
59+
if (!requestSet.isANYRelaxable()) {
60+
LOG.info("allocation history ignoring {}, relax locality is false", container);
61+
return;
62+
}
63+
this.relaxableG.add(new AbstractMap.SimpleEntry<>(
64+
fulfillTimeStamp, fulfillLatency));
65+
if (this.relaxableG.size() > this.maxEntryCount) {
66+
this.relaxableG.remove();
67+
}
68+
}
69+
}

0 commit comments

Comments
 (0)