Skip to content

Commit 2bfbdfd

Browse files
committed
[SPARK-44153][CORE][UI] Support 'Heap Histogram' column in Executor tab
1 parent cf6e90c commit 2bfbdfd

File tree

9 files changed

+170
-5
lines changed

9 files changed

+170
-5
lines changed

core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ <h4 class="title-table">Executors</h4>
128128
Shuffle Write</span></th>
129129
<th>Logs</th>
130130
<th>Thread Dump</th>
131+
<th>Heap Histogram</th>
131132
<th>Exec Loss Reason</th>
132133
</tr>
133134
</thead>

core/src/main/resources/org/apache/spark/ui/static/executorspage.js

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,25 @@
2020
/* global jQuery, setDataTableDefaults */
2121

2222
var threadDumpEnabled = false;
23+
var heapHistogramEnabled = false;
2324

2425
/* eslint-disable no-unused-vars */
2526
function setThreadDumpEnabled(val) {
2627
threadDumpEnabled = val;
2728
}
29+
function setHeapHistogramEnabled(val) {
30+
heapHistogramEnabled = val;
31+
}
2832
/* eslint-enable no-unused-vars */
2933

3034
function getThreadDumpEnabled() {
3135
return threadDumpEnabled;
3236
}
3337

38+
function getHeapHistogramEnabled() {
39+
return heapHistogramEnabled;
40+
}
41+
3442
function formatLossReason(removeReason) {
3543
if (removeReason) {
3644
return removeReason
@@ -551,6 +559,12 @@ $(document).ready(function () {
551559
return type === 'display' ? ("<a href='threadDump/?executorId=" + data + "'>Thread Dump</a>" ) : data;
552560
}
553561
},
562+
{
563+
name: 'heapHistogramCol',
564+
data: 'id', render: function (data, type) {
565+
return type === 'display' ? ("<a href='heapHistogram/?executorId=" + data + "'>Heap Histogram</a>") : data;
566+
}
567+
},
554568
{
555569
data: 'removeReason',
556570
render: formatLossReason
@@ -566,14 +580,15 @@ $(document).ready(function () {
566580
{"visible": false, "targets": 10},
567581
{"visible": false, "targets": 13},
568582
{"visible": false, "targets": 14},
569-
{"visible": false, "targets": 25}
583+
{"visible": false, "targets": 26}
570584
],
571585
"deferRender": true
572586
};
573587

574588
execDataTable = $(selector).DataTable(conf);
575589
execDataTable.column('executorLogsCol:name').visible(logsExist(response));
576590
execDataTable.column('threadDumpCol:name').visible(getThreadDumpEnabled());
591+
execDataTable.column('heapHistogramCol:name').visible(getHeapHistogramEnabled());
577592
$('#active-executors [data-toggle="tooltip"]').tooltip();
578593

579594
// This section should be visible once API gives the response.
@@ -721,7 +736,7 @@ $(document).ready(function () {
721736
"<div id='direct_mapped_pool_memory' class='direct_mapped_pool_memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='10'> Peak Pool Memory Direct / Mapped</div>" +
722737
"<div id='extra_resources' class='resources-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='13'> Resources</div>" +
723738
"<div id='resource_prof_id' class='resource-prof-id-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='14'> Resource Profile Id</div>" +
724-
"<div id='exec_loss_reason' class='exec-loss-reason-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='25'> Exec Loss Reason</div>" +
739+
"<div id='exec_loss_reason' class='exec-loss-reason-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='26'> Exec Loss Reason</div>" +
725740
"</div>");
726741

727742
reselectCheckboxesBasedOnTaskTableState();

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ import org.apache.spark.shuffle.api.ShuffleDriverComponents
6868
import org.apache.spark.status.{AppStatusSource, AppStatusStore}
6969
import org.apache.spark.status.api.v1.ThreadStackTrace
7070
import org.apache.spark.storage._
71-
import org.apache.spark.storage.BlockManagerMessages.TriggerThreadDump
71+
import org.apache.spark.storage.BlockManagerMessages.{TriggerHeapHistogram, TriggerThreadDump}
7272
import org.apache.spark.ui.{ConsoleProgressBar, SparkUI}
7373
import org.apache.spark.util._
7474
import org.apache.spark.util.logging.DriverLogger
@@ -750,6 +750,30 @@ class SparkContext(config: SparkConf) extends Logging {
750750
}
751751
}
752752

753+
/**
754+
* Called by the web UI to obtain executor heap histogram.
755+
*/
756+
private[spark] def getExecutorHeapHistogram(executorId: String): Option[Array[String]] = {
757+
try {
758+
if (executorId == SparkContext.DRIVER_IDENTIFIER) {
759+
Some(Utils.getHeapHistogram())
760+
} else {
761+
env.blockManager.master.getExecutorEndpointRef(executorId) match {
762+
case Some(endpointRef) =>
763+
Some(endpointRef.askSync[Array[String]](TriggerHeapHistogram))
764+
case None =>
765+
logWarning(s"Executor $executorId might already have stopped and " +
766+
"can not request heap histogram from it.")
767+
None
768+
}
769+
}
770+
} catch {
771+
case e: Exception =>
772+
logError(s"Exception getting heap histogram from executor $executorId", e)
773+
None
774+
}
775+
}
776+
753777
private[spark] def getLocalProperties: Properties = localProperties.get()
754778

755779
private[spark] def setLocalProperties(props: Properties): Unit = {

core/src/main/scala/org/apache/spark/internal/config/UI.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ private[spark] object UI {
9797
.booleanConf
9898
.createWithDefault(true)
9999

100+
val UI_HEAP_HISTOGRAM_ENABLED = ConfigBuilder("spark.ui.heapHistogramEnabled")
101+
.version("3.5.0")
102+
.booleanConf
103+
.createWithDefault(true)
104+
100105
val UI_PROMETHEUS_ENABLED = ConfigBuilder("spark.ui.prometheus.enabled")
101106
.internal()
102107
.doc("Expose executor metrics at /metrics/executors/prometheus. " +

core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ private[spark] object BlockManagerMessages {
5656
*/
5757
case object TriggerThreadDump extends ToBlockManagerMasterStorageEndpoint
5858

59+
/**
60+
* Driver to Executor message to get a heap histogram.
61+
*/
62+
case object TriggerHeapHistogram extends ToBlockManagerMasterStorageEndpoint
63+
5964
//////////////////////////////////////////////////////////////////////////////////
6065
// Messages from storage endpoints to the master.
6166
//////////////////////////////////////////////////////////////////////////////////

core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ class BlockManagerStorageEndpoint(
7878
case TriggerThreadDump =>
7979
context.reply(Utils.getThreadDump())
8080

81+
case TriggerHeapHistogram =>
82+
context.reply(Utils.getHeapHistogram())
83+
8184
case ReplicateBlock(blockId, replicas, maxReplicas) =>
8285
context.reply(blockManager.replicateBlock(blockId, replicas.toSet, maxReplicas))
8386

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.ui.exec
19+
20+
import javax.servlet.http.HttpServletRequest
21+
22+
import scala.xml.{Node, Text}
23+
24+
import org.apache.spark.SparkContext
25+
import org.apache.spark.ui.{SparkUITab, UIUtils, WebUIPage}
26+
27+
private[ui] class ExecutorHeapHistogramPage(
28+
parent: SparkUITab,
29+
sc: Option[SparkContext]) extends WebUIPage("heapHistogram") {
30+
31+
// Match the lines containing object informations
32+
val pattern = """\s*([0-9]+):\s+([0-9]+)\s+([0-9]+)\s+(\S+)(.*)""".r
33+
34+
def render(request: HttpServletRequest): Seq[Node] = {
35+
val executorId = Option(request.getParameter("executorId")).map { executorId =>
36+
UIUtils.decodeURLParameter(executorId)
37+
}.getOrElse {
38+
throw new IllegalArgumentException(s"Missing executorId parameter")
39+
}
40+
val time = System.currentTimeMillis()
41+
val maybeHeapHistogram = sc.get.getExecutorHeapHistogram(executorId)
42+
43+
val content = maybeHeapHistogram.map { heapHistogram =>
44+
val rows = heapHistogram.map { row =>
45+
row match {
46+
case pattern(rank, instances, bytes, name, module) =>
47+
<tr class="accordion-heading">
48+
<td>{rank}</td>
49+
<td>{instances}</td>
50+
<td>{bytes}</td>
51+
<td>{name}</td>
52+
<td>{module}</td>
53+
</tr>
54+
case pattern(rank, instances, bytes, name) =>
55+
<tr class="accordion-heading">
56+
<td>{rank}</td>
57+
<td>{instances}</td>
58+
<td>{bytes}</td>
59+
<td>{name}</td>
60+
<td></td>
61+
</tr>
62+
case _ =>
63+
// Ignore the first two lines and the last line
64+
//
65+
// num #instances #bytes class name (module)
66+
// -------------------------------------------------------
67+
// ...
68+
// Total 1267867 72845688
69+
}
70+
}
71+
<div class="row">
72+
<div class="col-12">
73+
<p>Updated at {UIUtils.formatDate(time)}</p>
74+
<table class={UIUtils.TABLE_CLASS_STRIPED + " accordion-group" + " sortable"}>
75+
<thead>
76+
<th>Rank</th>
77+
<th>Instances</th>
78+
<th>Bytes</th>
79+
<th>Class Name</th>
80+
<th>Module</th>
81+
</thead>
82+
<tbody>{rows}</tbody>
83+
</table>
84+
</div>
85+
</div>
86+
}.getOrElse(Text("Error fetching heap histogram"))
87+
UIUtils.headerSparkPage(request, s"Heap Histogram for Executor $executorId", content, parent)
88+
}
89+
}

core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,24 @@ private[ui] class ExecutorsTab(parent: SparkUI) extends SparkUITab(parent, "exec
3131
private def init(): Unit = {
3232
val threadDumpEnabled =
3333
parent.sc.isDefined && parent.conf.get(UI_THREAD_DUMPS_ENABLED)
34+
val heapHistogramEnabled =
35+
parent.sc.isDefined && parent.conf.get(UI_HEAP_HISTOGRAM_ENABLED)
3436

35-
attachPage(new ExecutorsPage(this, threadDumpEnabled))
37+
attachPage(new ExecutorsPage(this, threadDumpEnabled, heapHistogramEnabled))
3638
if (threadDumpEnabled) {
3739
attachPage(new ExecutorThreadDumpPage(this, parent.sc))
3840
}
41+
if (heapHistogramEnabled) {
42+
attachPage(new ExecutorHeapHistogramPage(this, parent.sc))
43+
}
3944
}
4045

4146
}
4247

4348
private[ui] class ExecutorsPage(
4449
parent: SparkUITab,
45-
threadDumpEnabled: Boolean)
50+
threadDumpEnabled: Boolean,
51+
heapHistogramEnabled: Boolean)
4652
extends WebUIPage("") {
4753

4854
def render(request: HttpServletRequest): Seq[Node] = {
@@ -52,6 +58,7 @@ private[ui] class ExecutorsPage(
5258
<script src={UIUtils.prependBaseUri(request, "/static/utils.js")}></script> ++
5359
<script src={UIUtils.prependBaseUri(request, "/static/executorspage.js")}></script> ++
5460
<script>setThreadDumpEnabled({threadDumpEnabled})</script>
61+
<script>setHeapHistogramEnabled({heapHistogramEnabled})</script>
5562
}
5663

5764
UIUtils.headerSparkPage(request, "Executors", content, parent, useDataTables = true)

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2287,6 +2287,22 @@ private[spark] object Utils extends Logging with SparkClassUtils {
22872287
}.map(threadInfoToThreadStackTrace)
22882288
}
22892289

2290+
/** Return a heap dump. Used to capture dumps for the web UI */
2291+
def getHeapHistogram(): Array[String] = {
2292+
val pid = String.valueOf(ProcessHandle.current().pid())
2293+
val builder = new ProcessBuilder("jmap", "-histo:live", pid)
2294+
builder.redirectErrorStream(true)
2295+
val p = builder.start()
2296+
val r = new BufferedReader(new InputStreamReader(p.getInputStream()))
2297+
val rows = ArrayBuffer.empty[String]
2298+
var line = ""
2299+
while (line != null) {
2300+
if (line.nonEmpty) rows += line
2301+
line = r.readLine()
2302+
}
2303+
rows.toArray
2304+
}
2305+
22902306
def getThreadDumpForThread(threadId: Long): Option[ThreadStackTrace] = {
22912307
if (threadId <= 0) {
22922308
None

0 commit comments

Comments
 (0)