Skip to content

Commit e90473e

Browse files
committed
kafka slaves
1 parent 03fa1f0 commit e90473e

File tree

7 files changed

+388
-0
lines changed

7 files changed

+388
-0
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
spark.mesos.executor.docker.image: <image built from `../external/docker/spark-mesos/Dockerfile`>
19+
spark.mesos.executor.docker.volumes: /usr/local/lib:/host/usr/local/lib:ro
20+
spark.mesos.executor.home: /opt/spark
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?xml version="1.0"?>
2+
3+
<!--
4+
Licensed to the Apache Software Foundation (ASF) under one or more
5+
contributor license agreements. See the NOTICE file distributed with
6+
this work for additional information regarding copyright ownership.
7+
The ASF licenses this file to You under the Apache License, Version 2.0
8+
(the "License"); you may not use this file except in compliance with
9+
the License. You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
-->
19+
20+
<allocations>
21+
<pool name="production">
22+
<schedulingMode>FAIR</schedulingMode>
23+
<weight>1</weight>
24+
<minShare>2</minShare>
25+
</pool>
26+
<pool name="test">
27+
<schedulingMode>FIFO</schedulingMode>
28+
<weight>2</weight>
29+
<minShare>3</minShare>
30+
</pool>
31+
</allocations>
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# Set everything to be logged to the console
19+
log4j.rootCategory=INFO, console
20+
log4j.appender.console=org.apache.log4j.ConsoleAppender
21+
log4j.appender.console.target=System.err
22+
log4j.appender.console.layout=org.apache.log4j.PatternLayout
23+
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
24+
25+
# Set the default spark-shell log level to WARN. When running the spark-shell, the
26+
# log level for this class is used to overwrite the root logger's log level, so that
27+
# the user can have different defaults for the shell and regular Spark apps.
28+
log4j.logger.org.apache.spark.repl.Main=WARN
29+
30+
# Settings to quiet third party logs that are too verbose
31+
log4j.logger.org.spark_project.jetty=WARN
32+
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
33+
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
34+
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
35+
log4j.logger.org.apache.parquet=ERROR
36+
log4j.logger.parquet=ERROR
37+
38+
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
39+
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
40+
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# syntax: [instance].sink|source.[name].[options]=[value]
19+
20+
# This file configures Spark's internal metrics system. The metrics system is
21+
# divided into instances which correspond to internal components.
22+
# Each instance can be configured to report its metrics to one or more sinks.
23+
# Accepted values for [instance] are "master", "worker", "executor", "driver",
24+
# and "applications". A wildcard "*" can be used as an instance name, in
25+
# which case all instances will inherit the supplied property.
26+
#
27+
# Within an instance, a "source" specifies a particular set of grouped metrics.
28+
# there are two kinds of sources:
29+
# 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will
30+
# collect a Spark component's internal state. Each instance is paired with a
31+
# Spark source that is added automatically.
32+
# 2. Common sources, like JvmSource, which will collect low level state.
33+
# These can be added through configuration options and are then loaded
34+
# using reflection.
35+
#
36+
# A "sink" specifies where metrics are delivered to. Each instance can be
37+
# assigned one or more sinks.
38+
#
39+
# The sink|source field specifies whether the property relates to a sink or
40+
# source.
41+
#
42+
# The [name] field specifies the name of source or sink.
43+
#
44+
# The [options] field is the specific property of this source or sink. The
45+
# source or sink is responsible for parsing this property.
46+
#
47+
# Notes:
48+
# 1. To add a new sink, set the "class" option to a fully qualified class
49+
# name (see examples below).
50+
# 2. Some sinks involve a polling period. The minimum allowed polling period
51+
# is 1 second.
52+
# 3. Wildcard properties can be overridden by more specific properties.
53+
# For example, master.sink.console.period takes precedence over
54+
# *.sink.console.period.
55+
# 4. A metrics specific configuration
56+
# "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
57+
# added to Java properties using -Dspark.metrics.conf=xxx if you want to
58+
# customize metrics system. You can also put the file in ${SPARK_HOME}/conf
59+
# and it will be loaded automatically.
60+
# 5. The MetricsServlet sink is added by default as a sink in the master,
61+
# worker and driver, and you can send HTTP requests to the "/metrics/json"
62+
# endpoint to get a snapshot of all the registered metrics in JSON format.
63+
# For master, requests to the "/metrics/master/json" and
64+
# "/metrics/applications/json" endpoints can be sent separately to get
65+
# metrics snapshots of the master instance and applications. This
66+
# MetricsServlet does not have to be configured.
67+
68+
## List of available common sources and their properties.
69+
70+
# org.apache.spark.metrics.source.JvmSource
71+
# Note: Currently, JvmSource is the only available common source.
72+
# It can be added to an instance by setting the "class" option to its
73+
# fully qualified class name (see examples below).
74+
75+
## List of available sinks and their properties.
76+
77+
# org.apache.spark.metrics.sink.ConsoleSink
78+
# Name: Default: Description:
79+
# period 10 Poll period
80+
# unit seconds Unit of the poll period
81+
82+
# org.apache.spark.metrics.sink.CSVSink
83+
# Name: Default: Description:
84+
# period 10 Poll period
85+
# unit seconds Unit of the poll period
86+
# directory /tmp Where to store CSV files
87+
88+
# org.apache.spark.metrics.sink.GangliaSink
89+
# Name: Default: Description:
90+
# host NONE Hostname or multicast group of the Ganglia server,
91+
# must be set
92+
# port NONE Port of the Ganglia server(s), must be set
93+
# period 10 Poll period
94+
# unit seconds Unit of the poll period
95+
# ttl 1 TTL of messages sent by Ganglia
96+
# dmax 0 Lifetime in seconds of metrics (0 never expired)
97+
# mode multicast Ganglia network mode ('unicast' or 'multicast')
98+
99+
# org.apache.spark.metrics.sink.JmxSink
100+
101+
# org.apache.spark.metrics.sink.MetricsServlet
102+
# Name: Default: Description:
103+
# path VARIES* Path prefix from the web server root
104+
# sample false Whether to show entire set of samples for histograms
105+
# ('false' or 'true')
106+
#
107+
# * Default path is /metrics/json for all instances except the master. The
108+
# master has two paths:
109+
# /metrics/applications/json # App information
110+
# /metrics/master/json # Master information
111+
112+
# org.apache.spark.metrics.sink.GraphiteSink
113+
# Name: Default: Description:
114+
# host NONE Hostname of the Graphite server, must be set
115+
# port NONE Port of the Graphite server, must be set
116+
# period 10 Poll period
117+
# unit seconds Unit of the poll period
118+
# prefix EMPTY STRING Prefix to prepend to every metric's name
119+
# protocol tcp Protocol ("tcp" or "udp") to use
120+
121+
# org.apache.spark.metrics.sink.StatsdSink
122+
# Name: Default: Description:
123+
# host 127.0.0.1 Hostname or IP of StatsD server
124+
# port 8125 Port of StatsD server
125+
# period 10 Poll period
126+
# unit seconds Units of poll period
127+
# prefix EMPTY STRING Prefix to prepend to metric name
128+
129+
## Examples
130+
# Enable JmxSink for all instances by class name
131+
#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
132+
133+
# Enable ConsoleSink for all instances by class name
134+
#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink
135+
136+
# Enable StatsdSink for all instances by class name
137+
#*.sink.statsd.class=org.apache.spark.metrics.sink.StatsdSink
138+
#*.sink.statsd.prefix=spark
139+
140+
# Polling period for the ConsoleSink
141+
#*.sink.console.period=10
142+
# Unit of the polling period for the ConsoleSink
143+
#*.sink.console.unit=seconds
144+
145+
# Polling period for the ConsoleSink specific for the master instance
146+
#master.sink.console.period=15
147+
# Unit of the polling period for the ConsoleSink specific for the master
148+
# instance
149+
#master.sink.console.unit=seconds
150+
151+
# Enable CsvSink for all instances by class name
152+
#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink
153+
154+
# Polling period for the CsvSink
155+
#*.sink.csv.period=1
156+
# Unit of the polling period for the CsvSink
157+
#*.sink.csv.unit=minutes
158+
159+
# Polling directory for CsvSink
160+
#*.sink.csv.directory=/tmp/
161+
162+
# Polling period for the CsvSink specific for the worker instance
163+
#worker.sink.csv.period=10
164+
# Unit of the polling period for the CsvSink specific for the worker instance
165+
#worker.sink.csv.unit=minutes
166+
167+
# Enable Slf4jSink for all instances by class name
168+
#*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink
169+
170+
# Polling period for the Slf4JSink
171+
#*.sink.slf4j.period=1
172+
# Unit of the polling period for the Slf4jSink
173+
#*.sink.slf4j.unit=minutes
174+
175+
# Enable JvmSource for instance master, worker, driver and executor
176+
#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
177+
178+
#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
179+
180+
#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
181+
182+
#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# A Spark Worker will be started on each of the machines listed below.
19+
localhost
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# Default system properties included when running spark-submit.
19+
# This is useful for setting default environmental settings.
20+
21+
# Example:
22+
# spark.master spark://master:7077
23+
# spark.eventLog.enabled true
24+
# spark.eventLog.dir hdfs://namenode:8021/directory
25+
# spark.serializer org.apache.spark.serializer.KryoSerializer
26+
# spark.driver.memory 5g
27+
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
# This file is sourced when running various Spark programs.
21+
# Copy it as spark-env.sh and edit that to configure Spark for your site.
22+
23+
# Options read when launching programs locally with
24+
# ./bin/run-example or ./bin/spark-submit
25+
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
26+
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
27+
# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
28+
29+
# Options read by executors and drivers running inside the cluster
30+
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
31+
# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
32+
# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
33+
# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
34+
35+
# Options read in YARN client/cluster mode
36+
# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
37+
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
38+
# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN
39+
# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
40+
# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
41+
# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
42+
43+
# Options for the daemons used in the standalone deploy mode
44+
# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
45+
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
46+
# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
47+
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
48+
# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
49+
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
50+
# - SPARK_WORKER_DIR, to set the working directory of worker processes
51+
# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
52+
# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
53+
# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
54+
# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
55+
# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
56+
# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
57+
# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
58+
59+
# Generic options for the daemons used in the standalone deploy mode
60+
# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf)
61+
# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs)
62+
# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp)
63+
# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER)
64+
# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0)
65+
# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file.
66+
# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
67+
# You might get better performance to enable these options if using native BLAS (see SPARK-21305).
68+
# - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL
69+
# - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS

0 commit comments

Comments
 (0)