Skip to content

Commit 8d5ec52

Browse files
committed
Initial commit for Hadoop package
1 parent a2452c7 commit 8d5ec52

File tree

69 files changed

+5775
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+5775
-0
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
dependencies:
2+
ecs:
3+
reference: git@8.0
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Hadoop
2+
3+
The Hadoop integration collects and parses data from the Hadoop Events APIs and using the Jolokia Metricbeat Module.
4+
5+
## Compatibility
6+
7+
This module has been tested against `Hadoop version 3.3.1`
8+
9+
## Requirements
10+
11+
In order to ingest data from Hadoop, you must know the full hosts for the NameNode, DataNode, Cluster Metrics, Node Manager and the Hadoop Events API.
12+
13+
## Metrics
14+
15+
### Application Metrics
16+
17+
This is the `application_metrics` dataset.
18+
19+
{{event "application_metrics"}}
20+
21+
{{fields "application_metrics"}}
22+
23+
### Expanded Cluster Metrics
24+
25+
This is the `expanded_cluster_metrics` dataset.
26+
27+
{{event "expanded_cluster_metrics"}}
28+
29+
{{fields "expanded_cluster_metrics"}}
30+
31+
### Jolokia Metrics
32+
33+
This is the `jolokia_metrics` dataset.
34+
35+
{{event "jolokia_metrics"}}
36+
37+
{{fields "jolokia_metrics"}}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
FROM centos:latest
2+
ARG SERVICE_VERSION=${SERVICE_VERSION:-3.3.1}
3+
ARG HADOOP_VERSION=${SERVICE_VERSION}
4+
5+
ARG JAVA_VERSION=8
6+
ARG JAVA_RELEASE=JDK
7+
8+
RUN set -eux && \
9+
pkg="java-1.$JAVA_VERSION.0-openjdk" && \
10+
if [ "$JAVA_RELEASE" = "JDK" ]; then \
11+
pkg="$pkg-devel"; \
12+
else \
13+
pkg="$pkg-headless"; \
14+
fi; \
15+
yum install -y "$pkg" && \
16+
yum clean all && \
17+
rm -rf /var/cache/yum
18+
19+
COPY profile.d/java.sh /etc/profile.d/
20+
21+
ENV JAVA_HOME=/usr
22+
23+
ARG TAR=hadoop-$HADOOP_VERSION.tar.gz
24+
25+
ENV PATH $PATH:/hadoop/bin
26+
27+
LABEL Description="Hadoop Dev" \
28+
"Hadoop Version"="$HADOOP_VERSION"
29+
30+
WORKDIR /
31+
32+
RUN set -eux && \
33+
yum install -y openssh-server openssh-clients tar which
34+
35+
ENV JOLOKIA_VERSION=1.6.0 JOLOKIA_HOST=0.0.0.0 JOLOKIA_PORT=7778
36+
ENV HADOOP_LATEST_VERSION=${SERVICE_VERSION}
37+
38+
RUN set -eux && \
39+
yum install -y wget hostname && \
40+
41+
wget "http://search.maven.org/remotecontent?filepath=org/jolokia/jolokia-jvm/${JOLOKIA_VERSION}/jolokia-jvm-${JOLOKIA_VERSION}-agent.jar" -O "jolokia-jvm-${JOLOKIA_VERSION}-agent.jar" && \
42+
wget -t 10 --max-redirect 1 --retry-connrefused -O "$TAR" "http://www.apache.org/dyn/closer.lua?filename=hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-$HADOOP_VERSION.tar.gz&action=download" || \
43+
wget -t 10 --max-redirect 1 --retry-connrefused -O "$TAR" "http://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-$HADOOP_VERSION.tar.gz" && \
44+
tar zxf "$TAR" && \
45+
46+
test -d "hadoop-$HADOOP_VERSION" && \
47+
ln -sv "hadoop-$HADOOP_VERSION" hadoop && \
48+
mkdir /etc/hadoop && \
49+
ln -s /hadoop/etc/hadoop /etc/hadoop/conf && \
50+
rm -fv "$TAR" && \
51+
{ rm -rf hadoop/share/doc; : ; } && \
52+
yum autoremove -y && \
53+
# gets autoremoved, ensure it's added back as Hadoop scripts need it
54+
yum install -y hostname && \
55+
yum clean all && \
56+
rm -rf /var/cache/yum
57+
58+
ENV JMX_REMOTE=yes JOLOKIA_ENABLED=yes
59+
60+
COPY entrypoint.sh /
61+
COPY conf/core-site.xml /hadoop/etc/hadoop/
62+
COPY conf/hdfs-site.xml /hadoop/etc/hadoop/
63+
COPY conf/yarn-site.xml /hadoop/etc/hadoop/
64+
COPY conf/mapred-site.xml /hadoop/etc/hadoop/
65+
COPY profile.d/hadoop.sh /etc/profile.d/
66+
COPY ssh/config /root/.ssh/
67+
68+
69+
RUN set -eux && \
70+
/hadoop/bin/hdfs namenode -format && \
71+
groupadd hadoop && \
72+
useradd -g hadoop hdfs && \
73+
useradd -g hadoop yarn && \
74+
mkdir -p /dfs/name && \
75+
mkdir -p /dfs/data && \
76+
mkdir -p /hadoop/logs && \
77+
chown -R hdfs:hadoop /dfs/name && \
78+
chown -R hdfs:hadoop /dfs/data && \
79+
chgrp -R hadoop /hadoop/logs && \
80+
chmod -R 0770 /hadoop/logs && \
81+
mkdir -p /root/.ssh \
82+
/home/hdfs/.ssh \
83+
/home/yarn/.ssh && \
84+
chown hdfs /home/hdfs/.ssh && \
85+
chown yarn /home/yarn/.ssh && \
86+
chmod 0700 /root/.ssh \
87+
/home/hdfs/.ssh \
88+
/home/yarn/.ssh
89+
90+
ENV HDFS_NAMENODE_USER=hdfs
91+
ENV HDFS_SECONDARYNAMENODE_USER=hdfs
92+
ENV HDFS_DATANODE_USER=hdfs
93+
ENV YARN_RESOURCEMANAGER_USER=yarn
94+
ENV YARN_NODEMANAGER_USER=yarn
95+
96+
97+
98+
RUN rm -rf /run/nologin
99+
RUN chmod -R 600 /etc/crypto-policies/
100+
CMD ["bash","/entrypoint.sh"]
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3+
<configuration>
4+
<property>
5+
<name>fs.defaultFS</name>
6+
<value>hdfs://localhost:8020</value>
7+
</property>
8+
</configuration>
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3+
<configuration>
4+
<property>
5+
<name>dfs.replication</name>
6+
<value>1</value>
7+
</property>
8+
<property>
9+
<name>dfs.namenode.name.dir</name>
10+
<value>/dfs/name</value>
11+
</property>
12+
<property>
13+
<name>dfs.datanode.data.dir</name>
14+
<value>/dfs/data</value>
15+
</property>
16+
<property>
17+
<name>dfs.permissions</name>
18+
<value>false</value>
19+
</property>
20+
</configuration>
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?xml version="1.0"?>
2+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3+
<configuration>
4+
<property>
5+
<name>mapreduce.framework.name</name>
6+
<value>yarn</value>
7+
</property>
8+
<property>
9+
<name>yarn.app.mapreduce.am.env</name>
10+
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
11+
</property>
12+
<property>
13+
<name>mapreduce.map.env</name>
14+
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
15+
</property>
16+
<property>
17+
<name>mapreduce.reduce.env</name>
18+
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
19+
</property>
20+
</configuration>
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?xml version="1.0"?>
2+
<configuration>
3+
<property>
4+
<name>yarn.nodemanager.aux-services</name>
5+
<value>mapreduce_shuffle</value>
6+
</property>
7+
<property>
8+
<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
9+
<value>100</value>
10+
</property>
11+
<property>
12+
<name>yarn.nodemanager.resource.cpu-vcores</name>
13+
<value>1</value>
14+
</property>
15+
16+
<property>
17+
<name>yarn.nodemanager.resource.memory-mb</name>
18+
<value>3072</value>
19+
</property>
20+
</configuration>
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
version: '3'
2+
services:
3+
hadoop:
4+
build: .
5+
ports:
6+
- "7777"
7+
- "7779"
8+
- "8088"
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
set -euo pipefail
2+
[ -n "${DEBUG:-}" ] && set -x
3+
4+
export JAVA_HOME="${JAVA_HOME:-/usr}"
5+
6+
export PATH="$PATH:/hadoop/sbin:/hadoop/bin"
7+
8+
if [ $# -gt 0 ]; then
9+
exec "$@"
10+
else
11+
for x in root hdfs yarn; do
12+
if ! [ -f "$x/.ssh/id_rsa" ]; then
13+
su - "$x" <<-EOF
14+
[ -n "${DEBUG:-}" ] && set -x
15+
ssh-keygen -t rsa -f ~/.ssh/id_rsa -N ""
16+
EOF
17+
fi
18+
if ! [ -f "$x/.ssh/authorized_keys" ]; then
19+
su - "$x" <<-EOF
20+
[ -n "${DEBUG:-}" ] && set -x
21+
cp -v ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys
22+
chmod -v 0400 ~/.ssh/authorized_keys
23+
EOF
24+
fi
25+
done
26+
27+
# removed in newer versions of CentOS
28+
if ! [ -f /etc/ssh/ssh_host_rsa_key ] && [ -x /usr/sbin/sshd-keygen ]; then
29+
/usr/sbin/sshd-keygen || :
30+
fi
31+
if ! [ -f /etc/ssh/ssh_host_rsa_key ]; then
32+
ssh-keygen -q -t rsa -f /etc/ssh/ssh_host_rsa_key -C '' -N ''
33+
chmod 0600 /etc/ssh/ssh_host_rsa_key
34+
chmod 0644 /etc/ssh/ssh_host_rsa_key.pub
35+
fi
36+
37+
if ! pgrep -x sshd &>/dev/null; then
38+
/usr/sbin/sshd
39+
fi
40+
echo
41+
SECONDS=0
42+
while true; do
43+
if ssh-keyscan localhost 2>&1 | grep -q OpenSSH; then
44+
echo "SSH is ready to rock"
45+
break
46+
fi
47+
if [ "$SECONDS" -gt 20 ]; then
48+
echo "FAILED: SSH failed to come up after 20 secs"
49+
exit 1
50+
fi
51+
echo "waiting for SSH to come up"
52+
sleep 1
53+
done
54+
echo
55+
if ! [ -f /root/.ssh/known_hosts ]; then
56+
ssh-keyscan localhost || :
57+
ssh-keyscan 0.0.0.0 || :
58+
fi | tee -a /root/.ssh/known_hosts
59+
hostname="$(hostname -f)"
60+
if ! grep -q "$hostname" /root/.ssh/known_hosts; then
61+
ssh-keyscan "$hostname" || :
62+
fi | tee -a /root/.ssh/known_hosts
63+
64+
mkdir -pv /hadoop/logs
65+
66+
67+
if [ "$JOLOKIA_ENABLED" = 'yes' ]; then
68+
69+
echo "export HDFS_NAMENODE_OPTS="-javaagent\:`echo /jolokia-jvm-${JOLOKIA_VERSION}-agent.jar=host=${JOLOKIA_HOST},port=7777`"" >> "/hadoop-${HADOOP_LATEST_VERSION}/etc/hadoop/hadoop-env.sh"
70+
echo "export HDFS_DATANODE_OPTS="-javaagent\:`echo /jolokia-jvm-${JOLOKIA_VERSION}-agent.jar=host=${JOLOKIA_HOST},port=7779`"" >> "/hadoop-${HADOOP_LATEST_VERSION}/etc/hadoop/hadoop-env.sh"
71+
echo "export YARN_NODEMANAGER_OPTS="-javaagent\:`echo /jolokia-jvm-${JOLOKIA_VERSION}-agent.jar=host=${JOLOKIA_HOST},port=7782`"" >> "/${HADOOP_LATEST_VERSION}/etc/hadoop/hadoop-env.sh"
72+
echo "export YARN_RESOURCEMANAGER_OPTS="-javaagent\:`echo /jolokia-jvm-${JOLOKIA_VERSION}-agent.jar=host=${JOLOKIA_HOST},port=7781`"" >> "/${HADOOP_LATEST_VERSION}/etc/hadoop/hadoop-env.sh"
73+
74+
fi
75+
76+
sed -i "s/localhost/$hostname/" /hadoop/etc/hadoop/core-site.xml
77+
echo 'Y' | hdfs namenode -format
78+
start-dfs.sh
79+
start-yarn.sh
80+
81+
hdfs dfs -mkdir -p /user/root/input
82+
hdfs dfs -put /hadoop-${HADOOP_LATEST_VERSION}/LICENSE.txt /user/root/input/
83+
hadoop jar /hadoop-${HADOOP_LATEST_VERSION}/share/hadoop/mapreduce/hadoop-mapreduce-examples-${HADOOP_LATEST_VERSION}.jar wordcount input output
84+
85+
tail -f /dev/null /hadoop/logs/*
86+
stop-yarn.sh
87+
stop-dfs.sh
88+
fi
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/usr/bin/env bash
2+
3+
export HADOOP_HOME=/hadoop

0 commit comments

Comments
 (0)