# zookeeper集群

# 创建hadoop

sudo groupadd hadoop sudo useradd -m -g hadoop -s /bin/bash hadoop sudo passwd hadoop id hadoop su - hadoop 

# ssh登录

ssh-keygen -t rsa cd ~/.ssh/ cp id_rsa.pub authorized_keys chmod 700 .ssh/ chmod 600 .ssh/* ssh localhost 

# ssh登录

#slave1 slave2 slave3 cat ~/.ssh/id_rsa.pub | ssh hadoop@master 'cat >> ~/.ssh/authorized_keys' #master scp -r authorized_keys hadoop@slave1:~/.ssh/ scp -r authorized_keys hadoop@slave2:~/.ssh/ scp -r authorized_keys hadoop@slave3:~/.ssh/ ssh master ssh slave1 ssh slave2 ssh slave3 

# jdk

sudo usermod -aG sudo hadoop su - hadoop sudo whoami sudo apt install openjdk-8-jdk java -version vi ~/.bashrc export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 export PATH=$JAVA_HOME/bin:$PATH source ~/.bashrc 

# zookeeper

wget https://downloads.apache.org/zookeeper/zookeeper-3.8.4/apache-zookeeper-3.8.4-bin.tar.gz tar -zxvf apache-zookeeper-3.8.4-bin.tar.gz sudo mv apache-zookeeper-3.8.4-bin /opt/zookeeper sudo mkdir -p /var/lib/zookeeper sudo mkdir -p /var/log/zookeeper sudo chown -R hadoop:hadoop /var/lib/zookeeper sudo chown -R hadoop:hadoop /var/log/zookeeper vi /opt/zookeeper/conf/zoo.cfg tickTime=2000 dataDir=/var/lib/zookeeper dataLogDir=/var/log/zookeeper clientPort=2181 initLimit=5 syncLimit=2 server.1=master:2888:3888 server.2=slave1:2888:3888 server.3=slave2:2888:3888 #master  echo "1" | sudo tee /var/lib/zookeeper/myid #slave1 echo "2" | sudo tee /var/lib/zookeeper/myid #slave2 echo "3" | sudo tee /var/lib/zookeeper/myid sudo vi /etc/systemd/system/zookeeper.service [Unit] Description=Zookeeper Documentation=https://zookeeper.apache.org After=network.target [Service] Type=simple User=hadoop ExecStart=/opt/zookeeper/bin/zkServer.sh start-foreground /opt/zookeeper/conf/zoo.cfg ExecStop=/opt/zookeeper/bin/zkServer.sh stop /opt/zookeeper/conf/zoo.cfg Restart=on-abnormal [Install] WantedBy=multi-user.target sudo systemctl daemon-reload sudo systemctl enable zookeeper sudo systemctl start zookeeper sudo systemctl status zookeeper sudo systemctl stop zookeeper /opt/zookeeper/bin/zkServer.sh start /opt/zookeeper/conf/zoo.cfg /opt/zookeeper/bin/zkServer.sh stop /opt/zookeeper/conf/zoo.cfg /opt/zookeeper/bin/zkServer.sh start-foreground /opt/zookeeper/conf/zoo.cfg /opt/zookeeper/bin/zkServer.sh status /opt/zookeeper/bin/zkCli.sh -server master:2181 /opt/zookeeper/bin/zkCli.sh -server localhost:2181 

# 错误

Cannot open channel to 2 at election address

https://www.cnblogs.com/tocode/p/10693715.html (opens new window)

server.1=0.0.0.0:2888:3888 本机器上ip地址改成0.0.0.0

# 启动成功

zookeeper

# hadoop

# 架构图

 +-----------------+ | Zookeeper | | Ensemble | | | | master:2181 | | slave1:2181 | | slave2:2181 | +--------+--------+ | | +----------------+------------------+ | | | | | | +---------+------+ +-------+---------+ +------+---------+ | master | | slave1 | | slave2 | | 192.168.3.201 | | 192.168.3.202 | | 192.168.3.203 | | | | | | | | +-------------+ | | +-------------+ | | +-------------+| | |NameNode (nn1)| | |NameNode (nn2)| | | DataNode || | |ResourceManager| | |ResourceManager| | | NodeManager || | |JournalNode | | |JournalNode | | | JournalNode || | |DataNode | | |DataNode | | | || | |NodeManager | | |NodeManager | | | || | +-------------+ | | +-------------+ | | +-------------+| | HDFS | | HDFS | | HDFS | | High Availability | High Availability | Data Storage | +-----------------+ +-----------------+ +----------------+ 

# chatgpt提示词

已知环境: 本实验所用的3台机器版本号为:Ubuntu 22.04.4 LTS,机器规划如下: | Hostname | IP Address | User | |----------|-----------------|------| | master | 192.168.3.201 | hadoop | | slave1 | 192.168.3.202 | hadoop | | slave2 | 192.168.3.203 | hadoop | 三台机器hadoop用户登录,已经配置好ssh免密访问,三台机器的/etc/hosts如下: 192.168.3.201 master 192.168.3.202 slave1 192.168.3.203 slave2, 并且可以通过机器名称访问 三台机器java已经安装好,其中JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64, 可以跳过 三台机器zookeeper集群已经安装好,zookeeper安装目录如下/opt/zookeeper/,可以跳过 要求: hadoop用户下,安装hadoop 3.4.0集群,其中/opt/hadoop为hadoop安装路径,包括配置hdfs ha和yarn ha, 集群角色如下: master:NameNode (nn1) ResourceManager JournalNode NodeManager DataNode slave1:NameNode (nn2) ResourceManager JournalNode NodeManager DataNode slave2: JournalNode NodeManager DataNode 

# 在所有节点安装hadoop

wget https://downloads.apache.org/hadoop/common/hadoop-3.4.0/hadoop-3.4.0.tar.gz tar -xzvf hadoop-3.4.0.tar.gz sudo mv hadoop-3.4.0 /opt/hadoop sudo chown -R hadoop:hadoop /opt/hadoop # ~/.bashrc export HADOOP_HOME=/opt/hadoop export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop source ~/.bashrc 

# 配置core-site.xml

在所有节点的 /opt/hadoop/etc/hadoop/core-site.xml 文件中添加以下内容:

<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://mycluster</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/opt/hadoop/tmp</value> </property> <property> <name>ha.zookeeper.quorum</name> <value>master:2181,slave1:2181,slave2:2181</value> </property> </configuration> 

# 配置hdfs-site.xml

在所有节点的 /opt/hadoop/etc/hadoop/hdfs-site.xml 文件中添加以下内容:

<configuration> <property> <name>dfs.nameservices</name> <value>mycluster</value> </property> <property> <name>dfs.ha.namenodes.mycluster</name> <value>nn1,nn2</value> </property> <property> <name>dfs.namenode.rpc-address.mycluster.nn1</name> <value>master:8020</value> </property> <property> <name>dfs.namenode.rpc-address.mycluster.nn2</name> <value>slave1:8020</value> </property> <property> <name>dfs.namenode.http-address.mycluster.nn1</name> <value>master:9870</value> </property> <property> <name>dfs.namenode.http-address.mycluster.nn2</name> <value>slave1:9870</value> </property> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://master:8485;slave1:8485;slave2:8485/mycluster</value> </property> <property> <name>dfs.client.failover.proxy.provider.mycluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <property> <name>dfs.ha.fencing.methods</name> <value>shell(/bin/true)</value> </property> <property> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <property> <name>dfs.journalnode.edits.dir</name> <value>/opt/hadoop/journal</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>/opt/hadoop/hdfs/data</value> </property> <property> <name>dfs.replication</name> <value>3</value> </property> </configuration> 

# 配置workers

在所有节点的 /opt/hadoop/etc/hadoop/workers 文件中,添加以下内容:

master slave1 slave2 

# 在所有节点的start journalnode

hdfs --daemon start journalnode 

# master节点设置namenode

hdfs namenode -format hdfs zkfc -formatZK hdfs namenode 

# slave1节点设置namenode

hdfs namenode -bootstrapStandby 

namenode

error分析

ha.BootstrapStandby: Unable to fetch namespace information from any remote NN. Possible NameNodes: [RemoteNameNodeInfo [nnId=nn1, ipcAddress=master/192.168.3.201:8020, httpAddress=http://master:9870]] 

删除127.0.0.1 master 删除127.0.0.1 slave1 删除127.0.0.1 slave2

https://cloud.tencent.com/developer/article/1913706 (opens new window)

# 在所有节点stop journalnode

hdfs --daemon stop journalnode 

# 在所有节点设置hadoop-env.sh

/opt/hadoop/etc/hadoop/hadoop-env.sh

export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 export HADOOP_HOME=/opt/hadoop 

# master节点启动集群

start-dfs.sh hdfs dfsadmin -report 

# 启动成功

start-dfs

# web访问

http://192.168.3.201:9870 (opens new window) active

http://192.168.3.202:9870 (opens new window) standby

# 上传文件

vi myfile.txt hdfs dfs -mkdir /test hdfs dfs -put myfile.txt /test hdfs dfs -ls /test 

# 配置mapred-site

在所有节点的 /opt/hadoop/etc/hadoop/mapred-site.xml 文件中添加以下内容:

<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>yarn.app.mapreduce.am.env</name> <value>HADOOP_MAPRED_HOME=/opt/hadoop</value> </property> <property> <name>mapreduce.map.env</name> <value>HADOOP_MAPRED_HOME=/opt/hadoop</value> </property> <property> <name>mapreduce.reduce.env</name> <value>HADOOP_MAPRED_HOME=/opt/hadoop</value> </property> </configuration> 

# 配置yarn-site

在所有节点的 /opt/hadoop/etc/hadoop/yarn-site.xml 文件中添加以下内容:

<configuration> <property> <name>yarn.resourcemanager.ha.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.ha.automatic-failover.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.ha.automatic-failover.embedded</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.cluster-id</name> <value>yarn-cluster</value> </property> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1,rm2</value> </property> <property> <name>yarn.resourcemanager.hostname.rm1</name> <value>master</value> </property> <property> <name>yarn.resourcemanager.hostname.rm2</name> <value>slave1</value> </property> <property> <name>yarn.resourcemanager.address.rm1</name> <value>master:8032</value> </property> <property> <name>yarn.resourcemanager.address.rm2</name> <value>slave1:8032</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.rm1</name> <value>master:8030</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.rm2</name> <value>slave1:8030</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address.rm1</name> <value>master:8031</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address.rm2</name> <value>slave1:8031</value> </property> <property> <name>yarn.resourcemanager.admin.address.rm1</name> <value>master:8033</value> </property> <property> <name>yarn.resourcemanager.admin.address.rm2</name> <value>slave1:8033</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm1</name> <value>master:8088</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm2</name> <value>slave1:8088</value> </property> <property> <name>yarn.resourcemanager.recovery.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.zk-address</name> <value>master:2181,slave1:2181,slave2:2181</value> </property> <property> <name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value> </property> <property> <name>yarn.resourcemanager.zk-state-store.parent-path</name> <value>/rmstore</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> </configuration> 

# 启动yarn

在 master 节点上

start-yarn.sh yarn --daemon stop resourcemanager yarn --daemon start resourcemanager 

启动日志

hadoop@master:~$ start-yarn.sh Starting resourcemanagers on [ master slave1] Starting nodemanagers 

jps

jpsall

rmadmin

# web访问

http://192.168.3.201:8088 (opens new window) rm1

http://192.168.3.202:8088 (opens new window) rm2

备注: 在 YARN 高可用配置中,只有处于 active 状态的 ResourceManager 的 Web 界面通常是完全可访问的。处于 standby 状态的 ResourceManager 可能会限制 Web 界面的访问。

# wordcount单词统计

vi wordcount.txt

hadoop crudapi1 hadoop crudapi2 hadoop crudapi3 hadoop crudapi4 yarn zookeeper hdfs yarn zookeeper hdfs 
hdfs dfs -mkdir /mpdata hdfs dfs -mkdir /mpdata/input hdfs dfs -put wordcount.txt /mpdata/input hdfs dfs -ls /mpdata/input hadoop jar /opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.0.jar wordcount /mpdata/input/wordcount.txt /mpdata/output 

hadooprun

检查任务 wordcount

检查输出, 结果正确 output

crudapi1 1 crudapi2 1 crudapi3 1 crudapi4 1 hadoop 4 hdfs 2 yarn 2 zookeeper 2