# zookeeper集群
# 创建hadoop
sudo groupadd hadoop sudo useradd -m -g hadoop -s /bin/bash hadoop sudo passwd hadoop id hadoop su - hadoop
# ssh登录
ssh-keygen -t rsa cd ~/.ssh/ cp id_rsa.pub authorized_keys chmod 700 .ssh/ chmod 600 .ssh/* ssh localhost
# ssh登录
#slave1 slave2 slave3 cat ~/.ssh/id_rsa.pub | ssh hadoop@master 'cat >> ~/.ssh/authorized_keys' #master scp -r authorized_keys hadoop@slave1:~/.ssh/ scp -r authorized_keys hadoop@slave2:~/.ssh/ scp -r authorized_keys hadoop@slave3:~/.ssh/ ssh master ssh slave1 ssh slave2 ssh slave3
# jdk
sudo usermod -aG sudo hadoop su - hadoop sudo whoami sudo apt install openjdk-8-jdk java -version vi ~/.bashrc export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 export PATH=$JAVA_HOME/bin:$PATH source ~/.bashrc
# zookeeper
wget https://downloads.apache.org/zookeeper/zookeeper-3.8.4/apache-zookeeper-3.8.4-bin.tar.gz tar -zxvf apache-zookeeper-3.8.4-bin.tar.gz sudo mv apache-zookeeper-3.8.4-bin /opt/zookeeper sudo mkdir -p /var/lib/zookeeper sudo mkdir -p /var/log/zookeeper sudo chown -R hadoop:hadoop /var/lib/zookeeper sudo chown -R hadoop:hadoop /var/log/zookeeper vi /opt/zookeeper/conf/zoo.cfg tickTime=2000 dataDir=/var/lib/zookeeper dataLogDir=/var/log/zookeeper clientPort=2181 initLimit=5 syncLimit=2 server.1=master:2888:3888 server.2=slave1:2888:3888 server.3=slave2:2888:3888 #master echo "1" | sudo tee /var/lib/zookeeper/myid #slave1 echo "2" | sudo tee /var/lib/zookeeper/myid #slave2 echo "3" | sudo tee /var/lib/zookeeper/myid sudo vi /etc/systemd/system/zookeeper.service [Unit] Description=Zookeeper Documentation=https://zookeeper.apache.org After=network.target [Service] Type=simple User=hadoop ExecStart=/opt/zookeeper/bin/zkServer.sh start-foreground /opt/zookeeper/conf/zoo.cfg ExecStop=/opt/zookeeper/bin/zkServer.sh stop /opt/zookeeper/conf/zoo.cfg Restart=on-abnormal [Install] WantedBy=multi-user.target sudo systemctl daemon-reload sudo systemctl enable zookeeper sudo systemctl start zookeeper sudo systemctl status zookeeper sudo systemctl stop zookeeper /opt/zookeeper/bin/zkServer.sh start /opt/zookeeper/conf/zoo.cfg /opt/zookeeper/bin/zkServer.sh stop /opt/zookeeper/conf/zoo.cfg /opt/zookeeper/bin/zkServer.sh start-foreground /opt/zookeeper/conf/zoo.cfg /opt/zookeeper/bin/zkServer.sh status /opt/zookeeper/bin/zkCli.sh -server master:2181 /opt/zookeeper/bin/zkCli.sh -server localhost:2181
# 错误
Cannot open channel to 2 at election address
https://www.cnblogs.com/tocode/p/10693715.html (opens new window)
server.1=0.0.0.0:2888:3888 本机器上ip地址改成0.0.0.0
# 启动成功
# hadoop
# 架构图
+-----------------+ | Zookeeper | | Ensemble | | | | master:2181 | | slave1:2181 | | slave2:2181 | +--------+--------+ | | +----------------+------------------+ | | | | | | +---------+------+ +-------+---------+ +------+---------+ | master | | slave1 | | slave2 | | 192.168.3.201 | | 192.168.3.202 | | 192.168.3.203 | | | | | | | | +-------------+ | | +-------------+ | | +-------------+| | |NameNode (nn1)| | |NameNode (nn2)| | | DataNode || | |ResourceManager| | |ResourceManager| | | NodeManager || | |JournalNode | | |JournalNode | | | JournalNode || | |DataNode | | |DataNode | | | || | |NodeManager | | |NodeManager | | | || | +-------------+ | | +-------------+ | | +-------------+| | HDFS | | HDFS | | HDFS | | High Availability | High Availability | Data Storage | +-----------------+ +-----------------+ +----------------+
# chatgpt提示词
已知环境: 本实验所用的3台机器版本号为:Ubuntu 22.04.4 LTS,机器规划如下: | Hostname | IP Address | User | |----------|-----------------|------| | master | 192.168.3.201 | hadoop | | slave1 | 192.168.3.202 | hadoop | | slave2 | 192.168.3.203 | hadoop | 三台机器hadoop用户登录,已经配置好ssh免密访问,三台机器的/etc/hosts如下: 192.168.3.201 master 192.168.3.202 slave1 192.168.3.203 slave2, 并且可以通过机器名称访问 三台机器java已经安装好,其中JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64, 可以跳过 三台机器zookeeper集群已经安装好,zookeeper安装目录如下/opt/zookeeper/,可以跳过 要求: hadoop用户下,安装hadoop 3.4.0集群,其中/opt/hadoop为hadoop安装路径,包括配置hdfs ha和yarn ha, 集群角色如下: master:NameNode (nn1) ResourceManager JournalNode NodeManager DataNode slave1:NameNode (nn2) ResourceManager JournalNode NodeManager DataNode slave2: JournalNode NodeManager DataNode
# 在所有节点安装hadoop
wget https://downloads.apache.org/hadoop/common/hadoop-3.4.0/hadoop-3.4.0.tar.gz tar -xzvf hadoop-3.4.0.tar.gz sudo mv hadoop-3.4.0 /opt/hadoop sudo chown -R hadoop:hadoop /opt/hadoop # ~/.bashrc export HADOOP_HOME=/opt/hadoop export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop source ~/.bashrc
# 配置core-site.xml
在所有节点的 /opt/hadoop/etc/hadoop/core-site.xml 文件中添加以下内容:
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://mycluster</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/opt/hadoop/tmp</value> </property> <property> <name>ha.zookeeper.quorum</name> <value>master:2181,slave1:2181,slave2:2181</value> </property> </configuration>
# 配置hdfs-site.xml
在所有节点的 /opt/hadoop/etc/hadoop/hdfs-site.xml 文件中添加以下内容:
<configuration> <property> <name>dfs.nameservices</name> <value>mycluster</value> </property> <property> <name>dfs.ha.namenodes.mycluster</name> <value>nn1,nn2</value> </property> <property> <name>dfs.namenode.rpc-address.mycluster.nn1</name> <value>master:8020</value> </property> <property> <name>dfs.namenode.rpc-address.mycluster.nn2</name> <value>slave1:8020</value> </property> <property> <name>dfs.namenode.http-address.mycluster.nn1</name> <value>master:9870</value> </property> <property> <name>dfs.namenode.http-address.mycluster.nn2</name> <value>slave1:9870</value> </property> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://master:8485;slave1:8485;slave2:8485/mycluster</value> </property> <property> <name>dfs.client.failover.proxy.provider.mycluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <property> <name>dfs.ha.fencing.methods</name> <value>shell(/bin/true)</value> </property> <property> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <property> <name>dfs.journalnode.edits.dir</name> <value>/opt/hadoop/journal</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>/opt/hadoop/hdfs/data</value> </property> <property> <name>dfs.replication</name> <value>3</value> </property> </configuration>
# 配置workers
在所有节点的 /opt/hadoop/etc/hadoop/workers 文件中,添加以下内容:
master slave1 slave2
# 在所有节点的start journalnode
hdfs --daemon start journalnode
# master节点设置namenode
hdfs namenode -format hdfs zkfc -formatZK hdfs namenode
# slave1节点设置namenode
hdfs namenode -bootstrapStandby
error分析
ha.BootstrapStandby: Unable to fetch namespace information from any remote NN. Possible NameNodes: [RemoteNameNodeInfo [nnId=nn1, ipcAddress=master/192.168.3.201:8020, httpAddress=http://master:9870]]
删除127.0.0.1 master 删除127.0.0.1 slave1 删除127.0.0.1 slave2
https://cloud.tencent.com/developer/article/1913706 (opens new window)
# 在所有节点stop journalnode
hdfs --daemon stop journalnode
# 在所有节点设置hadoop-env.sh
/opt/hadoop/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 export HADOOP_HOME=/opt/hadoop
# master节点启动集群
start-dfs.sh hdfs dfsadmin -report
# 启动成功
# web访问
http://192.168.3.201:9870 (opens new window)
http://192.168.3.202:9870 (opens new window)
# 上传文件
vi myfile.txt hdfs dfs -mkdir /test hdfs dfs -put myfile.txt /test hdfs dfs -ls /test
# 配置mapred-site
在所有节点的 /opt/hadoop/etc/hadoop/mapred-site.xml 文件中添加以下内容:
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>yarn.app.mapreduce.am.env</name> <value>HADOOP_MAPRED_HOME=/opt/hadoop</value> </property> <property> <name>mapreduce.map.env</name> <value>HADOOP_MAPRED_HOME=/opt/hadoop</value> </property> <property> <name>mapreduce.reduce.env</name> <value>HADOOP_MAPRED_HOME=/opt/hadoop</value> </property> </configuration>
# 配置yarn-site
在所有节点的 /opt/hadoop/etc/hadoop/yarn-site.xml 文件中添加以下内容:
<configuration> <property> <name>yarn.resourcemanager.ha.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.ha.automatic-failover.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.ha.automatic-failover.embedded</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.cluster-id</name> <value>yarn-cluster</value> </property> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1,rm2</value> </property> <property> <name>yarn.resourcemanager.hostname.rm1</name> <value>master</value> </property> <property> <name>yarn.resourcemanager.hostname.rm2</name> <value>slave1</value> </property> <property> <name>yarn.resourcemanager.address.rm1</name> <value>master:8032</value> </property> <property> <name>yarn.resourcemanager.address.rm2</name> <value>slave1:8032</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.rm1</name> <value>master:8030</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.rm2</name> <value>slave1:8030</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address.rm1</name> <value>master:8031</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address.rm2</name> <value>slave1:8031</value> </property> <property> <name>yarn.resourcemanager.admin.address.rm1</name> <value>master:8033</value> </property> <property> <name>yarn.resourcemanager.admin.address.rm2</name> <value>slave1:8033</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm1</name> <value>master:8088</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm2</name> <value>slave1:8088</value> </property> <property> <name>yarn.resourcemanager.recovery.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.zk-address</name> <value>master:2181,slave1:2181,slave2:2181</value> </property> <property> <name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value> </property> <property> <name>yarn.resourcemanager.zk-state-store.parent-path</name> <value>/rmstore</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> </configuration>
# 启动yarn
在 master 节点上
start-yarn.sh yarn --daemon stop resourcemanager yarn --daemon start resourcemanager
启动日志
hadoop@master:~$ start-yarn.sh Starting resourcemanagers on [ master slave1] Starting nodemanagers
# web访问
http://192.168.3.201:8088 (opens new window)
http://192.168.3.202:8088 (opens new window)
备注: 在 YARN 高可用配置中,只有处于 active 状态的 ResourceManager 的 Web 界面通常是完全可访问的。处于 standby 状态的 ResourceManager 可能会限制 Web 界面的访问。
# wordcount单词统计
vi wordcount.txt
hadoop crudapi1 hadoop crudapi2 hadoop crudapi3 hadoop crudapi4 yarn zookeeper hdfs yarn zookeeper hdfs
hdfs dfs -mkdir /mpdata hdfs dfs -mkdir /mpdata/input hdfs dfs -put wordcount.txt /mpdata/input hdfs dfs -ls /mpdata/input hadoop jar /opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.0.jar wordcount /mpdata/input/wordcount.txt /mpdata/output
检查任务
检查输出, 结果正确
crudapi1 1 crudapi2 1 crudapi3 1 crudapi4 1 hadoop 4 hdfs 2 yarn 2 zookeeper 2