VMware上安装Hadoop3.x
安装及配置虚拟机
虚拟机安装Java
删除旧jdk
rpm -qa|grep jdk
rpm -e --nodeps 刚查出来的jdk
rpm -e --nodeps 刚查出来的jdk
[root@c23 ~]# rpm -qa|grep jdk
copy-jdk-configs-3.3-10.el7_5.noarch
java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9.x86_64
java-1.8.0-openjdk-headless-1.8.0.342.b07-1.el7_9.x86_64
java-1.8.0-openjdk-devel-1.8.0.342.b07-1.el7_9.x86_64
[root@c23 ~]# rpm -e --nodeps java-1.8.0-openjdk-devel-1.8.0.342.b07-1.el7_9.x86_64
rpm -e --nodeps java-1.8.0-openjdk-headless-1.8.0.342.b07-1.el7_9.x86_64
rpm -e --nodeps java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9.x86_64
rpm -e --nodeps copy-jdk-configs-3.3-10.el7_5.noarch[root@c23 ~]# rpm -e --nodeps java-1.8.0-openjdk-headless-1.8.0.342.b07-1.el7_9.x86_64
[root@c23 ~]# rpm -e --nodeps java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9.x86_64
[root@c23 ~]# rpm -e --nodeps copy-jdk-configs-3.3-10.el7_5.noarch
[root@c23 ~]# rpm -qa|grep jdk
[root@c23 ~]#
copy-jdk-configs-3.3-10.el7_5.noarch
java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9.x86_64
java-1.8.0-openjdk-headless-1.8.0.342.b07-1.el7_9.x86_64
java-1.8.0-openjdk-devel-1.8.0.342.b07-1.el7_9.x86_64
[root@c23 ~]# rpm -e --nodeps java-1.8.0-openjdk-devel-1.8.0.342.b07-1.el7_9.x86_64
rpm -e --nodeps java-1.8.0-openjdk-headless-1.8.0.342.b07-1.el7_9.x86_64
rpm -e --nodeps java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9.x86_64
rpm -e --nodeps copy-jdk-configs-3.3-10.el7_5.noarch[root@c23 ~]# rpm -e --nodeps java-1.8.0-openjdk-headless-1.8.0.342.b07-1.el7_9.x86_64
[root@c23 ~]# rpm -e --nodeps java-1.8.0-openjdk-1.8.0.342.b07-1.el7_9.x86_64
[root@c23 ~]# rpm -e --nodeps copy-jdk-configs-3.3-10.el7_5.noarch
[root@c23 ~]# rpm -qa|grep jdk
[root@c23 ~]#
虚拟机安装Hadoop集群
配置主机master
配置主机hadoop3.3.1
在/usr/local/hadoop-3.3.1/etc/hadoop目录下修改配置
vi ./core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9864</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop-3.3.1/tmp</value>
</property>
<property>
<name>hadoop.native.lib</name>
<value>false</value>
<description>Should native hadoop libraries, if present, be used.
</description>
</property>
</configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9864</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop-3.3.1/tmp</value>
</property>
<property>
<name>hadoop.native.lib</name>
<value>false</value>
<description>Should native hadoop libraries, if present, be used.
</description>
</property>
</configuration>
vi ./hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///data/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:9868</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
</configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///data/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:9868</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
</configuration>
vi ./mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- jobhistory properties -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
</configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- jobhistory properties -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
</configuration>
vi ./yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>${yarn.resourcemanager.hostname}:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>${yarn.resourcemanager.hostname}:8030</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>${yarn.resourcemanager.hostname}:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address</name>
<value>${yarn.resourcemanager.hostname}:8090</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>${yarn.resourcemanager.hostname}:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>${yarn.resourcemanager.hostname}:8033</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/data/hadoop/yarn/local</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/data/tmp/logs</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://master:19888/jobhistory/logs/</value>
<description>URL for job history server</description>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>128</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>2</value>
</property>
</configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>${yarn.resourcemanager.hostname}:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>${yarn.resourcemanager.hostname}:8030</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>${yarn.resourcemanager.hostname}:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address</name>
<value>${yarn.resourcemanager.hostname}:8090</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>${yarn.resourcemanager.hostname}:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>${yarn.resourcemanager.hostname}:8033</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/data/hadoop/yarn/local</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/data/tmp/logs</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://master:19888/jobhistory/logs/</value>
<description>URL for job history server</description>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>128</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>2</value>
</property>
</configuration>
验证hadoop相关配置是否生效
hadoop version
说明:如果显示如下内容说明hadoop配置生效
[root@c31 ~]# hadoop version
Hadoop 3.3.1
Source code repository https://github.com/apache/hadoop.git -r a3b9c37a397ad4188041dd80621bdeefc46885f2
Compiled by ubuntu on 2021-06-15T05:13Z
Compiled with protoc 3.7.1
From source with checksum 88a4ddb2299aca054416d6b7f81ca55
This command was run using /usr/local/hadoop-3.3.1/share/hadoop/common/hadoop-common-3.3.1.jar
[root@c31 ~]# hadoop version
Hadoop 3.3.1
Source code repository https://github.com/apache/hadoop.git -r a3b9c37a397ad4188041dd80621bdeefc46885f2
Compiled by ubuntu on 2021-06-15T05:13Z
Compiled with protoc 3.7.1
From source with checksum 88a4ddb2299aca054416d6b7f81ca55
This command was run using /usr/local/hadoop-3.3.1/share/hadoop/common/hadoop-common-3.3.1.jar
复制文件到从机
scp -r /usr/lib/jvm slave1:/usr/lib/
scp -r /usr/local/hadoop-3.3.1/ slave1:/usr/local
scp /etc/profile slave1:/etc/profile
scp -r /usr/local/hadoop-3.3.1/ slave1:/usr/local
scp /etc/profile slave1:/etc/profile
scp -r /usr/lib/jvm slave2:/usr/lib/
scp -r /usr/local/hadoop-3.3.1/ slave2:/usr/local
scp /etc/profile slave2:/etc/profile
scp -r /usr/local/hadoop-3.3.1/ slave2:/usr/local
scp /etc/profile slave2:/etc/profile
测试集群
测试写速度
cd /opt/hadoop/share/hadoop/mapreduce/
hadoop jar ./hadoop-mapreduce-client-jobclient-3.2.1-tests.jar TestDFSIO -write -nrFiles 10 -fileSize 10MB
hadoop jar ./hadoop-mapreduce-client-jobclient-3.2.1-tests.jar TestDFSIO -write -nrFiles 10 -fileSize 10MB
结果
查看运行结果
2023-07-21 07:05:27,971 INFO fs.TestDFSIO: ----- TestDFSIO ----- : write
2023-07-21 07:05:27,972 INFO fs.TestDFSIO: Date & time: Fri Jul 21 07:05:27 GMT 2023
2023-07-21 07:05:27,973 INFO fs.TestDFSIO: Number of files: 10
2023-07-21 07:05:27,974 INFO fs.TestDFSIO: Total MBytes processed: 100
2023-07-21 07:05:27,975 INFO fs.TestDFSIO: Throughput mb/sec: 1.67
2023-07-21 07:05:27,976 INFO fs.TestDFSIO: Average IO rate mb/sec: 2.08
2023-07-21 07:05:27,977 INFO fs.TestDFSIO: IO rate std deviation: 0.94
2023-07-21 07:05:27,978 INFO fs.TestDFSIO: Test exec time sec: 202.62
2023-07-21 07:05:27,972 INFO fs.TestDFSIO: Date & time: Fri Jul 21 07:05:27 GMT 2023
2023-07-21 07:05:27,973 INFO fs.TestDFSIO: Number of files: 10
2023-07-21 07:05:27,974 INFO fs.TestDFSIO: Total MBytes processed: 100
2023-07-21 07:05:27,975 INFO fs.TestDFSIO: Throughput mb/sec: 1.67
2023-07-21 07:05:27,976 INFO fs.TestDFSIO: Average IO rate mb/sec: 2.08
2023-07-21 07:05:27,977 INFO fs.TestDFSIO: IO rate std deviation: 0.94
2023-07-21 07:05:27,978 INFO fs.TestDFSIO: Test exec time sec: 202.62
测试读速度
cd /opt/hadoop/share/hadoop/mapreduce/
rm -rf TestDFSIO_results.log
hadoop jar ./hadoop-mapreduce-client-jobclient-3.2.1-tests.jar TestDFSIO -read -nrFiles 10 -fileSize 10MB
rm -rf TestDFSIO_results.log
hadoop jar ./hadoop-mapreduce-client-jobclient-3.2.1-tests.jar TestDFSIO -read -nrFiles 10 -fileSize 10MB
查看运行结果
2023-07-21 07:17:52,772 INFO fs.TestDFSIO: ----- TestDFSIO ----- : read
2023-07-21 07:17:52,773 INFO fs.TestDFSIO: Date & time: Fri Jul 21 07:17:52 GMT 2023
2023-07-21 07:17:52,774 INFO fs.TestDFSIO: Number of files: 10
2023-07-21 07:17:52,774 INFO fs.TestDFSIO: Total MBytes processed: 100
2023-07-21 07:17:52,775 INFO fs.TestDFSIO: Throughput mb/sec: 8.33
2023-07-21 07:17:52,775 INFO fs.TestDFSIO: Average IO rate mb/sec: 9.16
2023-07-21 07:17:52,776 INFO fs.TestDFSIO: IO rate std deviation: 2.9
2023-07-21 07:17:52,779 INFO fs.TestDFSIO: Test exec time sec: 156.42
2023-07-21 07:17:52,773 INFO fs.TestDFSIO: Date & time: Fri Jul 21 07:17:52 GMT 2023
2023-07-21 07:17:52,774 INFO fs.TestDFSIO: Number of files: 10
2023-07-21 07:17:52,774 INFO fs.TestDFSIO: Total MBytes processed: 100
2023-07-21 07:17:52,775 INFO fs.TestDFSIO: Throughput mb/sec: 8.33
2023-07-21 07:17:52,775 INFO fs.TestDFSIO: Average IO rate mb/sec: 9.16
2023-07-21 07:17:52,776 INFO fs.TestDFSIO: IO rate std deviation: 2.9
2023-07-21 07:17:52,779 INFO fs.TestDFSIO: Test exec time sec: 156.42
常见问题
其它参考
Byte、KB、MB、GB、
TB、PB、EB、ZB、
YB、NB、DB、CB、
XB
TB、PB、EB、ZB、
YB、NB、DB、CB、
XB
1B (Byte字节);
1KB(Kilobyte) = 2^10 B = 1024 B;
1MB(Megabyte) = 2^10 KB = 1024 KB = 2^20 B;
1GB(Gigabyte) = 2^10 MB = 1024 MB = 2^30 B;
1TB(Terabyte) = 2^10 GB = 1024 GB = 2^40 B;
1PB(Petabyte) = 2^10 TB = 1024 TB = 2^50 B;
1EB(Exabyte) = 2^10 PB = 1024 PB = 2^60 B;
1ZB(Zettabyte) = 2^10 EB = 1024 EB = 2^70 B;
1YB(YottaByte) = 2^10 ZB = 1024 ZB = 2^80 B;
1BB(Brontobyte) = 2^10 YB = 1024 YB = 2^90 B;
1NB(NonaByte) = 2^10 BB = 1024 BB = 2^100 B;
1DB(DoggaByte) = 2^10 NB = 1024 NB = 2^110 B;
1CB (Corydonbyte) = 2^10 DB = 1024 DB = 2^120 B;
1XB (Xerobyte) = 2^10 CB = 1024 CB = 2^130 B;
1KB(Kilobyte) = 2^10 B = 1024 B;
1MB(Megabyte) = 2^10 KB = 1024 KB = 2^20 B;
1GB(Gigabyte) = 2^10 MB = 1024 MB = 2^30 B;
1TB(Terabyte) = 2^10 GB = 1024 GB = 2^40 B;
1PB(Petabyte) = 2^10 TB = 1024 TB = 2^50 B;
1EB(Exabyte) = 2^10 PB = 1024 PB = 2^60 B;
1ZB(Zettabyte) = 2^10 EB = 1024 EB = 2^70 B;
1YB(YottaByte) = 2^10 ZB = 1024 ZB = 2^80 B;
1BB(Brontobyte) = 2^10 YB = 1024 YB = 2^90 B;
1NB(NonaByte) = 2^10 BB = 1024 BB = 2^100 B;
1DB(DoggaByte) = 2^10 NB = 1024 NB = 2^110 B;
1CB (Corydonbyte) = 2^10 DB = 1024 DB = 2^120 B;
1XB (Xerobyte) = 2^10 CB = 1024 CB = 2^130 B;