软件包位置
https://pan.baidu.com/s/1pLFVWkr
文档地址
http://www.mashibing.com/hadoop_install.html
http://www.mashibing.com/hadoop_test.html
http://www.mashibing.com/hdfs_java.html
http://www.mashibing.com/yarn_test.html
http://www.mashibing.com/map_reduce.html
IP 地址设置
master: 192.168.56.101
slave1: 192.168.56.102
slave2: 192.168.56.103
slave3: 192.168.56.104
上传文件。
解压jdk
#rpm -ivh jdk-8u91-linux-x64.rpm
#cd /usr
#ls
#java
#cd
解压hadoop
#tar -xvf hadoop-2.7.3.tar.gz
#mv hadoop-2.7.3 hadoop
#mv hadoop /usr/local
配置
#vi /usr/local/hadoop/etc/hadoop/hadoop-env.sh
把export JAVA_HOME=${JAVA_HOME}改成export JAVA_HOME=/usr/java/default,然后:wq保存退出。
#vi /etc/profile
//在最下面加入:
export PATH=$PATH:/usr/local/hadoop/bin:/usr/local/hadoop/sbin
source /etc/profile
关机复制三台。
关闭防火墙。
#systemctl stop firewalld
#systemctl disable firewalld
启用
#vi /usr/local/hadoop/etc/hadoop/core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
#vi /etc/hosts
192.168.56.101 master
192.168.56.102 slave1
192.168.56.103 slave2
192.168.56.104 slave3
#hdfs namenode -format
master:
#hadoop-daemon.sh start namenode
#jps
slaves:
#hadoop-daemon.sh start datanode
#jps
查看集群情况
#hdfs dfsadmin -report | more
用浏览器浏览
192.168.56.101:50070
集中式管理集群。
关闭集群。
master:
#hadoop-daemon.sh stop namenode
#jps
slaves:
#hadoop-daemon.sh stop datanode
#jps
master
#vi /usr/local/hadoop/etc/hadoop/slaves
slave1
slave2
slave3
#start-dfs.sh
slaves:
#jps
设置免密登录(master)
#cd .ssh
#ssh-keygen -t rsa
#ssh-copy-id slave1
#ssh-copy-id slave2
#ssh-copy-id slave3
#ssh-copy-id master
#stop-dfs.sh
#start-dfs.sh
对文件进行增删改查操作
#hadoop fs -ls /
#cd /usr/local
#ll
#hadoop fs -put ./hadoop-2.7.3.tar.gz /
#hadoop fs -ls /
修改默认属性
#cd hadoop/etc/hadoop
#vi hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
#cd /usr/local
#hadoop fs -put ./jdk-8u91-Linux-x64.rpm /
#hadoop fs -ls /
修改检查时间
#cd hadoop/etc/hadoop
#vi hdfs-site.xml
<property>
<name>dfs.namenode.heartbeat.recheck-interval</name>
<value>10000</value>
</property>
#stop-dfs.sh
#start-dfs.sh
修改配置文件,所有机器都改
/usr/local/hadoop/etc/hadoop
#vi /usr/local/hadoop/etc/hadoop/core-site.xml
<property>
<name>hadoop.tmp.dir</name>
<value>/var/hadoop</value>
</property>
#hdfs namenode -format
#cd
vi hello.txt
#hadoop fs -put ./hello.txt /
#hadoop fs -ls /
用eclipse打开,file,new ,project,java project,HelloHDFS
-DHADOOP_USER_NAME=root
导入用到的包
lib下的所有包
public static void main(String[] args) throws Exception {
URL url = new URL("http://www.baidu.com");
InputStream in = url.openStream();
IOUtils.copyBytes(in,System.out,4096,true);
URL url = new URL("hdfs://192.168.56.101:9000/hello.txt");
InputStream in = url.openStream();
IOUtils.copyBytes(in,System.out,4096,true);
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
URL url = new URL("hdfs://192.168.56.101:9000/hello.txt");
InputStream in = url.openStream();
IOUtils.copyBytes(in,System.out, 4096,true);
Configuration conf = new Configuration();
conf.set("fs.defaultFS","hdfs://192.168.56.101:9000");
FileSystem fileSystem = FileSystem.get(conf);
新建一个目录
boolean success = fileSystem.mkdirs(new Path("/xuehuai"));
System.out.println(success);
master
#hadoop fs -ls /
判断存不存在
success = fileSystem.exists(new Path("/hello.txt"));
System.out.println(success);
删除一个文件
success = fileSystem.delete(new Path("/msb"),true);
System.out.println(success);
master:
#cd /usr/local/hadoop/etc/hadoop
#vi hdfs-site.xml
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
#hadoop-daemon.sh stop namenode
#hadoop-daemon.sh start namenode
FSDataOutputStream out = fileSystem.create(new Path("/test.data"),true);
FileInputStream fis = new FileInputStream("d:/test1/hive-env.sh.template");
IOUtils.copyBytes(fis, out, 4096,true);
master:
#hadoop fs -ls /
#hadoop fs -text /test.data
#hadoop fs -rm /test.data
FSDataOutputStream out = fileSystem.create(new Path("/test.data"),true);
FileInputStream in = new FileInputStream("d:/test1/hive-env.sh.template");
byte[] buf = new byte[4096];
int len = in.read(buf);
while(len != -1) {
out.write(buf,0,len);
len = in.read(buf);
}
in.close();
out.close();
}
}
配置计算调度系统yarn和计算引擎mr
配置yarn(所有)
#vi /usr/local/hadoop/etc/hadoop/yarn-site.xml
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
配置mapreduce.(所有)
#cp /usr/local/hadoop/etc/hadoop/mapred-site.xml.template /usr/local/hadoop/etc/hadoop/mapred-site.xml
#vi /usr/local/hadoop/etc/hadoop/mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
#start-yarn.sh //master
浏览器:192.168.56.101:8088
3、上传文件
#vi input.txt
#hadoop fs -mkdir /input
#hadoop fs -put input.txt /input
#hadoop fs -ls /input
#find /usr/local/hadoop -name *example*.jar
#hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount /input/input.txt /output