1.更改主机名和hosts
安装 vim
sudo apt-get install vim
sudo vim /etc/hostname
#hostname内容改为master
sudo vim /etc/hosts
#注释localhost,添加master对应IP
#127.0.0.1 localhost
192.168.1.21 master
重启才生效
注意:如果是vm虚拟机,要更改虚拟机的网络设置>桥接模式

2.安装java
先卸载ubuntu自带的openjdk
sudo apt-get remove openjdk*
下载jdk-8u211-linux-x64.tar.gz
我的文件是从win10复制到虚拟机,在主目录 /home/用户名 下
解压并移动到 /usr/local/java
tar -zxvf jdk-8u211-linux-x64.tar.gz
sudo mv jdk-8u211-linux-x64 /usr/local/java
配置java环境变量
sudo vim /etc/profile
#加入下面的内容
export JAVA_HOME=/usr/local/java
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=$JAVA_HOME/bin:$PATH
#激活环境变量
source /etc/profile
验证java
java -version
java
javac
3.开启ssh登陆
sudo apt-get install openssh-server #安装SSH server
ssh localhost #登陆SSH,第一次登陆输入yes
exit #退出登录的ssh localhost
cd ~/.ssh/ #如果没法进入该目录,执行一次ssh localhost
ssh-keygen -t rsa #生成 key
输入完 ssh-keygen -t rsa 需要连续敲击三次回车,如下图:


合并公钥到authorized_keys文件,在hadoop服务器,进入/root/.ssh目录,通过SSH命令合并
cd .ssh/
cat id_rsa.pub>> authorized_keys
#如果没有 authorized_keys就新建,touch /root/.ssh/authorized_keys
#通过下面命令测试
ssh localhost
ssh master
ssh 192.168.1.21
4、安装 Hadoop2.8.5
解压 并移动到 /usr/local/hadoop
tar -zxvf hadoop-2.8.5.tar.gz
#hadoop-2.8.5.tar.gz在home/用户名 目录下,解压也是在home/用户名 目录下
sudo mv hadoop-2.8.5 /usr/local/hadoop
/usr/local/hadoop 目录下 创建文件夹
mkdir tmp
mkdir hdfs
mkdir hdfs/data
mkdir hdfs/name
5、配置 /usr/local/hadoop/etc/hadoop 目录下的文件
1).配置 core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://192.168.1.21:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/hadoop/tmp</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131702</value>
</property>
</configuration>
2).配置 hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>192.168.1.21:9001</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
3).复制 etc/hadoop/mapred-site.xml.template 为 etc/hadoop/mapred-site.xml,再编辑:
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>192.168.1.21:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>192.168.1.21:19888</value>
</property>
</configuration>
4).配置 etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>4096</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
4096和2048是mr运算需要内存的大小
5).配置 /hadoop/etc/hadoop/ 目录下 hadoop-env.sh
修改JAVA_HOME为绝对地址
export JAVA_HOME=/usr/local/java
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
export HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_HOME}/lib/native
yarn-env.sh
修改JAVA_HOME为绝对地址
export JAVA_HOME=/usr/local/java
注意一定要配置 slaves 文件内容为 ip或者master(hosts的值),否则hive进行运算查找的时候调用mr会卡住。
#命令
vim etc/hadoop/salves
#输入
master
6).配置hadoop环境变量(包含了许多其他组件的环境变量)
sudo vim /etc/profile
export JAVA_HOME=/usr/local/java
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_COMMON_LIB_NATIVE_DIR"
export HIVE_HOME=/usr/local/hive
export HIVE_CONF_DIR=${HIVE_HOME}/conf
export HCAT_HOME=$HIVE_HOME/hcatalog
export HIVE_DEPENDENCY=/usr/local/hive/conf:/usr/local/hive/lib/*:/usr/local/hive/hcatalog/share/hcatalog/hive-hcatalog-pig-adapter-2.3.5.jar:/usr/local/hive/hcatalog/share/hcatalog/hive-hcatalog-core-2.3.5.jar:/usr/local/hive/hcatalog/share/hcatalog/hive-hcatalog-server-extensions-2.3.5.jar:/usr/local/hive/hcatalog/share/hcatalog/hive-hcatalog-streaming-2.3.5.jar:/usr/local/hive/lib/hive-exec-2.3.5.jar
export ZOOKEEPER_HOME=/usr/local/zookeeper/
export KAFKA_HOME=/usr/local/kafka
export HBASE_HOME=/usr/local/hbase
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$HCAT_HOME/bin:$HBASE_HOME/bin:$ZOOKEEPER_HOME:$KAFKA_HOME
export SCALA_HOME=/usr/local/scala
export PATH=.:${JAVA_HOME}/bin:${SCALA_HOME}/bin:$PATH
export SPARK_HOME=/usr/local/spark
export PATH=.:${JAVA_HOME}/bin:${SCALA_HOME}/bin:${SPARK_HOME}/bin:$PATH
export KYLIN_HOME=/usr/local/kylin
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$HCAT_HOME/bin:$HBASE_HOME/bin:$ZOOKEEPER_HOME:$KAFKA_HOME:$KYLIN_HOME/bin
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:${HIVE_HOME}/lib:$HBASE_HOME/lib:$KYLIN_HOME/lib
激活环境
source /etc/profile
6.启动hadoop
cd /usr/local/hadoop
1).格式化
bin/hdfs namenode -format
2).启动yarn ,hdfs,jobhistory
sbin/start-all.sh
# kylin需要连接jobhistory
sbin/mr-jobhistory-daemon.sh start historyserver
**3).jps #查看启动的程序**
#显示:
8402 SecondaryNameNode
8692 NodeManager
9014 Jps
8569 ResourceManager
8234 DataNode
7.安装mysql
sudo apt-get install mysql-server
但是ubuntu18安装mysql是不会出现设置 root 帐户密码的
命令:
sudo vim /etc/mysql/debian.cnf

在这个文件里面有着MySQL默认的用户名和用户密码,
最最重要的是:用户名默认的不是root,而是debian-sys-maint
mysql -u debian-sys-maint -p提示输入密码,输入刚刚那个复杂的密码,进入mysql
版本是5.7,password字段已经被删除,取而代之的是authentication_string字段,我的是5.7,所以要更改密码:
use mysql;
update user set authentication_string=PASSWORD("You'r New Password") where User='root';
update user set plugin="mysql_native_password";
flush privileges;
exit;
重启:
service mysql restart
开启root用户远程登录:
sudo mysql -u root -p
use mysql;
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '密码';
flush privileges;
select user,authentication_string,Host from user; #查看root

用navicat验证远程登录与否
如果root远程登录不成功:
1. 查看 3306 端口是否正常
netstat -an | grep 3306
tcp 0 0 127.0.0.1:3306 0.0.0.0:* LISTEN
注意:现在的 3306 端口绑定的 IP 地址是本地的 127.0.0.1
2. 修改 Mysql 配置文件(注意路径,跟之前网上的很多版本位置都不一样)
vim /etc/mysql/mysql.conf.d/mysqld.cnf
找到
bind-address = 127.0.0.1
前面加 #注释掉
3. 重启 Mysql
service mysql restart
sudo mysql -u root -p #进入mysql
create database hive; #好像不需要创建这个hive数据库
8.下载apache-hive-2.3.5-bin.tar.gz
1).解压移动到 /usr/local/hive
Hive 配置 Hadoop HDFS hive-site.xml 配置
进入目录 $HIVE_HOME/conf,将 hive-default.xml.template 文件复制一份并改名为 hive-site.xml
修改其中的数据库设定
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://192.168.1.21:3306/metastore?createDatabaseIfNotExist=true&characterEncoding=UTF-8&useSSL=false</value>
<description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>Username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>数据库密码</value>
<description>password to use against metastore database</description>
</property>
#搜索 hive.metastore.schema.verification,将对应的 value 修改为 false:
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>true</value>
</property>
2).hive-site.xml中有一个配置需要用到的文件,所以创建文件夹并修改权限
#或者hadoop2.0以后hdfs替代了hadoop:
hdfs dfs -mkdir -p /user/hive/warehouse
hadoop fs -chmod -R 777 /user
hdfs dfs -mkdir -p /tmp/hive
hadoop fs -chmod -R 777 /tmp
3).在 $HIVE_HOME/conf 目录下新建 hive-env.sh
#复制一份即可
cp hive-env.sh.template hive-env.sh
vim hive-env.sh
#添加内容
export HADOOP_HOME=/usr/local/hadoop
export HIVE_CONF_DIR=/usr/local/hive/conf
export HIVE_AUX_JARS_PATH=/usr/local/hive/lib
4).上传mysql 的驱动包上传到 Hive 的 lib 目录下
链接:https://pan.baidu.com/s/12uF9AoHX6B5I1k0RuwdEQQ
提取码:pg31
下载解压复制其中的mysql-connector-java-5.1.47-bin.jar
cp mysql-connector-java-5.1.47-bin.jar /usr/local/hive/lib
5).初始化mysql数据库
cd $HIVE_HOME/bin
schematool -initSchema -dbType mysql
#出现如下内容并查看mysql是否创建了metastore 数据库
......................................
.............................................
Initialization script completed
schemaTool completed
9.启动hive
#启动metastore服务
nohup hive --service metastore >> ~/metastore.log 2>&1 & ##hivemetastore
#启动hive服务
nohup hive --service hiveserver2 >> ~/hiveserver2.log 2>&1 & ##hiveserver2,jdbc连接均需要
netstat -lnp|grep 9083
tcp 0 0 0.0.0.0:9083 0.0.0.0:* LISTEN 11918/java
netstat -lnp|grep 10000
tcp 0 0 0.0.0.0:10000 0.0.0.0:* LISTEN 12011/java
#启动hive的交互
cd /usr/local/hive/bin
./hive
which: no hbase in (/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/java/jdk1.8.0_151
/bin:/usr/java/jdk1.8.0_151/bin:/hadoop//bin:/hadoop//sbin:/root/bin:/usr/java/jdk1.8.0_151/bin:/usr/java/jdk1.8.0_151/bin:/hadoop//bin:/hadoop//sbin:/hadoop/hive/bin)SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/hadoop/hive/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/imp
l/StaticLoggerBinder.class]SLF4J: Found binding in [jar:file:/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/
org/slf4j/impl/StaticLoggerBinder.class]SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Logging initialized using configuration in jar:file:/hadoop/hive/lib/hive-common-2.3.3.jar!/
hive-log4j2.properties Async: trueHive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider
using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
hive> show functions;
OK
!
!=
$sum0
%
10.hive报错:
如果发生slf4j jar包冲突报错,是因为Hadoop的slf4j 与hive的slf4j jar包绑定冲突,移除其中一个即可。
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/hive/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
rm -rf /usr/local/hive/lib/log4j-slf4j-impl-2.6.2.jar
出现如下
Caused by: java.net.URISyntaxException: Relative path in absolute URI: ${system:java.io.tmpdir%7D/$%7Bsystem:user.name%7D
在 hive-site.xml 中加入一下内容:
<property>
<name>system:java.io.tmpdir</name>
<value>/tmp/hive/java</value>
</property>
<property>
<name>system:user.name</name>
<value>${user.name}</value>
</property>
11.web界面
192.168.1.21:50070
192.168.1.21:18088
12.如果需要重新格式化HDFS
1).删除hdfs-site.xml与core-site.xml中指定的文件夹
/usr/local/hadoop/hdfs/name
/usr/local/hadoop/hdfs/data
/usr/local/hadoop/tmp
2).启动hadoopn 后,重新建立文件夹
#或者hadoop2.0以后hdfs替代了hadoop:
hdfs dfs -mkdir -p /user/hive/warehouse
hadoop fs -chmod -R 777 /user/hive/warehouse
hdfs dfs -mkdir -p /tmp/hive
hadoop fs -chmod -R 777 /tmp/hive
3).再次cd /usr/local/hadoop
格式化
bin/hdfs namenode -format
4).初始化mysql数据库
cd $HIVE_HOME/bin
schematool -initSchema -dbType mysql
#出现如下内容并查看mysql是否创建了metastore 数据库
......................................
.............................................
Initialization script completed
schemaTool completed
5).如果hive报错
SemanticException org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
启动hive --service metastore服务
hive --service metastore
6).Hadoop _ 疑难杂症 解决 1 - WARN util.NativeCodeLoader: Unable to load native-hadoop library for your plat...
链接:https://blog.csdn.net/u010003835/article/details/81127984
环境配置的问题:
在 /etc/profile 中,添加下面配置:
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_HOME=/home/hadoop/labc/hadoop-2.7.1
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_COMMON_LIB_NATIVE_DIR"
最后记得使配置生效:source /etc/profile
并把相同配置添加到 hadoop-env.sh 文件末尾。