1、下载二进制安装包
wget http://apache.mesi.com.ar/spark/spark-2.3.0/spark-2.3.0-bin-hadoop2.7.tgz
tar zxvf spark-2.3.0-bin-hadoop2.7.tgz
ln -s spark-2.3.0-bin-hadoop2.7 spark
wget https://downloads.lightbend.com/scala/2.11.12/scala-2.11.12.tgz
tar zxvf scala-2.11.12.tgz
sudo mv scala-2.11.12 /opt
sudo ln -s scala-2.12.5 scala
2、添加环境变量
~/.zshrc 或者 ~/.bashrc
export SPARK_HOME=$HOME/spark
export SCALA_HOME=/opt/scala
$SPARK_HOME/conf/spark-env.sh 添加
export SCALA_HOME=/opt/scala
export JAVA_HOME=/usr/java/jdk1.8.0_144
export SPARK_MASTER_IP=172.16.7.87
export SPARK_WORKER_MEMORY=1g
export HADOOP_HOME=$HOME/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HOME/hadoop/etc/hadoop
export SPARK_WORKER_MEMORY=1g
export SPARK_DRIVER_MEMORY=1g
export SPARK_HOME=$HOME/spark
export SPARK_LIBRARY_PATH=$SPARK_HOME/lib
export SPARK_MASTER_WEBUI_PORT=8080
export SPARK_WORKER_DIR=$SPARK_HOME/work
export SPARK_MASTER_PORT=7077
export SPARK_WORKER_PORT=7078
export SPARK_LOG_DIR=$SPARK_HOME/log
export SPARK_PID_DIR='$HOME/spark-without-hive/run'
export SPARK_DIST_CLASSPATH=($HADOOP_HOME/bin/hadoop classpath)
$SPARK_HOME/conf/spark-defaults.conf 添加
spark.master yarn
spark.home /home/mark/spark
spark.eventLog.enabled true
spark.eventLog.dir hdfs://hbase-01:9000/spark/logs
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.executor.memory 1g
spark.driver.memory 1g
spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
3、添加必要的jar包
cp $HADOOP_HOME/share/hadoop/yarn/lib/jersey-core-1.9.jar $SPARK_HOME/jars
cp $HADOOP_HOME/share/hadoop/yarn/lib/jersey-client-1.9.jar $SPARK_HOME/jars
rm $SPARK_HOME/jars/jersey-client-2.22.2.jar
wget -P $SPARK_HOME/jars http://central.maven.org/maven2/mysql/mysql-connector-java/5.1.40/mysql-connector-java-5.1.40.jar
如果安装了hive,那么ln -s $HIVE_HOME/conf/hive-site.xml $SPARK_HOME/conf/hive.xml
建立一个文件的软链接
4、启动spark-sql
敲几行测一下
create database u_t (foo int);
insert into table u_t values (2);
select foo from u_t;