1、连接mysql需要添加mysql驱动包到SPARK_HOME/jars目录下
cp mysql-connector-java-5.1.43.jar /opt/cloudera/parcels/SPARK2/lib/spark2/jars/
2、#spark2-conf/spark-env.sh 的 Spark 2 客户端高级配置代码段(安全阀)添加如下配置
for loop in `ls /opt/cloudera/parcels/CDH/jars/hbase-*.jar`;do
export SPARK_DIST_CLASSPATH=${loop}:${SPARK_DIST_CLASSPATH}
done
\#加载org.apache.spark.examples.pythonconverters...包
for loop in `ls /opt/cloudera/parcels/CDH/lib/spark/lib/spark-examples-*.jar`;do
export SPARK_DIST_CLASSPATH=${loop}:${SPARK_DIST_CLASSPATH}
done
\#加载hive整合hbase的包
for loop in `ls /opt/cloudera/parcels/CDH/lib/hive/lib/hive-hbase-handler-*.jar`;do
export SPARK_DIST_CLASSPATH=${loop}:${SPARK_DIST_CLASSPATH}
done
#加载HBase的配置到Spark2的环境变量中
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR}:/etc/hbase/conf/
3、安装python环境
sh Anaconda3-2019.10-Linux-x86_64.sh
配置环境变量
export PYSPARK_PYTHON=/opt/apps/anaconda3/bin/python3
export PYSPARK_DRIVER_PYTHON=/opt/apps/anaconda3/bin/python3