cd conf
[root@spark1 conf]# cp slaves.template slaves
[root@spark1 conf]# cp spark-env.sh.template spark-env.sh
vi slaves
spark1
spark2
spark3
vi spark-env.sh
export SPARK_MASTER_IP=spark1 主
export SPARK_MASTER_PORT=7077 主
export SPARK_WORKER_CORES=1 从
export SPARK_WORKER_INSTANCES=1 从
export SPARK_WORKER_MEMORY=512m 占用jvm的最大内存
echo $JAVA_HOME
cd /opt/spark-2.1.0-bin-hadoop2.7/sbin
vi spark-config.sh
export JAVA_HOME=/usr/local/java1.8/jdk1.8.0_161
scp -r spark-2.1.0-bin-hadoop2.7 root@spark2:/opt
scp -r spark-2.1.0-bin-hadoop2.7 root@spark3:/opt
chown -R root:root spark-2.1.0-bin-hadoop2.7
sbin/start-all.sh
访问192.168.4.31:8080能看到web界面
7070是提交任务用的
跑案例:
standalone执行命令./bin/spark-submit --class org.apache.spark.examples.SparkPi --master spark://spark1:7077 --executor-memory 512m --total-executor-cores 1 /opt/spark-2.1.0-bin-hadoop2.7/examples/jars/spark-examples_2.11-2.1.0.jar 1000
计算π的一个公式,迭代1000次
如果仅仅是standalone只配上面就可以,如果是spark on yarn 要配置下面的
如果想要跑spark on yarn则需要配置HADOOP_CONF_DIR
在spark-env.sh或者/etc/profile下面配置都可以
vim ~/.bash_profile
export HADOOP_CONF_DIR=/opt/hadoop-2.7.1/etc/hadoop
export PATH=$PATH:$HADOOP_CONF_DIR
scp -r ~/.bash_profile root@spark2:~/
scp -r ~/.bash_profile root@spark3:~/
source ~/.bash_profile
sbin/stop-all.sh
start-all.sh
sbin/start-all.sh
onyarn 执行命令 ./bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-client --executor-memory 512M --num-executors 1 /opt/spark-2.1.0-bin-hadoop2.7/examples/jars/spark-examples_2.11-2.1.0.jar 10
注意spark on yarn 依赖 hadoop,hadoop依赖zk