环境:CDH6.3.2,Spark-version:2.4.0+cdh6.3.2
配置文件:spark on yarn /etc/spark/conf/*
(core-site.xml,hdfs-site.xml,mapred-site.xml,yarn-site.xml)至于项目resource目录下
with hive 需要将hive-site.xml至于resource目录下
pom.xml
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<encoding>UTF-8</encoding>
<scala.version>2.11.12</scala.version>
<hadoop.version>3.0.0-cdh6.3.2</hadoop.version>
<hive.version>2.1.1-cdh6.3.2</hive.version>
<kafka.version>2.2.1-cdh6.3.2</kafka.version>
<spark.version>2.4.0-cdh6.3.2</spark.version>
</properties>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.25</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-yarn_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
</dependencies>
WordCound
object WordCound {
def main(args: Array[String]):Unit = {
//任务提交用户
System.setProperty("HADOOP_USER_NAME", "root")
val conf =new SparkConf().setAppName("WordCount")
// 设置yarn-client模式提交
.setMaster("yarn-client")
// 设置归档jar,与spark.yarn.jars作用一样,只是文件形式不一样,设置一个就可以
//如果不在一个局域网,需要设置driverHost
// .set("spark.driver.host", "10.40.0.54")
//.set("spark.yarn.archive", "")
//网络超时时间(不然容易报错This timeout is controlled by spark.executor.heartbeatInterval)
.set("spark.network.timeout", "600s")
//设置excutor 的 spark on yarn 依赖,可以是本机地址(local:)但是路径得一至
//将/opt/cloudera/parcels/CDH/jars/ 上传至HDFS(全量包)或者
//将/opt/cloudera/parcels/CDH/lib/spark/jars/上传至HDFS(如果要连接hive执行SQL,还需上传,hive-exec-2.1.1-cdh6.3.2.jar包)
.set("spark.yarn.jars","hdfs://cdh001:8020/spark-yarn/jars/*.jar")
// 设置序列化
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
//设置Driver主函数的路径
.setJars(List("*/*/kafka_test/target/kafka_test-1.0-SNAPSHOT.jar"))
}
}