规划
server 192.168.1.10
kudu-master
spark-master
server 192.168.1.25
kudu-slave
spark-slave
---------------------
192.168.1.10 安装 kudu master
上传 kudu rpm 包,kudu 版本 1.10.0
yum -y install httpd createrepo ntp ntpdate
hostnamectl set-hostname kudumaster
vi /etc/hosts
192.168.1.10 kudumaster
192.168.1.25 kuduslave
mkdir /var/www/html/kudu
mv kudu-* /var/www/html/kudu/
cd /var/www/html/kudu
createrepo .
systemctl start httpd.service
systemctl enable httpd.service
vi /etc/ntp.conf
server 127.127.1.0 iburst
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst
systemctl start ntpd.service
systemctl enable ntpd.service
vi /etc/yum.repos.d/kudu.repo
[kudu]
name=kudu
baseurl=http://192.168.1.10/kudu
enable=1
gpgcheck=0
yum makecache
yum -y install kudu-master.x86_64
vi /etc/kudu/conf/master.gflagfile
--fromenv=rpc_bind_addresses
--fromenv=log_dir
--fs_wal_dir=/var/lib/kudu/master
--fs_data_dirs=/var/lib/kudu/master
--rpc-encryption=disabled
--rpc_authentication=disabled
--default_num_replicas=1
--builtin_ntp_servers=192.168.1.10
service kudu-master start
chkconfig kudu-master on
----------------------------
192.168.1.25 安装 kudu tserver
vi /etc/yum.repos.d/kudu.repo
[kudu]
name=kudu
baseurl=http://192.168.1.10/kudu
enable=1
gpgcheck=0
yum -y install kudu-tserver.x86_64
hostnamectl set-hostname kuduslave
vi /etc/hosts
192.168.1.10 kudumaster
192.168.1.25 kuduslave
vi /etc/kudu/conf/tserver.gflagfile
--fromenv=rpc_bind_addresses
--fromenv=log_dir
--tserver_master_addrs=192.168.1.10:7051
--fs_wal_dir=/var/lib/kudu/tserver
--fs_data_dirs=/var/lib/kudu/tserver
--builtin_ntp_servers=192.168.1.10
vi /etc/ntp.conf
server 192.168.1.10 prefer
server 127.127.1.0 iburst
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst
systemctl start ntpd.service
systemctl enable ntpd.service
service kudu-tserver start
chkconfig kudu-tserver on
----------------------
web 浏览器 访问 192.168.1.10:8051,有 tablet server 说明安装成功
web 浏览器 访问 192.168.1.25:8050,有 tablet server 说
----------------------
192.168.1.10 安装 java maven spark
tar -zxvf jdk-8u201-linux-x64.tar.gz
mv jdk1.8.0_201/ /opt/jdk
tar -zxvf spark-2.4.3-bin-hadoop2.7.tgz
mv spark-2.4.3-bin-hadoop2.7/ /opt/spark
tar -zxvf apache-maven-3.6.1-bin.tar.gz
mv apache-maven-3.6.1/ /opt/maven
vi ~/.bashrc
export JAVA_HOME=/opt/jdk
export SPARK_HOME=/opt/spark
export M2_HOME=/opt/maven
export PATH=$PATH:$JAVA_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/sbin:$M2_HOME/bin
source ~/.bashrc
vi /opt/maven/conf/settings
<mirror>
<id>alimaven</id>
<name>aliyun maven</name>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
<mirrorOf>central</mirrorOf>
</mirror>
cd /opt/spark/conf/
mv spark-env.sh.template spark-env.sh
mv spark-defaults.conf.template spark-defaults.conf
vi spark-env.sh
SPARK_LOCAL_IP=192.168.1.10
SPARK_MASTER_HOST=192.168.1.10
start-master.sh
192.168.1.10 安装 java spark
tar -zxvf jdk-8u201-linux-x64.tar.gz
mv jdk1.8.0_201/ /opt/jdk
tar -zxvf spark-2.4.3-bin-hadoop2.7.tgz
mv spark-2.4.3-bin-hadoop2.7/ /opt/spark
vi ~/.bashrc
export JAVA_HOME=/opt/jdk
export SPARK_HOME=/opt/spark
export PATH=$PATH:$JAVA_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/sbin
source ~/.bashrc
cd /opt/spark/conf/
mv spark-env.sh.template spark-env.sh
mv spark-defaults.conf.template spark-defaults.conf
vi spark-env.sh
SPARK_LOCAL_IP=192.168.1.25
SPARK_MASTER_HOST=192.168.1.10
start-slave.sh spark://192.168.1.10:7077
------------
web 浏览器查看 192.168.1.10:8080 页面,worker 为 1。安装成功
----------------------------
192.168.1.10 安装 kudu 的 java 代码到本地仓库
tar -zxvf apache-kudu-1.10.0.tar.gz
mkdir /var/www/html/gradle
上传 gradle-5.4.1-all.zip
mv ~/gradle-5.4.1-all.zip /var/www/html/gradle
vi ~/apache-kudu-1.10.0/java/gradle/wrapper/gradle-wrapper.properties
distributionUrl=http\://192.168.1.10/gradle/gradle-5.4.1-all.zip
vi ~/apache-kudu-1.10.0/java/build.gradle
repositories {
maven { url 'http://maven.aliyun.com/nexus/content/repositories/central/' }
}
vi ~/apache-kudu-1.10.0/java/buildSrc/build.gradle
repositories {
maven { url 'http://maven.aliyun.com/nexus/content/groups/public/'}
maven { url "https://maven.aliyun.com/repository/spring-plugin" }
maven { url "https://maven.aliyun.com/repository/gradle-plugin" }
maven { url "https://maven.aliyun.com/repository/jcenter" }
}
vi ~/apache-kudu-1.10.0/java/kudu-jepsen/build.gradle
repositories {
maven { url "https://maven.aliyun.com/repository/jcenter" }
}
mkdir ~/.gradle/
vi ~/.gradle/init.gradle
allprojects {
repositories {
maven { url 'http://maven.aliyun.com/nexus/content/repositories/central/' }
maven { url 'http://maven.aliyun.com/nexus/content/groups/public/'}
maven { url "https://maven.aliyun.com/repository/spring-plugin" }
maven { url "https://maven.aliyun.com/repository/gradle-plugin" }
maven { url "https://maven.aliyun.com/repository/jcenter" }
}
}
cd ~/apache-kudu-1.10.0/java/
./gradlew install
---------------------
新建项目,可以在kudu中建表的程序
mvn archetype:generate -DgroupId=com.packt.samples -DartifactId=createtable -Dversion=1.0.0 -DinteractiveMode=false -DarchetypeCatalog=internal
cd createtable
vi pom.xml
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-client</artifactId>
<version>1.10.0</version>
</dependency>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>com.packt.samples.App</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>assemble-all</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
编辑文件
vi src/main/java/com/packt/samples/App.java
package com.packt.samples;
import org.apache.kudu.ColumnSchema;
import org.apache.kudu.Schema;
import org.apache.kudu.Type;
import org.apache.kudu.client.CreateTableOptions;
import org.apache.kudu.client.KuduClient;
import org.apache.kudu.client.KuduException;
import java.util.ArrayList;
import java.util.List;
public class App
{
public static void main( String[] args ) throws KuduException
{
KuduClient client = new KuduClient.KuduClientBuilder("192.168.1.10:7051").build();
List<ColumnSchema> columns = new ArrayList<>(2);
columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build());
columns.add(new ColumnSchema.ColumnSchemaBuilder("value", Type.STRING).nullable(true).build());
Schema schema = new Schema(columns);
CreateTableOptions cto = new CreateTableOptions();
List<String> hashKeys = new ArrayList<>(1);
hashKeys.add("key");
int numBuckets = 8;
cto.addHashPartitions(hashKeys, numBuckets);
client.createTable(args[0], schema, cto);
}
}
编译
mvn clean package -DskipTests
执行
java -jar target/createtable-1.0.0-jar-with-dependencies.jar linzhongwei
web 界面检查,发现表已经创建
---------------------
新建项目,可以在kudu中插入数据
mvn archetype:generate -DgroupId=com.packt.samples -DartifactId=inserttable -Dversion=1.0.0 -DinteractiveMode=false -DarchetypeCatalog=internal
cd inserttable
vi pom.xml
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-client</artifactId>
<version>1.10.0</version>
</dependency>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>com.packt.samples.App</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>assemble-all</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
编辑文件
vi src/main/java/com/packt/samples/App.java
package com.packt.samples;
import org.apache.kudu.client.KuduClient;
import org.apache.kudu.client.KuduException;
import org.apache.kudu.client.KuduSession;
import org.apache.kudu.client.Insert;
import org.apache.kudu.client.PartialRow;
import org.apache.kudu.client.KuduTable;
import java.util.ArrayList;
import java.util.List;
public class App
{
public static void main( String[] args ) throws KuduException
{
KuduClient client = new KuduClient.KuduClientBuilder("192.168.1.10:7051").build();
KuduTable table = client.openTable(args[0]);
KuduSession session = client.newSession();
for (int i = 0; i < 150; i++) {
Insert insert = table.newInsert();
PartialRow row = insert.getRow();
row.addInt("key", i);
if (i % 2 == 0) {
row.setNull("value");
} else {
row.addString("value", "value " + i);
}
session.apply(insert);
}
session.close();
if (session.countPendingErrors() != 0) {
System.out.println("errors inserting rows");
throw new RuntimeException("error inserting rows to Kudu");
}
System.out.println("Inserted 150 rows");
}
}
编译
mvn clean package -DskipTests
提交任务
java -jar target/inserttable-1.0.0-jar-with-dependencies.jar linzhongwei
---------------------------
编写 spark 任务用 sql 方法查询 kudu 数据
mvn archetype:generate -DgroupId=com.packt.samples -DartifactId=kudusparksql -Dversion=1.0.0 -DinteractiveMode=false -DarchetypeCatalog=internal
vi pom.xml
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-client</artifactId>
<version>1.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-spark2_2.11</artifactId>
<version>1.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.3</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.4.3</version>
</dependency>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>com.packt.samples.App</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>assemble-all</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
编辑源文件
vi src/main/java/com/packt/samples/App.java
package com.packt.samples;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SQLContext;
/**
* Hello world!
*
*/
public class App
{
public static void main( String[] args )
{
SparkSession sparkSession = SparkSession.builder().appName("Java Spark SQL basic example").getOrCreate();
SQLContext sqlContext = sparkSession.sqlContext();
Dataset<Row> load = sqlContext.read().format("org.apache.kudu.spark.kudu").option("kudu.master", "192.168.1.10:7051").option("kudu.table", "linzhongwei").load();
load.printSchema();
load.show();
}
}
编译
mvn clean package -DskipTests
提交任务
spark-submit --class com.packt.samples.App --master spark://192.168.1.10:7077 /root/kudusparksql/target/kudusparksql-1.0.0-jar-with-dependencies.jar