搭建kudu spark 测试环境,测试建表,写入,spark 读取

规划

server 192.168.1.10

kudu-master

spark-master

server 192.168.1.25

kudu-slave

spark-slave

---------------------

192.168.1.10  安装 kudu master

上传 kudu rpm 包,kudu 版本 1.10.0

yum -y install httpd createrepo ntp ntpdate

hostnamectl set-hostname kudumaster

vi /etc/hosts

192.168.1.10 kudumaster

192.168.1.25 kuduslave

mkdir /var/www/html/kudu

mv kudu-* /var/www/html/kudu/

cd /var/www/html/kudu

createrepo .

systemctl start httpd.service

systemctl enable httpd.service

vi /etc/ntp.conf

server 127.127.1.0 iburst

#server 0.centos.pool.ntp.org iburst

#server 1.centos.pool.ntp.org iburst

#server 2.centos.pool.ntp.org iburst

#server 3.centos.pool.ntp.org iburst

systemctl start ntpd.service

systemctl enable ntpd.service

vi /etc/yum.repos.d/kudu.repo

[kudu]

name=kudu

baseurl=http://192.168.1.10/kudu

enable=1

gpgcheck=0

yum makecache

yum -y install kudu-master.x86_64

vi /etc/kudu/conf/master.gflagfile

--fromenv=rpc_bind_addresses

--fromenv=log_dir

--fs_wal_dir=/var/lib/kudu/master

--fs_data_dirs=/var/lib/kudu/master

--rpc-encryption=disabled

--rpc_authentication=disabled

--default_num_replicas=1

--builtin_ntp_servers=192.168.1.10

service kudu-master start

chkconfig kudu-master on

----------------------------

192.168.1.25  安装 kudu tserver

vi /etc/yum.repos.d/kudu.repo

[kudu]

name=kudu

baseurl=http://192.168.1.10/kudu

enable=1

gpgcheck=0

yum -y install kudu-tserver.x86_64

hostnamectl set-hostname kuduslave

vi /etc/hosts

192.168.1.10 kudumaster

192.168.1.25 kuduslave

vi /etc/kudu/conf/tserver.gflagfile

--fromenv=rpc_bind_addresses

--fromenv=log_dir

--tserver_master_addrs=192.168.1.10:7051

--fs_wal_dir=/var/lib/kudu/tserver

--fs_data_dirs=/var/lib/kudu/tserver

--builtin_ntp_servers=192.168.1.10

vi /etc/ntp.conf

server 192.168.1.10 prefer

server 127.127.1.0 iburst

#server 0.centos.pool.ntp.org iburst

#server 1.centos.pool.ntp.org iburst

#server 2.centos.pool.ntp.org iburst

#server 3.centos.pool.ntp.org iburst

systemctl start ntpd.service

systemctl enable ntpd.service

service kudu-tserver start

chkconfig kudu-tserver on

----------------------

web 浏览器 访问 192.168.1.10:8051,有 tablet server 说明安装成功

web 浏览器 访问 192.168.1.25:8050,有 tablet server 说

----------------------

192.168.1.10  安装 java maven spark

tar -zxvf jdk-8u201-linux-x64.tar.gz

mv jdk1.8.0_201/ /opt/jdk

tar  -zxvf spark-2.4.3-bin-hadoop2.7.tgz

mv spark-2.4.3-bin-hadoop2.7/ /opt/spark

tar -zxvf apache-maven-3.6.1-bin.tar.gz

mv apache-maven-3.6.1/ /opt/maven

vi ~/.bashrc

export JAVA_HOME=/opt/jdk

export SPARK_HOME=/opt/spark

export M2_HOME=/opt/maven

export PATH=$PATH:$JAVA_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/sbin:$M2_HOME/bin

source ~/.bashrc

vi /opt/maven/conf/settings

    <mirror>

      <id>alimaven</id>

      <name>aliyun maven</name>

      <url>http://maven.aliyun.com/nexus/content/groups/public/</url>

      <mirrorOf>central</mirrorOf>       

    </mirror>

cd /opt/spark/conf/

mv spark-env.sh.template spark-env.sh

mv spark-defaults.conf.template  spark-defaults.conf

vi spark-env.sh

SPARK_LOCAL_IP=192.168.1.10

SPARK_MASTER_HOST=192.168.1.10

start-master.sh

192.168.1.10  安装 java spark

tar -zxvf jdk-8u201-linux-x64.tar.gz

mv jdk1.8.0_201/ /opt/jdk

tar  -zxvf spark-2.4.3-bin-hadoop2.7.tgz

mv spark-2.4.3-bin-hadoop2.7/ /opt/spark

vi ~/.bashrc

export JAVA_HOME=/opt/jdk

export SPARK_HOME=/opt/spark

export PATH=$PATH:$JAVA_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/sbin

source ~/.bashrc

cd /opt/spark/conf/

mv spark-env.sh.template spark-env.sh

mv spark-defaults.conf.template  spark-defaults.conf

vi spark-env.sh

SPARK_LOCAL_IP=192.168.1.25

SPARK_MASTER_HOST=192.168.1.10

start-slave.sh spark://192.168.1.10:7077

------------

web 浏览器查看 192.168.1.10:8080 页面,worker 为 1。安装成功

----------------------------

192.168.1.10  安装 kudu 的 java 代码到本地仓库

tar -zxvf apache-kudu-1.10.0.tar.gz

mkdir /var/www/html/gradle

上传 gradle-5.4.1-all.zip

mv ~/gradle-5.4.1-all.zip /var/www/html/gradle

vi ~/apache-kudu-1.10.0/java/gradle/wrapper/gradle-wrapper.properties

distributionUrl=http\://192.168.1.10/gradle/gradle-5.4.1-all.zip

vi ~/apache-kudu-1.10.0/java/build.gradle

  repositories {

    maven { url 'http://maven.aliyun.com/nexus/content/repositories/central/' }

  }

vi ~/apache-kudu-1.10.0/java/buildSrc/build.gradle

repositories {

  maven { url 'http://maven.aliyun.com/nexus/content/groups/public/'}

  maven { url "https://maven.aliyun.com/repository/spring-plugin" }

  maven { url "https://maven.aliyun.com/repository/gradle-plugin" }

  maven { url "https://maven.aliyun.com/repository/jcenter" }

}

vi ~/apache-kudu-1.10.0/java/kudu-jepsen/build.gradle

repositories {

  maven { url "https://maven.aliyun.com/repository/jcenter" }

}

mkdir ~/.gradle/

vi ~/.gradle/init.gradle

allprojects {

    repositories {

        maven { url 'http://maven.aliyun.com/nexus/content/repositories/central/' }

        maven { url 'http://maven.aliyun.com/nexus/content/groups/public/'}

        maven { url "https://maven.aliyun.com/repository/spring-plugin" }

        maven { url "https://maven.aliyun.com/repository/gradle-plugin" }

        maven { url "https://maven.aliyun.com/repository/jcenter" }

    }

}

cd ~/apache-kudu-1.10.0/java/

./gradlew install

---------------------

新建项目,可以在kudu中建表的程序

mvn archetype:generate -DgroupId=com.packt.samples -DartifactId=createtable -Dversion=1.0.0 -DinteractiveMode=false -DarchetypeCatalog=internal

cd createtable

vi pom.xml

    <dependency>

      <groupId>org.apache.kudu</groupId>

      <artifactId>kudu-client</artifactId>

      <version>1.10.0</version>

    </dependency>

<build>

  <plugins>

            <plugin>

                <groupId>org.apache.maven.plugins</groupId>

                <artifactId>maven-compiler-plugin</artifactId>

                <version>3.6.1</version>

                <configuration>

                    <source>1.8</source>

                    <target>1.8</target>

                </configuration>

            </plugin>

    <plugin>

      <groupId>org.apache.maven.plugins</groupId>

      <artifactId>maven-assembly-plugin</artifactId>

      <version>2.4</version>

      <configuration>

        <descriptorRefs>

          <descriptorRef>jar-with-dependencies</descriptorRef>

        </descriptorRefs>

        <archive>

          <manifest>

            <addClasspath>true</addClasspath>

            <mainClass>com.packt.samples.App</mainClass>

          </manifest>

        </archive>

      </configuration>

      <executions>

        <execution>

          <id>assemble-all</id>

          <phase>package</phase>

          <goals>

            <goal>single</goal>

          </goals>

        </execution>

      </executions>

    </plugin>

  </plugins>

</build>

编辑文件

vi src/main/java/com/packt/samples/App.java

package com.packt.samples;

import org.apache.kudu.ColumnSchema;

import org.apache.kudu.Schema;

import org.apache.kudu.Type;

import org.apache.kudu.client.CreateTableOptions;

import org.apache.kudu.client.KuduClient;

import org.apache.kudu.client.KuduException;

import java.util.ArrayList;

import java.util.List;

public class App

{

    public static void main( String[] args ) throws KuduException

    {

        KuduClient client = new KuduClient.KuduClientBuilder("192.168.1.10:7051").build();

        List<ColumnSchema> columns = new ArrayList<>(2);

        columns.add(new ColumnSchema.ColumnSchemaBuilder("key", Type.INT32).key(true).build());

        columns.add(new ColumnSchema.ColumnSchemaBuilder("value", Type.STRING).nullable(true).build());

        Schema schema = new Schema(columns);

        CreateTableOptions cto = new CreateTableOptions();

        List<String> hashKeys = new ArrayList<>(1);

        hashKeys.add("key");

        int numBuckets = 8;

        cto.addHashPartitions(hashKeys, numBuckets);

        client.createTable(args[0], schema, cto);

    }

}

编译

mvn clean package -DskipTests

执行

java -jar target/createtable-1.0.0-jar-with-dependencies.jar linzhongwei

web 界面检查,发现表已经创建

---------------------

新建项目,可以在kudu中插入数据

mvn archetype:generate -DgroupId=com.packt.samples -DartifactId=inserttable -Dversion=1.0.0 -DinteractiveMode=false -DarchetypeCatalog=internal

cd inserttable

vi pom.xml

    <dependency>

      <groupId>org.apache.kudu</groupId>

      <artifactId>kudu-client</artifactId>

      <version>1.10.0</version>

    </dependency>

<build>

  <plugins>

            <plugin>

                <groupId>org.apache.maven.plugins</groupId>

                <artifactId>maven-compiler-plugin</artifactId>

                <version>3.6.1</version>

                <configuration>

                    <source>1.8</source>

                    <target>1.8</target>

                </configuration>

            </plugin>

    <plugin>

      <groupId>org.apache.maven.plugins</groupId>

      <artifactId>maven-assembly-plugin</artifactId>

      <version>2.4</version>

      <configuration>

        <descriptorRefs>

          <descriptorRef>jar-with-dependencies</descriptorRef>

        </descriptorRefs>

        <archive>

          <manifest>

            <addClasspath>true</addClasspath>

            <mainClass>com.packt.samples.App</mainClass>

          </manifest>

        </archive>

      </configuration>

      <executions>

        <execution>

          <id>assemble-all</id>

          <phase>package</phase>

          <goals>

            <goal>single</goal>

          </goals>

        </execution>

      </executions>

    </plugin>

  </plugins>

</build>

编辑文件

vi src/main/java/com/packt/samples/App.java

package com.packt.samples;

import org.apache.kudu.client.KuduClient;

import org.apache.kudu.client.KuduException;

import org.apache.kudu.client.KuduSession;

import org.apache.kudu.client.Insert;

import org.apache.kudu.client.PartialRow;

import org.apache.kudu.client.KuduTable;

import java.util.ArrayList;

import java.util.List;

public class App

{

    public static void main( String[] args ) throws KuduException

    {

        KuduClient client = new KuduClient.KuduClientBuilder("192.168.1.10:7051").build();

        KuduTable table = client.openTable(args[0]);

        KuduSession session = client.newSession();

        for (int i = 0; i < 150; i++) {

            Insert insert = table.newInsert();

            PartialRow row = insert.getRow();

            row.addInt("key", i);

            if (i % 2 == 0) {

              row.setNull("value");

            } else {

              row.addString("value", "value " + i);

            }

            session.apply(insert);

        }

        session.close();

        if (session.countPendingErrors() != 0) {

            System.out.println("errors inserting rows");

            throw new RuntimeException("error inserting rows to Kudu");

        }

        System.out.println("Inserted 150 rows");

    }

}

编译

mvn clean package -DskipTests

提交任务

java -jar target/inserttable-1.0.0-jar-with-dependencies.jar linzhongwei

---------------------------

编写 spark 任务用 sql 方法查询 kudu 数据

mvn archetype:generate -DgroupId=com.packt.samples -DartifactId=kudusparksql -Dversion=1.0.0 -DinteractiveMode=false -DarchetypeCatalog=internal

vi pom.xml

    <dependency>

      <groupId>org.apache.kudu</groupId>

      <artifactId>kudu-client</artifactId>

      <version>1.10.0</version>

    </dependency>

    <dependency>

      <groupId>org.apache.kudu</groupId>

      <artifactId>kudu-spark2_2.11</artifactId>

      <version>1.10.0</version>

    </dependency>

    <dependency>

      <groupId>org.apache.spark</groupId>

      <artifactId>spark-core_2.11</artifactId>

      <version>2.4.3</version>

    </dependency>

    <dependency>

      <groupId>org.apache.spark</groupId>

      <artifactId>spark-sql_2.11</artifactId>

      <version>2.4.3</version>

    </dependency>

<build>

  <plugins>

            <plugin>

                <groupId>org.apache.maven.plugins</groupId>

                <artifactId>maven-compiler-plugin</artifactId>

                <version>3.6.1</version>

                <configuration>

                    <source>1.8</source>

                    <target>1.8</target>

                </configuration>

            </plugin>

    <plugin>

      <groupId>org.apache.maven.plugins</groupId>

      <artifactId>maven-assembly-plugin</artifactId>

      <version>2.4</version>

      <configuration>

        <descriptorRefs>

          <descriptorRef>jar-with-dependencies</descriptorRef>

        </descriptorRefs>

        <archive>

          <manifest>

            <addClasspath>true</addClasspath>

            <mainClass>com.packt.samples.App</mainClass>

          </manifest>

        </archive>

      </configuration>

      <executions>

        <execution>

          <id>assemble-all</id>

          <phase>package</phase>

          <goals>

            <goal>single</goal>

          </goals>

        </execution>

      </executions>

    </plugin>

  </plugins>

</build>

编辑源文件

vi src/main/java/com/packt/samples/App.java

package com.packt.samples;

import org.apache.spark.sql.Dataset;

import org.apache.spark.sql.Row;

import org.apache.spark.sql.SparkSession;

import org.apache.spark.sql.SQLContext;

/**

* Hello world!

*

*/

public class App

{

    public static void main( String[] args )

    {

        SparkSession sparkSession = SparkSession.builder().appName("Java Spark SQL basic example").getOrCreate();

        SQLContext sqlContext = sparkSession.sqlContext();

        Dataset<Row> load = sqlContext.read().format("org.apache.kudu.spark.kudu").option("kudu.master", "192.168.1.10:7051").option("kudu.table", "linzhongwei").load();

        load.printSchema();

        load.show();

    }

}

编译

mvn clean package -DskipTests

提交任务

spark-submit --class com.packt.samples.App --master spark://192.168.1.10:7077 /root/kudusparksql/target/kudusparksql-1.0.0-jar-with-dependencies.jar

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容