https://spark.apache.org/docs/latest/graphx-programming-guide.html#overview
spark 官网 scala版本的例子
构造点(实体),边(关系)
JavaSparkContext sc = new JavaSparkContext(sparkConf);
List<Tuple2<Long, String>> vertices = new ArrayList<>();
vertices.add(new Tuple2<>(1L, "张三"));
vertices.add(new Tuple2<>(2L, "李四"));
vertices.add(new Tuple2<>(3L, "王五"));
vertices.add(new Tuple2<>(4L, "赵六"));
vertices.add(new Tuple2<>(5L, "黄七"));
List<Edge<String>> edges = new ArrayList<>();
edges.add(new Edge(1L, 2L, "朋友"));
edges.add(new Edge(2L, 3L, "朋友"));
edges.add(new Edge(4L, 5L, "母子"));
JavaRDD<Tuple2<Long, String>> verticesJavaRDD = sc.parallelize(vertices);
// Long需要转成Object
RDD<Tuple2<Object, String>> verticesRDD = verticesJavaRDD.map(new Function<Tuple2<Long, String>, Tuple2<Object, String>>() {
@Override
public Tuple2<Object, String> call(Tuple2<Long, String> vertice) throws Exception {
return new Tuple2<Object, String>(vertice._1, vertice._2);
}
}).rdd();
RDD<Edge<String>> edgesRDD = sc.parallelize(edges).rdd();
生成图
ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
Graph<String, String> graph = Graph.apply(verticesRDD, edgesRDD, "", StorageLevel.MEMORY_ONLY(), StorageLevel.MEMORY_ONLY(), stringTag, stringTag);
连通图
Graph<String, String> graph = Graph.apply(verticesRDD, edgesRDD, "", StorageLevel.MEMORY_ONLY(), StorageLevel.MEMORY_ONLY(), stringTag, stringTag);
// 此处与scala版本不一样
GraphOps<String, String> ops = Graph.graphToGraphOps(graph, stringTag, stringTag);
ops.inDegrees().toJavaRDD().foreach(new VoidFunction<Tuple2<Object, Object>>() {
@Override
public void call(Tuple2<Object, Object> t) throws Exception {
System.out.println("indegress:" + t);
}
});
// 连通图需要构造GraphOps
Graph<Object, String> connectedComponentsGraph = ops.connectedComponents();
connectedComponentsGraph.vertices().toJavaRDD().foreach(new VoidFunction<Tuple2<Object, Object>>() {
@Override
public void call(Tuple2<Object, Object> t) throws Exception {
System.out.println(t);
}
});