'''
在写交互脚本时,首先导入findspark包,然后执行findspark.init(),这两行写在前边
'''
import findspark
findspark.init()
from pyspark import SparkContext
'''
from py4j.java_gateway import JavaGateway
gateway = JavaGateway() # connect to the JVM
random = gateway.jvm.java.util.Random() # create a java.util.Random instance
number1 = random.nextInt(10) # call the Random.nextInt method
number2 = random.nextInt(10)
print(number1, number2)
'''
sc = SparkContext('local')
old = sc.parallelize([1, 2, 3, 4, 5])
#新的map里将原来的每个元素拆成了3个
newFlatPartitions = old.flatMap(lambda x: (x, x+1, x*2))
#过滤,只保留小于6的元素
newFilterPartitions = newFlatPartitions.filter(lambda x: x < 6)
#去重
newDiscinctPartitions = newFilterPartitions.distinct()
print(newFlatPartitions.collect())
print(newFilterPartitions.collect())
print(newDiscinctPartitions.collect())
'''
local [*]在本地运行Spark,并使用与计算机上的逻辑内核一样多的工作线程。
'''