[hadoop@hadoop000 bin]$ ./spark-shell --master local[2] --jars ~/software/mysql-connector-java-5.1.27.jar
scala> case class Student (id:String, name:String, phone:String, email:String)
scala> val students = sc.textFile("file:///home/hadoop/data/student.data").map(_.split("\\|")).map(x=>(Student(x(0),x(1),x(2),x(3)))).toDF()
students: org.apache.spark.sql.DataFrame = [id: string, name: string ... 2 more fields]
//1.显示表中数据,超过20个字符的会部分省略
scala> students.show
+---+--------+--------------+--------------------+
| id| name| phone| email|
+---+--------+--------------+--------------------+
| 1| Burke|1-300-746-8446|ullamcorper.velit...|
| 2| Kamal|1-668-571-5046|pede.Suspendisse@...|
| 3| Olga|1-956-311-1686|Aenean.eget,netus...|
| 4| Belle|1-246-894-6340|vitae.aliquet.nec...|
| 5| Trevor|1-300-527-4967|dapibus.id@acturp...|
| 6| Laurel|1-691-379-9921|adipiscing@consec...|
| 7| Sara|1-608-140-1995|Donec.nibh@enimEt...|
| 8| Kaseem|1-881-586-2689|cursus.et.magna@e...|
| 9| Lev|1-916-367-5608|Vivamus.nisi@ipsu...|
| 10| Maya|1-271-683-2698|accumsan.convalli...|
| 11| Emi|1-467-270-1337| est@nunc.com|
| 12| Caleb|1-683-212-0896|Suspendisse@Quisq...|
| 13|Florence|1-603-575-2444|sit.amet.dapibus@...|
| 14| Anika|1-856-828-7883|euismod@ligulaeli...|
| 15| Tarik|1-398-171-2268|turpis@felisorci.com|
| 16| Amena|1-878-250-3129|lorem.lucrus.ut@s...|
| 17| Blossom|1-154-406-9596|Nunc.commodo.auct...|
| 18| Guy|1-869-521-3230|senectus.et.netus...|
| 19| Malachi|1-608-637-2772|Proin.mi.Aliquam@...|
| 20| Edward|1-711-710-6552|lectus@aliquetlib...|
+---+--------+--------------+--------------------+
only showing top 20 rows
//2.完整显示表中所有数据
scala> students.show(false)
+---+--------+--------------+-----------------------------------------+
|id |name |phone |email |
+---+--------+--------------+-----------------------------------------+
|1 |Burke |1-300-746-8446|ullamcorper.velit.in@ametnullaDonec.co.uk|
|2 |Kamal |1-668-571-5046|pede.Suspendisse@interdumenim.edu |
|3 |Olga |1-956-311-1686|Aenean.eget,netus@dictumcursusNunc.edu |
|4 |Belle |1-246-894-6340|vitae.aliquet.nec@neque.co.uk |
|5 |Trevor |1-300-527-4967|dapibus.id@acturpisegestas.net |
|6 |Laurel |1-691-379-9921|adipiscing@consectetueripsum.edu |
|7 |Sara |1-608-140-1995|Donec.nibh@enimEtianimperdiet.edu |
|8 |Kaseem |1-881-586-2689|cursus.et.magna@euismod.org |
|9 |Lev |1-916-367-5608|Vivamus.nisi@ipsumdolor.com |
|10 |Maya |1-271-683-2698|accumsan.convallis@ornarelectusjusto.edu |
|11 |Emi |1-467-270-1337|est@nunc.com |
|12 |Caleb |1-683-212-0896|Suspendisse@Quisque.edu |
|13 |Florence|1-603-575-2444|sit.amet.dapibus@lacusAliquamrutrum.ca |
|14 |Anika |1-856-828-7883|euismod@ligulaelit.co.uk |
|15 |Tarik |1-398-171-2268|turpis@felisorci.com |
|16 |Amena |1-878-250-3129|lorem.lucrus.ut@scelerisque.com |
|17 |Blossom |1-154-406-9596|Nunc.commodo.auctor@eratSed.co.uk |
|18 |Guy |1-869-521-3230|senectus.et.netus@lectusrutrum.com |
|19 |Malachi |1-608-637-2772|Proin.mi.Aliquam@estarcu.net |
|20 |Edward |1-711-710-6552|lectus@aliquetlibero.co.uk |
+---+--------+--------------+-----------------------------------------+
only showing top 20 rows
//3.显示前5条数据
scala> students.show(5,false)
+---+------+--------------+-----------------------------------------+
|id |name |phone |email |
+---+------+--------------+-----------------------------------------+
|1 |Burke |1-300-746-8446|ullamcorper.velit.in@ametnullaDonec.co.uk|
|2 |Kamal |1-668-571-5046|pede.Suspendisse@interdumenim.edu |
|3 |Olga |1-956-311-1686|Aenean.eget,netus@dictumcursusNunc.edu |
|4 |Belle |1-246-894-6340|vitae.aliquet.nec@neque.co.uk |
|5 |Trevor|1-300-527-4967|dapibus.id@acturpisegestas.net |
+---+------+--------------+-----------------------------------------+
only showing top 5 rows
//4.显示第一条数据
scala> students.head
res9: org.apache.spark.sql.Row = [1,Burke,1-300-746-8446,ullamcorper.velit.in@ametnullaDonec.co.uk]
//5.显示前5条数据
scala> students.head(5)
res10: Array[org.apache.spark.sql.Row] = Array([1,Burke,1-300-746-8446,ullamcorper.velit.in@ametnullaDonec.co.uk], [2,Kamal,1-668-571-5046,pede.Suspendisse@interdumenim.edu], [3,Olga,1-956-311-1686,Aenean.eget,netus@dictumcursusNunc.edu], [4,Belle,1-246-894-6340,vitae.aliquet.nec@neque.co.uk], [5,Trevor,1-300-527-4967,dapibus.id@acturpisegestas.net])
//6.显示前5条数据
scala> students.head(5).foreach(println)
[1,Burke,1-300-746-8446,ullamcorper.velit.in@ametnullaDonec.co.uk]
[2,Kamal,1-668-571-5046,pede.Suspendisse@interdumenim.edu]
[3,Olga,1-956-311-1686,Aenean.eget,netus@dictumcursusNunc.edu]
[4,Belle,1-246-894-6340,vitae.aliquet.nec@neque.co.uk]
[5,Trevor,1-300-527-4967,dapibus.id@acturpisegestas.net]
//7.显示第一条数据
scala> students.first
res12: org.apache.spark.sql.Row = [1,Burke,1-300-746-8446,ullamcorper.velit.in@ametnullaDonec.co.uk]
//8.显示“id”和“name”列
scala> students.select("id","name").show
+---+--------+
| id| name|
+---+--------+
| 1| Burke|
| 2| Kamal|
| 3| Olga|
| 4| Belle|
| 5| Trevor|
| 6| Laurel|
| 7| Sara|
| 8| Kaseem|
| 9| Lev|
| 10| Maya|
| 11| Emi|
| 12| Caleb|
| 13|Florence|
| 14| Anika|
| 15| Tarik|
| 16| Amena|
| 17| Blossom|
| 18| Guy|
| 19| Malachi|
| 20| Edward|
+---+--------+
only showing top 20 rows
//9.筛选id<5的数据
scala> students.filter("id<5").show
+---+-----+--------------+--------------------+
| id| name| phone| email|
+---+-----+--------------+--------------------+
| 1|Burke|1-300-746-8446|ullamcorper.velit...|
| 2|Kamal|1-668-571-5046|pede.Suspendisse@...|
| 3| Olga|1-956-311-1686|Aenean.eget,netus...|
| 4|Belle|1-246-894-6340|vitae.aliquet.nec...|
+---+-----+--------------+--------------------+
//10.筛选name为空的数据
scala> students.filter("name=''").show
+---+----+--------------+--------------------+
| id|name| phone| email|
+---+----+--------------+--------------------+
| 21| |1-711-710-6552|lecrus@aliquetlib...|
| 22| |1-711-710-6552|lecrus@aliquetlib...|
+---+----+--------------+--------------------+
//11.筛选name为空或者NULL的数据
scala> students.filter("name=''or name='NULL'").show
+---+----+--------------+--------------------+
| id|name| phone| email|
+---+----+--------------+--------------------+
| 21| |1-711-710-6552|lecrus@aliquetlib...|
| 22| |1-711-710-6552|lecrus@aliquetlib...|
| 23|NULL|1-711-710-6552|lecrus@aliquetlib...|
+---+----+--------------+--------------------+
//12.筛选出name第一个字母为“M”的数据
scala> students.filter("name like 'M%'").show
+---+-------+--------------+--------------------+
| id| name| phone| email|
+---+-------+--------------+--------------------+
| 10| Maya|1-271-683-2698|accumsan.convalli...|
| 19|Malachi|1-608-637-2772|Proin.mi.Aliquam@...|
+---+-------+--------------+--------------------+
//13.筛选出name第一个字母为“M”的数据
scala> students.filter("substr(name,0,1)='M'").show
+---+-------+--------------+--------------------+
| id| name| phone| email|
+---+-------+--------------+--------------------+
| 10| Maya|1-271-683-2698|accumsan.convalli...|
| 19|Malachi|1-608-637-2772|Proin.mi.Aliquam@...|
+---+-------+--------------+--------------------+
//14.筛选出name前三个字母为“Mal”的数据
scala> students.filter("substr(name,0,3)='Mal'").show
+---+-------+--------------+--------------------+
| id| name| phone| email|
+---+-------+--------------+--------------------+
| 19|Malachi|1-608-637-2772|Proin.mi.Aliquam@...|
+---+-------+--------------+--------------------+
//15.按name排序,默认是升序
scala> students.sort($"name").show
+---+--------+--------------+--------------------+
| id| name| phone| email|
+---+--------+--------------+--------------------+
| 21| |1-711-710-6552|lecrus@aliquetlib...|
| 22| |1-711-710-6552|lecrus@aliquetlib...|
| 16| Amena|1-878-250-3129|lorem.lucrus.ut@s...|
| 14| Anika|1-856-828-7883|euismod@ligulaeli...|
| 4| Belle|1-246-894-6340|vitae.aliquet.nec...|
| 17| Blossom|1-154-406-9596|Nunc.commodo.auct...|
| 1| Burke|1-300-746-8446|ullamcorper.velit...|
| 12| Caleb|1-683-212-0896|Suspendisse@Quisq...|
| 20| Edward|1-711-710-6552|lectus@aliquetlib...|
| 11| Emi|1-467-270-1337| est@nunc.com|
| 13|Florence|1-603-575-2444|sit.amet.dapibus@...|
| 18| Guy|1-869-521-3230|senectus.et.netus...|
| 2| Kamal|1-668-571-5046|pede.Suspendisse@...|
| 8| Kaseem|1-881-586-2689|cursus.et.magna@e...|
| 6| Laurel|1-691-379-9921|adipiscing@consec...|
| 9| Lev|1-916-367-5608|Vivamus.nisi@ipsu...|
| 19| Malachi|1-608-637-2772|Proin.mi.Aliquam@...|
| 10| Maya|1-271-683-2698|accumsan.convalli...|
| 23| NULL|1-711-710-6552|lecrus@aliquetlib...|
| 3| Olga|1-956-311-1686|Aenean.eget,netus...|
+---+--------+--------------+--------------------+
only showing top 20 rows
//16.按name降序排列
scala> students.sort($"name".desc).show(23,false)
+---+--------+--------------+-----------------------------------------+
|id |name |phone |email |
+---+--------+--------------+-----------------------------------------+
|5 |Trevor |1-300-527-4967|dapibus.id@acturpisegestas.net |
|15 |Tarik |1-398-171-2268|turpis@felisorci.com |
|7 |Sara |1-608-140-1995|Donec.nibh@enimEtianimperdiet.edu |
|3 |Olga |1-956-311-1686|Aenean.eget,netus@dictumcursusNunc.edu |
|23 |NULL |1-711-710-6552|lecrus@aliquetlibero.co.uk |
|10 |Maya |1-271-683-2698|accumsan.convallis@ornarelectusjusto.edu |
|19 |Malachi |1-608-637-2772|Proin.mi.Aliquam@estarcu.net |
|9 |Lev |1-916-367-5608|Vivamus.nisi@ipsumdolor.com |
|6 |Laurel |1-691-379-9921|adipiscing@consectetueripsum.edu |
|8 |Kaseem |1-881-586-2689|cursus.et.magna@euismod.org |
|2 |Kamal |1-668-571-5046|pede.Suspendisse@interdumenim.edu |
|18 |Guy |1-869-521-3230|senectus.et.netus@lectusrutrum.com |
|13 |Florence|1-603-575-2444|sit.amet.dapibus@lacusAliquamrutrum.ca |
|11 |Emi |1-467-270-1337|est@nunc.com |
|20 |Edward |1-711-710-6552|lectus@aliquetlibero.co.uk |
|12 |Caleb |1-683-212-0896|Suspendisse@Quisque.edu |
|1 |Burke |1-300-746-8446|ullamcorper.velit.in@ametnullaDonec.co.uk|
|17 |Blossom |1-154-406-9596|Nunc.commodo.auctor@eratSed.co.uk |
|4 |Belle |1-246-894-6340|vitae.aliquet.nec@neque.co.uk |
|14 |Anika |1-856-828-7883|euismod@ligulaelit.co.uk |
|16 |Amena |1-878-250-3129|lorem.lucrus.ut@scelerisque.com |
|21 | |1-711-710-6552|lecrus@aliquetlibero.co.uk |
|22 | |1-711-710-6552|lecrus@aliquetlibero.co.uk |
+---+--------+--------------+-----------------------------------------+
//17.按name降序排列,name相同的情况下按id降序排列
scala> students.sort($"name".desc,$"id".desc).show(23,false)
+---+--------+--------------+-----------------------------------------+
|id |name |phone |email |
+---+--------+--------------+-----------------------------------------+
|5 |Trevor |1-300-527-4967|dapibus.id@acturpisegestas.net |
|15 |Tarik |1-398-171-2268|turpis@felisorci.com |
|7 |Sara |1-608-140-1995|Donec.nibh@enimEtianimperdiet.edu |
|3 |Olga |1-956-311-1686|Aenean.eget,netus@dictumcursusNunc.edu |
|23 |NULL |1-711-710-6552|lecrus@aliquetlibero.co.uk |
|10 |Maya |1-271-683-2698|accumsan.convallis@ornarelectusjusto.edu |
|19 |Malachi |1-608-637-2772|Proin.mi.Aliquam@estarcu.net |
|9 |Lev |1-916-367-5608|Vivamus.nisi@ipsumdolor.com |
|6 |Laurel |1-691-379-9921|adipiscing@consectetueripsum.edu |
|8 |Kaseem |1-881-586-2689|cursus.et.magna@euismod.org |
|2 |Kamal |1-668-571-5046|pede.Suspendisse@interdumenim.edu |
|18 |Guy |1-869-521-3230|senectus.et.netus@lectusrutrum.com |
|13 |Florence|1-603-575-2444|sit.amet.dapibus@lacusAliquamrutrum.ca |
|11 |Emi |1-467-270-1337|est@nunc.com |
|20 |Edward |1-711-710-6552|lectus@aliquetlibero.co.uk |
|12 |Caleb |1-683-212-0896|Suspendisse@Quisque.edu |
|1 |Burke |1-300-746-8446|ullamcorper.velit.in@ametnullaDonec.co.uk|
|17 |Blossom |1-154-406-9596|Nunc.commodo.auctor@eratSed.co.uk |
|4 |Belle |1-246-894-6340|vitae.aliquet.nec@neque.co.uk |
|14 |Anika |1-856-828-7883|euismod@ligulaelit.co.uk |
|16 |Amena |1-878-250-3129|lorem.lucrus.ut@scelerisque.com |
|22 | |1-711-710-6552|lecrus@aliquetlibero.co.uk |
|21 | |1-711-710-6552|lecrus@aliquetlibero.co.uk |
+---+--------+--------------+-----------------------------------------+
//18.临时修改列名
scala> students.select($"name".as("new_name")).show(5,false)
+--------+
|new_name|
+--------+
|Burke |
|Kamal |
|Olga |
|Belle |
|Trevor |
+--------+
only showing top 5 rows
//19.join的用法
scala> val students1 = spark.sparkContext.textFile("file:///home/hadoop/data/student.data").map(_.split("\\|")).map(x =>(Student(x(0),x(1),x(2),x(3)))).toDF()
students1: org.apache.spark.sql.DataFrame = [id: string, name: string ... 2 more fields]
scala> val students2 = spark.sparkContext.textFile("file:///home/hadoop/data/student1.data").map(_.split("\\|")).map(x =>(Student(x(0),x(1),x(2),x(3)))).toDF()
students2: org.apache.spark.sql.DataFrame = [id: string, name: string ... 2 more fields]
//如果不输入join的第三个参数"inner",也默认为innerjoin
scala> students1.join(students2,students1.col("id")===students2.col("id"),"inner").show()
+---+--------+--------------+--------------------+---+--------+--------------+--------------------+
| id| name| phone| email| id| name| phone| email|
+---+--------+--------------+--------------------+---+--------+--------------+--------------------+
| 15| Tarik|1-398-171-2268|turpis@felisorci.com| 15| Tarik|1-398-171-2268|turpis@felisorci.com|
| 22| |1-711-710-6552|lecrus@aliquetlib...| 22| |1-711-710-6552|lecrus@aliquetlib...|
| 16| Amena|1-878-250-3129|lorem.lucrus.ut@s...| 16| Amena|1-878-250-3129|lorem.lucrus.ut@s...|
| 18| Guy|1-869-521-3230|senectus.et.netus...| 18| Guy|1-869-521-3230|senectus.et.netus...|
| 17| Blossom|1-154-406-9596|Nunc.commodo.auct...| 17| Blossom|1-154-406-9596|Nunc.commodo.auct...|
| 19| Malachi|1-608-637-2772|Proin.mi.Aliquam@...| 19| Malachi|1-608-637-2772|Proin.mi.Aliquam@...|
| 23| NULL|1-711-710-6552|lecrus@aliquetlib...| 23| NULL|1-711-710-6552|lecrus@aliquetlib...|
| 20| Edward|1-711-710-6552|lectus@aliquetlib...| 20| Edward|1-711-710-6552|lectus@aliquetlib...|
| 12| Caleb|1-683-212-0896|Suspendisse@Quisq...| 12| Caleb|1-683-212-0896|Suspendisse@Quisq...|
| 13|Florence|1-603-575-2444|sit.amet.dapibus@...| 13|Florence|1-603-575-2444|sit.amet.dapibus@...|
| 14| Anika|1-856-828-7883|euismod@ligulaeli...| 14| Anika|1-856-828-7883|euismod@ligulaeli...|
| 21| |1-711-710-6552|lecrus@aliquetlib...| 21| |1-711-710-6552|lecrus@aliquetlib...|
+---+--------+--------------+--------------------+---+--------+--------------+--------------------+
(四)DataFrame的常用函数
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- 1、三角函数 double sin (double);正弦 double cos (double);余弦 doub...
- 当计算多个数的公约数时,需要知道,前两个的最大公约数,依次和后面的数求公约数,得到的就是所有数字的最大公约数。