实验内容为:MapReduce编程实例(三),数据去重
输入:
2013-11-01 aa
2013-11-02 bb
2013-11-03 cc
2013-11-04 aa
2013-11-05 dd
2013-11-06 dd
2013-11-07 aa
2013-11-09 cc
2013-11-10 ee
2013-11-01 bb
2013-11-02 33
2013-11-03 cc
2013-11-04 bb
2013-11-05 23
2013-11-06 dd
2013-11-07 99
2013-11-09 99
2013-11-10 ee
头文件:
3. import java.io.IOException;
4. import java.util.HashSet;
5. import java.util.StringTokenizer;
6.
7. import org.apache.hadoop.conf.Configuration;
8. import org.apache.hadoop.fs.Path;
9. import org.apache.hadoop.io.Text;
10. import org.apache.hadoop.mapreduce.Job;
11. import org.apache.hadoop.mapreduce.Mapper;
12. import org.apache.hadoop.mapreduce.Reducer;
13. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
14. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15. import org.apache.hadoop.util.GenericOptionsParser;
其他部分:
public class Dedup {
23.
24. public static class MyMapper extends Mapper<Object, Text, Text, Text>{
25.
26. @Override
27. protected void map(Object key, Text value, Context context)
28. throws IOException, InterruptedException {
29. context.write(value, new Text(""));
30. }
31. }
32.
33. public static class MyReducer extends Reducer<Text, Text, Text, Text>{
34.
35. @Override
36. protected void reduce(Text key, Iterable<Text> value,
37. Context context)
38. throws IOException, InterruptedException {
39. context.write(key, new Text(""));
40. }
41. }
42.
43.
44. public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{
45. Configuration conf = new Configuration();
46. String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
47.
48. if(otherArgs.length<2){
49. System.out.println("parameter errors!");
50. System.exit(2);
51. }
52.
53. Job job = new org.apache.hadoop.mapreduce.Job(conf, "Dedup");
54. job.setJarByClass(Dedup.class);
55. job.setMapperClass(MyMapper.class);
56. job.setCombinerClass(MyReducer.class);
57. job.setReducerClass(MyReducer.class);
58. job.setOutputKeyClass(Text.class);
59. job.setOutputValueClass(Text.class);
60.
61. FileInputFormat.addInputPath(job,new ath(otherArgs[0]));
62. FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
63.
64. System.exit(job.waitForCompletion(true)?0:1);
65.
66. }
67.
68. }
输出结果:
2013-11-01 aa
2013-11-01 bb
2013-11-02 33
2013-11-02 bb
2013-11-03 cc
2013-11-03 cc
2013-11-04 98
2013-11-04 aa
2013-11-04 bb
2013-11-05 23
2013-11-05 93
2013-11-05 dd
2013-11-06 99
2013-11-06 dd
2013-11-07 92
2013-11-07 99
2013-11-07 aa
2013-11-09 99
2013-11-09 aa
2013-11-09 cc
2013-11-10 ee