倒排索引案例(多Job串联)

需求


需求分析


第一次处理

OneIndexMapper

public class OneIndexMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

    String name;

    @Override

    protected void setup(Context context) throws IOException, InterruptedException {

        // 获取文件名称

        FileSplit inputSplit = (FileSplit) context.getInputSplit();

        name = inputSplit.getPath().getName();

    }

    Text k = new Text();

    IntWritable v = new IntWritable(1);

    @Override

    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        // 1 获取一行

        String line = value.toString();

        // 2 切割

        String[] fields = line.split(" ");

        // 3 写出

        for (String word : fields) {

            k.set(word + "--" + name);

            context.write(k, v);

        }

    }

}


OneIndexReducer

public class OneIndexReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

    @Override

    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

        // 1 累加求和

        int sum = 0;

        for (IntWritable value : values) {

            sum += value.get();

        }

        IntWritable v = new IntWritable();

        v.set(sum);

        // 2 写出

        context.write(key, v);

    }

}


OneIndexDriver

public class OneIndexDriver {

    public static void main(String[] args) throws Exception {

        args = new String[]{"e:/input/inputoneindex", "e:/output5"};

        Configuration configuration = new Configuration();

        Job job = Job.getInstance(configuration);

        job.setJarByClass(OneIndexDriver.class);

        job.setMapperClass(OneIndexMapper.class);

        job.setReducerClass(OneIndexReducer.class);

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));

        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        boolean result = job.waitForCompletion(true);

        System.exit(result ? 0 : 1);

    }

}



第二次处理

TwoIndexMapper

public class TwoIndexMapper extends Mapper<LongWritable, Text, Text, Text> {

    Text k = new Text();

    Text v = new Text();

    @Override

    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        // 1 获取一行

        String line = value.toString();

        // 2 切割

        String[] fields = line.split("--");

        // 3 封装

        k.set(fields[0]);

        v.set(fields[1]);

        // 4 写出

        context.write(k, v);

    }

}


TwoIndexReducer

public class TwoIndexReducer extends Reducer<Text, Text, Text, Text> {

    Text v = new Text();

    @Override

    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

        // 1 拼接字符串

        StringBuffer sb = new StringBuffer();

        for (Text value : values) {

            sb.append(value.toString().replace("\t", "-->") + "\t");

        }

        v.set(sb.toString());

        // 2 写出

        context.write(key, v);

    }

}


TwoIndexDriver

public class TwoIndexDriver {

    public static void main(String[] args) throws Exception {

        args = new String[]{"e:/input/inputtwoindex", "e:/output6"};

        Configuration configuration = new Configuration();

        Job job = Job.getInstance(configuration);

        job.setJarByClass(TwoIndexDriver.class);

        job.setMapperClass(TwoIndexMapper.class);

        job.setReducerClass(TwoIndexReducer.class);

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));

        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        boolean result = job.waitForCompletion(true);

        System.exit(result ? 0 : 1);

    }

}

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

友情链接更多精彩内容