一.软件环境
- Intellij Idea
- Maven
- Hadoop分布式环境
- JDK 1.8
二.win10下环境配置
2.1 JDK安装环境变量添加(自行安装)
2.2 maven安装(自行安装)
2.3 Intellij Idea安装(自行安装)
2.4 hadoop安装
QQ图片20180508144543.png
QQ图片20180508144909.png
QQ图片20180508145022.png
三.创建maven工程
- 在Intellij中点击
File
->New
->Project
,在弹出的对话框中选择Maven,JDK选择1.8,点击Next
QQ图片20180508145459.png
QQ图片20180508145608.png
QQ图片20180508145643.png - java环境检查是否一致
file -> project structure
QQ图片20180508151127.png
file
->setting
QQ图片20180508151233.png - 修改pom.xml文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<!--项目相关配置-->
<groupId>com.guider.hadoop</groupId>
<artifactId>hadoop</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>hadoop</name>
<url>http://maven.apache.org</url>
<!--编译环境-->
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<!--库下代理服务器-->
<repositories>
<repository>
<id>nexus-aliyun</id>
<name>Nexus aliyun</name>
<url>http://maven.aliyun.com/nexus/content/groups/public</url>
</repository>
</repositories>
<!--依赖库,添加自己需要的库-->
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.5.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
</dependencies>
<!--构建插件配置,自动生成,不用管-->
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<!-- see http://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.20.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
-
复制配置文件到资源目录下
QQ图片20180508150157.png -
复制DiskChecker到org.apache.hadoop.util包下面
QQ图片20180508152913.png
四. 编写WordCount代码
package com.guider.hadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
public class WordCount extends Configured implements Tool {
// input -> map -> shuffle -> output
// mapper,输入数据变成键值对,一行转化为一条
//1. Map claaa
public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private Text mapOutputKey = new Text();
private IntWritable mapOutputValue = new IntWritable();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//1.将读取的文件变成,偏移量+内容
String linevalue = value.toString();
System.out.println("linevalue----" + linevalue);
//2.根据“ ”某种规则划分我们的单词,并处理
String[] strs = linevalue.split(" ");
for (String str : strs) {
//key:单词, value:1
mapOutputKey.set(str);
mapOutputValue.set(1);
context.write(mapOutputKey, mapOutputValue);
System.out.println("str----" + str);
}
//3.将结果传递出处
}
}
//2. Reduce class
//reducer,map的输出就是reduce的输入
public static class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable outputValue = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
//汇总
int sum = 0;
for (IntWritable value : values){
sum += value.get();
}
outputValue.set(sum);
context.write(key,outputValue);
}
}
//3. job class
public int run(String[] args) throws Exception {
//获取我们的配置
Configuration conf = new Configuration();
//Configuration conf = this.getConf();
Job job = Job.getInstance(conf, this.getClass().getSimpleName());
//设置input与output
Path inpath = new Path(args[0]);
FileInputFormat.addInputPath(job, inpath);
Path outpath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outpath);
//设置map与
//需要设置的内容类 + 输出key与value
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//shuffle优化
// job.setPartitionerClass(cls);
// job.setSortComparatorClass(cls);
// job.setCombinerClass(cls);
// job.setGroupingComparatorClass(cls);
//设置reduce
job.setReducerClass(WordCountReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//将job交给Yarn
boolean issucess = job.waitForCompletion(true);
return issucess ? 0 : 1;
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
//参数
args = new String[] {
"hdfs://bigguider22.com:8020/user/root/mapreduce/input",
"hdfs://bigguider22.com:8020/user/root/mapreduce/output1"
};
//跑我们的任务
int status = new WordCount().run(args);
//int status = ToolRunner.run(conf,new WordCount(),args);
System.exit(status);
}
}