Windows开发环境搭建

Maven

Maven下载

1
https://archive.apache.org

直接解压已经提供的压缩包即可

Maven配置

MAVEN_HOME/conf/settings.xml
1
2
3
4
5
6
7
8
9
10
11
12
<!-- 本地仓库: 根据自己的情况修改保存位置 -->
<localRepository>D:\maven-repo</localRepository>

<!-- 远程仓库: aliyun仓库 -->
<mirrors>
<mirror>
<id>alimaven</id>
<name>aliyun maven</name>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
<mirrorOf>central</mirrorOf>
</mirror>
</mirrors>

IDEA

1
https://www.jetbrains.com/idea/download/?section=windows

直接使用提供的安装包即可。

IDEA绑定Maven

image-20221029182326941

创建Maven项目

image-20221029182538750

安装Hadoop

替换bin目录

1
2
3
https://github.com/cdarlint/winutils
或者
https://github.com/A-stranger/studybigdata/tree/master/hadoop-2.7.3/bin

直接解压压缩包即可。

配置HADOOP环境变量

HADOOP_HOME

image-20221028150513734

PATH

image-20221030141057578

WordCount

WordCountMapper
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
package cn.studybigdata.hadoop.mapred.awordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String[] wordArray = value.toString().split(" ");

IntWritable one = new IntWritable(1);

for (String word : wordArray) {
context.write(new Text(word), one);
}
}
}
WordCountReducer
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
package cn.studybigdata.hadoop.mapred.awordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

int count = 0;
for (IntWritable value : values) {
int i = value.get();
count += i;
}
context.write(key, new IntWritable(count));
}
}
WordCountMain
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
package cn.studybigdata.hadoop.mapred.awordcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.io.Text;
import java.io.IOException;

public class WordCountMain {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);

job.setJarByClass(WordCountMain.class);

job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);

job.setReducerClass(WordCountReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.waitForCompletion(true);

}
}
pom.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
</dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.6</version>
<configuration>
<archive>
<manifest>
<mainClass>cn.studybigdata.hadoop.mapred.awordcount.WordCountMain</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>

Run Configuration

这样我们就可以读取本地文件,进行代码调试。

image-20221030141758287

s