Combiner

Map

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * 输入：<0, Text>
 *          hello word
 *          hello bigdata
 *          hello spark word
 *
 *
 * 输出：<word, 1>
 *
 *          hello 3
 *          word 2
 *          spark 1
 *          bigdata 1
 *
 */
                                              //<k1,     v1>      <k2,    v2>
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        // hello word
        String[] wordArray = value.toString().split(" ");

        // v2
        IntWritable one = new IntWritable(1);

        for (String word : wordArray) {
            //没有使用combiner : mapper <a, <1,1,1>>  node0->node1
            //使用了combiner :  mapper <a, <3>>    node0->node1
            context.write(new Text(word), one);
        }
    }
}

Combiner

package cn.studybigdata.hadoop.mapred.ecombine;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WordCountCombiner extends Reducer<Text, IntWritable, Text, IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

        //<word,1>
        //<word,1>
        //<word,1>


        //<key3 ,value3>
        //<word, [1,1,1]>

        int count = 0;
        for (IntWritable value : values) {
            int i = value.get();
            count += i;
        }

        context.write(key, new IntWritable(count));

    }
}

Reduce

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

        //<word,1>
        //<word,1>
        //<word,1>


        //<key3 ,value3>
        //<word, [1,1,1]>

        int count = 0;
        for (IntWritable value : values) {
            int i = value.get();
            count += i;
        }

        context.write(key, new IntWritable(count));

    }
}

Main

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class WordCountMain {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration configuration = new Configuration();
        //create a mapreduce job
        Job job = Job.getInstance(configuration);


        job.setJarByClass(WordCountMain.class);

        job.setMapperClass(WordCountMapper.class);
        //<word,1>
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setCombinerClass(WordCountCombiner.class);

//        job.setNumReduceTasks(3);
        job.setReducerClass(WordCountReduce.class);
        //<word,3>
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        //input path
        //output path
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);

    }
}

Pom


<dependencies>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>2.7.3</version>
    </dependency>
    <dependency>
        <groupId>org.apache.logging.log4j</groupId>
        <artifactId>log4j-api</artifactId>
        <version>2.18.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.logging.log4j</groupId>
        <artifactId>log4j-core</artifactId>
        <version>2.18.0</version>
    </dependency>
</dependencies>

<build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-jar-plugin</artifactId>
            <version>2.6</version>
            <configuration>
                <archive>
                    <manifest>
                        <mainClass>cn.studybigdata.hadoop.mapred.dpartition.DeptMain</mainClass>
                    </manifest>
                </archive>
            </configuration>
        </plugin>
    </plugins>
</build>

Hadoop Combiner

Combiner

Map

Combiner

Reduce

Main

Pom