Flink WordCount

输入

1
2
3
4
yuan@ThinkPad:~$ nc -lk 9999
a
b b
c c c

输出

1
2
3
4
5
6
(a,1)
(b,1)
(b,2)
(c,1)
(c,2)
(c,3)

wordcount

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// 3. 数据处理:单词计数
DataStream<Tuple2<String, Integer>> wordCountStream = socketSource
// 拆分每行输入为单个单词,并初始化为 (单词, 1)
.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String line, Collector<Tuple2<String, Integer>> out) throws Exception {
// 按空格拆分单词
String[] words = line.split(" ");
for (String word : words) {
// 过滤空单词
if (!word.isEmpty()) {
out.collect(new Tuple2<>(word, 1));
}
}
}
})
// 按单词分组(Tuple2 的第一个元素)
.keyBy(0)
// 累加计数(Tuple2 的第二个元素)
.sum(1);