





#include <iostream>
#include <string>
using namespace std;
int main(){
  string word;
  return 0;


#include <iostream>
#include <string>
#include <map>
using namespace std;
int main(){
  string key,num;
  map<string,int> count;
  map<string,int>::iterator it;
    it = count.find(key);
  return 0;


[root@client project]# g++ mapper.cpp -o mapper
[root@client project]# g++ reducer.cpp -o reducer


[root@client project]# hdfs dfs -cat /user/root/wordcount/input/*
hadoop framework include hdfs and mapreduce
mapreduce is a distributed framework
hdfs is a hadoop distributed file system


[root@client project]# hadoop jar /home/software/hadoop-2.7.7/share/hadoop/tools/lib/hadoop-streaming-2.7.7.jar -D mapred.job.name="wordcount" -input /user/root/wordcount/input -output /user/root/wordcount/output --mapper ./mapper --reducer ./reducer -file mapper -file reducer


[root@client project]# hadoop jar /home/software/hadoop-
2.7.7/share/hadoop/tools/lib/hadoop-streaming-2.7.7.jar -D mapred.job.name="wordcount" -
input /user/root/wordcount/input -output /user/root/wordcount/output --mapper ./mapper --
reducer ./reducer -file mapper -file reducer
19/09/21 21:47:03 WARN streaming.StreamJob: -file option is deprecated, please use generic option -files instead.
packageJobJar: [mapper, reducer, /tmp/hadoop-unjar4749452695736673048/] [] /tmp/streamjob4372833674036516407.jar tmpDir=null
19/09/21 21:47:05 INFO client.RMProxy: Connecting to ResourceManager at /
19/09/21 21:47:06 INFO client.RMProxy: Connecting to ResourceManager at /
19/09/21 21:47:07 INFO mapred.FileInputFormat: Total input paths to process : 1
19/09/21 21:47:08 INFO mapreduce.JobSubmitter: number of splits:2
19/09/21 21:47:08 INFO Configuration.deprecation: mapred.job.name is deprecated. Instead, use mapreduce.job.name
19/09/21 21:47:08 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1569071641619_0002
19/09/21 21:47:08 INFO impl.YarnClientImpl: Submitted application application_1569071641619_0002
19/09/21 21:47:08 INFO mapreduce.Job: The url to track the job: http://client:8088/proxy/application_1569071641619_0002/
19/09/21 21:47:08 INFO mapreduce.Job: Running job: job_1569071641619_0002
19/09/21 21:47:23 INFO mapreduce.Job: Job job_1569071641619_0002 running in uber mode : false
19/09/21 21:47:23 INFO mapreduce.Job:  map 0% reduce 0%
19/09/21 21:48:54 INFO mapreduce.Job:  map 100% reduce 0%
19/09/21 21:49:43 INFO mapreduce.Job:  map 100% reduce 100%
19/09/21 21:49:48 INFO mapreduce.Job: Job job_1569071641619_0002 completed successfully
19/09/21 21:49:50 INFO mapreduce.Job: Counters: 49
	File System Counters
		FILE: Number of bytes read=200
		FILE: Number of bytes written=378513
		FILE: Number of read operations=0
		FILE: Number of large read operations=0
		FILE: Number of write operations=0
		HDFS: Number of bytes read=419
		HDFS: Number of bytes written=95
		HDFS: Number of read operations=9
		HDFS: Number of large read operations=0
		HDFS: Number of write operations=2
	Job Counters 
		Launched map tasks=2
		Launched reduce tasks=1
		Data-local map tasks=2
		Total time spent by all maps in occupied slots (ms)=198469
		Total time spent by all reduces in occupied slots (ms)=22521
		Total time spent by all map tasks (ms)=198469
		Total time spent by all reduce tasks (ms)=22521
		Total vcore-milliseconds taken by all map tasks=198469
		Total vcore-milliseconds taken by all reduce tasks=22521
		Total megabyte-milliseconds taken by all map tasks=203232256
		Total megabyte-milliseconds taken by all reduce tasks=23061504
	Map-Reduce Framework
		Map input records=3
		Map output records=18
		Map output bytes=158
		Map output materialized bytes=206
		Input split bytes=236
		Combine input records=0
		Combine output records=0
		Reduce input groups=11
		Reduce shuffle bytes=206
		Reduce input records=18
		Reduce output records=11
		Spilled Records=36
		Shuffled Maps =2
		Failed Shuffles=0
		Merged Map outputs=2
		GC time elapsed (ms)=12487
		CPU time spent (ms)=8250
		Physical memory (bytes) snapshot=412422144
		Virtual memory (bytes) snapshot=6238203904
		Total committed heap usage (bytes)=263348224
	Shuffle Errors
	File Input Format Counters 
		Bytes Read=183
	File Output Format Counters 
		Bytes Written=95
19/09/21 21:49:50 INFO streaming.StreamJob: Output directory: /user/root/wordcount/output


[root@client project]# hdfs dfs -cat /user/root/wordcount/output/*
a	2
and	1
distributed	2
file	1
framework	2
hadoop	2
hdfs	2
include	1
is	2
mapreduce	2
system	1



    hadoop提供了java版本的mapreduce编程API xff0c 我们需要自定义编写mapper和reducer xff0c 分别继承Mapper和Reducer xff0c 然后重写map和reduce方法 同时需要在main方法