资源简介
很基础的MR供大家学习
代码片段和文件信息
package eb.cloud.mapreduce.MR.guoruonan;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
public class Tfidf {
public static class Mapper0 extends Mapper {
String filename;
public void map(LongWritable key Text value Context context)
throws IOException InterruptedException {
FileSplit split = (FileSplit) context.getInputSplit();
filename = split.getPath().getName();
String newString = value.toString().toLowerCase();
String results[] = newString.split(“[^a-zA-Z]“);
int flag = 0;
for (String val : results) {
if (val.equals(““))
continue;
context.write(new Text(filename) new Text(val));
}
}
}
public static class Reducer0 extends Reducer {
public void reduce(Text key Iterable values Context context)
throws IOException InterruptedException {
ArrayList array = new ArrayList();
for (Text t : values) {
array.add(t.toString());
}
for (String str : array) {
context.write(new Text(key.toString() + “ “+str) new Text(““
+ array.size()));
//
}
}
}
public static class Mapper1 extends Mapper {
public void map(LongWritable key Text value Context context)
throws IOException InterruptedException {
String line = value.toString();
int index = line.indexOf(“ “);
context.write(new Text(line.substring(0 index))
new Text(line.substring(index + 1)));
}
}
public static class Reducer1 extends Reducer {
public void reduce(Text key Iterable values Context context)
throws IOException InterruptedException {
ArrayList array = new ArrayList();
int ciNum = 1;
for (Text val : values) {
array.add(val.toString());
ciNum =
- 上一篇:校园导游系统Java版
- 下一篇:eclipse格式化文件
评论
共有 条评论