用MapReduce开发的一个TFIDF 计算关键字权重

大小: 13KB

文件类型: .java

金币: 1

下载: 0 次

发布日期: 2021-06-07
语言: Java
标签: Hadoop MapReduce TF-IDF

高速下载

资源简介

在Hadoop集群中，用MapReduce分布式计算TFIDF

资源截图

小图大图

代码片段和文件信息

package eb.cloud.mapreduce.MR.guoruonan;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;

public class Tfidf {

    public static class Mapper0 extends Mapper {
        String filename;

        public void map（LongWritable key Text value Context context）
                throws IOException InterruptedException {

            FileSplit split = （FileSplit） context.getInputSplit（）;
            filename = split.getPath（）.getName（）;
            String newString = value.toString（）.toLowerCase（）;
            String results[] = newString.split（“[^a-zA-Z]“）;
            int flag = 0;
            for （String val : results） {
                if （val.equals（““））
                    continue;
                context.write（new Text（filename） new Text（val））;
            }
        }
    }

    public static class Reducer0 extends Reducer {
        public void reduce（Text key Iterable values Context context）
                throws IOException InterruptedException {
            ArrayList array = new ArrayList（）;
            for （Text t : values） {
                array.add（t.toString（））;
            }
            for （String str : array） {
                context.write（new Text（key.toString（） + “ “+str） new Text（““
                        + array.size（）））;
                // 
            }
        }
    }

    public static class Mapper1 extends Mapper {

        public void map（LongWritable key Text value Context context）
                throws IOException InterruptedException {
            String line = value.toString（）;
            int index = line.indexOf（“	“）;
            context.write（new Text（line.substring（0 index））
                    new Text（line.substring（index + 1）））;
        }
    }

    public static class Reducer1 extends Reducer {

        public void reduce（Text key Iterable values Context context）
                throws IOException InterruptedException {
            ArrayList array = new ArrayList（）;
            int ciNum = 1;
            for （Text val : values） {
                array.add（val.toString（））;
                ciNum =

上一篇：java汉诺塔动画实现
下一篇：jsr173_1.0_api.jar

共有条评论

用MapReduce开发的一个TFIDF 计算关键字权重

资源简介

资源截图

代码片段和文件信息

评论

相关资源