资源简介
北京大学网络大数据管理与应用大作业,使用pagerank来分析微博数据。4个Spark和2个Hadoop实现

代码片段和文件信息
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.*;
import java.util.*;
public class hadoopPageRank {
public static class initGraphMapper extends Mapper {
@Override
protected void map(LongWritable key Text value Context context) throws IOException InterruptedException {
String[] lineSplit = value.toString().split(“\t“);
lineSplit[0] = StringUtils.strip(lineSplit[0] “\““); lineSplit[1] = StringUtils.strip(lineSplit[1] “\““);
if (lineSplit[1].equals(“0“) || lineSplit[0].equals(“0“)) return;
context.write(new Text(lineSplit[0]) new Text(lineSplit[1]));
context.write(new Text(lineSplit[0]) new Text(“node“));
context.write(new Text(lineSplit[1]) new Text(“node“));
}
}
public static class initGraphReducer extends Reducer {
@Override
protected void reduce(Text key Iterable values Context context) throws IOException InterruptedException {
Set set = new HashSet<>();
for (Text text: values) {
String val = text.toString();
if (val.equals(“node“)) continue;
set.add(val);
}
// if a node does not have out edges then add a edge to itself
if (set.size() > 0) context.write(key new Text(“1.0“ + “\t“ + String.join(“\t“ set)));
else context.write(key new Text(“1.0“ + “\t“ + key.toString()));
}
}
public static class pageRankMapper extends Mapper {
@Override
protected void map(LongWritable key Text value Context context) throws IOException InterruptedException {
String[] lineSplit = value.toString().split(“\t“);
Double rank = Double.parseDouble(lineSplit[1]);
int size = lineSplit.length - 2;
for (int i=0; i if (i == 0 || i == 1) continue;
context.write(new Text(lineSplit[i]) new Text(“rank“ + “ “ + rank / size));
context.write(new Text(lineSplit[0]) new Text(lineSplit[i]));
}
}
}
public static class pageRankReducer extends Reducer {
@Override
protected void reduce(Text key Iterable values Context context) throws IOException InterruptedException {
Double res = 0.0;
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2017-12-09 15:39 input_small\
文件 7105030 2017-11-25 15:15 input_small\page_rank_data_small.txt
文件 2560 2017-12-01 14:49 pom.xm
目录 0 2017-11-25 15:11 src\
目录 0 2017-11-25 15:11 src\main\
目录 0 2017-12-09 17:20 src\main\java\
文件 9335 2017-12-02 10:35 src\main\java\hadoopPageRank.java
文件 9207 2017-12-09 16:02 src\main\java\hadoopPageRankAverage.java
文件 4395 2017-12-08 11:04 src\main\java\sparkPageRank.java
文件 4389 2017-12-08 14:41 src\main\java\sparkPageRankAverage.java
文件 8083 2017-12-09 17:20 src\main\java\sparkPageRankAverageV2.java
文件 3136 2017-12-01 19:30 src\main\java\sparkPageRankBasic.java
文件 3110 2017-12-01 23:23 src\main\java\sparkPageRankHashMap.java
文件 6952 2017-12-08 13:46 src\main\java\sparkPageRankV2.java
目录 0 2017-12-02 11:02 src\main\resources\
文件 327 2017-12-02 11:02 src\main\resources\log4j.properties
目录 0 2017-11-25 15:11 src\test\
目录 0 2017-11-25 15:11 src\test\java\
相关资源
- 电信中兴光猫超密获取工具使用方法
- The Secret Path 3D 3D魔方迷宫[源码][scra
- HAP_Advanced_PDF_Password_Recovery 5.05
-
Windows em
bedded Compact 2013 应用开发调 - Reparatory Effects of Nicotine on NMDA Recepto
- Apolipoprotein E4 Impairs in vivo Hippocampal
- LenovoTinkPad; Marker 5.01
- Micrium.RTOS.1.0.0.pack
- Crystal Impact Match!2.1.3 试用延长
- Xpath生成器,自动生成可用的Xpath。
- 类pascal语言编译器(编译原理实验)
- Packet Tracer 5.2.1
- 官网Apache2.2
- 仿windows记事本
- hidusage.h hidpi.h 等USB开发用头文件
- ipv6网络抓包程序
- HDD repair.rar
- 简易绘图的制作 (一)WPF
- 基于Apache Mina实现的TCP长连接和短连接
- IpHlpApi.h&IpHlpApi.lib
- 可以把delphi的dcu文件转换为pas文件的
- jdbcTemplate分页彻底解决,使用游标滚
- SAMPLE (类pascal) 词法分析程序 C 版
- Delphi做的用于分析Pascal语言的词法分
-
xm
l课件及例题(xm l) - php程序实现数据库的增删改查
- OFDM 系统PAPR减小的三种经典方法
- pano2vr全景图制作教程
- The Impact of ETC System on Safety Performance
-
Differential ex
pression patterns of Toll-li
评论
共有 条评论