资源简介
Spark商业实战三部曲源码,包含书中所用到的代码以及用到的数据集
代码片段和文件信息
package com.dt.spark.SparkApps.sql;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.ParseException;
import java.util.Calendar;
import java.util.Random;
/**
* Spark商业案例书稿第三章:3.4.3.2 电商交互式分析系统应用模拟数据生成代码
* 电商数据自动生成代码,数据格式如下:
* 用户数据 :{“userID“: 0 “name“: “spark0“ “registeredTime“: “2016-10-11 18:06:25“}
* 日志数据:{“logID“: 00“userID“: 0 “time“: “2016-10-04 15:42:45“ “typed“: 0 “consumed“: 0.0}
*/
public class Mock_EB_Users_Data {
public static void main(String[] args) throws ParseException {
/**
* 通过传递进来的参数生成指定大小规模的数据;
*/
long numberItems = 1000;
String dataPath = “data/Mock_EB_Users_Data/“;
if (args.length > 1) {
numberItems = Integer.valueOf(args[0]);
dataPath = args[1];
}
System.out.println(“User log number is : “ + numberItems);
mockUserData(numberItems dataPath);
mockLogData(numberItems dataPath);
}
private static void mockLogData(long numberItems String dataPath) {
//{“logID“: 00“userID“: 0 “time“: “2016-10-04 15:42:45“ “typed“: 0 “consumed“: 0.0}
StringBuffer mock_Log_Buffer = new StringBuffer(““);
Random random = new Random();
for (int i = 0; i < numberItems; i++) { //userID
for (int j = 0; j < numberItems; j++) {
String initData = “2016-10-“;
String randomData = String.format(“%s%02d%s%02d%s%02d%s%02d“ initData random.nextInt(31)
“ “ random.nextInt(24)
“:“ random.nextInt(60)
“:“ random.nextInt(60));
String result = “{\“logID\“: “ + String.format(“%02d“ j) + “ \“userID\“: “ + i + “ \“time\“: \““ + randomData + “\“ \““ +
“typed\“: “ + String.format(“%01d“ random.nextInt(2)) +
“ \“consumed\“:“ + String.format(“%.2f“ random.nextDouble() * 1000)
+ “}“;
mock_Log_Buffer.append(result)
.append(“\n“);
}
}
System.out.println(mock_Log_Buffer);
PrintWriter printWriter = null;
try {
printWriter = new PrintWriter(new OutputStreamWriter(
new FileOutputStream(dataPath + “Mock_EB_Log_Data.json“)));
printWriter.write(mock_Log_Buffer.toString());
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
printWriter.close();
}
}
private static void mockUserData(long numberItems String dataPath) {
StringBuffer mock_User_Buffer = new StringBuffer(““);
Random random = new Random();
for (int i = 0; i < numberItems; i++) {
String initData = “2016-10-“;
String randomData = String.f
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\
文件 1250 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\README.md
目录 0 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\
目录 0 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\
文件 982 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\compiler.xm
文件 479 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\encodings.xm
目录 0 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\
文件 495 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__antlr_antlr_2_7_7.xm
文件 547 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__aopalliance_aopalliance_1_0.xm
文件 459 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__asm_asm_3_1.xm
文件 515 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__asm_asm_commons_3_1.xm
文件 494 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__asm_asm_tree_3_1.xm
文件 522 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_101tec_zkclient_0_3.xm
文件 547 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_alibaba_fastjson_1_1_41.xm
文件 582 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_clearspring_analytics_stream_2_7_0.xm
文件 597 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_esotericsoftware_kryo_shaded_3_0_3.xm
文件 562 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_esotericsoftware_minlog_1_3_0.xm
文件 677 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_fasterxm
文件 628 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_fasterxm
文件 656 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_fasterxm
文件 720 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_fasterxm
文件 727 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_fasterxm
文件 564 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_github_fommil_netlib_core_1_1_2.xm
文件 573 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_github_rwl_jtransforms_2_4_0.xm
文件 578 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_google_code_findbugs_jsr305_1_3_9.xm
文件 548 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_google_code_gson_gson_2_2_4.xm
文件 546 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_google_guava_guava_11_0_2.xm
文件 629 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_google_inject_extensions_guice_servlet_3_0.xm
文件 529 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_google_inject_guice_3_0.xm
文件 607 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_google_protobuf_protobuf_java_2_5_0.xm
文件 588 2019-03-15 05:14 code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_googlecode_javaewah_JavaEWAH_0_3_2.xm
............此处省略972个文件信息
- 上一篇:lora_pingpong
- 下一篇:哈工大计算机网络习题及模拟考试题
相关资源
- spark+scala学习
-
Spark Datafr
ame详解.zip - 基于Spark框架的聚类算法研究
- 实时分析-分析和可视化流数据的技术
- Spark快速大数据分析
- Cloudera Custom Training: Hands-On Exercises
- hadoop dll winutils 各种版本
- Spark快速大数据分析高清带标签pdf+全
- Spark高级数据分析-中文完整
- 基于 Flume+ Kafka+ Spark Streaming 实现实时
- 裸奔三部曲全书20110101.rar
- Spark快速大数据分析—中文版
-
使用Datafr
ame分析出租车的 GPS信息 - Philips飞利浦MP3Spark2固件工具Firmware(
- Spark大数据处理 技术 应用与性能优化
- CDH6离线安装 -
- apache-hadoop-3.1.0-winutils-master本地开发调
- 《Spark大数据处理 技术、应用与性能
- Cloudera Manager及CDH从5.4.8升级到5.12.1全
- 基于用户的SparkALS推荐系统和数据源
- Scala实用指南高清带书签
- 电商数据分析平台的设计与实现-论文
-
Fli
nk,Storm,Spark Streaming三种流框架 - Advanced Analytics with Spark 2nd Edition.pdf
- 用户行为大数据分析 PPT
- Spark-streaming 在京东的项目实践
- Apache_Spark_Graph_Processing
- 数据库处理基础设计实现
- Learning.Spark.Lightning-Fast.Big.Data.Analysi
- 《Spark高级数据分析》源代码
评论
共有 条评论