资源简介

Spark商业实战三部曲源码,包含书中所用到的代码以及用到的数据集

资源截图

代码片段和文件信息

package com.dt.spark.SparkApps.sql;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.ParseException;
import java.util.Calendar;
import java.util.Random;

/**
 * Spark商业案例书稿第三章:3.4.3.2 电商交互式分析系统应用模拟数据生成代码
 * 电商数据自动生成代码,数据格式如下:
 * 用户数据 :{“userID“: 0 “name“: “spark0“ “registeredTime“: “2016-10-11 18:06:25“}
 * 日志数据:{“logID“: 00“userID“: 0 “time“: “2016-10-04 15:42:45“ “typed“: 0 “consumed“: 0.0}
 */
public class Mock_EB_Users_Data {

    public static void main(String[] args) throws ParseException {
        /**
         * 通过传递进来的参数生成指定大小规模的数据;
         */

        long numberItems = 1000;
        String dataPath = “data/Mock_EB_Users_Data/“;

        if (args.length > 1) {
            numberItems = Integer.valueOf(args[0]);
            dataPath = args[1];
        }
        System.out.println(“User log number is : “ + numberItems);
        mockUserData(numberItems dataPath);
        mockLogData(numberItems dataPath);

    }

    private static void mockLogData(long numberItems String dataPath) {
        //{“logID“: 00“userID“: 0 “time“: “2016-10-04 15:42:45“ “typed“: 0 “consumed“: 0.0}
        StringBuffer mock_Log_Buffer = new StringBuffer(““);
        Random random = new Random();
        for (int i = 0; i < numberItems; i++) { //userID
            for (int j = 0; j < numberItems; j++) {
                String initData = “2016-10-“;
                String randomData = String.format(“%s%02d%s%02d%s%02d%s%02d“ initData random.nextInt(31)
                         “ “ random.nextInt(24)
                         “:“ random.nextInt(60)
                         “:“ random.nextInt(60));
                String result = “{\“logID\“: “ + String.format(“%02d“ j) + “ \“userID\“: “ + i + “ \“time\“: \““ + randomData + “\“ \““ +
                        “typed\“: “ + String.format(“%01d“ random.nextInt(2)) +
                        “ \“consumed\“:“ + String.format(“%.2f“ random.nextDouble() * 1000)
                        + “}“;

                mock_Log_Buffer.append(result)
                        .append(“\n“);

            }
        }
        System.out.println(mock_Log_Buffer);
        PrintWriter printWriter = null;
        try {
            printWriter = new PrintWriter(new OutputStreamWriter(
                    new FileOutputStream(dataPath + “Mock_EB_Log_Data.json“)));
            printWriter.write(mock_Log_Buffer.toString());
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            printWriter.close();
        }


    }

    private static void mockUserData(long numberItems String dataPath) {
        StringBuffer mock_User_Buffer = new StringBuffer(““);
        Random random = new Random();
        for (int i = 0; i < numberItems; i++) {
            String initData = “2016-10-“;
            String randomData = String.f

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\
     文件        1250  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\README.md
     目录           0  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\
     目录           0  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\
     文件         982  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\compiler.xml
     文件         479  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\encodings.xml
     目录           0  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\
     文件         495  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__antlr_antlr_2_7_7.xml
     文件         547  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__aopalliance_aopalliance_1_0.xml
     文件         459  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__asm_asm_3_1.xml
     文件         515  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__asm_asm_commons_3_1.xml
     文件         494  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__asm_asm_tree_3_1.xml
     文件         522  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_101tec_zkclient_0_3.xml
     文件         547  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_alibaba_fastjson_1_1_41.xml
     文件         582  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_clearspring_analytics_stream_2_7_0.xml
     文件         597  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_esotericsoftware_kryo_shaded_3_0_3.xml
     文件         562  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_esotericsoftware_minlog_1_3_0.xml
     文件         677  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_fasterxml_jackson_core_jackson_annotations_2_6_3.xml
     文件         628  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_fasterxml_jackson_core_jackson_core_2_6_3.xml
     文件         656  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_fasterxml_jackson_core_jackson_databind_2_6_3.xml
     文件         720  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_fasterxml_jackson_module_jackson_module_paranamer_2_6_5.xml
     文件         727  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_fasterxml_jackson_module_jackson_module_scala_2_11_2_6_5.xml
     文件         564  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_github_fommil_netlib_core_1_1_2.xml
     文件         573  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_github_rwl_jtransforms_2_4_0.xml
     文件         578  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_google_code_findbugs_jsr305_1_3_9.xml
     文件         548  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_google_code_gson_gson_2_2_4.xml
     文件         546  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_google_guava_guava_11_0_2.xml
     文件         629  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_google_inject_extensions_guice_servlet_3_0.xml
     文件         529  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_google_inject_guice_3_0.xml
     文件         607  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_google_protobuf_protobuf_java_2_5_0.xml
     文件         588  2019-03-15 05:14  code-of-spark-big-data-business-trilogy-master\SparkApps\.idea\libraries\Maven__com_googlecode_javaewah_JavaEWAH_0_3_2.xml
............此处省略972个文件信息

评论

共有 条评论