package cn.edu.fudan.corpus;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.objectOutputStream;
import java.util.HashMap;
import java.util.Properties;
import java.util.Random;
public class LookupTableGeneratorStart {
* @param args
public static void main(String[] args) {
// TODO Auto-generated method stub
Properties prop = new Properties();
try {
prop.load(new FileInputStream(
} catch (IOException e) {
String inputFile = prop.getProperty(“embeddingTextFile“);
String outputFile = prop.getProperty(“embeddingFile“);
HashMap lookuptable = new HashMap();
int dimension = Integer.parseInt(prop.getProperty(“dimension“));
String tokenFile = prop.getProperty(“tokenFile“);
double[] feature = null;
double divisor = 0.0d;
boolean isDebug = false;
try {
FileInputStream fis = new FileInputStream(inputFile);
InputStreamReader isr = new InputStreamReader(fis “UTF-8“);
BufferedReader br = new BufferedReader(isr);
String line = null;
String[] tokens = null;
int num = 0;
while ((line = br.readLine()) != null) {
line = line.trim();
if (!line.equals(““)) {
tokens = line.split(“\\s+“);
if (tokens.length != (dimension + 1)) {
System.out.println(“Check the embeddings at the line “
+ num + “ (the dimensionality is “
+ (tokens.length - 1) + “): “ + line);
} else {
feature = new double[dimension];
for (int i = 1; i < dimension + 1; i++) {
feature[i - 1] = Double.parseDouble(tokens[i]);
// Normalize
divisor = 0.0d;
for (int d = 0; d < dimension; d++) {
divisor += Math.pow(feature[d] 2);
divisor = Math.sqrt(divisor);
for (int d = 0; d < dimension; d++) {
feature[d] = feature[d] / divisor;
lookuptable.put(tokens[0] feature);
} catch (Exception e) {
// Supplement the special tokens
try {
FileInputStream fis = new FileInputStream(tokenFile);
InputStreamReader isr = new InputStreamReader(fis “UTF-8“);
BufferedReader br = new BufferedReader(isr);
String line = null;
Random randomgen = new Random();
while ((line = br.readLine()) != null) {
line = line.trim();
if (!lookuptable.containsKey(line)) {
System.out.println(“Message: the tokens “ + line + “ is missing in the embeddings.“);
feature = new double[dimension];
for (int i = 0; i < dimension; i++) {
feature[i] = (randomgen.nextDouble() - 0.5d) * 2 / dimension;
// Normalize
divisor = 0.
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2016-03-04 01:14 FudanDNN-NLPv2.0\
文件 377 2016-03-03 18:46 FudanDNN-NLPv2.0\.classpath
文件 8196 2016-03-04 01:14 FudanDNN-NLPv2.0\.DS_Store
目录 0 2016-03-04 10:53 __MACOSX\
目录 0 2016-03-04 10:53 __MACOSX\FudanDNN-NLPv2.0\
文件 120 2016-03-04 01:14 __MACOSX\FudanDNN-NLPv2.0\._.DS_Store
文件 375 2016-03-03 17:06 FudanDNN-NLPv2.0\.project
目录 0 2016-03-03 17:06 FudanDNN-NLPv2.0\.settings\
文件 587 2016-03-03 17:06 FudanDNN-NLPv2.0\.settings\org.eclipse.jdt.core.prefs
目录 0 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\
目录 0 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\
目录 0 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\
目录 0 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\
目录 0 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\corpus\
文件 5463 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\corpus\LookupTableGeneratorStart.class
文件 1392 2016-03-03 18:58 FudanDNN-NLPv2.0\bin\cn\edu\fudan\corpus\SemanticCorpusPrepareStart.class
文件 1192 2016-03-03 19:00 FudanDNN-NLPv2.0\bin\cn\edu\fudan\corpus\TokenExtractorStart.class
目录 0 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\crf\
文件 819 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\crf\ConditionalRandomFieldLargeScaleStart.class
目录 0 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\dnn\
文件 3367 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\dnn\WindowConvolutionNetworkDecoderStart.class
文件 4962 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\dnn\WindowConvolutionNetworkStart.class
目录 0 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\
文件 3358 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\CRFSemanticAnalyzerStart.class
文件 3064 2016-03-03 19:07 FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\LSTMSemanticAnalyzerStart.class
文件 1924 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\NamedIdentityRecognizerStart.class
文件 1916 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\PosTaggerStart.class
文件 1892 2016-03-03 19:08 FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\PrepreocessStart.class
文件 1861 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\flow\WordSegmentorStart.class
目录 0 2016-03-03 18:51 FudanDNN-NLPv2.0\bin\cn\edu\fudan\rnn\
文件 3364 2016-03-03 18:46 FudanDNN-NLPv2.0\bin\cn\edu\fudan\rnn\LSTMDecoderStart.class
- 上一篇:网上资源管理系统ssm-源码
- 下一篇:spring 共享单车管理系统
- 广联达6.0写锁包,2020年11月最新
- 机器学习个人笔记完整版v5.2-A4打印版
- 深度学习卷积神经网络可检测和分类
- GAN对抗式生成网络的应用:从图片上
- [en]深度学习[Deep Learning: Adaptive Compu
- 李宏毅-机器学习(视频2017完整)
- 吴恩达深度学习第一课第四周作业及
- 机器学习深度学习 PPT
- 麻省理工:深度学习介绍PPT-1
- Wikipedia机器学习迷你电子书之四《D
- 深度学习在遥感中的应用综述
- 深度学习数据集标注
- 深度学习算法实践源码-吴岸城
- 李宏毅深度学习ppt
- SSD目标检测算法论文-英文原版
- 台湾李宏毅教授深度学习讲义 pdf
- 基于深度学习实现人脸识别包含模型
- 深度学习与PyTorch-代码和PPT.zip
- 测试工程源码1(一种基于深度学习的
- 深度学习: MNIST的数据集
- 《深度学习》 高清版本中文PDFIan Go
- 今日头条38万条新闻数据标题
- 深度学习算法论文
- TensorFlow Machine Learning Cookbook+无码高清
- Hands-On Machine Learning with Scikit-Learn an
- Neural Networks:Tricks of the Trade+无码高清
- 基于深度学习的图像超分辨率算法论
- 人工智能初步学习总结
- 迁移学习简明手册
- 基于深度学习的软件源码漏洞预测综
共有 条评论