资源简介
nlp 文本语义 语义情感分析工具,能帮助用户快速的实现文本语义分析
代码片段和文件信息
package com.ansj.vec;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import com.ansj.vec.util.MapCount;
import com.ansj.vec.domain.HiddenNeuron;
import com.ansj.vec.domain.Neuron;
import com.ansj.vec.domain.WordNeuron;
import com.ansj.vec.util.Haffman;
public class Learn {
private Map wordMap = new HashMap<>();
/**
* 训练多少个特征
*/
private int layerSize = 200;
/**
* 上下文窗口大小
*/
private int window = 5;
private double sample = 1e-3;
private double alpha = 0.025;
private double startingAlpha = alpha;
public int EXP_TABLE_SIZE = 1000;
private Boolean isCbow = false;
private double[] expTable = new double[EXP_TABLE_SIZE];
private int trainWordsCount = 0;
private int MAX_EXP = 6;
public Learn(Boolean isCbow Integer layerSize Integer window Double alpha
Double sample) {
createExpTable();
if (isCbow != null) {
this.isCbow = isCbow;
}
if (layerSize != null)
this.layerSize = layerSize;
if (window != null)
this.window = window;
if (alpha != null)
this.alpha = alpha;
if (sample != null)
this.sample = sample;
}
public Learn() {
createExpTable();
}
/**
* trainModel
*
* @throws IOException
*/
private void trainModel(File file) throws IOException {
try (BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream(file)))) {
String temp = null;
long nextRandom = 5;
int wordCount = 0;
int lastWordCount = 0;
int wordCountActual = 0;
while ((temp = br.readLine()) != null) {
if (wordCount - lastWordCount > 10000) {
System.out.println(“alpha:“ + alpha + “\tProgress: “
+ (int) (wordCountActual / (double) (trainWordsCount + 1) * 100)
+ “%“);
wordCountActual += wordCount - lastWordCount;
lastWordCount = wordCount;
alpha = startingAlpha
* (1 - wordCountActual / (double) (trainWordsCount + 1));
if (alpha < startingAlpha * 0.0001) {
alpha = startingAlpha * 0.0001;
}
}
String[] strs = temp.split(“ “);
wordCount += strs.length;
List sentence = new ArrayList();
for (int i = 0; i < strs.length; i++) {
Neuron entry = wordMap.get(strs[i]);
if (entry == null) {
continue;
}
// The subsampling randomly discards frequent words while keeping the
// ranking same
if (sample > 0) {
double ran = (Math.sqrt(entry.freq / (sample * trainWords
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2017-11-21 09:59 Word2VEC_java-master\
文件 59 2017-11-21 09:59 Word2VEC_java-master\.gitignore
文件 2778 2017-11-21 09:59 Word2VEC_java-master\README.md
文件 1923 2017-11-21 09:59 Word2VEC_java-master\pom.xm
目录 0 2017-11-21 09:59 Word2VEC_java-master\src\
目录 0 2017-11-21 09:59 Word2VEC_java-master\src\main\
目录 0 2017-11-21 09:59 Word2VEC_java-master\src\main\java\
目录 0 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\
目录 0 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\
目录 0 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\vec\
文件 12556 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\vec\Learn.java
文件 8353 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\vec\Word2VEC.java
目录 0 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\vec\domain\
文件 215 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\vec\domain\HiddenNeuron.java
文件 507 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\vec\domain\Neuron.java
文件 597 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\vec\domain\WordEntry.java
文件 1557 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\vec\domain\WordNeuron.java
目录 0 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\vec\util\
文件 864 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\vec\util\Haffman.java
文件 1509 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\vec\util\MapCount.java
文件 4792 2017-11-21 09:59 Word2VEC_java-master\src\main\java\com\ansj\vec\util\WordKmeans.java
目录 0 2017-11-21 09:59 Word2VEC_java-master\src\test\
目录 0 2017-11-21 09:59 Word2VEC_java-master\src\test\java\
目录 0 2017-11-21 09:59 Word2VEC_java-master\src\test\java\test\
文件 587 2017-11-21 09:59 Word2VEC_java-master\src\test\java\test\Test.java
评论
共有 条评论