资源简介
hdp的java代码,非参数主题模型,对文章主题的提取。
代码片段和文件信息
/*
* (C) Copyright 2005-2011 Gregor Heinrich (gregor :: arbylon : net) \
* (This file is part of the knowceans-ilda experimental software package
*/
/*
* knowceans-ilda is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 3 of the License or (at your option)
* any later version.
*/
/*
* knowceans-ilda is distributed in the hope that it will be useful but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*/
/*
* You should have received a copy of the GNU General Public License along with
* this program; if not write to the Free Software Foundation Inc. 59 Temple
* Place Suite 330 Boston MA 02111-1307 USA
*/
package org.knowceans.corpus;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
/**
* CorpusResolver resolves indices into names.
*
* @author gregor
*/
public class CorpusResolver {
public static void main(String[] args) {
CorpusResolver cr = new CorpusResolver(“nips/nips“);
System.out.println(cr.getAuthor(2));
System.out.println(cr.getLabel(20));
System.out.println(cr.getDoc(501));
System.out.println(cr.getTerm(1));
System.out.println(cr.getTermId(cr.getTerm(1)));
}
public final String[] EXTENSIONS = { “docs“ “vocab“
“authors.key“ “labels.key“ “vols.key“ “docnames“ };
HashMap termids;
String[][] data = new String[EXTENSIONS.length][];
String filebase;
private boolean parmode;
public CorpusResolver(String filebase) {
this(filebase false);
}
/**
* control paragraph mode (possibly different vocabulary)
*
* @param filebase
* @param parmode
*/
public CorpusResolver(String filebase boolean parmode) {
this.parmode = parmode;
this.filebase = filebase;
for (int i = 0; i < EXTENSIONS.length; i++) {
String base = filebase;
// read alternative vocabulary for paragraph mode
if (parmode && EXTENSIONS[i].equals(“vocab“)) {
base += “.par“;
}
File f = new File(base + “.“ + EXTENSIONS[i]);
if (f.exists()) {
data[i] = load(f);
}
}
}
/**
* load from file removing every information after a = sign in
* each line
*
* @param f
* @return array of label strings
*/
private String[] load(File f) {
String[] strings = null;
try {
ArrayList a = new ArrayList();
BufferedReader br = new BufferedReader(
new FileReader(f));
String line = null;
while ((line = br.readLine()) != null) {
line = line.trim();
int ii = line.indexOf(‘=‘);
if (ii > -1) {
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2013-05-07 22:02 IldaGibbs\
文件 295 2013-05-07 16:31 IldaGibbs\.classpath
文件 6148 2013-05-07 22:02 IldaGibbs\.DS_Store
目录 0 2013-05-07 22:02 __MACOSX\
目录 0 2013-05-07 22:02 __MACOSX\IldaGibbs\
文件 82 2013-05-07 22:02 __MACOSX\IldaGibbs\._.DS_Store
文件 368 2013-05-07 16:31 IldaGibbs\.project
目录 0 2013-05-07 16:31 IldaGibbs\.settings\
文件 587 2013-05-07 16:31 IldaGibbs\.settings\org.eclipse.jdt.core.prefs
目录 0 2013-05-07 19:37 IldaGibbs\bin\
文件 6148 2013-05-07 19:36 IldaGibbs\bin\.DS_Store
目录 0 2013-05-07 16:32 IldaGibbs\bin\org\
目录 0 2013-05-07 16:32 IldaGibbs\bin\org\knowceans\
目录 0 2013-05-07 16:33 IldaGibbs\bin\org\knowceans\corpus\
文件 4832 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\corpus\CorpusResolver.class
文件 4146 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\corpus\DisjointDocTerms.class
文件 5595 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\corpus\Document.class
文件 269 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\corpus\ICorpus.class
文件 512 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\corpus\ILabelCorpus.class
文件 284 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\corpus\ISplitCorpus.class
文件 196 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\corpus\ITermCorpus.class
文件 6398 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\corpus\LabelNumCorpus.class
文件 12362 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\corpus\NumCorpus.class
文件 1530 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\corpus\VisCorpus.class
目录 0 2013-05-07 16:32 IldaGibbs\bin\org\knowceans\topics\
目录 0 2013-05-07 16:33 IldaGibbs\bin\org\knowceans\topics\simple\
文件 14261 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\topics\simple\IldaGibbs.class
文件 175 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\topics\simple\ISimpleGibbs.class
文件 149 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\topics\simple\ISimplePpx.class
文件 187 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\topics\simple\ISimpleQueryGibbs.class
文件 8391 2013-05-07 19:37 IldaGibbs\bin\org\knowceans\topics\simple\LdaGibbs.class
............此处省略102个文件信息
- 上一篇:apache httpclient jar包
- 下一篇:智能化停车场管理系统
评论
共有 条评论