资源简介
花了好长时间写的svm多分类 具体的使用在百度上
这个辛苦分应该给的吧
谢谢啦
代码片段和文件信息
/**
*
*/
package com.tassemble.classify.svm;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.objectOutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import libsvm.svm_model;
import libsvm.svm_node;
import libsvm.svm_parameter;
import libsvm.svm_problem;
import com.tassemble.analyzer.ArticleProcessor;
import com.tassemble.constants.Constant;
import com.tassemble.feature.Character;
import com.tassemble.feature.FeatureSelector;
import com.tassemble.feature.Selector;
import com.tassemble.tfidf.Category;
import com.tassemble.tfidf.TFIDF;
import com.tassemble.vsm.VSM;
/**
* @author chen-hongqin@163.com 2011-3-23
*
*/
public class App {
static Logger logger = Logger.getLogger(App.class);
void predict() {
}
/**
*
* @param args
* @throws IOException
* return void
*
*/
public static void main(String[] args) throws IOException {
PropertyConfigurator.configure(Constant.ROOT_PATH
+ “configures/classificationLog4j.properties“);
/**
* 1. classifier.jar -train c:\语料库2\
* 2. classifier.jar -predict c:\predictTest\
* 3. classifier.jar -check c:\predictTest\
*/
if (args[0].equals(“-train“)) {
train(args[1]);
} else if (args[0].equals(“-predict“)) {
logger.info(“load model please waiting ...“);
svm_model model = (svm_model) Constant.load(“model“);
logger.info(“load model completely!“);
Map map = (HashMap) Constant.load(“map“);
Predictor p = new Predictor(model map);
p.predict(args[1] ““);
} else if (args[0].equals(“-test“)) {
logger.info(“load model please waiting ...“);
svm_model model = (svm_model) Constant.load(“model“);
logger.info(“load model completely!“);
Map map = (HashMap) Constant.load(“map“);
ArticleProcessor processor = new ArticleProcessor();
ArrayList categories = processor.getCategories(args[1]);
VSM vsm = new VSM();
svm_problem problem = vsm.convertToVSM(categories map);
SVMScale svmScale = new SVMScale();
logger.info(“scale problems“);
problem = svmScale.scale(problem Constant.PREDICT_SCALE_ARGV);
logger.info(“end scale problems“);
Classifier t = new Classifier();
logger.info(“start predict ...“);
testPredict(t problem model);
}
}
/**
*
* return void
*
* @throws IOException
*
*/
private static void test() throws IOException {
// TODO Auto-generated method stub
Classifier t = new Classifier();
ArrayList categories = new TFIDF().process();
Selector selector = new FeatureSelector(categories);
HashMap> map = selector
.select(Constant.DEFAULT_NUMBER_OF_TOTAL_FE
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 7544244 2011-04-01 23:36 Classification\Classification\bin\BigramDict.dct
文件 568 2011-04-01 23:36 Classification\Classification\bin\com\tassemble\analyzer\Article.class
文件 4887 2011-04-01 23:36 Classification\Classification\bin\com\tassemble\analyzer\ArticleProcessor.class
文件 106 2011-04-01 23:36 Classification\Classification\bin\com\tassemble\analyzer\Scrapbook.jpage
文件 403 2011-04-01 23:36 Classification\Classification\bin\com\tassemble\analyzer\Word.class
文件 2114 2011-04-01 23:35 Classification\Classification\bin\com\tassemble\IG\DataReader.class
文件 1622 2011-04-01 23:35 Classification\Classification\bin\com\tassemble\IG\Doc.class
文件 5524 2011-04-01 23:35 Classification\Classification\bin\com\tassemble\IG\IgCalculator.class
文件 1263 2011-04-01 23:36 Classification\Classification\bin\com\xjt\nlp\word\ICTCLAS.class
文件 3135 2011-04-01 23:36 Classification\Classification\bin\com\xjt\nlp\word\Sentence.class
文件 2078 2011-04-01 23:36 Classification\Classification\bin\com\xjt\nlp\word\SplitWord.class
文件 2024 2011-04-01 23:36 Classification\Classification\bin\com\xjt\nlp\word\ThreadTest.class
文件 1170 2011-04-01 23:36 Classification\Classification\bin\com\xjt\nlp\word\Word.class
文件 152 2011-04-01 23:36 Classification\Classification\bin\com\xjt\nlp\word\Word.jbx
文件 1565689 2011-04-01 23:36 Classification\Classification\bin\coreDict.dct
文件 11 2011-04-01 23:35 Classification\Classification\bin\CVS\Entries
文件 11 2011-04-01 23:35 Classification\Classification\bin\CVS\Repository
文件 34 2011-04-01 23:35 Classification\Classification\bin\CVS\Root
文件 188416 2011-04-01 23:36 Classification\Classification\bin\jawin.dll
文件 94550 2011-04-01 23:36 Classification\Classification\bin\jawin.jar
文件 10412 2011-04-01 23:36 Classification\Classification\bin\lexical.ctx
文件 1032 2011-04-01 23:36 Classification\Classification\bin\nr.ctx
文件 113780 2011-04-01 23:36 Classification\Classification\bin\nr.dct
文件 408 2011-04-01 23:36 Classification\Classification\bin\ns.ctx
文件 54278 2011-04-01 23:36 Classification\Classification\bin\ns.dct
文件 408 2011-04-01 23:36 Classification\Classification\bin\tr.ctx
文件 64000 2011-04-01 23:36 Classification\Classification\bin\tr.dct
文件 1686 2011-04-01 23:35 Classification\Classification\bin\军事\0 (1).txt
文件 541 2011-04-01 23:35 Classification\Classification\bin\军事\0 (2).txt
文件 3711 2011-04-01 23:35 Classification\Classification\bin\军事\0 (3).txt
............此处省略310个文件信息
评论
共有 条评论