资源简介
CRF在NLP技术领域中主要用于文本标注,并有多种应用场景,例如:
分词(标注字的词位信息,由字构词)
词性标注(标注分词的词性,例如:名词,动词,助词)
命名实体识别(识别人名,地名,机构名,商品名等具有一定内在规律的实体名词)
代码片段和文件信息
#include
#include “crfpp.h“
// c++ -O3 example.cpp -lcrfpp
int main(int argc char **argv) {
// -v 3: access deep information like alphabetaprob
// -nN: enable nbest output. N should be >= 2
CRFPP::Tagger *tagger =
CRFPP::createTagger(“-m model -v 3 -n2“);
if (!tagger) {
std::cerr << CRFPP::getTaggerError() << std::endl;
return -1;
}
// clear internal context
tagger->clear();
// add context
tagger->add(“Confidence NN“);
tagger->add(“in IN“);
tagger->add(“the DT“);
tagger->add(“pound NN“);
tagger->add(“is VBZ“);
tagger->add(“widely RB“);
tagger->add(“expected VBN“);
tagger->add(“to TO“);
tagger->add(“take VB“);
tagger->add(“another DT“);
tagger->add(“sharp JJ“);
tagger->add(“dive NN“);
tagger->add(“if IN“);
tagger->add(“trade NN“);
tagger->add(“figures NNS“);
tagger->add(“for IN“);
tagger->add(“September NNP“);
std::cout << “column size: “ << tagger->xsize() << std::endl;
std::cout << “token size: “ << tagger->size() << std::endl;
std::cout << “tag size: “ << tagger->ysize() << std::endl;
std::cout << “tagset information:“ << std::endl;
for (size_t i = 0; i < tagger->ysize(); ++i) {
std::cout << “tag “ << i << “ “ << tagger->yname(i) << std::endl;
}
// parse and change internal stated as ‘parsed‘
if (! tagger->parse()) return -1;
std::cout << “conditional prob=“ << tagger->prob()
<< “ log(Z)=“ << tagger->Z() << std::endl;
for (size_t i = 0; i < tagger->size(); ++i) {
for (size_t j = 0; j < tagger->xsize(); ++j) {
std::cout << tagger->x(i j) << ‘\t‘;
}
std::cout << tagger->y2(i) << ‘\t‘;
std::cout << std::endl;
std::cout << “Details“;
for (size_t j = 0; j < tagger->ysize(); ++j) {
std::cout << ‘\t‘ << tagger->yname(j) << “/prob=“ << tagger->prob(ij)
<< “/alpha=“ << tagger->alpha(i j)
<< “/beta=“ << tagger->beta(i j);
}
std::cout << std::endl;
}
// when -n20 is specified you can access nbest outputs
std::cout << “nbest outputs:“ << std::endl;
for (size_t n = 0; n < 10; ++n) {
if (! tagger->next()) break;
std::cout << “nbest n=“ << n << “\tconditional prob=“ << tagger->prob() << std::endl;
// you can access any information using tagger->y()...
}
std::cout << “Done“ << std::endl;
delete tagger;
return 0;
}
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 28 2013-02-13 00:40 CRF++-0.58\AUTHORS
文件 1494 2013-02-13 00:40 CRF++-0.58\BSD
文件 164 2013-02-13 00:40 CRF++-0.58\COPYING
文件 50688 2013-02-13 00:40 CRF++-0.58\crf_learn.exe
文件 50688 2013-02-13 00:40 CRF++-0.58\crf_test.exe
文件 3243 2013-02-13 00:40 CRF++-0.58\doc\default.css
文件 2197 2013-02-13 00:40 CRF++-0.58\doc\doxygen\annotated.html
文件 677 2013-02-13 00:40 CRF++-0.58\doc\doxygen\bc_s.png
文件 4317 2013-02-13 00:40 CRF++-0.58\doc\doxygen\classCRFPP_1_1Model-members.html
文件 11478 2013-02-13 00:40 CRF++-0.58\doc\doxygen\classCRFPP_1_1Model.html
文件 15210 2013-02-13 00:40 CRF++-0.58\doc\doxygen\classCRFPP_1_1Tagger-members.html
文件 54951 2013-02-13 00:40 CRF++-0.58\doc\doxygen\classCRFPP_1_1Tagger.html
文件 2932 2013-02-13 00:40 CRF++-0.58\doc\doxygen\classes.html
文件 126 2013-02-13 00:40 CRF++-0.58\doc\doxygen\closed.png
文件 45954 2013-02-13 00:40 CRF++-0.58\doc\doxygen\crfpp_8h-source.html
文件 93236 2013-02-13 00:40 CRF++-0.58\doc\doxygen\crfpp_8h.html
文件 61339 2013-02-13 00:40 CRF++-0.58\doc\doxygen\crfpp_8h_source.html
文件 14965 2013-02-13 00:40 CRF++-0.58\doc\doxygen\doxygen.css
文件 3942 2013-02-13 00:40 CRF++-0.58\doc\doxygen\doxygen.png
文件 2080 2013-02-13 00:40 CRF++-0.58\doc\doxygen\files.html
文件 9878 2013-02-13 00:40 CRF++-0.58\doc\doxygen\functions.html
文件 9790 2013-02-13 00:40 CRF++-0.58\doc\doxygen\functions_func.html
文件 9423 2013-02-13 00:40 CRF++-0.58\doc\doxygen\globals.html
文件 2301 2013-02-13 00:40 CRF++-0.58\doc\doxygen\globals_defs.html
文件 8892 2013-02-13 00:40 CRF++-0.58\doc\doxygen\globals_func.html
文件 2283 2013-02-13 00:40 CRF++-0.58\doc\doxygen\globals_type.html
文件 1580 2013-02-13 00:40 CRF++-0.58\doc\doxygen\index.html
文件 13215 2013-02-13 00:40 CRF++-0.58\doc\doxygen\namespaceCRFPP.html
文件 2675 2013-02-13 00:40 CRF++-0.58\doc\doxygen\namespacemembers.html
文件 2554 2013-02-13 00:40 CRF++-0.58\doc\doxygen\namespacemembers_func.html
............此处省略47个文件信息
- 上一篇:Bz1621.lzh二进制编译器
- 下一篇:Word文档英文翻译助手
评论
共有 条评论