资源简介
正向最大匹配中文分词c++源程序
代码片段和文件信息
#include
#include
#include
#include
#include
using namespace std;
typedef struct indexl
{
long int lbeg;
long int lend;
} Id;
void Sort(vector &vstr)
{
for(vector::iterator i = vstr.begin(); i < vstr.end(); i++)
{
for(vector::iterator j = i; j < vstr.end(); j++)
if((*i).length() < (*j).length())
{
(*i).swap(*j);
}
}
}
void Init(Id * &id)//生成索引文档
{
ifstream in_src(“gbciku.txt“);
ofstream out_des(“gbindex.txt“);
vector vstr;
string line;
id = new Id[6768];
for(int x = 0; x < 6768 ; x++)
id[x].lbeg = -1;
int ivnum = 0;
int ipos = 0;
int ima = 0;
while(getline(in_srcline))
{
//int len = line.length();
int blankpos = line.find(“ “);
line.resize(blankpos);
//cout<<(int)line[0];
//int temp_x = line[0];
//int temp_y = (unsigned char)line[0] - 0xB0;
//int temp_x1 = ((unsigned char)line[0] - 0xB0)*94;
//int temp_y1 = line[1];
//int temp_y3=(unsigned char)line[1];
//int temp_y4=(unsigned char)line[1] - 0xA1;
//int temp_y2=((unsigned char)line[0] - 0xB0)*94+(unsigned char)line[1] - 0xA1;
int icurma = ((unsigned char)line[0] - 0xB0)*94+ (unsigned char)line[1] - 0xA1;
if(icurma > 6768||icurma < 0)
{
cout << endl;
}
if(icurma == ima)
{
line = line.substr(2 line.length() - 2);
//vstr[ivnum] = line;
vstr.insert(vstr.end() line);//首字匹配
}
else// icur_ma != ima
{
Sort(vstr);//vstr从大到小排列
for(vector::iterator i = vstr.begin(); i < vstr.end(); i++)
{
out_des << (*i) << “ “;//将索引文件注入容器中
ipos += (*i).length() + 1;//每一个i都指向了空格后的字符串
}
id[ima].lend = ipos;//ipos的值就确定了结构体的。。。
//ivnum = 0;
vstr.clear();//清除容器中所有元素i
ima =icurma ;
//cout< id[ima].lbeg = ipos - 1;
line = line.substr(2 line.length() - 2);
vstr.insert(vstr.end() line);//循环,首字匹配
}
}
Sort(vstr);
for(vector::iterator i = vstr.begin(); i < vstr.end(); i++)
{
out_des << (*i) << “ “;
ipos += (*i).length() + 1;
}
id[ima].lend = ipos;
//cout << ima << endl;//输入串长度
//
}
int Search(const char *str_in Id *id int cur_ma)//查找索引文件
{
//cout << str_in <<“ “;
ifstream in(“gbindex.txt“);
string line_temp = str_in;//输入串
// cout << line_temp.size();
char* szstr = new char[id[cur_ma].lend - id[cur_ma].lbeg + 1];
//cout << “ readfile “;
in.seekg(id[cur_ma].lbeg ios::beg);
in.read(szstr id[cur_ma].lend - id[cur_ma].lbeg);
// cout << “id[cur_ma].lend“ << id[cur_ma].lend << endl;//不明白其意思szstr
// cout << “id[cur_ma].lbeg“ << id[cur_ma].lbeg << endl;
szstr[id[cur_ma].lend - id[cur_ma].lbeg] = 0;
//cout << str_in <<“ “;
char word[40];
vector vstr;
char* szstr_temp = szstr;
// cout << strlen(szstr) << endl;//循环输出首字打头的字符串
//cout << strlen(szstr) << endl;//输出皆为两遍,why!
//string szstr_temp = szstr;
while(strlen(szstr_temp) > 0)
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
.CA.... 135822 2007-12-22 01:37 proc\aa
.CA.... 10616 2009-05-31 08:37 proc\Debug\BuildLog.htm
.CA.... 628736 2009-05-31 08:37 proc\Debug\maxinum-matching.exe
.CA.... 406 2009-05-31 08:37 proc\Debug\maxinum-matching.exe.em
.CA.... 472 2009-05-31 08:37 proc\Debug\maxinum-matching.exe.em
.CA.... 381 2009-05-31 08:37 proc\Debug\maxinum-matching.exe.intermediate.manifest
.CA.... 1470592 2009-05-31 08:37 proc\Debug\maxinum-matching.ilk
.CA.... 594752 2009-05-31 08:37 proc\Debug\maxinum-matching.obj
.CA.... 2993152 2009-05-31 08:37 proc\Debug\maxinum-matching.pdb
.CA.... 65 2009-05-31 08:37 proc\Debug\mt.dep
.CA.... 183296 2009-05-31 08:37 proc\Debug\vc90.idb
.CA.... 200704 2009-05-31 08:37 proc\Debug\vc90.pdb
.CA.... 2847520 2007-12-01 19:18 proc\gbciku.TXT
.CA.... 241740 2009-05-31 08:37 proc\gbindex.txt
.CA.... 227 2009-05-31 08:22 proc\input.txt
.CA.... 7135 2009-05-31 08:34 proc\maxinum-matching.cpp
.CA.... 6961 2007-12-19 18:04 proc\maxinum-matching.cpp.bak
.CA.... 3523 2007-12-22 13:35 proc\maxinum-matching.dsp
.CA.... 540 2007-12-22 20:17 proc\maxinum-matching.dsw
.CA.... 188416 2007-12-02 18:59 proc\maxinum-matching.exe
.CA.... 2501632 2009-05-31 08:38 proc\maxinum-matching.ncb
.CA.... 48640 2007-12-22 20:17 proc\maxinum-matching.opt
.CA.... 3346 2007-12-22 14:05 proc\maxinum-matching.plg
.CA.... 897 2009-05-31 08:02 proc\maxinum-matching.sln
.CA..H. 11264 2009-05-31 08:38 proc\maxinum-matching.suo
.CA.... 4996 2009-05-31 08:02 proc\maxinum-matching.vcproj
.CA.... 1409 2009-05-31 08:38 proc\maxinum-matching.vcproj.SUN-PC.SUN.user
.CA.... 1892 2007-12-09 16:56 proc\rmm.cpp
.CA.... 204 2007-12-07 22:17 proc\Test.cpp
.C.D... 0 2009-05-31 08:37 proc\Debug
............此处省略4个文件信息
- 上一篇:vc6.0下mfc控件加载jpg格式图片
- 下一篇:C语言写的推箱子游戏
评论
共有 条评论