资源简介
想用时找不到地方下载,找到了发出来大家共享下。欢迎大家一起交流学习
代码片段和文件信息
//sa614349@mail.ustc.edu.com
//desc:word2vec output vectors.bin file inter-transformation of binary and text file
#include
#include
#include
#include
const long long max_w = 50; // max length of vocabulary entries
int format = 1;//1: binary to text other:text to binary
const int file_name_len=200;
char read_file[200];
char write_file[200];
int ArgPos(char *str int argc char **argv) {
int a;
for (a = 1; a < argc; a++) if (!strcmp(str argv[a])) {
if (a == argc - 1) {
printf(“Argument missing for %s\n“ str);
exit(1);
}
return a;
}
return -1;
}
void PrintHelp(void){
printf(“\t-input vectors.bin file\n“);
printf(“\t-output the transformated file\n“);
printf(“\t-format binary2text|text2binary to inter-conversion from binary to text file \n“);
}
int GetInput(int argc char **argv)
{
int i;
char format_char[50];
if ((i = ArgPos((char *)“-format“ argc argv)) > 0) {strcpy(format_charargv[i + 1]); if(strcmp(format_char“text2binary“) == 0) format=0 ;}
if ((i = ArgPos((char *)“-input“ argc argv)) > 0) strcpy(read_file argv[i + 1]);
if ((i = ArgPos((char *)“-output“ argc argv)) > 0) strcpy(write_file argv[i + 1]);
}
int main(int argc char **argv) {
if (argc < 4 || strcmp(argv[1]“help“)==0 || strcmp(argv[1]“-help“)==0 || strcmp(argv[1]“-h“)==0 ) {
printf(“Usage: ./binary2text -input -output format \n“);
PrintHelp();
return -1;
}
GetInput(argcargv);
FILE *f*fo;
long long words size a b cn;
char ch;
float *M;
char *vocab;
f = fopen(read_file “rb“);
fo = fopen(write_file “wb“);
if (f == NULL) {
printf(“Input file not found\n“);
return -1;
}
if (fo == NULL) {
printf(“Output file error\n“);
return -1;
}
fscanf(f “%lld“ &words);
fscanf(f “%lld“ &size);
vocab = (char *)malloc((long long)max_w * sizeof(char));
M = (float *)malloc((long long)words * (long long)size * sizeof(float));
if (M == NULL || vocab == NULL ) {
printf(“Cannot allocate memory“);
return -1;
}
fprintf(fo “%lld %lld\n“ words size);
//binary to text format
if (format == 1) {
long long times = words;
char ch;
while(times--)
{
if(feof(f)) break;
memset(vocab0sizeof(vocab));
a = 0;
while (1) {
ch = fgetc(f);
if(ch==‘\n‘ && a==0) continue;
vocab[a] = ch;
if (feof(f) || (vocab[a] == ‘ ‘)) break;
if ((a < max_w) && (vocab[max_w + a] != ‘\n‘)) a++;
}
vocab[a] = 0;
for (a = 0; a < size; a++) {
// modify by hanleyzhang 20160705
//在进行word2vec训练时,-binary 1 时,词向量输出到vector.bin为二进制,否则为float.
//fscanf(f“%f“&M[a + b * size]);//vector.bin 为float时,只能通过fscanf读取浮点向量数据
fread(&M[a] sizeof(float) 1 f);//vector.bin 为binary时,只能通过fread读取浮点向量数据。
//fread(&M[a + b * size] sizeof(float) 1 f);
}
fprintf(fo “%s “ vocab);
for (b = 0; b
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2017-08-15 03:35 word2vec-google-master\
文件 11358 2017-08-15 03:35 word2vec-google-master\LICENSE
文件 1209 2017-08-15 03:35 word2vec-google-master\README.txt
文件 4147 2017-08-15 03:35 word2vec-google-master\binary2text.c
文件 5241 2017-08-15 03:35 word2vec-google-master\compute-accuracy.c
文件 631 2017-08-15 03:35 word2vec-google-master\demo-analogy.sh
文件 358 2017-08-15 03:35 word2vec-google-master\demo-classes.sh
文件 885 2017-08-15 03:35 word2vec-google-master\demo-phrase-accuracy.sh
文件 853 2017-08-15 03:35 word2vec-google-master\demo-phrases.sh
文件 5126 2017-08-15 03:35 word2vec-google-master\demo-train-big-model-v1.sh
文件 742 2017-08-15 03:35 word2vec-google-master\demo-vocab.sh
文件 414 2017-08-15 03:35 word2vec-google-master\demo-word-accuracy.sh
文件 275 2017-08-15 03:35 word2vec-google-master\demo-word.sh
文件 8688 2017-08-15 03:35 word2vec-google-master\distance.c
文件 4916 2017-08-15 03:35 word2vec-google-master\distance_old.c
文件 8475 2017-08-15 03:35 word2vec-google-master\kmeans.c
文件 415 2017-08-15 03:35 word2vec-google-master\kmeans.sh
文件 954 2017-08-15 03:35 word2vec-google-master\makefile
文件 537 2017-08-15 03:35 word2vec-google-master\mydemo-analogy.sh
文件 21331 2017-08-15 03:35 word2vec-google-master\vocabanalyse.c
文件 532 2017-08-15 03:35 word2vec-google-master\vocabanalyse.sh
文件 4728 2017-08-15 03:35 word2vec-google-master\word-analogy.c
文件 9386 2017-08-15 03:35 word2vec-google-master\word2phrase.c
文件 27746 2017-08-15 03:35 word2vec-google-master\word2vec.c
文件 6763 2017-08-15 03:35 word2vec-google-master\worddistance.c
文件 715 2017-08-15 03:35 word2vec-google-master\worddistance.h
文件 644 2017-08-15 03:35 word2vec-google-master\worddistance.h2
评论
共有 条评论