-
大小: 40.13MB文件类型: .zip金币: 1下载: 0 次发布日期: 2023-08-08
- 语言: Python
- 标签:
资源简介
一个非常简单的 BiLSTM-CRF 模型用于中文命名实体识别 (TensorFlow)
代码片段和文件信息
import sys pickle os random
import numpy as np
## tags BIO
tag2label = {“O“: 0
“B-PER“: 1 “I-PER“: 2
“B-LOC“: 3 “I-LOC“: 4
“B-ORG“: 5 “I-ORG“: 6
}
def read_corpus(corpus_path):
“““
read corpus and return the list of samples
:param corpus_path:
:return: data
“““
data = []
with open(corpus_path encoding=‘utf-8‘) as fr:
lines = fr.readlines()
sent_ tag_ = [] []
for line in lines:
if line != ‘\n‘:
[char label] = line.strip().split()
sent_.append(char)
tag_.append(label)
else:
data.append((sent_ tag_))
sent_ tag_ = [] []
return data
def vocab_build(vocab_path corpus_path min_count):
“““
:param vocab_path:
:param corpus_path:
:param min_count:
:return:
“““
data = read_corpus(corpus_path)
word2id = {}
for sent_ tag_ in data:
for word in sent_:
if word.isdigit():
word = ‘‘
elif (‘\u0041‘ <= word <=‘\u005a‘) or (‘\u0061‘ <= word <=‘\u007a‘):
word = ‘‘
if word not in word2id:
word2id[word] = [len(word2id)+1 1]
else:
word2id[word][1] += 1
low_freq_words = []
for word [word_id word_freq] in word2id.items():
if word_freq < min_count and word != ‘‘ and word != ‘‘:
low_freq_words.append(word)
for word in low_freq_words:
del word2id[word]
new_id = 1
for word in word2id.keys():
word2id[word] = new_id
new_id += 1
word2id[‘‘] = new_id
word2id[‘‘] = 0
print(len(word2id))
with open(vocab_path ‘wb‘) as fw:
pickle.dump(word2id fw)
def sentence2id(sent word2id):
“““
:param sent:
:param word2id:
:return:
“““
sentence_id = []
for word in sent:
if word.isdigit():
word = ‘‘
elif (‘\u0041‘ <= word <= ‘\u005a‘) or (‘\u0061‘ <= word <= ‘\u007a‘):
word = ‘‘
if word not in word2id:
word = ‘‘
sentence_id.append(word2id[word])
return sentence_id
def read_dictionary(vocab_path):
“““
:param vocab_path:
:return:
“““
vocab_path = os.path.join(vocab_path)
with open(vocab_path ‘rb‘) as fr:
word2id = pickle.load(fr)
print(‘vocab_size:‘ len(word2id))
return word2id
def random_embedding(vocab embedding_dim):
“““
:param vocab:
:param embedding_dim:
:return:
“““
embedding_mat = np.random.uniform(-0.25 0.25 (len(vocab) embedding_dim))
embedding_mat = np.float32(embedding_mat)
return embedding_mat
def pad_sequences(sequences pad_mark=0):
“““
:param sequences:
:param pad_mark:
:return:
“““
max_len = max(map(lambda x : len(x) sequences))
seq_list seq_len_list = [] []
for seq in seque
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2019-03-16 13:58 zh-NER-TF-master\
文件 28 2019-03-16 13:58 zh-NER-TF-master\.gitignore
文件 4103 2019-03-16 13:58 zh-NER-TF-master\README.md
文件 12732 2019-03-16 13:58 zh-NER-TF-master\conlleval_rev.pl
文件 3824 2019-03-16 13:58 zh-NER-TF-master\data.py
目录 0 2019-03-16 13:58 zh-NER-TF-master\data_path\
目录 0 2019-03-16 13:58 zh-NER-TF-master\data_path\original\
文件 49 2019-03-16 13:58 zh-NER-TF-master\data_path\original\li
文件 526458 2019-03-16 13:58 zh-NER-TF-master\data_path\original\test1.txt
文件 577540 2019-03-16 13:58 zh-NER-TF-master\data_path\original\testright1.txt
文件 10480443 2019-03-16 13:58 zh-NER-TF-master\data_path\original\train1.txt
文件 1114268 2019-03-16 13:58 zh-NER-TF-master\data_path\test_data
文件 13904440 2019-03-16 13:58 zh-NER-TF-master\data_path\train_data
文件 61479 2019-03-16 13:58 zh-NER-TF-master\data_path\word2id.pkl
目录 0 2019-03-16 13:58 zh-NER-TF-master\data_path_save\
目录 0 2019-03-16 13:58 zh-NER-TF-master\data_path_save\1521112368\
目录 0 2019-03-16 13:58 zh-NER-TF-master\data_path_save\1521112368\checkpoints\
文件 79 2019-03-16 13:58 zh-NER-TF-master\data_path_save\1521112368\checkpoints\checkpoint
文件 31417884 2019-03-16 13:58 zh-NER-TF-master\data_path_save\1521112368\checkpoints\model-31680.data-00000-of-00001
文件 1215 2019-03-16 13:58 zh-NER-TF-master\data_path_save\1521112368\checkpoints\model-31680.index
文件 5306570 2019-03-16 13:58 zh-NER-TF-master\data_path_save\1521112368\checkpoints\model-31680.me
文件 778 2019-03-16 13:58 zh-NER-TF-master\eval.py
文件 5605 2019-03-16 13:58 zh-NER-TF-master\main.py
文件 12572 2019-03-16 13:58 zh-NER-TF-master\model.py
目录 0 2019-03-16 13:58 zh-NER-TF-master\pics\
文件 961 2019-03-16 13:58 zh-NER-TF-master\pics\demo.txt
文件 786270 2019-03-16 13:58 zh-NER-TF-master\pics\pic1.png
文件 291153 2019-03-16 13:58 zh-NER-TF-master\pics\pic2.png
文件 2814 2019-03-16 13:58 zh-NER-TF-master\utils.py
相关资源
- Python-Tensorflow仿AlphaGo框架实现的AI围棋
- Python-我是小诗姬全唐诗作为训练数据
- Python-用于物体跟踪的全卷积连体网络
- Python-数学建模竞赛中所使用的相关算
- Python-MonoDepthPyTorchPyTorch无监督单目深
- Python-用Tensorflowjs实现的可回收非可回
- Python-利用TensorFlow中的深度学习进行图
- Python-TensorFlow快速入门与实战课件与参
- Python-FCN完全卷积网络中最简单最容易
- Python-匈牙利算法卡尔曼滤波器多目标
- Python-mathAI一个拍照做题程序输入一张
- Python-Tensorflow实现SpatialAsDeepSpatialCNN
- Python-图像分类目标检测姿态估计分割
- Python-用python3opencv3做的中国车牌识别
- Python-各种对抗神经网络GAN大合集
- Python-Intel开源增强学习框架Coach
- Python-CENet用于2D医学图像分割的上下文
- Python-基于深度神经网络和蒙特卡罗树
- Python-SPNLearningAffinityviaSpatialPropagatio
- Python-效果超赞的图片自动增强GANs非成
- Python-VoiceactivitydetectionVAD语音端点检测
- Python-TensorFlow实现的人脸性别年龄识别
- Python-waifu2x利用卷积神经网络放大图片
- Python-TheElementsofStatisticalLearningESL的中
- Python-基于Tensorflow和Keras实现端到端的
- Python-MuseGAN用于乐曲生成的AI
- Python-简单快速实时可定制的机器学习
- Python-PySceneDetect基于PythonOpenCV实现的视
- Python-输入输出隐马尔可夫模型IOHMM的
- Python-基于OpenCVKerasTensorFlow实现深度换
评论
共有 条评论