-
大小: 6.08MB文件类型: .zip金币: 2下载: 0 次发布日期: 2023-09-24
- 语言: Python
- 标签:
资源简介
Sequential Event Experiment based on Travel note crawled from XieCheng,基于50W携程出行游记的采集与顺承事件图谱构建
![](http://www.nz998.com/pic/71026.jpg)
代码片段和文件信息
#!/usr/bin/env python3
# coding: utf-8
# File: pattern.py
# Author: lhy
# Date: 18-7-15
import pymongo
import re
import jieba
from sentence_parser import *
class EventGraph:
def __init__(self):
conn = pymongo.MongoClient()
self.pattern = re.compile(r‘(.*)(其次|然后|接着|随后|接下来)(.*)‘)
self.col = conn[‘travel‘][‘doc‘]
self.col_insert = conn[‘travel‘][‘events‘]
self.parse_handler = LtpParser()
‘‘‘长句切分‘‘‘
def seg_long_sents(self content):
return [sentence for sentence in re.split(r‘[??!!。;;::\n\r….·]‘ content.replace(‘ ‘‘‘).replace(‘\u3000‘‘‘)) if len(sentence) > 5]
‘‘‘短句切分‘‘‘
def process_subsent(self content):
return [s for s in re.split(r‘[、,和与及且跟()~▲.]‘ content) if len(s)>1]
‘‘‘处理数据库中的文本‘‘‘
def process_doc(self):
count = 0
for item in self.col.find():
content = item[‘content‘]
events_all = self.collect_event(content)
if events_all:
data = {}
data[‘events‘] = events_all
self.col_insert.insert(data)
else:
continue
‘‘‘统计收集EVENT‘‘‘
def collect_event(self content):
events_all = []
sents= self.seg_long_sents(content)
for sent in sents:
events = self.event_extract(sent)
if events:
events_all.append(events)
return events_all
‘‘‘顺承事件抽取‘‘‘
def event_extract(self sent):
result = self.pattern.findall(sent)
if result:
event_seqs = []
for tmp in result:
pre = tmp[0]
post = tmp[2]
pre_sents = self.process_subsent(pre)
post_sents = self.process_subsent(post)
if pre_sents and post_sents:
event_seqs += pre_sents
event_seqs += post_sents
else:
continue
‘‘‘对事件进行结构化‘‘‘
if event_seqs:
events = self.extract_phrase(event_seqs)
return events
else:
pass
return []
‘‘‘将一个长句中的句子进行分解,提取出其中的vob短语‘‘‘
def extract_phrase(self event_seqs):
events = []
for event in event_seqs:
vobs = self.vob_exract(event)
if vobs:
events += vobs
return events
‘‘‘提取VOB关系‘‘‘
def vob_exract(self content):
vobs = []
words = list(jieba.cut(content))
if len(words) >= 300:
return []
postags = self.parse_handler.get_postag(words)
tuples child_dict_list = self.parse_handler.parser_main(words postags)
for tuple in tuples:
rel = tuple[-1]
pos_verb= tuple[4][0]
pos_object = tuple[2][0]
if rel == ‘VOB‘ and (pos_verb pos_object) in [(‘v‘ ‘n‘) (‘v‘ ‘i‘)]:
phrase = ‘‘.
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2018-12-15 05:16 SequentialEventExtration-master\
文件 93 2018-12-15 05:16 SequentialEventExtration-master\.gitattributes
目录 0 2018-12-15 05:16 SequentialEventExtration-master\.idea\
文件 398 2018-12-15 05:16 SequentialEventExtration-master\.idea\SequentialEventGraph.iml
文件 706 2018-12-15 05:16 SequentialEventExtration-master\.idea\misc.xm
文件 292 2018-12-15 05:16 SequentialEventExtration-master\.idea\modules.xm
文件 180 2018-12-15 05:16 SequentialEventExtration-master\.idea\vcs.xm
文件 20948 2018-12-15 05:16 SequentialEventExtration-master\.idea\workspace.xm
文件 7111 2018-12-15 05:16 SequentialEventExtration-master\README.md
目录 0 2018-12-15 05:16 SequentialEventExtration-master\event_graph\
目录 0 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\
目录 0 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\dist\
文件 30798 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\dist\vis.css
文件 1532584 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\dist\vis.js
文件 781766 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\dist\vis.map
文件 22008 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\dist\vis.min.css
文件 582497 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\dist\vis.min.js
文件 3312 2018-12-15 05:16 SequentialEventExtration-master\event_graph\event_extract.py
文件 4001 2018-12-15 05:16 SequentialEventExtration-master\event_graph\event_graph.py
文件 7007 2018-12-15 05:16 SequentialEventExtration-master\event_graph\sentence_parser.py
文件 13643483 2018-12-15 05:16 SequentialEventExtration-master\event_graph\seq_events.txt
文件 55935 2018-12-15 05:16 SequentialEventExtration-master\event_graph\travel_event_graph.html
目录 0 2018-12-15 05:16 SequentialEventExtration-master\image\
文件 297401 2018-12-15 05:16 SequentialEventExtration-master\image\all.png
文件 54270 2018-12-15 05:16 SequentialEventExtration-master\image\book.png
文件 45649 2018-12-15 05:16 SequentialEventExtration-master\image\food.png
文件 213664 2018-12-15 05:16 SequentialEventExtration-master\image\graph.png
文件 86232 2018-12-15 05:16 SequentialEventExtration-master\image\plane.png
文件 99399 2018-12-15 05:16 SequentialEventExtration-master\image\train.png
目录 0 2018-12-15 05:16 SequentialEventExtration-master\news_spider\
目录 0 2018-12-15 05:16 SequentialEventExtration-master\news_spider\.idea\
............此处省略22个文件信息
相关资源
- Python-DeepMoji模型的pyTorch实现
- Python-使用DeepFakes实现YouTube视频自动换
- Python-一系列高品质的动漫人脸数据集
- Python-Insightface人脸检测识别的最小化
- Python-自然场景文本检测PSENet的一个
- Python-在特征金字塔网络FPN的Pytorch实现
- Python-PyTorch实时多人姿态估计项目的实
- Python-用PyTorch10实现FasterRCNN和MaskRCNN比
- Python-心脏核磁共振MRI图像分割
- Python-基于YOLOv3的行人检测
- Python-RLSeq2Seq用于SequencetoSequence模型的
- Python-PyTorch对卷积CRF的参考实现
- Python-高效准确的EAST文本检测器的一个
- Python-pytorch实现的人脸检测和人脸识别
- Python-UNet用于医学图像分割的嵌套UN
- Python-TensorFlow弱监督图像分割
- Python-基于tensorflow实现的用textcnn方法
- Python-Keras实现Inceptionv4InceptionResnetv1和
- Python-pytorch中文手册
- Python-FastSCNN的PyTorch实现快速语义分割
- Python-滑动窗口高分辨率显微镜图像分
- Python-使用MovieLens数据集训练的电影推
- Python-机器学习驱动的Web应用程序防火
- Python-subpixel利用Tensorflow的一个子像素
-
Python-汉字的神经风格转移Neuralst
y - Python-神经网络模型能够从音频演讲中
- Python-深度增强学习算法的PyTorch实现策
- Python-基于深度学习的语音增强使用
- Python-基于知识图谱的红楼梦人物关系
- Python-STGAN用于图像合成的空间变换生
评论
共有 条评论