-
大小: 6.08MB文件类型: .zip金币: 1下载: 0 次发布日期: 2023-09-24
- 语言: Python
- 标签:
资源简介
Sequential Event Experiment based on Travel note crawled from XieCheng,基于50W携程出行游记的采集与顺承事件图谱构建
代码片段和文件信息
#!/usr/bin/env python3
# coding: utf-8
# File: pattern.py
# Author: lhy
# Date: 18-7-15
import pymongo
import re
import jieba
from sentence_parser import *
class EventGraph:
def __init__(self):
conn = pymongo.MongoClient()
self.pattern = re.compile(r‘(.*)(其次|然后|接着|随后|接下来)(.*)‘)
self.col = conn[‘travel‘][‘doc‘]
self.col_insert = conn[‘travel‘][‘events‘]
self.parse_handler = LtpParser()
‘‘‘长句切分‘‘‘
def seg_long_sents(self content):
return [sentence for sentence in re.split(r‘[??!!。;;::\n\r….·]‘ content.replace(‘ ‘‘‘).replace(‘\u3000‘‘‘)) if len(sentence) > 5]
‘‘‘短句切分‘‘‘
def process_subsent(self content):
return [s for s in re.split(r‘[、,和与及且跟()~▲.]‘ content) if len(s)>1]
‘‘‘处理数据库中的文本‘‘‘
def process_doc(self):
count = 0
for item in self.col.find():
content = item[‘content‘]
events_all = self.collect_event(content)
if events_all:
data = {}
data[‘events‘] = events_all
self.col_insert.insert(data)
else:
continue
‘‘‘统计收集EVENT‘‘‘
def collect_event(self content):
events_all = []
sents= self.seg_long_sents(content)
for sent in sents:
events = self.event_extract(sent)
if events:
events_all.append(events)
return events_all
‘‘‘顺承事件抽取‘‘‘
def event_extract(self sent):
result = self.pattern.findall(sent)
if result:
event_seqs = []
for tmp in result:
pre = tmp[0]
post = tmp[2]
pre_sents = self.process_subsent(pre)
post_sents = self.process_subsent(post)
if pre_sents and post_sents:
event_seqs += pre_sents
event_seqs += post_sents
else:
continue
‘‘‘对事件进行结构化‘‘‘
if event_seqs:
events = self.extract_phrase(event_seqs)
return events
else:
pass
return []
‘‘‘将一个长句中的句子进行分解,提取出其中的vob短语‘‘‘
def extract_phrase(self event_seqs):
events = []
for event in event_seqs:
vobs = self.vob_exract(event)
if vobs:
events += vobs
return events
‘‘‘提取VOB关系‘‘‘
def vob_exract(self content):
vobs = []
words = list(jieba.cut(content))
if len(words) >= 300:
return []
postags = self.parse_handler.get_postag(words)
tuples child_dict_list = self.parse_handler.parser_main(words postags)
for tuple in tuples:
rel = tuple[-1]
pos_verb= tuple[4][0]
pos_object = tuple[2][0]
if rel == ‘VOB‘ and (pos_verb pos_object) in [(‘v‘ ‘n‘) (‘v‘ ‘i‘)]:
phrase = ‘‘.
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2018-12-15 05:16 SequentialEventExtration-master\
文件 93 2018-12-15 05:16 SequentialEventExtration-master\.gitattributes
目录 0 2018-12-15 05:16 SequentialEventExtration-master\.idea\
文件 398 2018-12-15 05:16 SequentialEventExtration-master\.idea\SequentialEventGraph.iml
文件 706 2018-12-15 05:16 SequentialEventExtration-master\.idea\misc.xm
文件 292 2018-12-15 05:16 SequentialEventExtration-master\.idea\modules.xm
文件 180 2018-12-15 05:16 SequentialEventExtration-master\.idea\vcs.xm
文件 20948 2018-12-15 05:16 SequentialEventExtration-master\.idea\workspace.xm
文件 7111 2018-12-15 05:16 SequentialEventExtration-master\README.md
目录 0 2018-12-15 05:16 SequentialEventExtration-master\event_graph\
目录 0 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\
目录 0 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\dist\
文件 30798 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\dist\vis.css
文件 1532584 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\dist\vis.js
文件 781766 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\dist\vis.map
文件 22008 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\dist\vis.min.css
文件 582497 2018-12-15 05:16 SequentialEventExtration-master\event_graph\VIS\dist\vis.min.js
文件 3312 2018-12-15 05:16 SequentialEventExtration-master\event_graph\event_extract.py
文件 4001 2018-12-15 05:16 SequentialEventExtration-master\event_graph\event_graph.py
文件 7007 2018-12-15 05:16 SequentialEventExtration-master\event_graph\sentence_parser.py
文件 13643483 2018-12-15 05:16 SequentialEventExtration-master\event_graph\seq_events.txt
文件 55935 2018-12-15 05:16 SequentialEventExtration-master\event_graph\travel_event_graph.html
目录 0 2018-12-15 05:16 SequentialEventExtration-master\image\
文件 297401 2018-12-15 05:16 SequentialEventExtration-master\image\all.png
文件 54270 2018-12-15 05:16 SequentialEventExtration-master\image\book.png
文件 45649 2018-12-15 05:16 SequentialEventExtration-master\image\food.png
文件 213664 2018-12-15 05:16 SequentialEventExtration-master\image\graph.png
文件 86232 2018-12-15 05:16 SequentialEventExtration-master\image\plane.png
文件 99399 2018-12-15 05:16 SequentialEventExtration-master\image\train.png
目录 0 2018-12-15 05:16 SequentialEventExtration-master\news_spider\
目录 0 2018-12-15 05:16 SequentialEventExtration-master\news_spider\.idea\
............此处省略22个文件信息
相关资源
- Python-在TensorFlow中实现实现图像卷积网
- Python-60DaysRLChallenge中文版强化学习6
- Python-一个非常简单的BiLSTMCRF模型用于
- Python-Tensorflow仿AlphaGo框架实现的AI围棋
- Python-我是小诗姬全唐诗作为训练数据
- Python-用于物体跟踪的全卷积连体网络
- Python-数学建模竞赛中所使用的相关算
- Python-MonoDepthPyTorchPyTorch无监督单目深
- Python-用Tensorflowjs实现的可回收非可回
- Python-利用TensorFlow中的深度学习进行图
- Python-TensorFlow快速入门与实战课件与参
- Python-FCN完全卷积网络中最简单最容易
- Python-匈牙利算法卡尔曼滤波器多目标
- Python-mathAI一个拍照做题程序输入一张
- Python-Tensorflow实现SpatialAsDeepSpatialCNN
- Python-图像分类目标检测姿态估计分割
- Python-用python3opencv3做的中国车牌识别
- Python-各种对抗神经网络GAN大合集
- Python-Intel开源增强学习框架Coach
- Python-CENet用于2D医学图像分割的上下文
- Python-基于深度神经网络和蒙特卡罗树
- Python-SPNLearningAffinityviaSpatialPropagatio
- Python-效果超赞的图片自动增强GANs非成
- Python-VoiceactivitydetectionVAD语音端点检测
- Python-TensorFlow实现的人脸性别年龄识别
- Python-waifu2x利用卷积神经网络放大图片
- Python-TheElementsofStatisticalLearningESL的中
- Python-基于Tensorflow和Keras实现端到端的
- Python-MuseGAN用于乐曲生成的AI
- Python-简单快速实时可定制的机器学习
评论
共有 条评论