资源简介
利用网络上公开的数据构建一个小型的证券知识图谱/知识库
代码片段和文件信息
import os
import csv
import hashlib
def get_md5(string):
“““Get md5 according to the string
“““
byte_string = string.encode(“utf-8“)
md5 = hashlib.md5()
md5.update(byte_string)
result = md5.hexdigest()
return result
def build_executive(executive_prep executive_import):
“““Create an ‘executive‘ file in csv format that can be imported into Neo4j.
format -> person_id:IDnamegenderage:int:LABEL
label -> Person
“““
print(‘Writing to {} file...‘.format(executive_import.split(‘/‘)[-1]))
with open(executive_prep ‘r‘ encoding=‘utf-8‘) as file_prep \
open(executive_import ‘w‘ encoding=‘utf-8‘) as file_import:
file_prep_csv = csv.reader(file_prep delimiter=‘‘)
file_import_csv = csv.writer(file_import delimiter=‘‘)
headers = [‘person_id:ID‘ ‘name‘ ‘gender‘ ‘age:int‘ ‘:LABEL‘]
file_import_csv.writerow(headers)
for i row in enumerate(file_prep_csv):
if i == 0 or len(row) < 3:
continue
info = [row[0] row[1] row[2]]
# generate md5 according to ‘name‘ ‘gender‘ and ‘age‘
info_id = get_md5(‘{}{}{}‘.format(row[0] row[1] row[2]))
info.insert(0 info_id)
info.append(‘Person‘)
file_import_csv.writerow(info)
print(‘- done.‘)
def build_stock(stock_industry_prep stock_concept_prep stock_import):
“““Create an ‘stock‘ file in csv format that can be imported into Neo4j.
format -> company_id:IDnamecode:LABEL
label -> CompanyST
“““
print(‘Writing to {} file...‘.format(stock_import.split(‘/‘)[-1]))
stock = set() # ‘codename‘
with open(stock_industry_prep ‘r‘ encoding=‘utf-8‘) as file_prep:
file_prep_csv = csv.reader(file_prep delimiter=‘‘)
for i row in enumerate(file_prep_csv):
if i == 0:
continue
code_name = ‘{}{}‘.format(row[0] row[1].replace(‘ ‘ ‘‘))
stock.add(code_name)
with open(stock_concept_prep ‘r‘ encoding=‘utf-8‘) as file_prep:
file_prep_csv = csv.reader(file_prep delimiter=‘‘)
for i row in enumerate(file_prep_csv):
if i == 0:
continue
code_name = ‘{}{}‘.format(row[0] row[1].replace(‘ ‘ ‘‘))
stock.add(code_name)
with open(stock_import ‘w‘ encoding=‘utf-8‘) as file_import:
file_import_csv = csv.writer(file_import delimiter=‘‘)
headers = [‘stock_id:ID‘ ‘name‘ ‘code‘ ‘:LABEL‘]
file_import_csv.writerow(headers)
for s in stock:
split = s.split(‘‘)
ST = False # ST flag
states = [‘*ST‘ ‘ST‘ ‘S*ST‘ ‘SST‘]
info = []
for state in states:
if split[1].startswith(state):
ST = True
split[1] = split[1].replace(state ‘‘)
相关资源
- 一种改进的基于序列到序列框架的知
- 知识图谱导论
- 第一届全国中文知识图谱研讨会演讲
- 我国科学教育研究热点、现状与启示
- 煤矿巷道支护领域知识图谱构建
- 知识库管理
- 新冠开放知识图谱.健康JSON数据
- 知识图谱构建技术综述
- 个人知识管理系统软件
- 搭建上市公司知识图谱及数据
- 大规模知识图谱技术.pdf
- 基于知识图谱的自动问答系统
- 企业级大数据知识图谱产品的构建及
- 大规模知识图谱数据存储
- 知识图谱实现步骤和方法
- 基于知识图谱的问答系统关键技术研
- 中文信息处理发展报告2016+知识图谱发
- 知识图谱在司法领内的尝试与应用
- 领域知识图谱构建 Domain-Specific Knowl
- 百度知识图谱新进展64页ppt百度知心系
- AMIE:在不完整知识库下的关联规则挖
- PAAS平台问题知识库
-
Freeba
se-FB13 - wooyun drops乌云知识库全部文章
- 知识图谱_数据融合 (DataMatching)
- 国外近十年深度学习的研究现状与发
- 知识图谱综述研究进展
- FB15k-237.zip
- 包含背景知识和问答的问答系统训练
- 东南大学 崇志宏:非结构数据存储和
评论
共有 条评论