text-cnn源代码

大小: 20KB

文件类型: .zip

金币: 2

下载: 0 次

发布日期: 2021-05-17
语言: 其他
标签: cnn

高速下载

资源简介

基于卷积神经网络处理中文文本分类

资源截图

小图大图

代码片段和文件信息

import numpy as np
import re
from sklearn.preprocessing import LabelBinarizer
from tensorflow.contrib import learn
import pdb
import collections

def clean_str（string）:
    “““
    Tokenization/string cleaning for all datasets except for SST.
    Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    “““
    string = re.sub（r“[^A-Za-z0-9（）!?\‘\‘]“ “ “ string）
    string = re.sub（r“\‘s“ “ \‘s“ string）
    string = re.sub（r“\‘ve“ “ \‘ve“ string）
    string = re.sub（r“n\‘t“ “ n\‘t“ string）
    string = re.sub（r“\‘re“ “ \‘re“ string）
    string = re.sub（r“\‘d“ “ \‘d“ string）
    string = re.sub（r“\‘ll“ “ \‘ll“ string）
    string = re.sub（r““ “  “ string）
    string = re.sub（r“!“ “ ! “ string）
    string = re.sub（r“\（“ “ \（ “ string）
    string = re.sub（r“\）“ “ \） “ string）
    string = re.sub（r“\?“ “ \? “ string）
    string = re.sub（r“\s{2}“ “ “ string）
    return string.strip（）.lower（）

def load_data_and_labels（positive_data_file negative_data_file）:
    “““
    Loads MR polarity data from files splits the data into words and generates labels.
    Returns split sentences and labels.
    “““
    # Load data from files
    positive_examples = list（open（positive_data_file “r“）.readlines（））
    positive_examples = [s.strip（） for s in positive_examples]
    negative_examples = list（open（negative_data_file “r“）.readlines（））
    negative_examples = [s.strip（） for s in negative_examples]
    # Split by words
    x_text = positive_examples + negative_examples
    x_text = [clean_str（sent） for sent in x_text]
    # Generate labels
    positive_labels = [[0 1] for _ in positive_examples]
    negative_labels = [[1 0] for _ in negative_examples]
    y = np.concatenate（[positive_labels negative_labels] 0）
    return [x_text y]


def load_data_labels（data_file labels_file）:
    “““
    Loads MR polarity data from files splits the data into words and generates labels.
    Returns split sentences and labels.
    “““
    data = []
    labels = []
    with open（data_file ‘r‘ encoding=‘latin-1‘） as f:
        data.extend（[s.strip（） for s in f.readlines（）]）
        data = [clean_str（s） for s in data]

    with open（labels_file ‘r‘） as f:
        labels.extend（[s.strip（） for s in f.readlines（）]）
        lables = [label.split（‘‘）[1].strip（） for label in labels]

    lb = LabelBinarizer（）
    y = lb.fit_transform（lables）

    # max_document_length = max（[len（x.split（“ “）） for x in data]）
    # print（max_document_length）
    vocab_processor = learn.preprocessing.VocabularyProcessor（1000）
    x = np.array（list（vocab_processor.fit_transform（data）））
    return x y vocab_processor


def batch_iter（data batch_size num_epochs shuffle=True）:
    “““
    Generates a batch iterator for a dataset.
    “““
    data = np.array（data）
    data_size = len（data）
    num_batches_per_epoch = int（（len（data） - 1） / batch_size） + 1
    for epoch in range（num_epochs）:
        # Shuffle the data at each epoch

属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2017-05-22 00:05  TextCNN-master\
     文件        1052  2017-05-22 00:05  TextCNN-master\.gitignore
     目录           0  2017-05-22 00:05  TextCNN-master\.idea\
     文件         548  2017-05-22 00:05  TextCNN-master\.idea\TextCNN.iml
     目录           0  2017-05-22 00:05  TextCNN-master\.idea\inspectionProfiles\
     文件         228  2017-05-22 00:05  TextCNN-master\.idea\inspectionProfiles\profiles_settings.xml
     文件         241  2017-05-22 00:05  TextCNN-master\.idea\misc.xml
     文件         266  2017-05-22 00:05  TextCNN-master\.idea\modules.xml
     文件         180  2017-05-22 00:05  TextCNN-master\.idea\vcs.xml
     文件       37209  2017-05-22 00:05  TextCNN-master\.idea\workspace.xml
     文件        7651  2017-05-22 00:05  TextCNN-master\LICENSE
     文件          60  2017-05-22 00:05  TextCNN-master\README.md
     文件        3593  2017-05-22 00:05  TextCNN-master\data_helpers.py
     文件        4031  2017-05-22 00:05  TextCNN-master\eval.py
     文件        3775  2017-05-22 00:05  TextCNN-master\text_cnn.py
     文件        8210  2017-05-22 00:05  TextCNN-master\train.py
     文件        8551  2017-05-22 00:05  TextCNN-master\train_debug.py

上一篇：ArduinoUSBKeyboard库文件
下一篇：大学物理实验思考题答案.txt

共有条评论

text-cnn源代码

资源简介

资源截图

代码片段和文件信息

评论

相关资源