资源简介
Kaggle平台泰坦尼克号数据集+源代码+注释
代码片段和文件信息
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
################################
# Preparing Data
################################
# read data from file
data = pd.read_csv(‘train.csv‘)
# fill nan values with 0
data = data.fillna(0)
# convert [‘male‘ ‘female‘] values of Sex to [1 0]
data[‘Sex‘] = data[‘Sex‘].apply(lambda s: 1 if s == ‘male‘ else 0)
# ‘Survived‘ is the label of one class
# add ‘Deceased‘ as the other class
data[‘Deceased‘] = data[‘Survived‘].apply(lambda s: 1 - s)
# select features and labels for training
dataset_X = data[[‘Sex‘ ‘Age‘ ‘Pclass‘ ‘SibSp‘ ‘Parch‘ ‘Fare‘]].as_matrix()
dataset_Y = data[[‘Deceased‘ ‘Survived‘]].as_matrix()
# split training data and validation set data
X_train X_val y_train y_val = train_test_split(dataset_X dataset_Y
test_size=0.2
random_state=42)
################################
# Constructing Dataflow Graph
################################
# create symbolic variables
X = tf.placeholder(tf.float32 shape=[None 6])
y = tf.placeholder(tf.float32 shape=[None 2])
# weights and bias are the variables to be trained
weights = tf.Variable(tf.random_normal([6 2]) name=‘weights‘)
bias = tf.Variable(tf.zeros([2]) name=‘bias‘)
y_pred = tf.nn.softmax(tf.matmul(X weights) + bias)
# Minimise cost using cross entropy
# NOTE: add a epsilon(1e-10) when calculate log(y_pred)
# otherwise the result will be -inf
cross_entropy = - tf.reduce_sum(y * tf.log(y_pred + 1e-10)
reduction_indices=1)
cost = tf.reduce_mean(cross_entropy)
# use gradient descent optimizer to minimize cost
train_op = tf.train.GradientDescentOptimizer(0.001).minimize(cost)
# calculate accuracy
correct_pred = tf.equal(tf.argmax(y 1) tf.argmax(y_pred 1))
acc_op = tf.reduce_mean(tf.cast(correct_pred tf.float32))
################################
# Training and Evaluating the model
################################
# use session to run the calculation
with tf.Session() as sess:
# variables have to be initialized at the first place
tf.global_variables_initializer().run()
# training loop
for epoch in range(10):
total_loss = 0.
for i in range(len(X_train)):
# prepare feed data and run
feed_dict = {X: [X_train[i]] y: [y_train[i]]}
# print(“x_train“)
#print(X_train[i])
_ loss = sess.run([train_op cost] feed_dict=feed_dict)
print(“number:“+str(i))
print(sess.run(y_predfeed_dict=feed_dict))
total_loss += loss
# display loss per epoch
#print(‘Epoch: %04d total loss=%.9f‘ % (epoch + 1 total_loss))
# Accuracy calculated by TensorFlow
accuracy = sess.run(acc_op feed_dict={X: X_val y: y_val})
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 26694 2018-04-27 11:42 Titannic\Taitan_onehot.csv
文件 3914 2018-04-27 12:41 Titannic\example1.py
文件 5864 2018-04-25 22:00 Titannic\example2.py
文件 3258 2018-04-25 16:32 Titannic\gender_submission.csv
文件 28629 2018-04-25 16:32 Titannic\test.csv
文件 61194 2018-04-25 16:32 Titannic\train.csv
目录 0 2018-04-27 13:18 Titannic\
- 上一篇:模拟电磁曲射炮论文.docx
- 下一篇:吾爱破解论坛学习脱壳
相关资源
- leagues_NBA_2014_games_games.csv
- 已经制作好的mnist数据集中的10张图片
- KNN详细算法以及数据集
- international-airline-passengers.zip
- 做SAR图像目标检测的MSTAR数据集总共
- 4360GoogleAVA数据集百度云地址及相关介
- 多目标跟踪MOT_2DMOT2015数据集.txt
- 微软MSCOCO数据集 train2017.zip 百度云分
- UrbanSound8K
- kaggle猫狗大战数据集.txt
- ImageNet_mini数据集链接
- 机器学习 数据集
- 同济大学掌纹掌静脉数据集
- SVM_Iris.rar
- Breast-Cancer数据集.txt
- UCI数据集txt格式.rar
- vot数据集.txt
- Brats2017数据集
- 鸢尾花iris数据集,用于机器学习训练
- LCSTS数据集完整版280万条
- 肤色分割数据集
- imglab 制作数据集标签
- 手写digits.zip
- 适用于libsvm的iris数据集
- IEEE-PHM2009数据集
- MNIST数据集对应的txt文件
- knn算法识别手写体--mnist数据集
- UA-DETRAC车辆检测数据集
- 中国流域数据集
- 所有数据集
评论
共有 条评论