资源简介
泰坦尼克python数据分析,带数据集和源代码,强烈推荐。
代码片段和文件信息
import re
import numpy as np
import pandas as pd
import random as rd
from sklearn import preprocessing
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA
np.set_printoptions(precision=4 threshold=10000 linewidth=160 edgeitems=999 suppress=True)
pd.set_option(‘display.max_columns‘ None)
pd.set_option(‘display.max_rows‘ None)
pd.set_option(‘display.width‘ 160)
pd.set_option(‘expand_frame_repr‘ False)
pd.set_option(‘precision‘ 4)
def processCabin():
global df
df[‘Cabin‘][df.Cabin.isnull()] = ‘U0‘
df[‘CabinLetter‘] = df[‘Cabin‘].map( lambda x : getCabinLetter(x))
df[‘CabinLetter‘] = pd.factorize(df[‘CabinLetter‘])[0]
if keep_binary:
cletters = pd.get_dummies(df[‘CabinLetter‘]).rename(columns=lambda x: ‘CabinLetter_‘ + str(x))
df = pd.concat([df cletters] axis=1)
df[‘CabinNumber‘] = df[‘Cabin‘].map( lambda x : getCabinNumber(x)).astype(int) + 1
if keep_scaled:
scaler = preprocessing.StandardScaler()
df[‘CabinNumber_scaled‘] = scaler.fit_transform(df[‘CabinNumber‘])
def getCabinLetter(cabin):
match = re.compile(“([a-zA-Z]+)“).search(cabin)
if match:
return match.group()
else:
return ‘U‘
def getCabinNumber(cabin):
match = re.compile(“([0-9]+)“).search(cabin)
if match:
return match.group()
else:
return 0
def processTicket():
global df
df[‘TicketPrefix‘] = df[‘Ticket‘].map( lambda x : getTicketPrefix(x.upper()))
df[‘TicketPrefix‘] = df[‘TicketPrefix‘].map( lambda x: re.sub(‘[\.?\/?]‘ ‘‘ x) )
df[‘TicketPrefix‘] = df[‘TicketPrefix‘].map( lambda x: re.sub(‘STON‘ ‘SOTON‘ x) )
df[‘TicketPrefixId‘] = pd.factorize(df[‘TicketPrefix‘])[0]
if keep_binary:
prefixes = pd.get_dummies(df[‘TicketPrefix‘]).rename(columns=lambda x: ‘TicketPrefix_‘ + str(x))
df = pd.concat([df prefixes] axis=1)
df.drop([‘TicketPrefix‘] axis=1 inplace=True)
df[‘TicketNumber‘] = df[‘Ticket‘].map( lambda x: getTicketNumber(x) )
df[‘TicketNumberDigits‘] = df[‘TicketNumber‘].map( lambda x: len(x) ).astype(np.int)
df[‘TicketNumberStart‘] = df[‘TicketNumber‘].map( lambda x: x[0:1] ).astype(np.int)
df[‘TicketNumber‘] = df.TicketNumber.astype(np.int)
if keep_scaled:
scaler = preprocessing.StandardScaler()
df[‘TicketNumber_scaled‘] = scaler.fit_transform(df[‘TicketNumber‘])
def getTicketPrefix(ticket):
match = re.compile(“([a-zA-Z\.\/]+)“).search(ticket)
if match:
return match.group()
else:
return ‘U‘
def getTicketNumber(ticket):
match = re.compile(“([\d]+$)“).search(ticket)
if match:
return match.group()
else:
return ‘0‘
def processFare():
global df
df[‘Fare‘][ np.isnan(df[‘Fare‘]) ] = df[‘Fare‘].median()
df[‘Fare‘][ np.where(df[‘Fare‘]==0)[0] ] = df[‘Fare‘][ df[‘Fare‘].nonzero()[0] ]
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2015-03-23 08:38 kaggle-Titanic-master\
文件 200 2015-03-23 08:38 kaggle-Titanic-master\README.md
文件 14597 2015-03-23 08:38 kaggle-Titanic-master\dataProcess.py
文件 72565 2015-03-23 08:38 kaggle-Titanic-master\figure_1.png
文件 4216 2015-03-23 08:38 kaggle-Titanic-master\randomForest.py
文件 2839 2015-03-23 08:38 kaggle-Titanic-master\result.csv
文件 28210 2015-03-23 08:38 kaggle-Titanic-master\test.csv
文件 60302 2015-03-23 08:38 kaggle-Titanic-master\train.csv
- 上一篇:某招聘网站数据分析案例及数据集.zip
- 下一篇:python分析pcap
相关资源
- python实现SGBM图像匹配算法
- python实现灰度直方图均衡化
- scrapy_qunar_one
- Python学习全系列教程永久可用
- python简明教程.chm
- 抽奖大转盘python的图形化界面
- 双边滤波器实验报告及代码python
- python +MYSQL+HTML实现21蛋糕网上商城
- Python-直播答题助手自动检测出题搜索
- OpenCV入门教程+OpenCV官方教程中文版
- Python 串口工具源码+.exe文件
- Python开发的全栈股票系统.zip
- Python操作Excel表格并将其中部分数据写
- python书籍 PDF
- 利用python绘制散点图
- python+labview+No1.vi
- 老男孩python项目实战
- python源码制作whl文件.rar
- python3.5可用的scipy
- PYTHON3 经典50案例.pptx
- 计算机科学导论-python.pdf
- python模拟鼠标点击屏幕
- windows鼠标自动点击py脚本
- 鱼c小甲鱼零基础学python全套课后题和
- Python 练习题100道
- Practical Programming 2nd Edition
- wxPython Application Development Cookbook
- python 3.6
- Python 3.5.2 中文文档 互联网唯一CHM版本
- python3.5.2.chm官方文档
评论
共有 条评论