资源简介
吴恩达机器学习编程作业python3版本
代码片段和文件信息
#本文是高斯分布用于异常检测
#load data set
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
data = sio.loadmat(‘ex8data1.mat‘)
X = data[‘X‘]
Xval = data[‘Xval‘]
yval = data[‘yval‘]
print (X.shapeXval.shapeyval.shape)
print (X[:5])
print (yval[:5])
plt.plot(X[:0]X[:1]‘bx‘) # 沿坐标轴划线函数plot x1 and x2 using blue x markers
plt.xlim(0 30)
plt.ylim(0 30)
plt.title(‘Visualize the ex8data1‘)
plt.xlabel(‘Latency (ms)‘)
plt.ylabel(‘Throughput (mb/s)‘)
plt.show()
# 高斯分布
from scipy import stats # 内置有计算数据点属于正态分布的概率的方法
# 求得均值和方差
def estimateGaussian(x):
# train set x fit the musigma2
m n = x.shape
mu = np.mean(x axis=0).reshape(1 -1) # 仅变成1行,不管多少列
# sigma2 = np.sum(np.square(x - mu)axis=0).reshape(1-1) / m
sigma2 = np.var(x axis=0).reshape(1 -1) # 求方差,也可直接求标准差sigma
return mu sigma2
# 计算高斯概率
def p(x mu sigma2):
# x is a new example:[m*n]
m n = x.shape
p_list = []
for j in range(m):
p = 1
for i in range(n):
p *= stats.norm.pdf(x[j i] mu[0 i] np.sqrt(sigma2[0 i]))
# stats.norm.pdf(xmeansigma)
p_list.append(p)
p_array = np.array(p_list).reshape(-1 1) # 仅为1列
return p_array
mu sigma2 = estimateGaussian(X)
print(‘mu.shape:‘mu.shape ‘sigma2.shape:‘ sigma2.shape)
print(‘mu:‘mu ‘sigma2:‘ sigma2)
p_train = p(X mu sigma2) # 调用p(x mu sigma2)函数,也仅为1列
print(‘p_train[:5]:‘ p_train[:5])
p_val = p(Xval mu sigma2)
print(‘p_val[:5]:‘ p_val[:5])
# cross validation for select threshold
# 交叉验证用于阈值选定这里用的是F1 score这个评估指标
def selectThreshold(y pval):
bestEpsilon = 0
bestF1 = 0
stepSize = (np.max(pval) - np.min(pval)) / 1000 # 为何如此选取?
for epsilon in np.arange(np.min(pval) np.max(pval) stepSize):
predictions = (pval < epsilon) # 判断真假
# fp= np.sum((predictions == 1) & (y == 0))
fp = np.sum((predictions == 1) & (y == 0))
fn = np.sum((predictions == 0) & (y == 1))
tp = np.sum((predictions == 1) & (y == 1))
if tp + fp == 0:
precision = 0
else:
precision = float(tp) / (tp + fp) # note!!!!float!!!
if tp + fn == 0:
recall = 0
else:
recall = float(tp) / (tp + fn)
if precision + recall == 0:
F1 = 0
else:
F1 = 2.0 * precision * recall / (precision + recall)
if F1 > bestF1:
bestF1 = F1
bestEpsilon = epsilon
return bestEpsilon bestF1
#test the cs
epsilonF1 = selectThreshold(yvalp_val)
print(“Best epsilon found using cross-validation: %e“%(epsilon))
print(“Best F1 on Cross Validation Set: %f“%(F1))
# 可视化一下检测是异常值
print(“Outliers found: %d “ % (np.sum(p_train < epsilon)))
# visualization:Draw a red circle around those outliers
outliters = np.where(p_train.ravel() < epsilon)
plt.plot(X[: 0] X[: 1] ‘bx‘)
plt.plot(X[outliters
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2018-02-08 16:05 Coursera_python3\
目录 0 2018-02-08 16:04 Coursera_python3\Gauss Anomaly detection\
目录 0 2018-02-08 16:04 Coursera_python3\Gauss Anomaly detection\.idea\
文件 317 2018-02-07 14:14 Coursera_python3\Gauss Anomaly detection\.idea\Gauss Anomaly detection.iml
文件 185 2018-02-07 14:12 Coursera_python3\Gauss Anomaly detection\.idea\misc.xm
文件 298 2018-02-07 14:12 Coursera_python3\Gauss Anomaly detection\.idea\modules.xm
文件 1048 2018-02-07 14:12 Coursera_python3\Gauss Anomaly detection\.idea\workspace.xm
文件 5214 2018-02-07 15:46 Coursera_python3\Gauss Anomaly detection\Gauss Anomaly detection.py
文件 9501 2017-12-11 15:07 Coursera_python3\Gauss Anomaly detection\ex8data1.mat
文件 93481 2017-12-11 15:07 Coursera_python3\Gauss Anomaly detection\ex8data2.mat
目录 0 2018-02-08 16:04 Coursera_python3\K-means聚类\
目录 0 2018-02-08 16:04 Coursera_python3\K-means聚类\.idea\
文件 317 2018-01-23 15:45 Coursera_python3\K-means聚类\.idea\K-means聚类.iml
文件 185 2018-01-23 15:41 Coursera_python3\K-means聚类\.idea\misc.xm
文件 278 2018-01-23 15:41 Coursera_python3\K-means聚类\.idea\modules.xm
文件 1048 2018-01-23 15:41 Coursera_python3\K-means聚类\.idea\workspace.xm
文件 4485 2018-01-29 16:07 Coursera_python3\K-means聚类\Kmeans(also for 3d).py
文件 45606 2017-09-27 22:01 Coursera_python3\K-means聚类\bird_small.mat
文件 33031 2017-09-27 22:01 Coursera_python3\K-means聚类\bird_small.png
文件 995 2017-12-11 15:07 Coursera_python3\K-means聚类\ex7data1.mat
文件 4784 2017-12-11 15:07 Coursera_python3\K-means聚类\ex7data2.mat
文件 11027767 2017-12-11 15:07 Coursera_python3\K-means聚类\ex7faces.mat
文件 4071 2018-01-24 15:16 Coursera_python3\K-means聚类\kmeans(not suitbale for 3d).py
目录 0 2018-02-08 16:04 Coursera_python3\PCA\
目录 0 2018-02-08 16:04 Coursera_python3\PCA\.idea\
文件 317 2018-01-30 16:35 Coursera_python3\PCA\.idea\PCA.iml
文件 258 2018-01-30 16:27 Coursera_python3\PCA\.idea\modules.xm
文件 1048 2018-01-30 16:27 Coursera_python3\PCA\.idea\workspace.xm
文件 2444 2018-01-30 18:32 Coursera_python3\PCA\PCA.py
文件 995 2017-12-11 15:07 Coursera_python3\PCA\ex7data1.mat
文件 11027767 2017-12-11 15:07 Coursera_python3\PCA\ex7faces.mat
............此处省略2356个文件信息
相关资源
- Michael Nielsen 的《Neural Networks and Deep
- python3实现RSA(非调用RSA库
- python3.6.1 32位
- Python3.7.332bit
- python3.0菜鸟教程.zip
- python3.4 3.5 3.6 twisted适配windows
- Python机器学习算法-mobi文字版-附带m
- Pandas 0.19.2 官方文档 汉化中文版(精
- [书签+文字版]Python Machine Learning 2nd
- matlab和python的神经网络
- wxPython3.0-win32-3.0.2.0-py2732位系统安装包
- 《机器学习实战》pdf及所和数据集文
- (完整版)learn python 3 the Hard Way
- ThePracticeofComputingUsingPython3rdedition.pd
- Python-用于物体跟踪的全卷积连体网络
- 基于Python3 tkinterGUI界面实现读取存储
- python3.7 64位版
- numpy python3.7
- python3.6.4中文文档说明chm&pdf;双格式
- python3+Django微博源代码和开发环境
- Python-数学建模竞赛中所使用的相关算
- Python-pycharmpython36Django20mysql用户登录与
- Python-MonoDepthPyTorchPyTorch无监督单目深
- Python-用Tensorflowjs实现的可回收非可回
- NumPy攻略:Python科学计算与数据分析
- python编程从入门到实践高清pdf +廖雪峰
- Python-3.7.1.tgz
- Python Machine Learning 2nd Edition [Sebastian
- 机器学习——推荐系统python实现
- Python3源代码.rar
评论
共有 条评论