资源简介
异常检测(高斯分布模型)+训练、验证、测试数据
代码片段和文件信息
import numpy as np
import random
import matplotlib.pyplot as plt
def TXTtoNumpy(TXTfilename lableState=False Print=False delim = ‘\t‘):
‘‘‘
:param TXTfilename: Path about TXT file
:param lableState: True for have labels of data
:param print: to print info about data
:param delim: to split ‘\t‘
:return:
‘‘‘
TXTfr = open(TXTfilename)
TXTList = TXTfr.readlines()
stringArr = [line.strip().split(delim) for line in TXTList]
n_examples = len(stringArr)
if lableState:
n_features = len(stringArr[0])-1
labels = np.zeros(n_examples)
labels = [int(line[n_features]) for line in stringArr]
else:
n_features = len(stringArr[0])
if Print:
print(“n_examples: “ n_examples)
print(“n_features: “ n_features)
floatList = np.zeros((n_examples n_features))
for i in range(0 n_features):
floatList[:i] = [float(line[i]) for line in stringArr]
if lableState:
return floatList labels
else:
return floatList
def GaussianParamEstimation(npArr GaussianType = ‘Normal‘):
‘‘‘
:param npArr: shape=(n_examples n_features)
:param GaussianType: ‘Normal‘ or ‘Multi‘
:return:
‘‘‘
n_features = npArr.shape[1]
# mean = np.zeros(n_features)
mean = np.average(npArr axis=0)
if GaussianType == ‘Normal‘:
# std = np.zeros(n_features)
std = np.std(npArr axis=0)
return mean std
elif GaussianType == ‘Multi‘:
sigma = np.cov(npArr - mean rowvar=0)
return mean sigma
def NormalGaussion(X mean std):
‘‘‘
:param X: shape=(1 n_features)
:param mean: shape=(1 n_features)
:param std: shape=(1 n_features)
:return:
‘‘‘
n_feature = X.shape[1]
P = 1;
for i in range(0n_feature):
temp1 = ( 1 / (np.sqrt(2*np.pi) * std[i]))
temp2 = np.exp( -pow(X[:i] - mean[i] 2) / (2 * pow(std[i]2)))
P = P * (temp1 * temp2)
return P
def MultiGaussion(X mean sigma):
‘‘‘
:param X: shape=(1 n_features)
:param mean: shape=(1 n_features)
:param sigma: shape=(n_features n_features)
:return:
‘‘‘
temp1 = ( 1 / (pow(2*np.pi np.pi/2) * np.sqrt(np.linalg.det(sigma))))
temp2 = np.dot((X-mean) np.linalg.inv(sigma))
temp3 = np.exp( (-1/2) * np.dot(temp2 (X-mean).T))
P = temp1 * temp3
return P
def AnomalyDetection(npArr labels iterations lamda_step=0.001 lamda=0.001):
‘‘‘
:param npArr: shape=(n_examples n_features)
:param labels: shape=(n_examples 1)
:param iterations:
:param lamda_step:
:param lamda:
:return:
‘‘‘
n_examples = npArr.shape[0]
n_features = npArr.shape[1]
# 将labels的列表类型转为numpy类型
labels = np.array(labels).reshape(n_examples 1)
# 找出标记为非0(异常样本)的索引
anomalyIndex = []
for i in range(0 n_examples):
if(labels[i:i+1:] != 0):
anomalyIndex.append(i)
# 根据异常样本索引得
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2019-04-26 09:08 AnomalyDetection\
文件 4527 2019-04-26 08:59 AnomalyDetection\testSet3.txt
文件 7509 2019-04-26 09:08 AnomalyDetection\Anomaly_detection.py
评论
共有 条评论