资源简介
《机器学习实战》pdf及所附python代码和数据集文件
代码片段和文件信息
‘‘‘
Created on Sep 16 2010
kNN: k Nearest Neighbors
Input: inX: vector to compare to existing dataset (1xN)
dataSet: size m data set of known vectors (NxM)
labels: data set labels (1xM vector)
k: number of neighbors to use for comparison (should be an odd number)
Output: the most popular class label
@author: pbharrin
‘‘‘
from numpy import *
import operator
from os import listdir
def classify0(inX dataSet labels k):
dataSetSize = dataSet.shape[0]
diffMat = tile(inX (dataSetSize1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel0) + 1
sortedClassCount = sorted(classCount.iteritems() key=operator.itemgetter(1) reverse=True)
return sortedClassCount[0][0]
def createDataSet():
group = array([[1.01.1][1.01.0][00][00.1]])
labels = [‘A‘‘A‘‘B‘‘B‘]
return group labels
def file2matrix(filename):
fr = open(filename)
numberOfLines = len(fr.readlines()) #get the number of lines in the file
returnMat = zeros((numberOfLines3)) #prepare matrix to return
classLabelVector = [] #prepare labels return
fr = open(filename)
index = 0
for line in fr.readlines():
line = line.strip()
listFromLine = line.split(‘\t‘)
returnMat[index:] = listFromLine[0:3]
classLabelVector.append(int(listFromLine[-1]))
index += 1
return returnMatclassLabelVector
def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = zeros(shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - tile(minVals (m1))
normDataSet = normDataSet/tile(ranges (m1)) #element wise divide
return normDataSet ranges minVals
def datingClassTest():
hoRatio = 0.50 #hold out 10%
datingDataMatdatingLabels = file2matrix(‘datingTestSet2.txt‘) #load data setfrom file
normMat ranges minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m*hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
classifierResult = classify0(normMat[i:]normMat[numTestVecs:m:]datingLabels[numTestVecs:m]3)
print “the classifier came back with: %d the real answer is: %d“ % (classifierResult datingLabels[i])
if (classifierResult != datingLabels[i]): errorCount += 1.0
print “the total error rate is: %f“ % (errorCount/float(numTestVecs))
print errorCount
def img2vector(filename):
returnVect = zeros((11024))
fr = open(filename)
for i in range(32):
lineStr = fr.readline()
for j in range(32):
returnVect[032*i+j] = int(li
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2018-04-02 16:47 machinelearninginaction\
目录 0 2012-03-01 15:44 machinelearninginaction\.git\
文件 31 2012-03-01 15:44 machinelearninginaction\.git\COMMIT_EDITMSG
文件 341 2011-05-04 15:31 machinelearninginaction\.git\config
文件 73 2011-05-04 15:27 machinelearninginaction\.git\desc
文件 23 2011-05-04 15:27 machinelearninginaction\.git\HEAD
目录 0 2011-05-04 15:27 machinelearninginaction\.git\hooks\
文件 452 2011-05-04 15:27 machinelearninginaction\.git\hooks\applypatch-msg.sample
文件 896 2011-05-04 15:27 machinelearninginaction\.git\hooks\commit-msg.sample
文件 160 2011-05-04 15:27 machinelearninginaction\.git\hooks\post-commit.sample
文件 552 2011-05-04 15:27 machinelearninginaction\.git\hooks\post-receive.sample
文件 189 2011-05-04 15:27 machinelearninginaction\.git\hooks\post-update.sample
文件 398 2011-05-04 15:27 machinelearninginaction\.git\hooks\pre-applypatch.sample
文件 1578 2011-05-04 15:27 machinelearninginaction\.git\hooks\pre-commit.sample
文件 4951 2011-05-04 15:27 machinelearninginaction\.git\hooks\pre-reba
文件 1239 2011-05-04 15:27 machinelearninginaction\.git\hooks\prepare-commit-msg.sample
文件 3611 2011-05-04 15:27 machinelearninginaction\.git\hooks\update.sample
文件 9112 2012-03-01 15:44 machinelearninginaction\.git\index
目录 0 2011-05-04 15:27 machinelearninginaction\.git\info\
文件 240 2011-05-04 15:27 machinelearninginaction\.git\info\exclude
目录 0 2011-05-04 15:30 machinelearninginaction\.git\logs\
文件 7468 2012-03-01 15:44 machinelearninginaction\.git\logs\HEAD
目录 0 2011-05-04 15:31 machinelearninginaction\.git\logs\refs\
目录 0 2011-05-04 15:30 machinelearninginaction\.git\logs\refs\heads\
文件 7468 2012-03-01 15:44 machinelearninginaction\.git\logs\refs\heads\master
目录 0 2011-05-04 15:31 machinelearninginaction\.git\logs\refs\remotes\
目录 0 2011-05-04 15:31 machinelearninginaction\.git\logs\refs\remotes\origin\
文件 6480 2012-03-01 15:44 machinelearninginaction\.git\logs\refs\remotes\origin\master
目录 0 2012-03-01 15:44 machinelearninginaction\.git\ob
目录 0 2011-12-19 17:28 machinelearninginaction\.git\ob
文件 933 2011-12-19 17:28 machinelearninginaction\.git\ob
............此处省略621个文件信息
- 上一篇:sublie text3.zip
- 下一篇:python数据分析参考案例
评论
共有 条评论