资源简介
Apriori关联性分析python实现(含数据集),结构清晰易懂
代码片段和文件信息
# -*- coding: utf-8 -*-
“““
Created on Sun Nov 26 17:00:38 2017
@author: Q
“““
import numpy as np
def loadDataSet():
return [[1 3 4] [2 3 5] [1 2 3 5] [2 5]]
def createC1(dateSet):
c1 = []
for line in dateSet:
for item in line:
if not [item] in c1:
c1.append([item])
c1.sort()
return list(map(frozensetc1))
def scanData(datackminSupport):#寻找满足最小支持度的项集
ssCnt = {}
for tid in data:
for can in ck:
if can.issubset(tid):
if can not in ssCnt.keys():
ssCnt[can] = 0
ssCnt[can] += 1
numItems = len(data)
retList = []
supportData = {}
for key in ssCnt.keys():
support = ssCnt[key]/numItems
if support >= minSupport:
retList.append(key)
supportData[key] = support
return retListsupportData
def aprioriGen(Lkk): #根据k-1项集生成k项集
retList = []
lenLk = len(Lk)
for i in range(lenLk):
for j in range(i+1lenLk):
l1 = list(Lk[i])[:k-2]
l2 = list(Lk[j])[:k-2]
l1.sort()
l2.sort()
if l1 == l2:
retList.append(Lk[i] | Lk[j])
return retList
def apriori(dataSetminSupport = 0.5):#生成频繁项集
c1 = createC1(dataSet)
D = list(map(setdataSet))
l1supportData = scanData(Dc1minSupport)
L = [l1]
k = 2
while(len(L[k-2])>0):
ck = aprioriGen(L[k-2]k)
lksupk = scanData(DckminSupport)
k = k + 1
L.append(lk)
supportData.update(supk)
return LsupportData
def generaterRules(LsupportDataminConf=0.7):#生成规则
bigRuleList = []
for i in range(1len(L)):
for freqSet in L[i]:
H1 = [frozenset([item]) for item in freqSet]
if i>1:
rulesFromConseq(freqSetH1supportDatabigRuleListminConf)
else:
calcConf(freqSetH1supportDatabigRuleListminConf)
return bigRuleList
def calcConf(freqSetHsuppurtDatabrlminConf = 0.7):#计算满足置信度的规则
prunedH = []
for conseq in H:
conf = suppurtData[freqSet]/suppurtData[freqSet-conseq]
if conf > minConf:
brl.append((freqSet-conseqconseqconf))
prunedH.append(conseq)
return prunedH
def rulesFromConseq(freqSetHsupportDatabrlminConf=0.7):#递归生成规则
m = len(H[0])
if len(freqSet)>=(m+1):
Hmp1 = calcConf(freqSetHsupportDatabrlminConf)
if (len(Hmp1) > 1):
Hmp1 = aprioriGen(Hmp1m+1)
rulesFromConseq(freqSetHmp1supportDatabrlminConf)
data = [line.split() for line in open(‘mushroom.dat‘).readlines()]
Lsupport = apriori(dataminSupport=0.3)
for i in range(len(L)):
for item in L[i]:
if item & {‘2‘}:
print(item)
#print(L)
#
#data = loadDataSet()
##c1 = cr
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 8 2017-12-03 20:38 Apriori\.git\COMMIT_EDITMSG
文件 299 2017-12-03 20:39 Apriori\.git\config
文件 73 2017-12-03 20:38 Apriori\.git\desc
文件 23 2017-12-03 20:38 Apriori\.git\HEAD
文件 478 2017-12-03 20:38 Apriori\.git\hooks\applypatch-msg.sample
文件 896 2017-12-03 20:38 Apriori\.git\hooks\commit-msg.sample
文件 189 2017-12-03 20:38 Apriori\.git\hooks\post-update.sample
文件 424 2017-12-03 20:38 Apriori\.git\hooks\pre-applypatch.sample
文件 1642 2017-12-03 20:38 Apriori\.git\hooks\pre-commit.sample
文件 1348 2017-12-03 20:38 Apriori\.git\hooks\pre-push.sample
文件 4898 2017-12-03 20:38 Apriori\.git\hooks\pre-reba
文件 544 2017-12-03 20:38 Apriori\.git\hooks\pre-receive.sample
文件 1239 2017-12-03 20:38 Apriori\.git\hooks\prepare-commit-msg.sample
文件 3610 2017-12-03 20:38 Apriori\.git\hooks\update.sample
文件 225 2017-12-03 20:38 Apriori\.git\index
文件 240 2017-12-03 20:38 Apriori\.git\info\exclude
文件 154 2017-12-03 20:38 Apriori\.git\logs\HEAD
文件 154 2017-12-03 20:38 Apriori\.git\logs\refs\heads\master
文件 143 2017-12-03 20:39 Apriori\.git\logs\refs\remotes\origin\master
文件 71486 2017-12-03 20:38 Apriori\.git\ob
文件 127 2017-12-03 20:38 Apriori\.git\ob
文件 1307 2017-12-03 20:38 Apriori\.git\ob
文件 91 2017-12-03 20:38 Apriori\.git\ob
文件 41 2017-12-03 20:38 Apriori\.git\refs\heads\master
文件 41 2017-12-03 20:39 Apriori\.git\refs\remotes\origin\master
文件 3278 2017-11-28 07:32 Apriori\Apriori.py
文件 570408 2011-07-13 09:49 Apriori\mushroom.dat
目录 0 2017-12-03 20:39 Apriori\.git\logs\refs\remotes\origin
目录 0 2017-12-03 20:38 Apriori\.git\logs\refs\heads
目录 0 2017-12-03 20:39 Apriori\.git\logs\refs\remotes
............此处省略21个文件信息
- 上一篇:snmporin.py
- 下一篇:python界面图书管理系统—GUI界面版
评论
共有 条评论