资源简介

Apriori关联性分析python实现(含数据集),结构清晰易懂

资源截图

代码片段和文件信息

# -*- coding: utf-8 -*-
“““
Created on Sun Nov 26 17:00:38 2017

@author: Q
“““

import numpy as np
def loadDataSet():
    return [[1 3 4] [2 3 5] [1 2 3 5] [2 5]]

def createC1(dateSet):
    c1 = []
    for line in dateSet:
        for item in line:
            if not [item] in c1:
                c1.append([item])
    c1.sort()
    return list(map(frozensetc1))
    
def scanData(datackminSupport):#寻找满足最小支持度的项集
    ssCnt = {}
    for tid in data:
        for can in ck:
            if can.issubset(tid):
                if can not in ssCnt.keys():
                    ssCnt[can] = 0
                ssCnt[can] += 1
    numItems = len(data)
    retList = []
    supportData = {}
    for key in ssCnt.keys():
        support = ssCnt[key]/numItems
        if support >= minSupport:
            retList.append(key)
        supportData[key] = support
    return retListsupportData

    
def aprioriGen(Lkk):  #根据k-1项集生成k项集
    retList = []
    lenLk = len(Lk)
    for i in range(lenLk):
        for j in range(i+1lenLk):
            l1 = list(Lk[i])[:k-2]
            l2 = list(Lk[j])[:k-2]
            l1.sort()
            l2.sort()
            if l1 == l2:
                retList.append(Lk[i] | Lk[j])
    return retList

def apriori(dataSetminSupport = 0.5):#生成频繁项集
    c1 = createC1(dataSet)
    D = list(map(setdataSet))
    l1supportData = scanData(Dc1minSupport)
    L = [l1]
    k = 2
    while(len(L[k-2])>0):
        ck = aprioriGen(L[k-2]k)
        lksupk = scanData(DckminSupport)
        k = k + 1
        L.append(lk)
        supportData.update(supk)
    return LsupportData
def generaterRules(LsupportDataminConf=0.7):#生成规则
    bigRuleList = []
    for i in range(1len(L)):
        for freqSet in L[i]:
            H1 = [frozenset([item]) for item in freqSet]
            if i>1:
                rulesFromConseq(freqSetH1supportDatabigRuleListminConf)
            else:
                calcConf(freqSetH1supportDatabigRuleListminConf)
    return bigRuleList
def calcConf(freqSetHsuppurtDatabrlminConf = 0.7):#计算满足置信度的规则
    prunedH = []
    for conseq in H:
        conf = suppurtData[freqSet]/suppurtData[freqSet-conseq]
        if conf > minConf:
            brl.append((freqSet-conseqconseqconf))
            prunedH.append(conseq)
    return prunedH

def rulesFromConseq(freqSetHsupportDatabrlminConf=0.7):#递归生成规则
    m = len(H[0])
    if len(freqSet)>=(m+1):
        Hmp1 = calcConf(freqSetHsupportDatabrlminConf)
        if (len(Hmp1) > 1):
            Hmp1 = aprioriGen(Hmp1m+1)
            rulesFromConseq(freqSetHmp1supportDatabrlminConf)

        
        
    
data = [line.split() for line in open(‘mushroom.dat‘).readlines()]
Lsupport = apriori(dataminSupport=0.3)
for i in range(len(L)):
    for item in L[i]:
        if item & {‘2‘}:
            print(item)
#print(L)
#    
#data = loadDataSet()
##c1 = cr

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件          8  2017-12-03 20:38  Apriori\.git\COMMIT_EDITMSG

     文件        299  2017-12-03 20:39  Apriori\.git\config

     文件         73  2017-12-03 20:38  Apriori\.git\description

     文件         23  2017-12-03 20:38  Apriori\.git\HEAD

     文件        478  2017-12-03 20:38  Apriori\.git\hooks\applypatch-msg.sample

     文件        896  2017-12-03 20:38  Apriori\.git\hooks\commit-msg.sample

     文件        189  2017-12-03 20:38  Apriori\.git\hooks\post-update.sample

     文件        424  2017-12-03 20:38  Apriori\.git\hooks\pre-applypatch.sample

     文件       1642  2017-12-03 20:38  Apriori\.git\hooks\pre-commit.sample

     文件       1348  2017-12-03 20:38  Apriori\.git\hooks\pre-push.sample

     文件       4898  2017-12-03 20:38  Apriori\.git\hooks\pre-rebase.sample

     文件        544  2017-12-03 20:38  Apriori\.git\hooks\pre-receive.sample

     文件       1239  2017-12-03 20:38  Apriori\.git\hooks\prepare-commit-msg.sample

     文件       3610  2017-12-03 20:38  Apriori\.git\hooks\update.sample

     文件        225  2017-12-03 20:38  Apriori\.git\index

     文件        240  2017-12-03 20:38  Apriori\.git\info\exclude

     文件        154  2017-12-03 20:38  Apriori\.git\logs\HEAD

     文件        154  2017-12-03 20:38  Apriori\.git\logs\refs\heads\master

     文件        143  2017-12-03 20:39  Apriori\.git\logs\refs\remotes\origin\master

     文件      71486  2017-12-03 20:38  Apriori\.git\objects\2c\14ebf236b7b241e0b2ae6f7de08290e5c30f78

     文件        127  2017-12-03 20:38  Apriori\.git\objects\81\90e6230eb4a074f9c28795b13c5e66e579e3c8

     文件       1307  2017-12-03 20:38  Apriori\.git\objects\91\315aa837057c666ec1425d104cdc54e1483f1d

     文件         91  2017-12-03 20:38  Apriori\.git\objects\e6\2669c3a5d9512b713f337d55714723964b0d5d

     文件         41  2017-12-03 20:38  Apriori\.git\refs\heads\master

     文件         41  2017-12-03 20:39  Apriori\.git\refs\remotes\origin\master

     文件       3278  2017-11-28 07:32  Apriori\Apriori.py

     文件     570408  2011-07-13 09:49  Apriori\mushroom.dat

     目录          0  2017-12-03 20:39  Apriori\.git\logs\refs\remotes\origin

     目录          0  2017-12-03 20:38  Apriori\.git\logs\refs\heads

     目录          0  2017-12-03 20:39  Apriori\.git\logs\refs\remotes

............此处省略21个文件信息

评论

共有 条评论