资源简介
FP-growth发现频繁项集python实现(含数据集),结构清晰易懂
代码片段和文件信息
# -*- coding: utf-8 -*-
“““
Created on Tue Nov 28 21:49:24 2017
@author: Q
“““
import numpy as np
def loadSimpDat():
simpDat = [[‘r‘ ‘z‘ ‘h‘ ‘j‘ ‘p‘]
[‘z‘ ‘y‘ ‘x‘ ‘w‘ ‘v‘ ‘u‘ ‘t‘ ‘s‘]
[‘z‘]
[‘r‘ ‘x‘ ‘n‘ ‘o‘ ‘s‘]
[‘y‘ ‘r‘ ‘x‘ ‘z‘ ‘q‘ ‘t‘ ‘p‘]
[‘y‘ ‘z‘ ‘x‘ ‘e‘ ‘q‘ ‘s‘ ‘t‘ ‘m‘]]
return simpDat
def createInitSet(dataSet):
retDict = {}
for trans in dataSet:
retDict[frozenset(trans)] = retDict.get(frozenset(trans)0) + 1
return retDict
class treeNode:
def __init__(selfnameValuenumOccurparentNode):
self.name = nameValue
self.count = numOccur
self.nodelink = None
self.parent = parentNode
self.children = {}
def inc(selfnumOccur):
self.count += numOccur
def disp(selfind=1):
print(‘ ‘*indself.name‘ ‘self.count)
for child in self.children.values():
child.disp(ind+1)
def createTree(dataSetminSup=1):#生成树
headerTable = {}
for trans in dataSet:
for item in trans:
headerTable[item] = headerTable.get(item0) + dataSet[trans]
# for key in headerTable.keys():
# if headerTable[key] # del(headerTable[key])
#
headerTable = {k:v for kv in headerTable.items() if v>=minSup}
freqItemSet = set(headerTable.keys())
if len(freqItemSet) == 0:
return NoneNone
for k in headerTable:
headerTable[k] = [headerTable[k]None]
retTree = treeNode(‘Null Set‘0None)
for transSetcount in dataSet.items():
localD = {}
for tran in transSet:
if tran in freqItemSet:
localD[tran] = headerTable[tran][0]
if len(localD) > 0:
orderedItems = [v[0] for v in sorted(localD.items() key = lambda k: k[1]reverse = True)]
updateTree(orderedItemsretTreeheaderTablecount)
return retTreeheaderTable
def updateTree(itemsinTreeheaderTablecount):#更新树
if items[0] in inTree.children:
inTree.children[items[0]].inc(count)
else:
inTree.children[items[0]] = treeNode(items[0]countinTree)
# print(‘this is ‘inTree.children[items[0]].name‘ items:‘items)
if headerTable[items[0]][1] == None:
headerTable[items[0]][1] = inTree.children[items[0]]
else:
updateHeader(headerTable[items[0]][1]inTree.children[items[0]])
if len(items) > 1:
updateTree(items[1::]inTree.children[items[0]]headerTablecount)
def updateHeader(nodeToTesttargetNode):#更新头指针
i = 0
while(nodeToTest.nodelink != None):
nodeToTest = nodeToTest.nodelink
# print(nodeToTest.name‘ ‘i)
i = i+1
nodeToTest.nodelink = targetNode
def ascendT
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 10 2017-12-03 20:34 FP-growth\.git\COMMIT_EDITMSG
文件 301 2017-12-03 20:34 FP-growth\.git\config
文件 73 2017-12-03 20:34 FP-growth\.git\desc
文件 23 2017-12-03 20:34 FP-growth\.git\HEAD
文件 478 2017-12-03 20:34 FP-growth\.git\hooks\applypatch-msg.sample
文件 896 2017-12-03 20:34 FP-growth\.git\hooks\commit-msg.sample
文件 189 2017-12-03 20:34 FP-growth\.git\hooks\post-update.sample
文件 424 2017-12-03 20:34 FP-growth\.git\hooks\pre-applypatch.sample
文件 1642 2017-12-03 20:34 FP-growth\.git\hooks\pre-commit.sample
文件 1348 2017-12-03 20:34 FP-growth\.git\hooks\pre-push.sample
文件 4898 2017-12-03 20:34 FP-growth\.git\hooks\pre-reba
文件 544 2017-12-03 20:34 FP-growth\.git\hooks\pre-receive.sample
文件 1239 2017-12-03 20:34 FP-growth\.git\hooks\prepare-commit-msg.sample
文件 3610 2017-12-03 20:34 FP-growth\.git\hooks\update.sample
文件 145 2017-12-03 20:34 FP-growth\.git\index
文件 240 2017-12-03 20:34 FP-growth\.git\info\exclude
文件 156 2017-12-03 20:34 FP-growth\.git\logs\HEAD
文件 156 2017-12-03 20:34 FP-growth\.git\logs\refs\heads\master
文件 143 2017-12-03 20:34 FP-growth\.git\logs\refs\remotes\origin\master
文件 132 2017-12-03 20:34 FP-growth\.git\ob
文件 56 2017-12-03 20:34 FP-growth\.git\ob
文件 1558 2017-12-03 20:34 FP-growth\.git\ob
文件 41 2017-12-03 20:34 FP-growth\.git\refs\heads\master
文件 41 2017-12-03 20:34 FP-growth\.git\refs\remotes\origin\master
文件 4249 2017-11-29 22:19 FP-growth\FPgrowth.py
目录 0 2017-12-03 20:34 FP-growth\.git\logs\refs\remotes\origin
目录 0 2017-12-03 20:34 FP-growth\.git\logs\refs\heads
目录 0 2017-12-03 20:34 FP-growth\.git\logs\refs\remotes
目录 0 2017-12-03 20:34 FP-growth\.git\refs\remotes\origin
目录 0 2017-12-03 20:34 FP-growth\.git\logs\refs
............此处省略18个文件信息
评论
共有 条评论