Apriori Algorithm
a,d,e,NaN
b,c,d,NaN
a,b,c,d
b,c,NaN,NaN
a,b,d,NaN
d,e,NaN,NaN
a,b,c,NaN
c,d,e,NaN
a,b,c,NaN
"""
from itertools import combinations
import pandas as pd
trans=pd.read_csv('output.txt',header=None)
print (trans)
def apriori(trans,support=0.01,minlen=1):
ts=pd.get_dummies(trans.unstack().dropna()).groupby(level=1).sum()
collen,rowlen=ts.shape
pattern=[]
for cnum in range(minlen,rowlen+1):
for cols in combinations(ts,cnum):
patsup=ts[list(cols)].all(axis=1).sum()
print(patsup,'\n')
patsup=float(patsup)/collen
pattern.append([",".join(cols),patsup])
sdf=pd.DataFrame(pattern,columns=['Pattern','Support'])
results=sdf[sdf.Support >= support ]
return results
print (apriori(trans))
ts=pd.get_dummies(trans.unstack().dropna()).groupby(level=1).sum()
collen,rowlen=ts.shape
"""
output.txt
a,b,c,NaNa,d,e,NaN
b,c,d,NaN
a,b,c,d
b,c,NaN,NaN
a,b,d,NaN
d,e,NaN,NaN
a,b,c,NaN
c,d,e,NaN
a,b,c,NaN
"""
from itertools import combinations
import pandas as pd
trans=pd.read_csv('output.txt',header=None)
print (trans)
def apriori(trans,support=0.01,minlen=1):
ts=pd.get_dummies(trans.unstack().dropna()).groupby(level=1).sum()
collen,rowlen=ts.shape
pattern=[]
for cnum in range(minlen,rowlen+1):
for cols in combinations(ts,cnum):
patsup=ts[list(cols)].all(axis=1).sum()
print(patsup,'\n')
patsup=float(patsup)/collen
pattern.append([",".join(cols),patsup])
sdf=pd.DataFrame(pattern,columns=['Pattern','Support'])
results=sdf[sdf.Support >= support ]
return results
print (apriori(trans))
ts=pd.get_dummies(trans.unstack().dropna()).groupby(level=1).sum()
collen,rowlen=ts.shape
No comments:
Post a Comment