Friday, November 18, 2016

Apriori Algorithm

Apriori Algorithm

"""
output.txt
a,b,c,NaN
a,d,e,NaN
b,c,d,NaN
a,b,c,d
b,c,NaN,NaN
a,b,d,NaN
d,e,NaN,NaN
a,b,c,NaN
c,d,e,NaN
a,b,c,NaN
"""

from itertools import combinations
import pandas as pd


trans=pd.read_csv('output.txt',header=None)
print (trans)
def apriori(trans,support=0.01,minlen=1):
    ts=pd.get_dummies(trans.unstack().dropna()).groupby(level=1).sum()
    collen,rowlen=ts.shape
    pattern=[]
    for cnum in range(minlen,rowlen+1):
        for cols in combinations(ts,cnum):
            patsup=ts[list(cols)].all(axis=1).sum()
            print(patsup,'\n')
            patsup=float(patsup)/collen
            pattern.append([",".join(cols),patsup])
    sdf=pd.DataFrame(pattern,columns=['Pattern','Support'])
    results=sdf[sdf.Support >= support ]
    return results
print (apriori(trans))
ts=pd.get_dummies(trans.unstack().dropna()).groupby(level=1).sum()

collen,rowlen=ts.shape



No comments:

Post a Comment