This question is for frequent pattern mining algorithm Apriori and closed patter
ID: 3737700 • Letter: T
Question
This question is for frequent pattern mining algorithm Apriori and closed pattern mining algorithm like CLOSET.
Implement Apriori algorithm to mine frequent pattern from a transaction dataset
Implement an algorithm to mine closed frequent pattern from the same dataset. You can either write a code to extract closed patterns from the result that you got in Part 1 or code CLOSET.
Input Format
The input dataset is a transaction dataset.
The first line of the input corresponds to the minimum support.
Each following line of the input corresponds to one transaction. Items in each transaction are seperated by a space.
Please refer to the sample input below. In sample input 0, the minimum support is 2, and the dataset contains 3 transactions and 5 item types (A, B, C, D and E).
Constraints
NA
Output Format
The output are the frequent patterns you mined out from the input dataset.
Each line in the output should be of the format :
Support [frequent pattern]
Frequent patterns should be listed in a descending order based on support. e.g. 3 [C] is listed before 2 [A].
Ties should be resolved based on lexicographical order. e.g. 2 [A] is listed before 2 [A C]
Items within each pattern should be listed in lexicographical order as well seperated by a single space. e.g. 2 [B C D]
First print the frequent patterns and then closed pattern. Seperate the output for two parts by an empty line. In sample output 0, first 9 lines correspond to frequent patterns and last 3 lines correspond to closed pattern.
Sample Input 0
Sample Output 0
Sample Input 1
Sample Output 1
Note: The solution is expected in python 3.
Explanation / Answer
import sys
import itertools
my_list = []
for line in sys.stdin:
data = line.strip().split(' ')
my_list.append(data)
items = []
min_support = int(my_list[0][0])
del my_list[0]
# single frequent items
for i in range(len(my_list)):
for j in range(len(my_list[i])):
items.append(my_list[i][j])
items = sorted(list(set(items)))
item_count = {}
for key in items:
for i in range(len(my_list)):
if key in my_list[i]:
if key in item_count:
item_count[key] += 1
else:
item_count[key] = 1
shortlisted_items = []
useless_items = []
for key in item_count:
if item_count[key] >= min_support:
shortlisted_items.append(key)
else:
useless_items.append(key)
shortlisted_items.sort()
for key in useless_items:
del item_count[key]
# single frequent items end
# generating all possible combinations
master_list = []
for j in range(0, len(my_list)):
temp_list = []
for L in range(2, len(my_list[j]) + 1):
subset = list(itertools.combinations(sorted(my_list[j]), L))
temp_list.append(subset)
master_list.append(temp_list)
super_final_list = []
for j in range(len(master_list)):
for k in range(len(master_list[j])):
temp_list = []
for l in range(len(master_list[j][k])):
letter = ''
for m in range(len(master_list[j][k][l])):
letter += str(master_list[j][k][l][m]) + ' '
temp_list.append(letter.strip())
super_final_list.append(sorted(set(temp_list)))
master_list = super_final_list[:]
del super_final_list
# generating all possible combinations end
def has_infrequent_subset(c, L):
c = c.split(' ')
if (len(c) == 2): # c[i] in L[0] for i in range(len(c))):
for i in range(len(c)):
if str(c[i]) not in L:
return True
return False
else:
subset = list(itertools.combinations(sorted(c), len(c) - 1))
temp_list = []
for m in range(len(subset)):
letter = ''
for j in range(len(subset[m])):
letter += str(subset[m][j]) + ' '
temp_list.append(letter.strip())
for s in temp_list:
if s not in L:
return True
return False
def apriori_gen(L):
Ck = []
for i in range(len(L)):
for j in range(i + 1, len(L)):
if len(L[i].split(' ')) == 1: # len(list(L[i]))==1:
c = str(L[i]) + ' ' + str(L[j])
if has_infrequent_subset(c, L):
continue
else:
Ck.append(c)
else:
if L[i].split(' ')[:-1] == L[j].split(' ')[:-1]:
c = ''
for item in L[i].split(' '):
c += item + ' '
c += L[j].split(' ')[-1]
if has_infrequent_subset(c, L):
continue
else:
Ck.append(c)
return Ck
L = []
L.append(shortlisted_items)
C = []
for k in itertools.count(1, 1):
if not L[k - 1]:
break
Ck = apriori_gen(L[k - 1])
temp_dict = {}
temp_list = []
for c in Ck:
for i in range(len(master_list)):
for j in range(len(master_list[i])):
if c == master_list[i][j]:
if c in temp_dict:
temp_dict[c] += 1
else:
temp_dict[c] = 1
for key in temp_dict:
if temp_dict[key] >= min_support:
temp_list.append(key)
item_count[key] = temp_dict[key]
L.append(sorted(temp_list))
count_list = []
for key, value in item_count.items():
count_list.append(value)
count_list = sorted(list(set(count_list)), reverse=True)
segregated_item_list = []
for j in count_list:
temp_list = []
for key, value in item_count.items():
if value == j:
temp_list.append(key)
segregated_item_list.append(sorted(temp_list))
for i in segregated_item_list:
for j in i:
print(item_count[j], '[' + j.strip() + ']')
print()
# Closed Pattern Mining
closed_patterns = []
for key, value in item_count.items():
closed_patterns.append(key.strip().split())
closed_patterns = sorted(closed_patterns, key=len, reverse=True)
i = 0
while i < len(closed_patterns):
x = ''
for v in closed_patterns[i]:
x += v + ' '
x = x.strip()
for j in range(1, len(closed_patterns[i])):
subset = list(itertools.combinations(sorted(closed_patterns[i]), j))
temp_list = []
for m in range(len(subset)):
letter = ''
for j in range(len(subset[m])):
letter += str(subset[m][j]) + ' '
temp_list.append(letter.strip())
for item in temp_list:
if item_count[item] == item_count[x]:
if item.strip().split(' ') in closed_patterns:
closed_patterns.remove(item.strip().split(' '))
i += 1
closed_patterns = sorted(closed_patterns)
final_closed_patterns = []
for itemset in closed_patterns:
letter = ''
for item in itemset:
letter += item + ' '
final_closed_patterns.append(letter.strip())
closed_patterns = final_closed_patterns[:]
del final_closed_patterns
for i in segregated_item_list:
for items in i:
if items in closed_patterns:
print(item_count[items], '[' + items.strip() + ']')
print()
# Max Pattern Minning
max_patterns = []
for key, value in item_count.items():
max_patterns.append(key.strip().split())
max_patterns = sorted(max_patterns, key=len, reverse=True)
i = 0
while i < len(max_patterns):
for j in range(1, len(max_patterns[i])):
subset = list(itertools.combinations(sorted(max_patterns[i]), j))
temp_list = []
for m in range(len(subset)):
letter = ''
for j in range(len(subset[m])):
letter += str(subset[m][j]) + ' '
temp_list.append(letter.strip().split())
for item in temp_list:
if item in max_patterns:
max_patterns.remove(item)
i += 1
max_patterns = sorted(max_patterns)
final_max_patterns = []
for itemset in max_patterns:
letter = ''
for item in itemset:
letter += item + ' '
final_max_patterns.append(letter.strip())
max_patterns = final_max_patterns[:]
del final_max_patterns
for i in segregated_item_list:
for items in i:
if items in max_patterns:
print(item_count[items], '[' + items.strip() + ']')
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.