#usage: python count.py inputfile 10 outputfile updatefile opfile
#python count.py inputfile 10 | cut -f 1 | sort | uniq -c

import sys
args = sys.argv
file = args[1]
itemThreshold = int(args[2])
outputfile = args[3]
itemCount = {}
with open(file) as fh:
	for row in fh:
		item = row.split('\t')[1]
		if item not in itemCount:
			itemCount[item] = 1
		else:
			itemCount[item] += 1
fh.close()

itemThreshlist = []
op = open(outputfile,'w')
with open(file) as fh:
	for row in fh:
		item = row.split('\t')[1]
		line = row.split('\t')
		if itemCount[item] < itemThreshold:
			itemThreshlist.append(item)
			line[1] = "Unclassified"
			op.write('%s' % '\t'.join(map(str,line)))
		else:
			op.write('%s' % row)
fh.close()
op.close()

updatefile = args[4]
opfile = args[5]
op = open(opfile, 'w')
with open(updatefile) as fh:
	for row in fh:
		item = row.split('\t')[1]
		line = row.split('\t')
		if item in itemThreshlist:
			line[1] = "Unclassified"
			op.write('%s' % '\t'.join(map(str,line)))
		else:
			op.write('%s' % row)
fh.close()
op.close()
