library(biomaRt)
dir="word_data/"
type="celltype"
ngram="2-4"
for(k in 10:10){
result_file=paste(dir,"c10_cost1_unclassified_curated_",type,"_",ngram,"gram_labelcheck", sep="")
#result_file=paste("~/Documents/Automatic-Annotation/data_201702/c20_celltype_3-5gram_tree100_depth",k,"_labelcheck",sep = "")
test_result=read.table(file = result_file)
#output_file=paste("~/Documents/Automatic-Annotation/data_201702/c20_celltype_3-5gram_tree100_depth",k,"_rf_performance.txt",sep = "")
output_file=paste(dir,"c10_cost1_unclassified_curated_",type,"_",ngram,"gram_performance.txt",sep = "")
tp_array<-c()
fn_array<-c()
fp_array<-c()
tn_array<-c()
precision<-c()
recall<-c()
f1score<-c()
label<-c()
if(type=="celltype"){
  num_class=132  #132
}else{
  num_class=127  #127
}
correct=0
for(i in 1:dim(test_result)[1])
{
  if(test_result[i,1]==test_result[i,2])
    correct=correct+1
}
accuracy=correct/dim(test_result)[1]
for(j in 0:num_class){
  tp=0;tn=0;fn=0;fp=0
  for(i in 1:dim(test_result)[1])
  {
    if((test_result[i,1]==j)&&(test_result[i,2]!=j))
      fn=fn+1
    if((test_result[i,1]!=j)&&(test_result[i,2]==j))
      fp=fp+1
    if((test_result[i,1]==j)&&(test_result[i,2]==j))
      tp=tp+1
    if((test_result[i,1]!=j)&&(test_result[i,2]!=j))
      tn=tn+1
  }
  if(tn!=dim(test_result)[1]){
  tp_array<-c(tp_array,tp)
  tn_array<-c(tn_array,tn)
  fp_array<-c(fp_array,fp)
  fn_array<-c(fn_array,fn)
  if(tp==0){
    precision<-c(precision,0)
    recall<-c(recall,0)
    f1score<-c(f1score,0)
  }else{
    pre<-tp/(tp+fp)
    rec<-tp/(tp+fn)
    precision<-c(precision,pre)
    recall<-c(recall,rec)
    f1score<-c(f1score,(2*pre*rec)/(pre+rec))
  }
  label<-c(label,j)
  }
}

confusion_matrix<-c()
confusion_matrix<-cbind(confusion_matrix,format(label,digits = 0))
confusion_matrix<-cbind(confusion_matrix,tp_array)
confusion_matrix<-cbind(confusion_matrix,tn_array)
confusion_matrix<-cbind(confusion_matrix,fp_array)
confusion_matrix<-cbind(confusion_matrix,fn_array)
confusion_matrix<-cbind(confusion_matrix,format(precision,digits = 3))
confusion_matrix<-cbind(confusion_matrix,format(recall,digits = 3))
confusion_matrix<-cbind(confusion_matrix,format(f1score,digits = 3))
perform1<-paste("sum: ",sum(tp_array),sum(tn_array),sum(fp_array),sum(fn_array))
perform2<-paste("macro average: ",format(ave(precision)[1],digits = 3), format(ave(recall)[1],digits = 3), format(ave(f1score)[1],digits = 3))
micro_precision<-sum(tp_array)/(sum(tp_array)+sum(fp_array))
micro_recall<-sum(tp_array)/(sum(tp_array)+sum(fn_array))
micro_f1<-2*micro_precision*micro_recall/(micro_precision+micro_recall)
perform3<-paste("micro average: ", format(micro_precision,digits = 3), format(micro_recall,digits = 3), format(micro_f1,digits = 3))

#write.matrix(confusion_matrix, file=output_file,sep = " ")
write(perform1,file = output_file,sep = " ")
write(perform2,file = output_file,sep = " ",append = T)
write(perform3,file = output_file,sep = " ",append = T)
write(accuracy,file = output_file,sep = " ",append = T)
}
