library(ggplot2) library(ggrepel) dir<-"word_data" #word_data or char_data type<-"antigen" #celltype or antigen label<-"2" # "1" or "2" num<-"3-5" # "1" to "10" acc_mat<-c() if(type=="celltype") { class_num=87 }else { class_num=76 } true_class_total<-array(0, dim=class_num) false_class_total<-array(0, dim=class_num) for (f in 1:10) { result_file=paste("~/Documents/Automatic-Annotation/SVM-result/",dir,"/output_c10_test", f, "_human_",type,"_intl",label,"f",num,"gram.txt", sep="") label_file=paste("~/Documents/Automatic-Annotation/label_feature/c10_test",f,"_human_",type,"_intl",label,".txt",sep = "") test_result=read.table(file = result_file) test_label=read.table(file = label_file) test_result.df <- data.frame(test_result) #number of classes num_class<-dim(test_result.df)[2]-1 true_class<-array(0, dim=c(num_class)) false_class<-array(0, dim=c(num_class)) false_index<-c() # record the index of false predicted samples true_index<-c() for (row in 2:dim(test_result.df)[1]) { row_value<-test_result.df[row,2:dim(test_result.df)[2]] col_ind<-which(row_value==max(row_value)) class_ind <- test_result.df[1,col_ind+1] label_ind <- test_label[row-1,1] if(class_ind==label_ind) #label_ind:0-86, true_class[1:87] { true_class[label_ind+1]=true_class[label_ind+1]+1 true_class_total[label_ind+1]=true_class_total[label_ind+1]+1 true_index<-c(true_index,row-1) }else { false_class[label_ind+1]=false_class[label_ind+1]+1 false_class_total[label_ind+1]=false_class_total[label_ind+1]+1 false_index<-c(false_index,row-1) #row index of test_result.df } } #write index of false sample to file false_file = paste("~/Documents/Automatic-Annotation/SVM-result/",dir,"/c10_test",f,"_human_",type,"_l",label,"_",num,"gram","_falseindex",sep = "") write.table(false_index, file = false_file,col.names = F, row.names = F, quote = F, sep = "\n") true_file = paste("~/Documents/Automatic-Annotation/SVM-result/",dir,"/c10_test",f,"_human_",type,"_l",label,"_",num,"gram","_trueindex",sep = "") write.table(true_index, file = true_file,col.names = F, row.names = F, quote = F, sep = "\n") #write accuracy of each class to file filename = paste("~/Documents/Automatic-Annotation/SVM-result/",dir,"/c10_test",f,"_human_",type,"_l",label,"_",num,"gram","_accuracy",sep = "") acc<-c() acc<-c(acc, sum(true_class)/(sum(true_class)+sum(false_class))) for (i in 1:dim(true_class)) { if(true_class[i]==0) acc<-c(acc,0) else acc<-c(acc, true_class[i]/(true_class[i]+false_class[i])) } write.table(acc, file = filename,col.names = F, row.names = F, quote = F, sep = "\n") acc_mat<-rbind(acc_mat,acc) } dim(acc_mat) #plot Mean accuracy and error bar of each class mean<-c() sd<-c() for (c in 1:dim(acc_mat)[2]) { mean <- c(mean,mean(acc_mat[,c])) sd <- c(sd,sd(acc_mat[,c])) } mean ##name each column as class l2_class<-c("Average") class_file<-paste("~/Documents/Automatic-Annotation/label_feature/c10_human_",type,"_l",label,"_class.txt",sep = "") class1<-read.table(file = class_file, sep="\t") col.df<-data.frame(class1[,2]) colnames(col.df)<-c("V1") l2_class<-rbind(t(l2_class), col.df) l2_class<-cbind(l2_class,mean) l2_class<-cbind(l2_class,sd) colnames(l2_class) <- c("Class", "MeanAccuracy", "SD") dir_out = paste("~/Documents/Automatic-Annotation/SVM-result/",dir,"/",sep = "") fo_img = sprintf("%sc10_%s_l%s_class_%sgram_acc.pdf", dir_out, type, label,num) t=paste("Average accuracy of each class (",dim(class1)[1]," classes in Label ",label,") for human ",type," data Total average is ", l2_class[1,2],"(word, ",num,"gram)",sep = "") pdf(fo_img, width=20, height=9, pointsize=9) g <- ggplot(l2_class, aes(x=Class, y=MeanAccuracy, colour=Class)) + geom_errorbar(aes(ymin=MeanAccuracy-SD, ymax=MeanAccuracy+SD), width=.5) + geom_line() + geom_point() + theme(axis.text.x = element_text(angle = 90, hjust = 1), plot.title = element_text(family = 'Helvetica', color = '#666666', face = 'bold', size = 16, hjust = 0.5))+labs(title = t) plot(g) dev.off() #extract bad cases filename=paste("~/Documents/Automatic-Annotation/Analyze/human_",type,"_l",label,"_",num,"gram","_badcase08_name.txt",sep = "") l2_class[,2] ind<-which(l2_class[,2]<0.8) ind badcase<-l2_class[ind,1] badcase vec_badcase<-as.vector(badcase) write(vec_badcase,file=filename,sep = "\n") badcase class1[,2] bad_class_id<-c() for (i in 1:length(vec_badcase)) { id<-which(class1[,2]==vec_badcase[i]) bad_class_id<-c(bad_class_id,id-1) } bad_class_id #exactly class ID (0-86) false_class_total[id+1] true_class_total acc<-c() for (i in 1:length(true_class_total)) { class<-true_class_total[i]/(false_class_total[i]+true_class_total[i]) acc<-c(acc,class) } id<-which(acc<0.8) sum(false_class_total) sum(true_class_total)