from sklearn.ensemble import RandomForestClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score

import numpy as np
np.random.seed(0)


if __name__ == '__main__':

    x_train = np.loadtxt('word_data/c20_unclassified_training_celltype_3-5gramfeature.txt')
    y_train = np.loadtxt('label_feature/c20_unclassified_training_celltype_intl2.txt')
    x_test = np.loadtxt('word_data/c20_unclassified_curated_celltype_3-5gramfeature.txt')
    y_test = np.loadtxt('label_feature/c20_unclassified_curated_celltype_intl2.txt')
    for i in range(101,151):
        clf = RandomForestClassifier(max_depth=i,n_estimators=300,random_state=0)
        clf.fit(x_train,y_train)
        clf_probs = clf.predict_proba(x_test)
        clf_pred = clf.predict(x_test)
#    score = log_loss(y_test,clf_probs)
#    accuracy = accuracy_score(y_test,clf_pred)

    #Accuracy
#    print("Accuracy = ", accuracy)
#    print("loss = ", score)

        file=open('predict_c20_celltype_3-5gram_tree100_depth'+str(i)+'.txt','w')
        for item in clf_pred:
            file.write("%s\n" % item)
        file.close()   
        file=open('label_c20_celltype_3-5gram_tree100_depth'+str(i)+'.txt','w')
        for item in y_test:
            file.write("%s\n" % item)
        file.close()   
