from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.externals.six.moves import zip
import numpy as np

if __name__ == '__main__':

    x_train = np.loadtxt('word_data/c20_unclassified_training_celltype_3-5gramfeature.txt')
    y_train = np.loadtxt('label_feature/c20_unclassified_training_celltype_intl2.txt')
    x_test = np.loadtxt('word_data/c20_unclassified_curated_celltype_3-5gramfeature.txt')
    y_test = np.loadtxt('label_feature/c20_unclassified_curated_celltype_intl2.txt')
    for i in range(2,11):
        bdt_real = AdaBoostClassifier(DecisionTreeClassifier(max_depth=i),n_estimators=100,random_state=0,learning_rate=1)
        bdt_discrete = AdaBoostClassifier(DecisionTreeClassifier(max_depth=i),n_estimators=100,random_state=0,learning_rate=1.5,algorithm="SAMME")
        bdt_real.fit(x_train,y_train)
        bdt_discrete.fit(x_train,y_train)
        stage = 0;
        for real_test_predict, discrete_train_predict in zip(bdt_real.staged_predict(x_test), bdt_discrete.staged_predict(x_train)):
            file1=open('predict_c20_celltype_3-5gram_adaboost_real_stage'+str(stage)+'_tree100_depth'+str(i)+'.txt','w')
            file2=open('predict_c20_celltype_3-5gram_adaboost_discrete_stage'+str(stage)+'_tree100_depth'+str(i)+'.txt','w')
            for item in real_test_predict:
                file1.write("%s\n" % item)
            file1.close()
            for item in discrete_train_predict:
                file2.write("%s\n" % item)
            file2.close()   
            stage = stage + 1

#    score = log_loss(y_test,clf_probs)
#    accuracy = accuracy_score(y_test,clf_pred)

    #Accuracy
#    print("Accuracy = ", accuracy)
#    print("loss = ", score)

        file=open('label_c20_celltype_3-5gram_adaboost_tree100_depth'+str(i)+'.txt','w')
        for item in y_test:
            file.write("%s\n" % item)
        file.close()   
