#!/bin/sh
function ca_datacount() {
  /home/okishinya/R-3.5.1/bin/R --vanilla --args $1 << 'DDD'
    args <- commandArgs(trailingOnly = T)
    expF <- as.character(args[1])
    suppressPackageStartupMessages(library(tidyverse))
    df <- read.table(expF, header = F, stringsAsFactors = F, sep = "\t", fill = TRUE, quote = "")
    colkey <- c(1,2,3,5)
    df <- df[, colkey]
    genome <- c("hg19", "mm9", "rn6", "dm3", "ce10", "sacCer3")
    df <- df[df$V2 %in% genome, ]
    colnames(df) <- c("SRX", "Organism", "AntigenClass", "CellClass")
    df$Organism <- df$Organism %>% 
      str_replace_all(c('hg19' = 'H. sapiens', 'mm9' = 'M. musculus', 'rn6' = 'R. norvegicus', 'dm3' = 'D. melanogaster', 'ce10' = 'C. elegans', 'sacCer3' = 'S. cerevisiae'))
    organism <- c('H. sapiens', 'M. musculus', 'R. norvegicus', 'D. melanogaster', 'C. elegans', 'S. cerevisiae')
    color <- vector(mode = "character", length = length(organism))
    color <- c("#EF7169", "#B79F00", "#05BA38", "#0ABFC4", "609CFF", "F664E2")
    
    # Organism_Datatype
    expType <- c("ChIP-Seq", "ATAC-Seq", "DNase-seq", "Bisulfite-Seq")
    genome_expType <- matrix(rep(0, length(organism) * length(expType)), nrow = length(organism), ncol = length(expType))
    rownames(genome_expType) <- organism
    colnames(genome_expType) <- expType
    for (i in organism) {
      for (j in expType) {
        if (j != "ChIP-Seq") genome_expType[i, j] <- nrow(df[df$Organism == i & df$AntigenClass == j, ])
        else genome_expType[i, j] <- nrow(df[df$Organism == i & ! df$AntigenClass %in% c("ATAC-Seq", "DNase-seq", "Bisulfite-Seq"), ])
      }
    }
    genome_expType <- data.frame("Organism" = rownames(genome_expType), "ChIP-Seq" = genome_expType[, "ChIP-Seq"], "ATAC-Seq" = genome_expType[, "ATAC-Seq"], "DNase-seq" = genome_expType[, "DNase-seq"], "Bisulfite-Seq" = genome_expType[, "Bisulfite-Seq"], check.names=FALSE)
    genome_expType <-gather(genome_expType, key = "expType", value = "Counts", "ChIP-Seq", "ATAC-Seq", "DNase-seq", "Bisulfite-Seq")
    genome_expType$Data_types <- factor(genome_expType$expType, levels = rev(expType))
    genome_expType$Organism <- factor(genome_expType$Organism, levels = rev(organism))
    g <- ggplot(genome_expType, aes(x = Organism, y = Counts, fill = Data_types)) + ggtitle("# of data by types of experiment", subtitle = paste("(", format(Sys.time(), "%b %d, %Y"), ")"))
    g <- g + geom_bar(stat = "identity") + scale_fill_manual(values = rev(color[1:4]))
    g <- g + coord_flip() + guides(fill = guide_legend(reverse = T, title="Data types"))
    g <- g + theme(plot.margin = unit(c(2, 2, 2, 2), "lines"),
            text=element_text(size=20, 
            family="Helvetica"),
            axis.title.y=element_text(vjust = 5),
            axis.title.x=element_text(vjust = -2),
            legend.title=element_text(size=20), 
            legend.text=element_text(size=15, margin = margin(t = 10)),
            plot.title = element_text(size = 25, vjust=5),
            plot.subtitle = element_text(size = 20, vjust=5))
    ggsave(file = paste("organism_datatypes_", format(Sys.time(), "%Y%m%d"), ".png", sep = ""), plot = g, dpi = 200, width = 10, height = 4)
    
    # Antigen_class
    labeli <- as_labeller(c(`H. sapiens` = "HS", `M. musculus` = "MM", `R. norvegicus` = "RN", `D. melanogaster` = "DM", `C. elegans` = "CE", `S. cerevisiae` = "SC"))
    antigens <- c("Histone", "RNA polymerase", "TFs and others", "Input control", "Unclassified", "No description", "ATAC-Seq", "DNase-seq", "Bisulfite-Seq")
    antigen_count <- matrix(nrow = length(organism), ncol = length(antigens))
    rownames(antigen_count) <- organism
    colnames(antigen_count) <- antigens
    
    for (i in organism) {
      for (j in antigens) {
        antigen_count[i, j] <- nrow(df[df$Organism == i & df$AntigenClass == j, ])
      }
    }
    
    colnames(antigen_count) <- gsub(" ", "_", antigens)
    antigen_count <- data.frame("Organism" = organism, antigen_count, check.names = F)
    antigen_count <- gather(antigen_count, key = "Antigens", value = "Counts", gsub(" ", "_", antigens))
    antigen_count$Antigens <- gsub("_", " ", antigen_count$Antigens)
    antigen_count$Antigens <- factor(antigen_count$Antigens, levels = rev(antigens))
    antigen_count$Organism <- factor(antigen_count$Organism, levels = organism)
    
    g <- ggplot(antigen_count, aes(x = Antigens, y = Counts, fill = Organism))
    g <- g + geom_bar(stat = "identity") + coord_flip() + xlab("Antigen Class") + ggtitle("# of Antigen Classes in ChIP-Atlas", subtitle = paste("(", format(Sys.time(), "%b %d, %Y"), ")"))
    g <- g + facet_grid(rows = vars(Organism), scales = "free_y", switch = "y", space = "free_y", labeller = labeli) 
    g <- g + theme(plot.margin = unit(c(2, 2, 2, 2), "lines"),
            text=element_text(size=20, 
            family="Helvetica"),
            strip.text = element_text(size=15),
            axis.title.y=element_text(vjust = 5),
            axis.title.x=element_text(vjust = -2),
            legend.title=element_text(size=20), 
            legend.text=element_text(size=15, margin = margin(t = 10)),
            plot.title = element_text(size = 25, hjust = 0.5, vjust=5),
            plot.subtitle = element_text(size = 20, hjust = -0.1, vjust=5))
    ggsave(file = paste("antigen_class", format(Sys.time(), "%Y%m%d"), ".png", sep = ""), plot = g, dpi = 200, width = 10, height = 18, limitsize = F)
    
    # Cell_type
    ori_df <- df
    for (t in 1:length(expType)) {
      if (t == 1) {
        df <- ori_df[! ori_df$AntigenClass %in% c("ATAC-Seq", "DNase-seq", "Bisulfite-Seq"), ]
      } else {
        df <- ori_df[ori_df$AntigenClass == expType[t], ]
      }
      cells <- unique(df$CellClass)[order(unique(df$CellClass))]
      cells <- c(cells[! cells %in% c("Others", "Unclassified", "No description")], "Others", "Unclassified", "No description")
      cell_count <- matrix(nrow = length(organism), ncol = length(cells))
      rownames(cell_count) <- organism
      colnames(cell_count) <- cells
      for (i in organism) {
        for (j in cells) {
          cell_count[i, j] <- nrow(df[df$Organism == i & df$CellClass == j, ])
        }
      }
      colnames(cell_count) <- gsub(" ", "_", cells)
      cell_count <- data.frame("Organism" = organism, cell_count, check.names = F)
      cell_count <- gather(cell_count, key = "Cells", value = "Counts", gsub(" ", "_", cells))
      cell_count$Cells <- gsub("_", " ", cell_count$Cells)
      cell_count$Cells <- factor(cell_count$Cells, levels = rev(cells))
      cell_count$Organism <- factor(cell_count$Organism, levels = organism)
      cell_count <- cell_count[cell_count$Counts > 0, ]
      g <- ggplot(cell_count, aes(x = Cells, y = Counts, fill = Organism)) + ggtitle(paste("# of Cell Type Classes by", expType[t]), subtitle = paste("(", format(Sys.time(), "%b %d, %Y"), ")"))
      g <- g + geom_bar(stat = "identity") + coord_flip() + xlab("Cell type class")
      g <- g + facet_grid(rows = vars(Organism), scales = "free_y", switch = "y", space = "free_y", labeller = labeli) 
      g <- g + theme(plot.margin = unit(c(2, 2, 2, 2), "lines"),
              text=element_text(size=20, 
              family="Helvetica"),
              strip.text = element_text(size=15),
              axis.title.y=element_text(vjust = 5),
              axis.title.x=element_text(vjust = -2),
              legend.title=element_text(size=20), 
              legend.text=element_text(size=15, margin = margin(t = 10)),
              plot.title = element_text(size = 25, hjust = 0.2, vjust=5),
              plot.subtitle = element_text(size = 20, hjust = -0.1, vjust=5))
      ggsave(file = paste("cell_class_", expType[t], "_", format(Sys.time(), "%Y%m%d"), ".png", sep = ""), plot = g, dpi = 200, width = 10, height = 25, limitsize = F)
    }
DDD
}

LANG_backup=$LANG
LANG="en_US.UTF-8"
expTab="/home/okishinya/chipatlas/lib/assembled_list/experimentList.tab"
rm -f *`date +%Y%m%d`".png"

ca_datacount "$expTab" > /dev/null

LANG=$LANG_backup

exit
