#$ -S /bin/bash
#$ -cwd
export PATH=${PATH}

EXPERIMENT=$1
REVISION=$2

if [ ${REVISION} = "hg19" ] || [ ${REVISION} = "hg38" ]; then
  SPECIES="H_sapiens"
elif [ ${REVISION} = "mm9" ] || [ ${REVISION} = "mm10" ]; then
  SPECIES="M_musculus"
fi

cd /home/zou/chipatlas/results/${REVISION}/bmap/

echo -e "EXPERIMENT\t${EXPERIMENT}" > ./log/${EXPERIMENT}
echo -e "REVISION\t${REVISION}" >> ./log/${EXPERIMENT}


# number of processors

PNUM=4
# accession numbers for an experiment and runs
# rum modes (SINGLE or PAIRED) are also included

# SRXからSRRを取得 #
# RUNS=$(cat /home/okishinya/chipatlas/lib/metadata/SRA_Metadata_RunInfo.tab | awk -F"\t" -v OFS=" " -v EXPERIMENT="${EXPERIMENT}" '$1==EXPERIMENT {
#   for (i=3;i<=NF;i++) {
#     if ($2==0) printf "%s ", $i",SINGLE"
#     else printf "%s ", $i",PAIRED"
#   }
# }')

RUNS=$(cat /home/okishinya/chipatlas/lib/metadata/SRA_Metadata_RunInfo.tab | awk -F"\t" -v OFS=" " -v EXPERIMENT="${EXPERIMENT}" '$1==EXPERIMENT {
  for (i=3;i<=NF;i++) {
    if ($2==0) printf "%s ", $i
    else printf "%s ", $i
  }
}')

# fastq
RUN_OPTION=""
paired=0
single=0
for ACCESSION in ${RUNS}
do
  fasterq-dump -e ${PNUM} --split-files ${ACCESSION}  
  if [ -f ./${ACCESSION}_2.fastq ]; then
    paired=1
    if [ ${paired} = 0 ] || [ ${single} = 0 ]; then
      RUN_OPTION="$RUN_OPTION -pfastq ${ACCESSION}_1.fastq.gz ${ACCESSION}_2.fastq.gz"
      pigz -f -p ${PNUM} ${ACCESSION}_1.fastq
      pigz -f -p ${PNUM} ${ACCESSION}_2.fastq
    else
      echo "SINGLE と PAIRED が混在している\t異常終了" >> ./log/${EXPERIMENT}
      for ACCESSION in ${RUNS}
      do
        rm ${ACCESSION}*
      done
      exit
    fi
  elif [ -f ./${ACCESSION}.fastq ]; then
    single=1
    if [ ${paired} = 0 ] || [ ${single} = 0 ]; then
      RUN_OPTION="$RUN_OPTION -fastq ${ACCESSION}.fastq.gz"
      pigz -f -p ${PNUM} ${ACCESSION}.fastq
    else
      echo "SINGLE と PAIRED が混在している\t異常終了" >> ./log/${EXPERIMENT}
      for ACCESSION in ${RUNS}
      do
        rm ${ACCESSION}*
      done
      exit
    fi
  fi
done


echo ${RUN_OPTION}

# .fastq.gz ができていることを確認
# 異常がある場合プロセスを終了する
for ACCESSION in ${RUNS}
do
  if [ ${paired} = 1 ]; then
    if [ ! -f ./${ACCESSION}_1.fastq.gz ] || [ ! -f ./${ACCESSION}_1.fastq.gz ]; then
      echo -e "fasterq\t異常終了" >> ./log/${EXPERIMENT}
      exit
    else
      echo -e "fasterq\t正常終了" >> ./log/${EXPERIMENT}
    fi
  elif [ ${single} = 1 ]; then
    if [ ! -f ./${ACCESSION}.fastq.gz ]; then
      echo -e "fasterq\t異常終了" >> ./log/${EXPERIMENT}
      exit
    else
      echo -e "fasterq\t正常終了" >> ./log/${EXPERIMENT}
    fi
  fi
done


# mapping
BMap -species ${SPECIES} -revision ${REVISION} -SCF 2 -SCL 32 -thread ${PNUM} ${RUN_OPTION} -sum ${EXPERIMENT}.sum.gz -bisulalign ${EXPERIMENT}.bisulalign

for RUN in ${RUNS}
do
  srr=$(echo ${RUN} | cut -d"," -f1)
  rm ${srr}*.fastq.gz
done

pigz -f -p ${PNUM} ${EXPERIMENT}.bisulalign

# .bisulalign.gz && .sum.gz ができていることを確認
# 異常がある場合プロセスを終了する
if [ ! -f ./${EXPERIMENT}.bisulalign.gz ] || [ ! -f ./${EXPERIMENT}.sum.gz ]; then
  echo -e "BMap\t異常終了" >> ./log/${EXPERIMENT}
  exit
else
  echo -e "BMap\t正常終了" >> ./log/${EXPERIMENT}
fi

#launch xvfb for a virtual x-client
/usr/bin/Xvfb :99 -screen 0 1280x1024x24 &
temp=$DISPLAY
export DISPLAY=:99

#summarize the mapping result
MapSum -sum ${EXPERIMENT}.sum.gz -outprefix ${EXPERIMENT}
rm ${EXPERIMENT}.mapsum.png

# .mapsum.html ができていることを確認
# 異常がある場合プロセスを終了する
if [ ! -f ./${EXPERIMENT}.mapsum.html ]; then
  echo -e "MapSum\t異常終了" >> ./log/${EXPERIMENT}
  exit
else
  echo -e "MapSum\t正常終了" >> ./log/${EXPERIMENT}
fi

MPTC -species ${SPECIES} -revision ${REVISION} -graph ${EXPERIMENT}.graph -track ${EXPERIMENT} -thread ${PNUM} -bisulalign ${EXPERIMENT}.bisulalign.gz

# .graph ができていることを確認
# 異常がある場合プロセスを終了する
if [ ! -f ./${EXPERIMENT}.graph ]; then
  echo -e "MPTC\t異常終了" >> ./log/${EXPERIMENT}
  exit
else
  echo -e "MPTC\t正常終了" >> ./log/${EXPERIMENT}
fi

rm ${EXPERIMENT}.bisulalign.gz
rm ${EXPERIMENT}.sum.gz

CalcCoverage -graph ${EXPERIMENT}.graph -outprefix ${EXPERIMENT}
rm -r ${EXPERIMENT}.stat.image

# .stat.html ができていることを確認
# 異常がある場合プロセスを終了する
if [ ! -f ./${EXPERIMENT}.stat.html ]; then
  echo -e "CalcCoverage\t異常終了" >> ./log/${EXPERIMENT}
  exit
else
  echo -e "CalcCoverage\t正常終了" >> ./log/${EXPERIMENT}
fi

MethExport -graph ${EXPERIMENT}.graph -cpg true -methyl ${EXPERIMENT}.cpg.methyl.bedGraph.gz -cover ${EXPERIMENT}.cpg.cover.bedGraph.gz -tab ${EXPERIMENT}.cpg.methexport.tab.gz -methpipe ${EXPERIMENT}.cpg.methpipe.tab.gz

# .cpg.cover.bedGraph.gz .cpg.methpipe.tab.gz .cpg.methyl.bedGraph.gz ができていることを確認
# 異常がある場合プロセスを終了する
if [ ! -f ./${EXPERIMENT}.cpg.cover.bedGraph.gz ] || [ ! -f ./${EXPERIMENT}.cpg.methpipe.tab.gz ] || [ ! -f ./${EXPERIMENT}.cpg.methyl.bedGraph.gz ]; then
  echo -e "MethExport\t異常終了" >> ./log/${EXPERIMENT}
  exit
else
  echo -e "MethExport\t正常終了" >> ./log/${EXPERIMENT}
fi

rm ${EXPERIMENT}.graph
rm ${EXPERIMENT}.cpg.methexport.tab.gz

# hypo- hyper- partially methylated region を抽出
gunzip -c ${EXPERIMENT}.cpg.methpipe.tab.gz | grep -v "LAMBDA" | sort -k 1,1 -k 2,2n -k 3,3n -k 6,6 > ${EXPERIMENT}.cpg.methpipe.sorted.tab
# .cpg.methpipe.sorted.tab
# $1              $2              $3          $4                       $5                                                              $6
# chromosome    location of C     strand   sequence context      estimated methylation level (C/(C+T) mapping to that position)       reads overlapping withthat site

hmr -v -o ${EXPERIMENT}.hmr ${EXPERIMENT}.cpg.methpipe.sorted.tab
pmd -v -o ${EXPERIMENT}.pmd ${EXPERIMENT}.cpg.methpipe.sorted.tab
hypermr -v -o ${EXPERIMENT}.hypermr ${EXPERIMENT}.cpg.methpipe.sorted.tab

# hmr/pmd
# $4              $5       $6
# 通し番号       CpGs     always +

# hyper
# $4              $5                                              $6
# hyper: CpGs     accumulative methylation level of all CpGs     always +

# .hmr .pmd .hypermr ができていることを確認
# 異常がある場合プロセスを終了する
if [ ! -f ./${EXPERIMENT}.hmr ] || [ ! -f ./${EXPERIMENT}.pmd ] || [ ! -f ./${EXPERIMENT}.hypermr ]; then
  echo -e "hypo- hyper-me-region 抽出\t異常終了" >> ./log/${EXPERIMENT}
  exit
else
  echo -e "hypo- hyper-me-region 抽出\t正常終了" >> ./log/${EXPERIMENT}
fi

rm ${EXPERIMENT}.cpg.methpipe.sorted.tab
rm ${EXPERIMENT}.cpg.methpipe.tab.gz

# BigWig 作成
gunzip -c ${EXPERIMENT}.cpg.methyl.bedGraph.gz | grep -v "LAMBDA" > ${EXPERIMENT}.cpg.methyl.bedGraph
rm ${EXPERIMENT}.cpg.methyl.bedGraph.gz
bedSort ${EXPERIMENT}.cpg.methyl.bedGraph ${EXPERIMENT}.cpg.methyl.sorted.bedGraph
rm ${EXPERIMENT}.cpg.methyl.bedGraph
bedGraphToBigWig ${EXPERIMENT}.cpg.methyl.sorted.bedGraph ~/chipatlas/lib/genome_size/${REVISION}.chrom.sizes ${EXPERIMENT}.cpg.methyl.bw

gunzip -c ${EXPERIMENT}.cpg.cover.bedGraph.gz | grep -v "LAMBDA" > ${EXPERIMENT}.cpg.cover.bedGraph
rm ${EXPERIMENT}.cpg.cover.bedGraph.gz
bedSort ${EXPERIMENT}.cpg.cover.bedGraph ${EXPERIMENT}.cpg.cover.sorted.bedGraph
rm ${EXPERIMENT}.cpg.cover.bedGraph
bedGraphToBigWig ${EXPERIMENT}.cpg.cover.sorted.bedGraph ~/chipatlas/lib/genome_size/${REVISION}.chrom.sizes ${EXPERIMENT}.cpg.cover.bw

# .cpg.cover.bw .cpg.methyl.bw ができていることを確認
# 異常がある場合プロセスを終了する
if [ ! -f ./${EXPERIMENT}.cpg.cover.bw ] || [ ! -f ./${EXPERIMENT}.cpg.methyl.bw ]; then
  echo -e "BigWig\t異常終了" >> ./log/${EXPERIMENT}
  exit
else
  echo -e "BigWig\t正常終了" >> ./log/${EXPERIMENT}
fi

rm ${EXPERIMENT}.cpg.methyl.sorted.bedGraph
rm ${EXPERIMENT}.cpg.cover.sorted.bedGraph

export DISPLAY=$temp
killall Xvfb


mv ${EXPERIMENT}.cpg.cover.bw cover/
mv ${EXPERIMENT}.cpg.methyl.bw methyl/
mv ${EXPERIMENT}.hmr hmr/
mv ${EXPERIMENT}.pmd pmd/
mv ${EXPERIMENT}.hypermr hypermr/
mv ${EXPERIMENT}.mapsum.html mapsum/     # hit in genome / no. of reads
mv ${EXPERIMENT}.stat.html stat/       # basic statics (read depth, Me-level etc.)



# 異常のものだけ log を残す
failed=$(cat ./log/${EXPERIMENT} | grep "異常" | wl)
if [ ${failed} = 0 ]; then
  rm ./log/${EXPERIMENT}
fi


