#!/bin/sh
#$ -S /bin/sh
# sh chipatlas/sh/makePeakBrowser/splitByAttributes.sh ce10 His Brs 05 xxx xxx
# qsub -l short -l s_vmem=8G,mem_req=8G -e a.log.txt -o a.log.txt chipatlas/sh/makePeakBrowser/splitByAttributes.sh tmpDirForPeakBrowser/sortedBed/ce10/ce10.20.bed ce10 His Brs 05 xxx xxx
inBed=$1
Genome=$2
agL=$3
ctL=$4
Qval=$5
agS=$6
ctS=$7

# テスト用
cat << 'XXX' > /dev/null
  inBed=tmpDirForPeakBrowser/sortedBed/dm6/dm6.bs.bed
  agL=BSF
  ctL=xxx
  Qval=bs
  agS=hypermr
  ctS="xxx"
  Genome=dm6
XXX

# 出力ファイル名の作成
outFn=`echo $agL@$ctL@$Qval@$agS@$ctS| tr '@' '\t'| awk -F '\t' '
function subStr(str) {
  gsub("\\\(", "BRACKETL", str)
  gsub("\\\)", "BRACKETR", str)
  gsub("+", "PULUS", str)
  gsub(",", "KOMMA", str)
  gsub("\\\.", "PERIOD", str)
  gsub("/", "SLASH", str)
  gsub(" ", "_", str)
  return str
} {
  Lc = ($2 == "xxx") ? "ALL" : $2
  ag = ($4 == "xxx") ? "AllAg" : subStr($4)
  ct = ($5 == "xxx") ? "AllCell" : subStr($5)
  print $1"."Lc"."$3"."ag"."ct".bed"
}'`

# 抗原小、細胞小ともに xxx の場合は in silico ChIP 用のファイルを作成する
function split_4_inSilicoChIP() {
  if [ "$agS" = "xxx" -a "$ctS" = "xxx" ]; then
    if [ "$agL" = "BSF" ]; then
      cat $1| awk -F '\t' -v OFS='\t' '{
        $4 = $4"."$5
        print $0
      }'
    else
      cat $1
    fi| cut -f1-4| split -l 50000000 - tmpDirForPeakBrowser/inSilicoChIP/$Genome/$outFn"."
  else
    cat $1 > /dev/null
  fi
}

# 条件に一致する SRX だけを抜き出す。
# tmpDirForPeakBrowser/public/dm6/BSF.Emb.bs.hmr.AllCell.bed
# sh chipatlas/sh/makePeakBrowser/splitByAttributes.sh tmpDirForPeakBrowser/sortedBed/dm6/dm6.bs.bed dm6 BSF xxx bs "hypermr" "xxx"

cat $inBed| awk -F '\t' -v Genome=$Genome -v agL=$agL -v ctL=$ctL -v agS="$agS" -v ctS="$ctS" -v outFn=$outFn '
BEGIN {
  gen = Genome
  gsub(/[0-9]/, "", gen)
  while ((getline < "chipatlas/lib/metadata/metadataForPeakBrowser.tsv") > 0) {
    if ($2 ~ gen) {
      if (ctL == "xxx") {
        if (agL != "BSF") {
          if (agS == "xxx") {                   # His  ALL  AllAg  AllCell
            if (agL == substr($3, 1, 3)) x[$1]++
          } else {                              # His  ALL  H3ac   AllCell
            if ($3 == agL"@ "agS) x[$1]++
          }
        } else if (agL == "BSF") {              # BSF  ALL  AllAg/hmr  AllCell
          if (agL == substr($3, 1, 3)) x[$1]++
        }
      } else if (ctL != "xxx") {
        if (agL != "BSF") {
          if (agS == "xxx" && ctS == "xxx") {   # His  Brs  AllAg  AllCell
            if (agL == substr($3, 1, 3) && ctL == substr($4, 1, 3)) x[$1]++
          } else if (ctS == "xxx") {            # His  Brs  H3ac   AllCell
            if ($3 == agL"@ "agS && ctL == substr($4, 1, 3)) x[$1]++
          } else if (agS == "xxx") {            # His  Brs  AllAg  MCF-7
            if (agL == substr($3, 1, 3) && $4 == ctL"@ "ctS) x[$1]++
          }
        } else if (agL == "BSF") {
          if (ctS == "xxx") {                   # BSF  Liv  AllAg/hmr  AllCell
            if (agL == substr($3, 1, 3) && ctL == substr($4, 1, 3)) x[$1]++
          } else if (agS == "xxx") {            # BSF  Liv  AllAg  MCF-7
            if (agL == substr($3, 1, 3) && $4 == ctL"@ "ctS) x[$1]++
          }
        }
      }
    }
  }
} x[$4] > 0 {
  if ( (agL != "BSF") || (agL == "BSF" && $5 == agS) || (agL == "BSF" && agS == "xxx") ) {
    i++
    print
  }
} END {
  if (agS == "xxx" && ctS == "xxx") print i + 0 > "tmpDirForPeakBrowser/linNum/"Genome"."outFn
}'| tee tmpDirForPeakBrowser/splitByAttributes/$Genome/$outFn| split_4_inSilicoChIP

# Peak Browser 用に BED9+GFF3 形式にし、index を作成する
if [ "$agS" = "xxx" -a "$ctS" = "xxx" ]; then
  N=`cat tmpDirForPeakBrowser/linNum/$Genome.$outFn`
else
  N=`cat tmpDirForPeakBrowser/splitByAttributes/$Genome/$outFn| wc -l`
fi

if [ $N -gt 10000000 ]; then
  qsub -e /dev/null -o /dev/null -l s_vmem=32G -l mem_req=32G chipatlas/sh/makePeakBrowser/makeBed9GFF3.sh tmpDirForPeakBrowser/splitByAttributes/$Genome/$outFn $Genome $agL $ctL $Qval "$agS" "$ctS"
else
  sh chipatlas/sh/makePeakBrowser/makeBed9GFF3.sh tmpDirForPeakBrowser/splitByAttributes/$Genome/$outFn $Genome $agL $ctL $Qval "$agS" "$ctS"
fi
exit

