#!/bin/sh
#$ -S /bin/sh

expTab="/home/okishinya/chipatlas/lib/assembled_list/experimentList.tab"
tmpF="expList_tmp"

cat $expTab| awk -F"\t" -v qq="\"" -v ORS="" '{
  printf NR"\t"
  printf qq"id"qq": "qq$1"_"$2qq", "
  printf qq"experimentId"qq": "qq$1qq
  print "\n"
}' > "$tmpF"1

cat $expTab| awk -F"\t" -v qq="\"" -v ORS="" '{
  printf NR"\t"
  printf qq"genome"qq": "qq$2qq
  print "\n"
}' > "$tmpF"2

cat $expTab| awk -F"\t" -v qq="\"" -v ORS="" '{
  printf NR"\t"
  printf qq"antigen"qq": {"qq"class"qq": "qq$3qq", "qq"id"qq": "qq$4qq"}"
  print "\n"
}' > "$tmpF"3_4

cat $expTab| cut -f7| awk -F"\t" -v ORS="" '{
  c = split($1, x, "|")
  for (i=1; i<=c; i++) {
    split (x[i], y, "=")
    if (x[1] != "NA") {
      gsub(/[\\\042]/, "\\\\&", y[2])
      print y[1]"COLONCOLON"y[2]"COMMACOMMA"
    }
  }
  print "\n"
}' | awk '{
  if ($0 ~ /COMMACOMMA$/ ) sub(/.{10}$/, "", $0)
  print
}' | awk -F"\t" -v qq="\"" -v ORS="" '{
  print NR"\t"
  if ($0 != "") printf qq
  gsub("COLONCOLON", qq": "qq)
  gsub("COMMACOMMA", qq", "qq)
  print $0
  if ($0 != "") print qq"\n"
  else print "\n"
}' > "$tmpF"7

cat $expTab| awk -F"\t" -v qq="\"" -v ORS="" -v tmp="$tmpF"7 '
BEGIN {
  while (getline < tmp) a[$1] = $2
} {
  print NR"\t"
  print qq"cell"qq": {"qq"class"qq": "qq$5qq", "qq"id"qq": "qq$6qq
  if (length(a[NR]) > 0) print ", "qq"desc"qq": {"a[NR]"}"
  print "}\n"
}' > "$tmpF"5_6_7

rm -f "$tmpF"7

cat $expTab| awk -F"\t" -v qq="\"" -v ORS="" '
BEGIN {
  while("cat /home/okishinya/chipatlas/lib/metadata/SRA_Metadata_RunInfo.tab" | getline) l[$1]=$2
  while("cat /home/okishinya/chipatlas/lib/assembled_list/peakNumber.tsv" | getline) p[$1"_"$2"_"$3]=$4
} {
  printf NR"\t"
  split($8, x, ",")
  if (l[$1] == 0) layout = "Single-end"
  else if (l[$1] == 1) layout = "Paired-end"
  else layout = "NA"
  if ($3 != "Bisulfite-Seq") {
    p05=p[$1"_"$2"_05"]
    p10=p[$1"_"$2"_10"]
    p20=p[$1"_"$2"_20"]
    if (p05 == "") p05=qq"NA"qq
    if (p10 == "") p10=qq"NA"qq
    if (p20 == "") p20=qq"NA"qq
    print qq"procLog"qq": {"qq"layout"qq": "qq""layout""qq", "qq"#reads"qq": "x[1]", "qq"%mapped"qq": "x[2]", "qq"%dupl"qq": "x[3]", "qq"#peaks"qq": {"qq"05"qq": "p05", "qq"10"qq": "p10", "qq"20"qq": "p20"}"
  } else {
    hypo=p[$1"_"$2"_hmr"]
    partial=p[$1"_"$2"_pmd"]
    hyper=p[$1"_"$2"_hypermr"]
    if (hypo == "") hypo=qq"NA"qq
    if (partial == "") partial=qq"NA"qq
    if (hyper == "") hyper=qq"NA"qq
    print qq"procLog"qq": {"qq"layout"qq": "qq""layout""qq", "qq"#reads"qq": "x[1]", "qq"%mapped"qq": "x[2]", "qq"coverage"qq": "x[3]", "qq"#mr"qq": {"qq"hypo"qq": "hypo", "qq"partial"qq": "partial", "qq"hyper"qq": "hyper"}"
  }
  print "}\n"
}' > "$tmpF"8

cat $expTab| awk -F"\t" -v qq="\"" -v ORS="" '{
  printf NR"\t"
  gsub(/[\\\042]/, "\\\\&", $9)
  if ($9 != "-") print qq"title"qq": "qq$9qq
  print "\n"
}' > "$tmpF"9

cat $expTab| cut -f10-| awk -F"\t" -v ORS="" '{
  if ($0 != "") {
    c = NF
    for (i=1; i<=c; i++) {
      split ($i, y, "=")
      gsub(/[\\\042]/, "\\\\&", y[2])
      print y[1]"COLONCOLON"y[2]"COMMACOMMA"
    }
  }
  print "\n"
}' | awk '{
  if ($0 ~ /COMMACOMMA$/ ) sub(/.{10}$/, "", $0)
  print
}' | awk -F"\t" -v qq="\"" -v ORS="" '{
  print NR"\t"
  if ($0 != "") {
    printf qq"metadata"qq": {"
    printf qq
    gsub("COLONCOLON", qq": "qq)
    gsub("COMMACOMMA", qq", "qq)
    print $0
    print qq"}\n"
  } else print "\n"
}' > "$tmpF"10_

wcl=$(cat $expTab| wc -l)
seq $wcl | awk -F"\t" -v time="$time" -v wcl="$wcl" -v qq="\"" -v ORS="" -v tmp1="$tmpF"1 -v tmp2="$tmpF"2 -v tmp3_4="$tmpF"3_4 -v tmp5_6_7="$tmpF"5_6_7 -v tmp8="$tmpF"8 -v tmp9="$tmpF"9 -v tmp10_="$tmpF"10_ '
BEGIN {
  while (getline < tmp1)     a1[$1] = $2
  while (getline < tmp2)     a2[$1] = $2
  while (getline < tmp3_4)   a3_4[$1] = $2
  while (getline < tmp5_6_7) a5_6_7[$1] = $2
  while (getline < tmp8)     a8[$1] = $2
  while (getline < tmp9)     a9[$1] = $2
  while (getline < tmp10_)   a10_[$1] = $2
  print "{\n"
  print qq"meta"qq": {"
  print qq"updated"qq": "qq""time""qq"}, \n"
  print qq"experiments"qq": [\n"
} {
  print "{\n"
  print a1[$1]", "a2[$1]", "a3_4[$1]", "a5_6_7[$1]", "a8[$1]
  if (length(a9[$1]) > 0) {
    print ", "a9[$1] 
    if (length(a10_[$1]) > 0) print ", "a10_[$1]"\n"
    else print "\n"
  }
  else if (length(a10_[$1]) > 0) print ", "a10_[$1]"\n"
  else print "\n"
  if (NR != wcl) print "},\n"
  else print "}\n"
} END {
  print "]\n}\n"
}' | jq . > "chipatlas/lib/assembled_list/experimentList.json"

rm "$tmpF"*

# cat experimentList.json| jq '{meta: {updated: .meta.updated}, experiments: .experiments[0]}' 
# cat experimentList.json | jq '.experiments | map(select(.antigen.class != "Bisulfite-Seq" and .genome == "hg19"))[0]'
# cat experimentList.json | jq '.experiments | map(select(.antigen.class == "Bisulfite-Seq" and .genome == "hg19"))[0]'

exit
