#!/usr/bin/perl

# version 01

$rmsk="/export/data/mga/hg19/rmsk/rmsk_hg19.SGA";

$dir=$ARGV[0]; $inp=$ARGV[1]; $tab=$ARGV[2]; 

open(INP, "<$inp"); $i=0;
while(<INP>) {chomp; @f=split /\t/;
   $i++; $fte[$i]=$f[0]; $tf[$i]=$f[1]}
close(INP); $n=$i;

open(INP, "<$tab");
while(<INP>) {chomp; @f=split /\t/;
   $FTE{$f[0]}=$f[1]; $MAT{$f[0]}=$f[2]}
close(INP); $n=$i;

# major loop over samples

while(<STDIN>) {$i++; @f=split /\t/; if($f[4] eq "T") { 
    $sample = $f[0]; $sample =~ s/.sga//;

# minor loop over target motifs

   for($i=1; $i<=$n; $i++) {if($f[2] eq $fte[$i]) {
      $ref=$MAT{$tf[$i]}; $REF=$FTE{$tf[$i]}; $target="$dir/$sample.sga";

# edit shell code 

$command = <<CMD;

# --- Sample: $sample; Feature $REF

# extract top 1000 peaks with motif

sort -s -m -k1,1 -k3,3n $ref $rmsk | counts_filter > ref_masked.sga
chipcenter -z -s 75  -r TRG -c 50 $target 2>/dev/null > trg_centered.sga
sort -s -m -k1,1 -k3,3n -k4,4 ref_masked.sga trg_centered.sga | compactsga > ref_trg.sga
chipscore -o -A $REF -B TRG -b -100 -e 100 -t 0 ref_trg.sga 2>/dev/null \\
 | sort -k6,6nr | head -1000 | sort -k1,1 -k3,3n -k4,4 > peaks.sga

# generating aggregation plots

featreplace -f TRG $target > trg.sga
sort -s -m -k1,1 -k3,3n -k4,4 peaks.sga trg.sga > ref_trg.sga
chipcor -o -A "$REF" -B "TRG +" -b -1000 -e 1000 -w 20 -c 50 -n 2 ref_trg.sga  2>/dev/null > results/$sample\_$tf[$i]_f.tsv
chipcor -o -A "$REF" -B "TRG -" -b -1000 -e 1000 -w 20 -c 50 -n 2 ref_trg.sga  2>/dev/null > results/$sample\_$tf[$i]_r.tsv
# rm *.sga
CMD

# print shell code to stdout 

   print $command}

   }}}
