#!/bin/csh

#Version  4/16/2007
# Run protein distance programs as a command
#Synopsis: protdist.csh infile dmethod transratio gc categorization method bseed replicates blocksize percent\
#             tconmeth power subrep global negbranch outgroup jumble jseed numjum\
#             termout printdata outfile treefile

#Convert arguments to variables
set INFILE         = $1
set DMETHOD        = $2
set TRANSRATIO     = $3
set GC             = $4
set CATEGORIZATION = $5
set METHOD         = $6
set BSEED          = $7
set REPLICATES     = $8
set BLOCKSIZE      = $9
set PERCENT        = $10
set TCONMETH       = $11
set POWER          = $12
set SUBREP         = $13
set GLOBAL         = $14
set NEGBRANCH      = $15
set OUTGROUP       = $16
set JUMBLE         = $17
set JSEED          = $18
set NUMJUM         = $19
set TERMOUT        = $20
set PRINTDATA      = $21
set OUTFILE        = $22
set TREEFILE       = $23

# RM_CMD - command to be used for removing files and directories
if (-e /usr/bin/rm) then
   set RM_CMD = /usr/bin/rm
else
   if (-e /bin/rm) then
      set RM_CMD = /bin/rm
   else
      set RM_CMD = rm
   endif
endif

# On some systems, mv is aliased to mv -i. We need to get around this
# so that the script won't try to prompt the user when overwriting a file.
if ( ! ${?MV_CMD} ) then
   if (-e /usr/bin/mv) then
      set MV_CMD = /usr/bin/mv
   else
      if (-e /bin/mv) then
	 set MV_CMD = /bin/mv
      else
	 set MV_CMD = mv
      endif
   endif
endif


# Make a temporary directory in which to run the program
set TEMPDIR = DNADIST.$$
mkdir $TEMPDIR
cp $INFILE $TEMPDIR/infile.temp
cd $TEMPDIR

#----------------- generate keyboard input to send to PROTDIST -----

echo "Protein Distance Matrix Phylogeny Methods" > MSGFILE
echo "" >> MSGFILE
echo "---------------------  DISTANCE MATRIX ---------------------" >> MSGFILE
echo "" >> MSGFILE

# Choose method for constructing distance matrix
switch ($DMETHOD)
  case "J": # Jones-Taylor-Thornton
     echo "Distance matrix constructed using Jones-Taylor-Thornton method" >> MSGFILE
    breaksw
  case "H": # Kimura 2-parameter
     echo "Distance matrix constructed using Henikoff-Tiller PMB matrix method" >> MSGFILE
     echo p >> MATRIXPARAMFILE
    breaksw
  case "D": # Jukes - Cantor
     echo "Distance matrix constructed using Dayhoff PAM method" >> MSGFILE
     echo p >> MATRIXPARAMFILE
     echo p >> MATRIXPARAMFILE
    breaksw
  case "K": # Kimura
     echo "Distance matrix constructed using Kimura method" >> MSGFILE
     echo p >> MATRIXPARAMFILE
     echo p >> MATRIXPARAMFILE
     echo p >> MATRIXPARAMFILE
    breaksw
  case "C": # Categories
     echo "Distance matrix constructed using Categories method" >> MSGFILE
     echo p >> MATRIXPARAMFILE
     echo p >> MATRIXPARAMFILE
     echo p >> MATRIXPARAMFILE
     echo p >> MATRIXPARAMFILE
     echo p >> MATRIXPARAMFILE
    breaksw
  default: # Jones-Taylor-Thornton
     echo "Distance matrix constructed using Jones-Taylor-Thornton method" >> MSGFILE
    breaksw
endsw

if ($DMETHOD == 'C' ) then

   # Transition/transversion ratio
   if ($TRANSRATIO != 2.0) then
      echo t >> MATRIXPARAMFILE
      echo $TRANSRATIO >> MATRIXPARAMFILE
      endif
   echo "Transition/Transversion ratio = "$TRANSRATIO >> MSGFILE
   
   # Genetic code
   switch ($GC)
     case "u" :
       echo 'Using Genetic Code: UNIVERSAL' >> MSGFILE
       breaksw
     case "m" :
       echo 'Using Genetic Code: MITOCHONDRIAL' >> MSGFILE
       echo u >> MATRIXPARAMFILE    
       echo m >> MATRIXPARAMFILE
       breaksw
     case "v" :
       echo 'Using Genetic Code: VERTEBRATE MITOCHONDRIAL' >> MSGFILE
       echo u >> MATRIXPARAMFILE    
       echo v >> MATRIXPARAMFILE
       breaksw
     case "f" :
       echo 'Using Genetic Code: FLY MITOCHONDRIAL' >> MSGFILE
       echo u >> MATRIXPARAMFILE    
       echo f >> MATRIXPARAMFILE
       breaksw
     case "y" :
       echo 'Using Genetic Code: YEAST MITOCHONDRIAL' >> MSGFILE
       echo u >> MATRIXPARAMFILE    
       echo y >> MATRIXPARAMFILE
       breaksw
     default :
       echo 'Using Genetic Code: UNIVERSAL' >> MSGFILE  
       breaksw
   endsw

   # Categorization method
   switch ($CATEGORIZATION)
     case "G" :
       echo 'Categories of amino acids: George-Hunt-Barker' >> MSGFILE
       breaksw
     case "C" :
       echo 'Categories of amino acids: Chemical' >> MSGFILE
       echo a >> MATRIXPARAMFILE    
       echo c >> MATRIXPARAMFILE
       breaksw
     case "H" :
       echo 'Categories of amino acids: Hall' >> MSGFILE
       echo a >> MATRIXPARAMFILE    
       echo h >> MATRIXPARAMFILE
       breaksw
     default :
       echo 'Categories of amino acids: George-Hunt-Barker' >> MSGFILE  
       breaksw
   endsw
      
endif

#----------------- generated resampled datasets, if specified  -----
# Choose resampling method

# Random seeds, odd, of the form 4n + 1
@ tempbseed = ( ( ( $BSEED / 4 ) * 4 ) + 1 )

switch ($METHOD)
  case "n":
    echo " " >> MSGFILE
    cp infile.temp infile
    breaksw
  case "b":
    echo RESAMPLING\: Bootstrap,  $REPLICATES REPLICATES, SEED\=$BSEED >>MSGFILE
    if ( $BLOCKSIZE > 1 ) echo 'Resampling in blocks of ' $BLOCKSIZE >> MSGFILE
    if ( $PERCENT < 100 ) echo 'Partial Resampling: ' $PERCENT  'percent of sites sampled' >> MSGFILE
    cp infile.temp infile
    seqboot.csh infile.temp s $tempbseed b $REPLICATES $PERCENT $BLOCKSIZE yes 0 weights
    echo m >> MATRIXPARAMFILE
    echo w >> MATRIXPARAMFILE
    echo $REPLICATES >> MATRIXPARAMFILE
    breaksw
  case "d" :
    echo RESAMPLING\: Delete-half Jacknifing, $REPLICATES REPLICATES, SEED\=$BSEED >>MSGFILE
    if ( $PERCENT < 100 ) echo 'Partial Resampling: ' $PERCENT  'percent of sites sampled' >> MSGFILE
    cp infile.temp infile
    seqboot.csh infile.temp s $tempbseed d $REPLICATES $PERCENT $BLOCKSIZE yes 0 weights
    echo m >> MATRIXPARAMFILE
    echo w >> MATRIXPARAMFILE
    echo $REPLICATES >> MATRIXPARAMFILE
    breaksw
  case "ps":
    echo RESAMPLING\: Permute species for each character, $REPLICATES REPLICATES, SEED\=$BSEED >>MSGFILE
    seqboot.csh infile.temp s $tempbseed ps $REPLICATES $PERCENT $BLOCKSIZE no 0 infile
    echo m >> MATRIXPARAMFILE
    echo d >> MATRIXPARAMFILE
    echo $REPLICATES >> MATRIXPARAMFILE
    breaksw
  case "po":
    echo RESAMPLING\: Permute character order, $REPLICATES REPLICATES, SEED\=$BSEED >>MSGFILE
    seqboot.csh infile.temp s $tempbseed po $REPLICATES $PERCENT $BLOCKSIZE no 0 infile
    echo m >> MATRIXPARAMFILE
    echo d >> MATRIXPARAMFILE
    echo $REPLICATES >> MATRIXPARAMFILE
    breaksw
  case "pw":
    echo RESAMPLING\: Permute within species, $REPLICATES REPLICATES, SEED\=$BSEED >>MSGFILE
    seqboot.csh infile.temp s $tempbseed pw $REPLICATES $PERCENT $BLOCKSIZE no 0 infile
    echo m >> MATRIXPARAMFILE
    echo d >> MATRIXPARAMFILE
    echo $REPLICATES >> MATRIXPARAMFILE
    breaksw        
endsw

#accept current settings and do the analysis
echo y  >> MATRIXPARAMFILE

#----------------- Run PROTDIST   -----

nice protdist < MATRIXPARAMFILE
$MV_CMD outfile infile


echo "---------------------  CONSTRUCTING TREE(S) ---------------------" >> MSGFILE
echo "" >> MSGFILE

#----------------- generate keyboard input to send to distance tree program -----

# Choose method for constructing distance matrix
switch ($TCONMETH)
  case "w": # weighbor, weighted Neighbor-Joining
    set PROGRAM = weighbor
    breaksw
  case "F": # FITCH, Fitch-Margoliash method
    set PROGRAM = fitch
    breaksw
  case "f": # FITCH, Minimum evolution
    set PROGRAM = fitch
    echo d >> TREEPARAMFILE
    breaksw
  case "K": # KITSCH, Fitch-Margoliash method
    set PROGRAM = kitsch
    breaksw
  case "k": # KITSCH, Minimum evolution method
    set PROGRAM = kitsch
    echo d >> TREEPARAMFILE
    breaksw
  case "N": # Neighbor-joining
    set PROGRAM = neighbor
    breaksw
  case "U": # Neighbor-joining
    set PROGRAM = neighbor
    echo n >> TREEPARAMFILE
    breaksw
  default : # FITCH, Fitch-Margoliash method
    set PROGRAM = fitch
    breaksw
endsw

if ($PROGRAM == "weighbor") then
    echo 'Please cite:' >> MSGFILE
    echo 'WEIGHBOR - Weighted Neighbor Joining.'  >> MSGFILE
    echo 'William J. Bruno, Nicholas D. Socci, and Aaron L. Halpern'  >> MSGFILE
    echo 'Weighted Neighbor Joining: A Likelihood-Based Approach to'  >> MSGFILE
    echo 'Distance-Based Phylogeny Reconstruction,'   >> MSGFILE
    echo 'Mol. Biol. Evol. 17 (1): 189-197 (2000).' >> MSGFILE
    echo " " >> MSGFILE
    
    
    #Read length of sequence alignment from $INFILE
    set FIRSTLINE = `cat infile.temp | head -1`
    set SEQLENGTH = $FIRSTLINE[2]
    echo SEQLENGTH = $SEQLENGTH
   


    echo `time nice +8 $PROGRAM -L $SEQLENGTH -b 20 -i infile -o outtree -v > $TERMOUT` > TIMEFILE
    $MV_CMD weighbor.out outfile
    echo Execution times on `hostname`\: `cat TIMEFILE` >> outfile
    
else
    #----------------- Run FITCH, KITCH or NEIGHBOR   -----


    switch ($METHOD)
      case "n":
	breaksw
      case "b":
	echo m >> TREEPARAMFILE
	echo w >> TREEPARAMFILE
	echo $REPLICATES >> TREEPARAMFILE
	breaksw
      case "d" :
	echo m >> TREEPARAMFILE
	echo w >> TREEPARAMFILE
	echo $REPLICATES >> TREEPARAMFILE
	breaksw
      case "ps":
	echo m >> TREEPARAMFILE
	echo d >> TREEPARAMFILE
	echo $REPLICATES >> TREEPARAMFILE
	breaksw
      case "po":
	echo m >> TREEPARAMFILE
	echo d >> TREEPARAMFILE
	echo $REPLICATES >> TREEPARAMFILE
	breaksw
      case "pw":
	echo m >> TREEPARAMFILE
	echo d >> TREEPARAMFILE
	echo $REPLICATES >> TREEPARAMFILE
	breaksw        
    endsw


    # Jumble - When multiple datasets are analyzed, DNAPARS automatically
    # jumbles, and prompts for a random number seed for jumbling. Othersise,
    # jumbling must be explicitly set.

    # Random seed, odd, of the form 4n + 1
    @ tempjseed = ( ( ( $JSEED / 4 ) * 4 ) + 1 )

    if ($METHOD == "n") then
       if ($JUMBLE == J) then 
	  echo JUMBLING SEQUENCE ORDER $NUMJUM ITERATIONS, SEED\=$tempjseed >> MSGFILE
	  echo  j >> TREEPARAMFILE
	  echo $tempjseed  >> TREEPARAMFILE
	  echo $NUMJUM >> TREEPARAMFILE
       endif
    else
       echo JUMBLING SEQUENCE ORDER $NUMJUM ITERATIONS, SEED\=$tempjseed >> MSGFILE
       echo $tempjseed  >> TREEPARAMFILE
       echo $NUMJUM >> TREEPARAMFILE
    endif

    # Subreplicates
    if ($SUBREP == 'n') echo 's' >> TREEPARAMFILE

    # Global rearrangements
    if ($GLOBAL == 'y') echo 'g' >> TREEPARAMFILE

    # Negative branch lengths
    if ($NEGBRANCH == 'y') echo '-' >> TREEPARAMFILE

    # Outgroup
    set FIRSTLINE = `head -1 infile`
    set NUMSEQ = $FIRSTLINE[1]
    if (($OUTGROUP > 1) && ($OUTGROUP <= $NUMSEQ)) then
       echo o >> TREEPARAMFILE
       echo $OUTGROUP >> TREEPARAMFILE
    else
       set OUTGROUP = 1
    endif


    # Should sequence data be printed?
    if ($PRINTDATA ==  y) echo 1 >> TREEPARAMFILE

    # When resampling, turn off printing trees to outfile

    if (($METHOD == 'b') || ($METHOD == 'd') || ($METHOD == 'ps') || ($METHOD == 'po') || ($METHOD == 'pw')) then
       echo 3 >> TREEPARAMFILE
    endif

    #accept current settings and do the analysis
    echo y  >> TREEPARAMFILE

    #----------------- Run FITCH, KITCH or NEIGHBOR   -----

    echo `time nice +8 $PROGRAM < TREEPARAMFILE > $TERMOUT` > TIMEFILE
    echo Execution times on `hostname`\: `cat TIMEFILE` >> outfile
endif

#----------- Return results to calling directory----------------
# When using resampling, filter the treefile through
# consense to generate an unrooted consensus tree.
if (($METHOD == 'b') || ($METHOD == 'd') || ($METHOD == 'ps') || ($METHOD == 'po') || ($METHOD == 'pw')) then
	echo " " >> outfile
	echo '-------------------------------------------' >> outfile
	consense.csh outtree e 1 $OUTGROUP n outfile.consense ../$TREEFILE
	cat MSGFILE outfile outfile.consense  > ../$OUTFILE
        echo '' >> ../$OUTFILE
        echo '' >> ../$OUTFILE
        echo '>>>> THIS TREEFILE IS A CONSENSUS TREE' >> ../$OUTFILE
        echo '>>>> TO GENERATE BRANCH LENGTHS' >> ../$OUTFILE
        echo '>>>> USE TREE FILE AS INPUT FOR DNAML OR OTHER PROGRAM' >> ../$OUTFILE
        echo '>>>> USING THE USERTREE OPTION' >> ../$OUTFILE
else
        $MV_CMD outtree ../$TREEFILE
	cat MSGFILE outfile > ../$OUTFILE
endif

cd ..
$RM_CMD -rf $TEMPDIR

echo $PROGRAM completed.





