#!/bin/csh

#Version  6/9/2008
# Run restml as a command
#Synopsis: restml.csh infile utree ufn method bseed replicates percent\
#             allsites speedy global sitenum sitelen outgroup jumble jseed numjum\
#             termout printdata outfile treefile

#Convert arguments to variables
set INFILE        = $1
set UTREE         = $2
set UFN           = $3
set METHOD        = $4
set BSEED         = $5
set REPLICATES    = $6
set PERCENT       = $7
set ALLSITES      = $8
set SPEEDY        = $9
set GLOBAL        = $10
set SITENUM       = $11
set SITELEN       = $12
set OUTGROUP      = $13
set JUMBLE        = $14
set JSEED         = $15
set NUMJUM        = $16
set TERMOUT       = $17
set PRINTDATA     = $18
set OUTFILE       = $19
set TREEFILE      = $20


# RM_CMD - command to be used for removing files and directories
if (-e /usr/bin/rm) then
   set RM_CMD = /usr/bin/rm
else
   if (-e /bin/rm) then
      set RM_CMD = /bin/rm
   else
      set RM_CMD = rm
   endif
endif


# Make a temporary directory in which to run the program
set TEMPDIR = RESTML.$$
mkdir $TEMPDIR

# Remember where we started
set STARTDIR = $PWD
echo 'Starting Directory: ' $STARTDIR


cp $INFILE $TEMPDIR/infile.temp

if ($UTREE == 'y') then
   # Turn off bootstrapping when evaluating a user tree.
   # If bootstrapping is on, an empty file may be generated
   # which could cause drawtree or drawgram to loop infinitely
   set METHOD = n
   
   # Make sure that treefile begins with number of trees on first
   # line of file. If first line in file has parentheses, the
   # number must be added.
   # We use the cut command to truncate the line, because a tree with
   # lots of branches can create a line that is too long for the shell to handle.
   if (`head -1 $UFN | cut -c1-50 | grep '('` != "" ) then
      grep ';' $UFN | wc -l >> $TEMPDIR/intree
   endif
   cat $UFN >> $TEMPDIR/intree
endif

cd $TEMPDIR

if ($UTREE == 'y') then
   echo 'u' >> TREEPARAMFILE
endif

echo "---------------------  RESTML  ---------------------" >> MSGFILE
echo "" >> MSGFILE


#----------------- generated resampled datasets, if specified  -----
# Choose resampling method

# NOTE!!: Unlike most other programs in PHYLIP, RESTML can NOT use weights
# when analyzing multiple datasets. Therefore, seqboot.csh must generate bootstrapped
# datasets, rather than weights.
# Also note that the 'r' option to seqboot.csh tells it that the datafile
# contains an extra number, telling the number of  restriction enzymes used.

# Random seeds, odd, of the form 4n + 1
@ tempbseed = ( ( ( $BSEED / 4 ) * 4 ) + 1 )

switch ($METHOD)
  case "n":
    echo " " >> MSGFILE
    cp infile.temp infile
    breaksw
  case "b":
    echo RESAMPLING\: Bootstrap,  $REPLICATES REPLICATES, SEED\=$BSEED >>MSGFILE
    if ( $PERCENT < 100 ) echo 'Partial Resampling: ' $PERCENT  'percent of sites sampled' >> MSGFILE
   seqboot.csh infile.temp m $tempbseed b $REPLICATES $PERCENT 1 no 0 infile
    echo m >> TREEPARAMFILE
    echo $REPLICATES >> TREEPARAMFILE
    breaksw
  case "d" :
    echo RESAMPLING\: Delete-half Jacknifing, $REPLICATES REPLICATES, SEED\=$BSEED >>MSGFILE
    if ( $PERCENT < 100 ) echo 'Partial Resampling: ' $PERCENT  'percent of sites sampled' >> MSGFILE
    seqboot.csh infile.temp m $tempbseed d $REPLICATES $PERCENT 1 no 0 infile
    echo m >> TREEPARAMFILE
    echo $REPLICATES >> TREEPARAMFILE
    breaksw
  case "ps":
    echo RESAMPLING\: Permute species for each character, $REPLICATES REPLICATES, SEED\=$BSEED >>MSGFILE
    seqboot.csh infile.temp m $tempbseed ps $REPLICATES $PERCENT 1 no 0 infile
    echo m >> TREEPARAMFILE
    echo $REPLICATES >> TREEPARAMFILE
    breaksw
  case "po":
    echo RESAMPLING\: Permute character order, $REPLICATES REPLICATES, SEED\=$BSEED >>MSGFILE
    seqboot.csh infile.temp m $tempbseed po $REPLICATES $PERCENT 1 no 0 infile
    echo m >> TREEPARAMFILE
    echo $REPLICATES >> TREEPARAMFILE
    breaksw
  case "pw":
    echo RESAMPLING\: Permute within species, $REPLICATES REPLICATES, SEED\=$BSEED >>MSGFILE
    seqboot.csh infile.temp m $tempbseed pw $REPLICATES $PERCENT 1 no 0 infile
    echo m >> TREEPARAMFILE
    echo $REPLICATES >> TREEPARAMFILE
    breaksw        
endsw

# Jumble - When multiple datasets are analyzed, RESTML automatically
# jumbles, and prompts for a random number seed for jumbling. Othersise,
# jumbling must be explicitly set.

if ($UTREE != 'y') then
   # Random seed, odd, of the form 4n + 1
   @ tempjseed = ( ( ( $JSEED / 4 ) * 4 ) + 1 )

   if ($METHOD == "n") then
      if ($JUMBLE == J) then 
	 echo JUMBLING SEQUENCE ORDER $NUMJUM ITERATIONS, SEED\=$tempjseed >> MSGFILE
	 echo  j >> TREEPARAMFILE
	 echo $tempjseed  >> TREEPARAMFILE
	 echo $NUMJUM >> TREEPARAMFILE
      else

      endif
   else
      echo JUMBLING SEQUENCE ORDER $NUMJUM ITERATIONS, SEED\=$tempjseed >> MSGFILE
      echo j >> TREEPARAMFILE
      echo $tempjseed  >> TREEPARAMFILE
      echo $NUMJUM >> TREEPARAMFILE
   endif
endif



#----------------- generate keyboard input to send to restml program -----
# Input file is not interleaved
echo 'i' >> TREEPARAMFILE

# All sites are detected
if ($ALLSITES == 'y') then
   echo 'a' >> TREEPARAMFILE
   echo 'Assuming all sites detected' >> MSGFILE
else
#   echo 'Assuming NOT all sites detected' >> MSGFILE
endif

# Speedier but rougher analysis
if ($SPEEDY == 'n') then
   echo 's' >> TREEPARAMFILE
   echo 'Doing thorough analysis' >> MSGFILE
else
   echo 'Doing speedy but rougher analysis' >> MSGFILE   
endif

# Global rearrangements
if ($GLOBAL == 'y') then
   echo 'g' >> TREEPARAMFILE
   echo 'Doing global rearrangements' >> MSGFILE
else
   echo 'No global rearrangements done' >> MSGFILE
endif


# Site length
if (($SITELEN < 1) || ($SITELEN > 50)) then
   set SITELEN = 6
endif
echo 'l' >> TREEPARAMFILE
echo $SITELEN >> TREEPARAMFILE


# Outgroup
  set FIRSTLINE = `head -1 infile`
  set NUMSEQ = $FIRSTLINE[1]
  if (($OUTGROUP > 1) && ($OUTGROUP <= $NUMSEQ)) then
     echo o >> TREEPARAMFILE
     echo $OUTGROUP >> TREEPARAMFILE 
  else
     set OUTGROUP = 1
  endif


# Should sequence data be printed?
if ($PRINTDATA ==  y) echo 1 >> TREEPARAMFILE

# When resampling, turn off printing trees to outfile

if (($METHOD == 'b') || ($METHOD == 'd') || ($METHOD == 'ps') || ($METHOD == 'po') || ($METHOD == 'pw')) then
   echo 3 >> TREEPARAMFILE
endif

#accept current settings and do the analysis
echo y  >> TREEPARAMFILE


#----------------- Run RESTML  -----

# RESTML expects a third number to appear on the first line of infile,
# indicating the number of restriction enzymes used. This has to be added
# to the first line of EACH dataset, when there are multiple datasets in
# infile.
#set FIRSTLINE = `head -1 infile`
#set NUMSEQ = $FIRSTLINE[1]
#echo $FIRSTLINE $SITENUM > infile
#tail +2 infile.bak >> infile

mv infile infile.bak
sed -e "s/^  *[0-9][0-9]*  *[0-9][0-9]*/& $SITENUM/" < infile.bak > infile
$RM_CMD infile.bak

echo `time nice +8 restml < TREEPARAMFILE > $TERMOUT` > TIMEFILE
echo Execution times on `hostname`\: `cat TIMEFILE` >> outfile

#----------- Return results to calling directory----------------
# When using resampling, filter the treefile through
# consense to generate an unrooted consensus tree.
if (($METHOD == 'b') || ($METHOD == 'd') || ($METHOD == 'ps') || ($METHOD == 'po') || ($METHOD == 'pw')) then
	echo " " >> outfile
	echo '-------------------------------------------' >> outfile
      set ROOTEDTREE = "n"
      consense.csh outtree e 1 $OUTGROUP $ROOTEDTREE outfile.consense constree
      cat MSGFILE outfile outfile.consense  > $STARTDIR/$OUTFILE
      mv outtree $STARTDIR/$TREEFILE.alltrees	# save the raw trees, not the consensus tree
      mv constree $STARTDIR/$TREEFILE
        echo '' >> $STARTDIR/$OUTFILE
        echo '' >> $STARTDIR/$OUTFILE
        echo '>>>> THIS TREEFILE IS A CONSENSUS TREE' >> $STARTDIR/$OUTFILE
        echo '>>>> TO GENERATE BRANCH LENGTHS' >> $STARTDIR/$OUTFILE
        echo '>>>> USE TREE FILE AS INPUT FOR DNAML OR OTHER PROGRAM' >> $STARTDIR/$OUTFILE
        echo '>>>> USING THE USERTREE OPTION' >> $STARTDIR/$OUTFILE
else
        mv outtree $STARTDIR/$TREEFILE
	cat MSGFILE outfile > $STARTDIR/$OUTFILE
endif

cd ..
$RM_CMD -rf $TEMPDIR

echo RESTML completed.





