#!/bin/sh
# xylem_shuffle.sh                  August  27,  2009
# This is a front end for mrtrans. It makes sure that the names of
# the sequences in PROTFILE and DNAFILE are the same, and re-orders
# the sequences in DNAFILE, if necessary, to be in the same order
# as in PROTFILE.
# This script assumes that sequence names in PROTFILE are IDENTICAL to
# the corresponding names in DNAFILE.

#echo 'Running xylem_shuffle.sh'

INFILE=$1
SEED=$2
WINDOW=$3
OVERLAP=$4
OUTFILE=$5
JOBID=$$


# RM_CMD - command to be used for removing files and directories
if [ -f /usr/bin/rm ]
then
   RM_CMD=/usr/bin/rm
else
   if [ -f /bin/rm ]
   then
      RM_CMD=/bin/rm
   else
      RM_CMD=rm
   fi
fi

# SFLAG is the first character of the first line in the file. 
# # = DNA, % = protein, " = text

SFLAG=`head -1 $INFILE | cut -c1`


# Convert from a GDE flatfile to a FASTA file
sed "s/^[#%]/>/" < $INFILE > $JOBID.infile 

# run shuffle, and delete the first 2 lines, which are message lines
# that might confuse programs that read fasta files.
shuffle -s$SEED -w$WINDOW -o$OVERLAP < $JOBID.infile > $JOBID.tmp1
btail.sh 3 $JOBID.tmp1 $JOBID.tmp2

# Truncate the name lines after the first blank and
# add "-rand" to each name o indicate that the sequences
# have been randomized.
cat $JOBID.tmp2 | cut -f1 -d" " | sed 's/^>.*/&-rand/' > $JOBID.tmp3

# Convert the output into GDE flat file format, using the
# flag characters for either DNA or protein
sed "s/^>/$SFLAG/" < $JOBID.tmp3 > $OUTFILE

$RM_CMD $JOBID.*

 
