#!/bin/csh

#Version 5/23/2008
# Run fasta programs
#Synopsis: fastaout.csh -m # outfile

set PID = $$ #process id

#!/bin/csh

# RM_CMD - command to be used for removing files and directories
if (-e /usr/bin/rm) then
   set RM_CMD = /usr/bin/rm
else
   if (-e /bin/rm) then
      set RM_CMD = /bin/rm
   else
      set RM_CMD = rm
   endif
endif

# set global variables
if ($2 == 3) then
   set DESTINATION = GDE
   set GDEOUT = $PID.gen
else 
   if ($4 == "") then
      set DESTINATION = TEXTEDIT
   else
      set DESTINATION = FILES
      set OUTNAME = $4
   endif
endif

set OUTFILE = $3

# Parse out the accession numbers or names of hits
# and write to name.$PID. 

# Normally, Pearson/FASTA format
# files contain a simple name in the form ">name". FASTA
# programs put the name at the beginning of lines
# of the form "name - rest of the line"
# GenPept files are a special case. They have complex
# name lines, with various ID's. The GenBank accession
# number has to be parsed out of these lines. The lines look like
# >gi|5881102|gb|AAD55053.1|AF170915_1 (AF170915) green fluorescent protein [Expression vector FRMwg]
# FASTY makes matters worse by truncating the line to make room for
# statistics, which sometimes cuts off the accession # that's in
# parentheses. So we have to be able to deal with two types of
# lines, one with the ACCESSION number in parentheses, the other
# in the form "|AF170915_1"



switch ($DESTINATION)
  case "TEXTEDIT":
       # Special code for text editors used by GDE and scripts called by GDE
       # Nedit crashes in some Linux systems due to libraries set in BIRCHLIBS.
       # nedit_wrapper unsets LD_LIBRARY_PATH before calling nedit.
       # gedit opens all files in a single window. gedit_wrapper.sh forces
       # gedit to open each file in different window.
       # choose_edit_wrapper.sh returns the name of the wrapper to use
       # for each editor, or just returns GDE_TEXTEDIT if there is no
       # wrapper.
       setenv GDE_TEXTEDIT `choose_edit_wrapper.sh`  
       mv $OUTFILE outfile.$PID
       #extract names of hits into a file and open the file in text editor
       egrep -e '^* - *' outfile.$PID | egrep -v -e '^>>' | egrep -v -e '^#' |\
            cut -d" " -f1 > name.$PID
       if !(-z name.$PID) then
          ($GDE_TEXTEDIT name.$PID -geometry 15x40; $RM_CMD name.$PID) &
       else
         $RM_CMD name.$PID
       endif
       #read outfile into texteditor
       ($GDE_TEXTEDIT outfile.$PID; $RM_CMD outfile.$PID) &
       breaksw
  case "FILES":
       #extract names of hits into a file
       egrep -e "^* - *" $OUTFILE | egrep -v -e '^>>' | egrep -v -e '^#' |\
               cut -d" " -f1 > $OUTNAME.names
       mv $OUTFILE $OUTNAME.fasta
       breaksw
  case "GDE":
       # store sequence names
       #head -4 $OUTFILE | tail +3 > namelines.$PID
       # We can no longer use tail +n because the syntax is no longer consistent
       # between Unix and Linux
       @ NUMLINES = (`wc -l $OUTFILE | sed -e "s/^[ ]*//" | cut -f1 -d" "`)
       @ TAILLINES = ( $NUMLINES - 3 )
       head -4 $OUTFILE | tail -2 > namelines.$PID    
       set NAME1 = `head -1 namelines.$PID |cut -f1 -d ' '`
       set NAME2 = `tail -1 namelines.$PID |cut -f1 -d ' '`
       # remove leading and trailing lines from outfile and convert periods
       # to dashes, to represent gaps
       @ TAILLINES =  ( $NUMLINES - 7 )
       tail -$TAILLINES $OUTFILE | grep -v Elapsed | sed "s/[.]/-/g" > seqfile.$PID
       echo '>'$NAME1 > wrpfile.$PID
       readseq  -i1 -fPlain -pipe seqfile.$PID | cat >> wrpfile.$PID

       echo '>'$NAME2 >> wrpfile.$PID
       readseq  -i2 -fPlain -pipe seqfile.$PID | cat >> wrpfile.$PID

       # re-format as GenBank file for GDE input
       readseq  -a -f2 -o=$GDEOUT wrpfile.$PID 
       (gde $GDEOUT; $RM_CMD $GDEOUT)&

       # Clean up.
       $RM_CMD *.$PID
       breaksw
endsw



