#!/bin/csh
# FEATURES - a c-shell script (version  14 June 09)
# FEATURES is a front end for GETOB. FEATURES provides an interactive
# menu interface, as well as automating the process of resolving
# nested database expressions

# This script automates, as much as is possible, two tasks. First, getob 
# expects all of its input to come from files, as described in getob.doc.
# The parameter menu lets the user input some of the data directly at the 
# keyboard, for cases in which only one data element (eg. accession number)
# is needed. Otherwise, FEATURES prompts the user for a filename.
# The second thing that FEATURES does is to run getob and check to see
# whether there are unresolved expressions in the output. If there are,
# getob is called as many times as is necessary to resolve all expressions.

#####################################################################
# set parameter defaults
####################################################################
set mode = "interactive"
set NICELEVEL = '+10'
set jobid = $$
set featype = ""
set features = ""
set entrytype = "n"
set entries = ""
set dbtype = "g"
set database = ""
set where = "a"
set outfile  = ""
set resolve = ""
set acno = "OFF"
set needtosplit = "OFF"
set options   = ""

# RM_CMD - command to be used for removing files and directories
if (-e /usr/bin/rm) then
   set RM_CMD = /usr/bin/rm
else
   if (-e /bin/rm) then
      set RM_CMD = /bin/rm
   else
      set RM_CMD = rm
   endif
endif


#####################################################################
# Read options from the command line and set parameters,
# or prompt user for parameters at terminal.
####################################################################
  set numargs = $#argv

if ($numargs != 0) then
  #---------------------------------------------------
  #parameters given in command line
  set mode = command
  set files = ""

  #One argument form: a FEATURES expression is given as the input, returning ONLY
  #the resultant sequence to the standard output.
  if ($numargs == 1) then
     # -h : print usage message
     if ($1 == "-h") then
        echo "Usage:    features"
        echo "          features expression"
        echo "          features [-f featurekey | -F keyfile]"
        echo "                   [-n name     |-a accession    | -e expression |"
        echo "                    -N namefile |-A accfile      | -E expfile]"
        echo "                   [-u dataset  | -U dataset     | -g ]"
        exit
      else
	set resolve = "r"
	set features = ""
	set entries = $1
        #Make sure the expression begins with '@'
        if (`echo $entries |cut -c1-1` == @) then
        else
          set entries = "@"$entries
        endif

	echo "OBJECTS" > FEA.$jobid.inf #make dummy infile
	echo "SITES" >> FEA.$jobid.inf  #for getob
	echo $entries > FEA.$jobid.nam
        #accession number will be used for output filenames
	set base = `cut -c2-80 FEA.$jobid.nam | cut -f1 -d':'` 
	set acno = "ON"
     endif
  else
  #Multiple argument form: feature keys, entries, expressions, and databases are
  #specified on the command line. Message, sequence and expression files are written.
      set index = 1
      while ($index < $numargs)
	switch ($argv[$index])
	  case "-f":
	    @ index++
	    set features = $argv[$index]
	    set resolve = ""
	    echo "OBJECTS" > FEA.$jobid.inf
	    echo $features >> FEA.$jobid.inf 
	    echo "SITES" >> FEA.$jobid.inf
	    breaksw
	  case "-F":
	    @ index++
	    set features = $argv[$index]
	    set resolve = ""
	    set features = $argv[$index]
	    echo "OBJECTS" > FEA.$jobid.inf
	    cat $features >> FEA.$jobid.inf 
	    echo "SITES" >> FEA.$jobid.inf
	    breaksw
	  case "-n":
	    @ index++
	    set entries = $argv[$index]
	    set base = $entries  #entry name used for output file names
	    echo $entries > FEA.$jobid.nam
	    set acno = "OFF"
	    breaksw
	  case "-a":
	    @ index++
	    set entries = $argv[$index]
	    set base = $entries #accession number used for output file names
	    echo $entries > FEA.$jobid.nam
	    set acno = "ON"
	    breaksw
	  case "-e":
	    set resolve = "r"
	    set features = ""
	    @ index++
	    set entries = $argv[$index]

            #Make sure the expression begins with '@'
            if (`echo $entries |cut -c1-1` == @) then
            else
              set entries = "@"$entries
            endif

	    echo "OBJECTS" > FEA.$jobid.inf #make dummy infile
	    echo "SITES" >> FEA.$jobid.inf  #for getob
	    echo $entries > FEA.$jobid.nam
            #accession number will be used for output filenames
	    set base = `cut -c2-80 FEA.$jobid.nam |cut -f1 -d':'` 
	    set acno = "ON"
	    breaksw
	  case "-N":
	    @ index++
	    set entries = $argv[$index]
	    set base = $entries:t
	    set base = $base:r    #raw filename used for output filenames
	    cat $entries > FEA.$jobid.nam
	    set acno = "OFF"
	    breaksw
	  case "-A":
	    @ index++
	    set entries = $argv[$index]
	    set base = $entries:t
	    set base = $base:r    #raw filename used for output filenames
	    cat $entries > FEA.$jobid.nam
	    set acno = "ON"
	    breaksw
	  case "-E":
	    set resolve = "r"
	    set features = ""
	    @ index++
	    set entries = $argv[$index]
	    set base = $entries:t
	    set base = $base:r    #raw filename used for output filenames
	    cat $entries > FEA.$jobid.nam
	    echo "OBJECTS" > FEA.$jobid.inf #make dummy infile
	    echo "SITES" >> FEA.$jobid.inf  #for getob
	    set acno = "ON"
	    breaksw
	  case "-u":
	    set dbtype = u
	    @ index++
	    set database = $argv[$index]
	    set dbname = $database:r
	    set extension = $database:e
	    if  ($extension == gen) then 
		 #If user-defined database is one or more
		 #GenBank entries, it will have to be 
		 #split by splitdb for use by programs.
		set needtosplit = ON
	    else 
		set needtosplit = OFF
	    endif
	    set anofile = $dbname.ano
	    set seqfile = $dbname.wrp
	    set indfile = $dbname.ind
	    breaksw
	  case "-U":
	    set dbtype = U
	    @ index++
	    set database = $argv[$index]
	    set dbname = $database:r
            set base = $dbname
	    set extension = $database:e
	    if  ($extension == gen) then 
		 #If user-defined database is one or more
		 #GenBank entries, it will have to be 
		 #split by splitdb for use by programs.
		set needtosplit = ON
	    else 
		set needtosplit = OFF
	    endif
	    set anofile = $dbname.ano
	    set seqfile = $dbname.wrp
	    set indfile = $dbname.ind
	    breaksw
	  endsw
	  @ index++
      end # while

      #Make sure all necessary data has been entered
      if ($resolve != r && $features == "") then
           echo '>>> Features must be specified' 
           exit
         endif
        # csh subtlety: $entries must be quoted to allow for blanks in
        # the string
        if ("$entries" == "" && $dbtype != "U") then 
           echo '>>> Entries must be specified'
           exit
        endif

  endif
else 
  #---------------------------------------------------------------
  # Interactive parameter input
  set mode = interactive
  set complete = 0
  while ($complete == 0)
      #Display current parameter settings
      echo '___________________________________________________________________'
      echo '                     FEATURES - Version   28 Jan 05                '
      echo '    Please cite: Fristensky (1993) Nucl. Acids Res. 21:5997-6003'
      echo '___________________________________________________________________'
      echo 'Features: ' $features
      echo 'Entries:  ' $entries
      echo 'Dataset:  ' $database
      echo '___________________________________________________________________'
      echo '   Parameter              Description                      Value'
      echo '-------------------------------------------------------------------'
      echo '1).................... FEATURES TO EXTRACT ....................> '$featype
      echo '  f:Type a feature at the keyboard '
      echo '  F:Read a list of features from a file'
      echo '2)....................ENTRIES TO BE PROCESSED (choose one).....> '$entrytype
      echo '  Keyboard input - n:name     a:accession #     e:expression'
      echo '  File input     - N:name(s)  A:accession #(s)  E:expression(s)' 
      echo '3)....................WHERE TO GET IT .........................> '$dbtype
      echo '  u:Genbank dataset   g:complete GenBank database'
      echo '  U: same as u, but all entries'
      echo '4)....................WHERE TO SEND IT ........................> '$where
      echo '  s:Each feature to a separate file  a:All output to same file'
      echo '   ---------------------------------------------------------------'
      echo '   Type number of your choice or 0 to continue:'

      #Prompt for parameter to change
      set paramnum = $<
      switch ($paramnum)
        case 0:
          set complete = 1
          breaksw
        case 1:
          if ($resolve == r) then
             echo '>>> No features needed to resolve expressions.'
          else
            echo 'Enter one of {fF}:'
            set temp = $<
            if ($temp == f | $temp == F) then 
               set featype = $temp
               switch ($featype)
                 case f:
                   set resolve = ""
                   echo 'Enter feature:'
                   set features = $<
                   echo "OBJECTS" > FEA.$jobid.inf
                   echo $features >> FEA.$jobid.inf 
                   echo "SITES" >> FEA.$jobid.inf               
                   breaksw
                 case F:
                   set resolve = ""
                   echo 'Enter filename:'
                   set features = $<
                   cat $features > FEA.$jobid.inf
                   breaksw
                 endsw
            else
              echo '>>> Invalid choice'
            endif # f,F
          endif # resolve == r
          breaksw # 1
        case 2:
          echo 'Enter one of {naeNAE}:'
          set temp = $<
          if ($temp == n | $temp == a | $temp == e |\
              $temp == N | $temp == A | $temp == E) then 
             set entrytype = $temp
             switch ($entrytype)
               case n:
                 echo 'Enter name:'
                 set entries = $<
                 set base = $entries  #entry name used for output file names
                 echo $entries > FEA.$jobid.nam
                 set acno = "OFF"
                 breaksw
               case a:
                 echo 'Enter accession number:'
                 set entries = $<
                 set base = $entries #accession number used for
                                     # output file names
                 echo $entries > FEA.$jobid.nam
                 set acno = "ON"
                 breaksw
               case e:
                 set resolve = "r"
                 set features = ""
                 echo 'Enter expression:'
                 set entries = $<

                 #Make sure the expression begins with '@'
                 if (`echo $entries |cut -c1-1` == @) then
                 else
                   set entries = "@""$entries"
                 endif

                 echo "OBJECTS" > FEA.$jobid.inf #make dummy infile
                 echo "SITES" >> FEA.$jobid.inf  #for getob
                 echo $entries > FEA.$jobid.nam
                 #accession number will be used for output filenames
                 set base = `cut -c2-80 FEA.$jobid.nam |cut -f1 -d':'` 
                 set acno = "ON"
                 breaksw
               case N:
                 echo 'Enter filename:'
                 set entries = $<
                 set base = $entries:t
                 set base = $base:r    #raw filename used for output filenames
                 cat $entries > FEA.$jobid.nam
                 set acno = "OFF"
                 breaksw
               case A:
                 echo 'Enter filename:'
                 set entries = $<
                 set base = $entries:t
                 set base = $base:r    #raw filename used for output filenames
                 cat $entries > FEA.$jobid.nam
                 set acno = "ON"
                 breaksw
               case E:
                 set resolve = "r"
                 set features = ""
                 echo 'Enter filename:'
                 set entries = $<
                 set base = $entries:t
                 set base = $base:r    #raw filename used for output filenames
                 cat $entries > FEA.$jobid.nam
                 echo "OBJECTS" > FEA.$jobid.inf #make dummy infile
                 echo "SITES" >> FEA.$jobid.inf  #for getob
                 set acno = "ON"
                 breaksw
               endsw
          else 
            echo '>>> Invalid choice'
          endif #naeNAE
          breaksw
        case 3:
          echo 'Enter one of {uUg}:'
          set temp = $<
          switch ($temp)
             case u:
               set dbtype = u
               echo 'Enter filename:'
               set database = $<
               set dbname = $database:r
               set extension = $database:e
               if  ($extension == gen) then
                   #If user-defined database is one or more
                   #GenBank entries, it will have to be
                   #split by splitdb for use by programs.
                   set needtosplit = ON
               else
                   set needtosplit = OFF
               endif
               set anofile = $dbname.ano
               set seqfile = $dbname.wrp
               set indfile = $dbname.ind
               breaksw
             case U:
               set dbtype = U
               echo 'Enter filename:'
               set database = $<
               set dbname = $database:r
               set base = $dbname
               set extension = $database:e
               if  ($extension == gen) then
                   #If user-defined database is one or more
                   #GenBank entries, it will have to be
                   #split by splitdb for use by programs.
                   set needtosplit = ON
               else
                   set needtosplit = OFF
               endif
               set anofile = $dbname.ano
               set seqfile = $dbname.wrp
               set indfile = $dbname.ind
               breaksw
             case g:
               set dbtype = g
               breaksw
             default:
               echo '>>> Invalid choice'
               breaksw
             endsw
           breaksw
        case 4:
          echo 'Enter one of {sa}:'
          set temp = $<
          if ($temp == s | $temp == a) then 
             set where = $temp
          else
            echo '>>> Invalid choice'
          endif
          breaksw
        endsw

      #Make sure all necessary data has been entered
      if ($complete == 1) then
         if ($resolve != r && $features == "") then
           echo '>>> Features must be specified (option 1)' 
           set complete = 0
         endif
        # csh subtlety: $entries must be quoted to allow for blanks in
        # the string
        if ("$entries" == "" && $dbtype != "U") then 
           echo '>>> Entries must be specified (option 2)'
           set complete = 0
        endif
      endif
    end #while 
  endif # argv 


  #set options
  if ($resolve == r) then 
     set options = "$options -r"
  endif
  if ($acno == ON) then
     set options = "$options -c"
  endif 
  if ($where == s) then 
     set options = "$options -f"
  endif

#####################################################################
#  For a given database, extract the specified features.
####################################################################

  if ($mode == interactive) then
    set termout = /dev/tty
    echo 'Messages will be written to '$base.msg
    if ($where == a) then
        echo 'Final sequence output will be written to ' $base.out
     else
        echo 'Output for each entry will be written to individual files.'
        echo 'Filenames will be in the form:    name.obj'
     endif
     if ($resolve != r) then
        echo 'Expressions will be written to '$base.exp
     endif
  else
    set termout = /dev/null
  endif # $mode == interactive

  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  # The first pass will usually resolve all feature expressions. 
  #Retrieve GenBank entries, if necessary
  if ($dbtype == g) then
     if ($resolve == r) then
        #Unresolved expressions will begin with '@' in first column.
        #Find lines with unresolved expressions and extract the
        #accession numbers for use by fetch.
        egrep -e ^@ FEA.$jobid.nam > UNRESOLVED.$jobid.fea
        # first, extract ACCESSION number from between @ and :
        # If it is in the form ACCESSION.VERSION, truncate the version field
        cut -c2-80 UNRESOLVED.$jobid.fea |cut -f1 -d':' |cut -f1 -d'.' > UNRESOLVED.$jobid.nam
        set fetchfile = UNRESOLVED.$jobid.nam
     else
        set fetchfile = FEA.$jobid.nam
     endif
     set anofile = $base.ano
     set seqfile = $base.wrp
     set indfile = $base.ind
     fetch $fetchfile FEA.$jobid.gen > $termout
     if (-e FEA.$jobid.gen) then
        nice $NICELEVEL splitdb FEA.$jobid.gen $anofile $seqfile $indfile
        $RM_CMD -f FEA.$jobid.gen
        else
          exit
        endif
     endif

  #Extract features from the entries
if ($mode == interactive)  echo 'Extracting features...'
  if ($needtosplit == ON) then
     nice $NICELEVEL splitdb $database $anofile $seqfile $indfile
     endif
  if ($dbtype == 'U') cp $indfile FEA.$jobid.nam
  
  # quick test to make sure that FEA.$jobid.nam and $anofile aren't empty
  @ NUMLINES = (`wc -l FEA.$jobid.nam | sed -e "s/^[ ]*//" | cut -f1 -d" "`) 
  grep 'LOCUS' $anofile > FEA.$jobid.test
  if ("$NUMLINES" != 0 && ! -z FEA.$jobid.test) then
  
     if ($where == a) then
	if ($numargs == 1) then
           nice $NICELEVEL getob $options FEA.$jobid.inf FEA.$jobid.nam $anofile $seqfile\
        	  $indfile /dev/null FEA.$jobid.out /dev/null  > $base.$jobid.term

	else
	  nice $NICELEVEL getob $options FEA.$jobid.inf FEA.$jobid.nam $anofile $seqfile $indfile\
        	  $base.msg FEA.$jobid.out $base.exp  > $base.$jobid.term
	endif
	else
	  nice $NICELEVEL getob $options FEA.$jobid.inf FEA.$jobid.nam $anofile $seqfile $indfile\
        	  $base.msg  $base.exp  > $base.$jobid.term
     endif
  endif
  
  # Clean up a bit. Database files could be huge!
  if ($dbtype == g | $needtosplit == ON) then 
     $RM_CMD -f $anofile $seqfile $indfile
     endif

  #Pull out all lines containing indirect references
  # RESTORE THESE LINES, AND DELETE THE SUBSEQUENT 5 LINES,
  # ONCE WE GET A WORKING EQUIVALENT OT NFETCH.
#  if ($where == a) then
#     egrep -e ^@ FEA.$jobid.out > UNRESOLVED.$jobid.fea
#  else
#     egrep -e ^@  *.obj > UNRESOLVED.$jobid.fea
#  endif
  if ($where == a) then
     egrep -v -e ^@ FEA.$jobid.out > temp.$jobid.fea
  else
     egrep -v -e ^@  *.obj > temp.$jobid.fea
  endif
  cat  temp.$jobid.fea > FEA.$jobid.out

  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
  # This part resolves any unresolved expressions that can be resolved.
  # Only in very rare cases will this loop execute more than once.

  # Prevent loop from executing by generating a zero length file.
  # We'll have to leave these features unresolved until we get
  # a program to retrieve GenBank entries across the network.

#  while !(-z UNRESOLVED.$jobid.fea) #is not empty
  while ( 1 == 0) # never true
    if ($where == s) then
       echo ">>> Can't resolve references sent to separate files."
       echo '>>> Re-run FEATURES separately for each unresolved reference'
       echo ">>> See $base.msg to find unresolved references."
       exit 
    else
      if ($mode == interactive) echo 'Resolving Feature expression(s)...'
      #extract accession numbers to be retrieved
      # first, extract ACCESSION number from between @ and :
      # If it is in the form ACCESSION.VERSION, truncate the version field
      cut -c2-80 UNRESOLVED.$jobid.fea | cut -f1 -d':' | cut -f1 -d'.' > FEA.$jobid.nam
 
      #retrieve the sequences into a new file, and create
      #a database subset to be used by getob. If $GB is set, then 
      #entries will be fetched from the online GenBank database.
      #Otherwise, features will attempt to get entries from the
      #user-generated database subset.
      if (${?GB} | ${?XYLEM_RHOST}) then
         set fetchoptions = ""
      else 
         set fetchoptions = "-G $database"
      endif
#      fetch $fetchoptions FEA.$jobid.nam FEA.$jobid.gen >$termout
#      lfetch $fetchoptions FEA.$jobid.nam FEA.$jobid.gen >$termout
      # At present we don't have a program to retrieve GenBank entries 
      # from a remote database, so we just create a dummy output file,
      # This means that features which reference other entries will not
      # be resolved.
      echo '' > FEA.$jobid.gen     
      # test for empty GenBank file
      #grep 'LOCUS' FEA.$jobid.gen > FEA.$jobid.test
      #if !(-z FEA.$jobid.test) then
      @ COUNT = `grep -c 'LOCUS' FEA.$jobid.gen`
      echo 'COUNT: '$COUNT
      if ($COUNT > 0) then
      
	  splitdb FEA.$jobid.gen FEA.$jobid.ano FEA.$jobid.wrp FEA.$jobid.ind

	  #run getob again to resolve indirect references
	  echo 'Extracting features...' > $termout
	  mv FEA.$jobid.out UNRESOLVED.$jobid.out
	  if ($numargs == 1) then
            nice $NICELEVEL getob -r -c FEA.$jobid.inf UNRESOLVED.$jobid.out FEA.$jobid.ano\
                      FEA.$jobid.wrp FEA.$jobid.ind /dev/null FEA.$jobid.out  >>\
                	$base.$jobid.term

	  else
            nice $NICELEVEL getob -r -c FEA.$jobid.inf UNRESOLVED.$jobid.out FEA.$jobid.ano\
                	FEA.$jobid.wrp FEA.$jobid.ind FEA.$jobid.msg FEA.$jobid.out\
                	>> $base.$jobid.term
            cat FEA.$jobid.msg >> $base.msg 
	  endif
      else
          echo "" > FEA.$jobid.out
      endif
      
      #Pull out all lines containing indirect references 
      #ie. lines beginning with '@' 
      egrep -e ^@ FEA.$jobid.out > UNRESOLVED.$jobid.fea
     endif
     end # while

 if ($where == a) then
    #One argument form, command mode
    #Sequence output only goes to standard output.
    if ($numargs == 1) then
      cat FEA.$jobid.out
    #Multi-argument form or interactive mode
    else
      cat FEA.$jobid.out > $base.out
    endif
  endif

#####################################################################
#  Clean up.
####################################################################
$RM_CMD -f FEA.$jobid.* UNRESOLVED.$jobid.* *.$jobid.term
