#!/bin/sh

# usage: csv2phyl.sh csvfile
#        csv2phyl.sh           {for piping output to another command} 
#  where $JOBID.NAMEFILE is a file containing the name field (ie. 
# the first field) from the .csv file.
unset noclobber
JOBID=$$

 # RM_CMD - command to be used for removing files and directories
if [ -f /usr/bin/rm ]
   then
   RM_CMD=/usr/bin/rm
else
   if [ -f /bin/rm ]
      then
      RM_CMD=/bin/rm
   else
      RM_CMD=rm
   fi
fi



if [ $# -eq 0 ] 
  then
  PIPEOUTPUT=1
  INFILE=$JOBID.INFILE
  cat /dev/stdin | tr -d '\"' > $JOBID.INFILE
  OUTFILE=/dev/stdout
else
  PIPEOUTPUT=0
  cat $1 | tr -d '\"' > $JOBID.INFILE
  # Create an output filename OUTFILE by truncating the .CSV or .csv
  # file extension
  BASENAME=`echo $1 | sed -e "s/\.[cC][sS][vV]$//"`
  OUTFILE=$BASENAME.phyl
fi
  

# Create two files: one file with just the names,
# and another file with the remaining data ie. the markers
cut -f1 -d"," < $JOBID.INFILE > $JOBID.NAMEFILE1
cut -f2- -d"," < $JOBID.INFILE > $JOBID.DATAFILE1

# Create a new file by padding the names in $JOBID.NAMEFILE with
# to a width of 10 characters, as required by Phylip.
sed -e 's/.*/&          /' < $JOBID.NAMEFILE1 | cut -c 1-10 > $JOBID.NAMEFILE2

# Find out how many lines and columns are present in the $JOBID.DATAFILE
# Phylip needs these numbers for the first line of the phylip input file
# Note: Originally, the name of the variable was LINENO, but apparently, that
# variable is already in use on some Linux systems.
LINENUM=`wc -l $JOBID.DATAFILE1 | sed -e 's/^[ \t]*//' | cut -f1 -d ' '`

# Calculating the number of columns in $JOBID.DATAFILE is a bit more tricky.
# We first create $JOBID.DATAFILE2 by stripping out all the commas from
# $JOBID.DATAFILE1. Next, we create a $JOBID.TESTFILE containing the first line
# of $JOBID.DATAFILE2. The number of characters in $JOBID.TESTFILE should be the
# number of columns in $JOBID.DATAFILE2
# Delete carriage return and linefeed characters that might be
# at the end of the line when writing $JOBID.TESTFILE
tr -d "," < $JOBID.DATAFILE1 > $JOBID.DATAFILE2
head -1 < $JOBID.DATAFILE2 | tr -d '\012' | tr -d '\015' > $JOBID.TESTFILE
COLNO=`wc -m  $JOBID.TESTFILE | sed -e 's/^[ \t]*//' | cut -f1 -d ' '`


# Create a Phylip-format input file by writing LINES and COLS onto
# the first line, and pasting the padded NAMES and DATA onto the 
# remaining lines.
echo $LINENUM  $COLNO > $OUTFILE
paste -d \0 $JOBID.NAMEFILE2 $JOBID.DATAFILE2 >> $OUTFILE

# Clean up
$RM_CMD -f $JOBID.*  
