package blrevcomp;
import java.util.*;

/**Biological sequence
*/
public class BioSeq {
  /** Maximum number of sequence elements  */
  public int MAXSEQ = 1000000; 
  /** Sequence name */
  public String Name = new String("");
  /** Flag character that indicates beginning of a sequence */
  public char SFlag = '>';
  /** Sequence */
  public char[] seq = new char[MAXSEQ];
  /** Length of sequence */
  public int LEN = 0;
  enum NAType {DNA,RNA};
  /** NATYPE tells whether sequence is DNA or RNA */
  NAType Stype;
  Nuchar Nuc;



/**
 Nuchar.INuc.get() returns the character to be written if the sequence is being
 printed from the input strand.<br>
 Nuchar.CNuc.get() returns the character to be written if the sequence is being
 printed from the complementary strand.<br>
 The case of nucleotides in the input sequence is preserved in the output sequence.
 <p>Examples:<br>
 <blockquote>
 Nuchar.Inuc.get('A') returns 'A'<br>
 Nuchar.Inuc.get('a') returns 'a'<br>
 Nuchar.Cnuc.get('Y') returns 'R'<br>
 Nuchar.Cnuc.get('d') returns 'h'<br>
 Nuchar.Cnuc.get('N') returns 'N'<br> 
 </blockquote>
 * 
 * 
 All standard IUPAC ambiguity characters are supported, as defined in
 <ul>
 Cornish-Bowden (1985) <u>Nucl. Acids Res.</u> 13:3021-3030. 
 </ul> 
 If gap characters ('-') are present in the file, they are retained in the output.
 */
public class Nuchar {
   /** Input strand */ 
   public HashMap<Character,Character> INuc = new HashMap<Character,Character>();
  /** Complementary strand */
   public HashMap<Character,Character> CNuc = new HashMap<Character,Character>(); 
    
    /** Creates a new instance of Nucleotide */
    void Nuchar(NAType Stype) {

        // Initialize nucleotides for the input strand

         INuc.put('A', 'A');  INuc.put('a', 'a');
         INuc.put('G', 'G');  INuc.put('g', 'g');
         INuc.put('C', 'C');  INuc.put('c', 'c');
         INuc.put('T', 'T');  INuc.put('t', 't');
         INuc.put('U', 'U');  INuc.put('u', 'u');
         INuc.put('R', 'R');  INuc.put('r', 'r');
         INuc.put('D', 'D');  INuc.put('d', 'd');
         INuc.put('V', 'V');  INuc.put('v', 'v');
         INuc.put('M', 'M');  INuc.put('m', 'm');
         INuc.put('K', 'K');  INuc.put('k', 'k');
         INuc.put('B', 'B');  INuc.put('b', 'b');
         INuc.put('H', 'H');  INuc.put('h', 'h');
         INuc.put('Y', 'Y');  INuc.put('y', 'y');
         INuc.put('W', 'W');  INuc.put('w', 'w');
         INuc.put('S', 'S');  INuc.put('s', 's');
         INuc.put('N', 'N');  INuc.put('n', 'n');
         INuc.put('-', '-');  INuc.put('-', '-');
         
         
       // Initialize nucleotides for the complementary strand
          if (Stype.equals(NAType.DNA)){
            CNuc.put('A', 'T');  CNuc.put('a', 't');             
            }
          else {
            CNuc.put('A', 'U');  CNuc.put('a', 'u');             
          }
         CNuc.put('G', 'C');  CNuc.put('g', 'c');
         CNuc.put('C', 'G');  CNuc.put('c', 'g');
         CNuc.put('T', 'A');  CNuc.put('t', 'a');
         CNuc.put('U', 'A');  CNuc.put('u', 'a');
         CNuc.put('R', 'Y');  CNuc.put('r', 'y');
         CNuc.put('D', 'H');  CNuc.put('d', 'h');
         CNuc.put('V', 'B');  CNuc.put('v', 'b');
         CNuc.put('M', 'K');  CNuc.put('m', 'k');
         CNuc.put('K', 'M');  CNuc.put('k', 'm');
         CNuc.put('B', 'V');  CNuc.put('b', 'v');
         CNuc.put('H', 'D');  CNuc.put('h', 'd');
         CNuc.put('Y', 'R');  CNuc.put('y', 'r');
         CNuc.put('W', 'S');  CNuc.put('w', 's');
         CNuc.put('S', 'w');  CNuc.put('s', 'w');         
         CNuc.put('N', 'N');  CNuc.put('n', 'n');
         CNuc.put('-', '-');  CNuc.put('-', '-');
    }
}

  /**
   * Fasta sequences begin with '>'. GDE flatfile DNA/RNA sequences begin with '#'
   * newSeqFound returns true if the currentLine begins with either character.
   */   
  boolean newSeqFound(BufferedTextInputFile In) {
        if  (!In.EOF && (In.currentLine.startsWith(">") || In.currentLine.startsWith("#")))  {
            return true;
        }  
        else {
            return false;
        }
     } 

    /** Read a sequence in Pearson format:
    <pre>
    >name
    sequence
    sequence
    ...
    </pre>
    @param In Input file
      */  
  
  public boolean readFasta(BufferedTextInputFile In) {

     // Reinitialize each time 
     LEN = 0;
     Name = "";
     boolean OKAY = false;
     
    // Advance to next '>' character
    // In.nextLine();
    while (! newSeqFound(In) && !In.EOF) {In.nextLine();}; 

     // Read name and SFlag   
    if (newSeqFound(In)) {
        OKAY = true;
        SFlag = In.currentLine.charAt(0);
	int RIGHT = In.currentLine.indexOf(' ');
	if (RIGHT == -1) {RIGHT = In.currentLine.length();};
	Name = In.currentLine.substring(1,RIGHT); //delete leading '>' char
        //System.out.println(Name);
	In.nextLine();
	while (!In.EOF && ! newSeqFound(In)) {
	      // Read sequence
	      char CH;
     	      for (int i=0; i < In.currentLine.length(); i++) 
		    {CH = In.currentLine.charAt(i);
		     if (CH != ' ')  {seq[LEN] = CH; LEN++;} // if
		    } // for
	      In.nextLine();
	      }; // while     
       }; // if
       Stype = getNAType();
       Nuc = new Nuchar();
       Nuc.Nuchar(Stype);
       return OKAY;
    } // readFasta

  /** Write raw sequence on a single line.
  @param Out BufferedTextOutputFile
   */
  void writeSingleLine(BufferedTextOutputFile Out) {
    
	  for (int j= 0; j < LEN; j++) {
	      Out.writeChar(seq[j]);
//System.out.print(seq[j]);
	      } // for
    } // writeSingleLine


  /** Write sequence in raw format.
   @param Out BufferedTextOutputFile
   @param STRAND 'i': input strand 'c': complimentary strand
   @param SENSE 1 : original direction, -1 : reverse direction
   @param N - Nuchar.INuc for input strand; Nuchar.CNuc for complimentary strand
   @param WIDTH number of characters in output line 
   */
  
  public void writeRAW(BufferedTextOutputFile Out, char STRAND, int SENSE, HashMap N, int WIDTH) {
    int POS,I;
    int THISLINE, NUCSPRINTED;
    
    
    NUCSPRINTED = 0;
    
    /** If SENSE = 1, we write the sequence from 0 to LEN-1.
     *  If SENSE = -1, we write the sequence from LEN-1 to 0
     *  POS is the position in the sequence at any given time.
     *  POS is incremented by POS = POS + SENSE. Thus, if SENSE
     *  is negative, POS decreases ie. going from the end to the
     *  beginning of the sequence.
     *  
     */
    if (SENSE == 1) {
        POS = 0;
    }
    else {
        POS = LEN -1;
    }
    
    while (NUCSPRINTED < LEN) {
          THISLINE = WIDTH;
          if (THISLINE + NUCSPRINTED > LEN) {
              THISLINE = LEN - NUCSPRINTED;
          } 
          // print the next THISLINE nucleotides
          I = 1;
          while (I <= THISLINE) {
              Out.writeChar((Character) N.get(seq[POS]));
              POS = POS + SENSE;
              I++;
          }
          Out.writeln("");
          NUCSPRINTED = NUCSPRINTED + THISLINE;
      }
    } // writeRAW

     

   /** Write sequence in Pearson/FASTA format
    @param Out - Output fie
    @param SUFFIX - Add SUFFIX to name of sequence:
    <ul>
    <li>inverse complement - .opp</li>
    <li>complement - .comp</li>
    <li>reverse strand - .flip</li>
    </ul>
    @param STRAND - 'i' - input; 'c' - complement
    @param SENSE  1 - forward direction; -1 reverse direction
    @param WIDTH - number of nucleotides to print per line
    */
  public void writeFasta(BufferedTextOutputFile Out, String SUFFIX, char STRAND, int SENSE, int WIDTH) {
         // Write name
         //System.out.println(SFlag + Name + SUFFIX);
         Out.writeln(SFlag + Name + SUFFIX);
         // Write sequence
         if (STRAND == 'i') {
             writeRAW(Out,STRAND,SENSE,Nuc.INuc,WIDTH);
        }
         else {
             writeRAW(Out,STRAND,SENSE,Nuc.CNuc,WIDTH);             
         }
  }
  
  NAType getNAType() {
      boolean Found = false;
      int I = 0;
      while (!Found && I < LEN) {
          if (seq[I] =='U' || seq[I] == 'u') {
             Found = true;    
          }
          I++;
      }
      if (Found) {
         return NAType.RNA;            
      }
      else {
         return NAType.DNA; 
      }
  } 
} // BioSeq
