/*
 * GenBankFile.java
 *
 * Created on January 30, 2008, 11:58 AM
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */

import org.biolegato.core.data.sequence.Sequence;
import org.biolegato.core.data.sequence.Sequence.Type;
import org.biolegato.core.data.sequence.Sequence.Topology;
import org.biolegato.core.data.sequence.Sequence.Strandedness;
import org.biolegato.core.plugintypes.DataFormat;
import java.io.File;
import java.io.Reader;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Hashtable;
import java.util.LinkedList;
import java.util.StringTokenizer;

/**
 *
 * @author alvare
 */
public class GenBankFile2008 extends DataFormat {

    /** Creates a new instance of GenBankFile */
    public GenBankFile2008 () {
    }

    /**
     * Translates a string from the BioLegato internal format to the given file format
     * @param seq  the sequence to convert
     * @return  the resulting string
     */
    public String translateTo (Sequence seq) {
        StringBuffer result = new StringBuffer();
        String sequenceLength;
        String sequence = "";
        java.util.Map sequenceData = null;

        // translate the data
        // NOTE: append is faster than + or concat operators
        if (seq != null) {
	    sequence = seq.getField("sequence").toString();
            if ( seq.getField("modified").equals(Boolean.TRUE) || seq.getField("originalgb").equals("")) {
		result.append("LOCUS       ").append((seq.getField("name").toString() +
						      "                ").substring(
			0, 16)).append(" ");

		sequenceLength = "           " + sequence.length();
		result.append(sequenceLength.substring(Math.max(0,
								sequenceLength.length() -
								11)));

		if ( ! seq.getField("type").equals(Type.PROTEIN)) {
		    result.append(" bp ").append(getGBStrandedness((Strandedness) seq.getField(
			    "strandedness"))).append("-").append(((Type) seq.getField(
			    "type") == Type.DNA ? "DNA     " : "RNA     "));
		} else {
		    result.append(" aa            ");
		}
		result.append((seq.getField("topology") != Topology.CIRCULAR ? "linear   "
			       : "circular "));
		if (seq.containsKey("classification")) {
		    result.append(seq.getField("classification")).append(" ");
		} else {
		    result.append("CON "); // division code + space
		}
		if (seq.containsKey("creation-date")) {
		    result.append(seq.getField("creation-date"));
		} else {
		    result.append((new SimpleDateFormat("dd-MMM-yyyy")).format(
			    new Date()).toUpperCase());
		}
		result.append("\n");
		if ( ! seq.getField("accession").equals("")) {
		    result.append("ACCESSION   ").append(
			    seq.getField("accession").toString()).append("\n");
		}
		if ( ! seq.getField("description").equals("")) {
		    result.append("DESCRIPTION ").append(
			    seq.getField("description").toString()).append("\n");
		}
		if ( ! seq.getField("GI").equals("")) {
		    result.append("VERSION     ").append(( ! seq.getField(
			    "accession").equals("") ? seq.getField("accession").toString() +
						      ".1" : "A00001.1") + " GI:" +
							 seq.getField("GI").toString() +
							 "\n");
		}
	    } else {
                result.append(seq.getField("originalgb"));
            }
	    result.append("ORIGIN");
	    for (int count = 0; count < sequence.length(); count += 60) {
		result.append("\n").append(GBRightJustify("" + (count + 1)));
		for (int spaceCount = count; spaceCount < count + 60 &&
					     spaceCount < sequence.length();
		     spaceCount += 10) {
		    result.append(" ").append(sequence.substring(spaceCount,
								 Math.min(
			    sequence.length(), spaceCount + 10)));
		}
	    }
	    result.append("\n//\n");
        }
        return result.toString();
    }

    /**
     * Translates a string from the given file format into the BioLegato internal format
     * @param data  the string to convert
     * @return  the resulting string
     */
    public Sequence[] translateFrom (java.io.BufferedReader data) throws
            java.io.IOException {
        String name = "";
        String value = "";
        StringBuffer valueBuffer = new StringBuffer();
        String line = "";
        String temp = "";
        StringBuffer original = new StringBuffer();
        int index = 0;
        int tabIndex = 0;
        int spaceIndex = 0;
        LinkedList<Sequence> result = new LinkedList<Sequence>();
        StringTokenizer tokenz = null;
        Hashtable<String, Object> sequence = new Hashtable<String, Object>();

        while ((line = data.readLine()) != null) {
            // get the positions of both the first space and first tab characters.
            spaceIndex = line.indexOf(' ');
            tabIndex = line.indexOf('\t');

            // determine the substring index
            if (tabIndex <= 0 || tabIndex <= 0) {
                index = Math.max(tabIndex, spaceIndex);
            } else {
                index = Math.min(tabIndex, spaceIndex);
            }

            if (index == 0 && name != null &&  ! name.equals("")) {
                // NOTE: append is faster than + or concat operators
                valueBuffer.append(line.trim());
            } else if ( ! line.equals("") && index != 0) {

                if ( ! name.equals("")) {
                    value = valueBuffer.toString();
                    if (name.equals("sequence")) {
                        value = value.replaceAll("[\\d\\s]", "");
                    }
                    sequence.put(name, value);
                    name = "";
                }

                if (line.trim().equals("//")) {
                    sequence.put("originalgb", original);
                    result.add(new Sequence(sequence));
                    sequence.clear();
                    original = new StringBuffer();
                } else {
                    valueBuffer = new StringBuffer();
                    if (index > 0) {
                        // NOTE: append is faster than + or concat operators
                        name = line.substring(0, index).trim().toLowerCase();
                        valueBuffer.append(line.substring(index + 1).trim());
                        value = line.substring(index + 1).trim();
                    } else {
                        name = line.trim().toLowerCase();
                    }
                    if (name.equals("locus")) {
			if (!sequence.isEmpty()) {
			    sequence.put("originalgb", original);
			    result.add(new Sequence(sequence));
			    sequence.clear();
			    original = new StringBuffer();
			    org.biolegato.core.main.BLMain.warning("Genbank entry missing // terminator", "GenBankFile2008.class");
			}
                        tokenz = new StringTokenizer(value);
                        if (tokenz.hasMoreTokens()) {
                            sequence.put("name", tokenz.nextToken());
                        }
                        if (tokenz.hasMoreTokens()) {
                            tokenz.nextToken();    // this is the length (ignorable)
                        }
                        if (tokenz.hasMoreTokens()) {
                            temp = tokenz.nextToken();
                            if (temp.equals("aa")) {    // this should be bp or aa
                                sequence.put("type", Type.PROTEIN);
                                tokenz.nextToken();
                            } else if (temp.equals("bp") && tokenz.hasMoreTokens()) {
                                temp = tokenz.nextToken();
                                if (temp.contains("-")) {
                                    sequence.put("strandedness",
                                                 toStrandedness(
                                            temp.substring(0, temp.indexOf("-"))));
                                    sequence.put("type", toType(temp.substring(
                                            temp.indexOf("-") + 1)));
                                } else {
                                    sequence.put("type", toType(temp));
                                }
                            }
                        }

                        ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
                        //***********************************************************************************************************//
                        //* NOTE: any tokens following the type SEEM to be optional, do not rely on them being in the files read in *//
                        //***********************************************************************************************************//
                        ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
                        if (tokenz.hasMoreTokens() &&
                            tokenz.nextToken().toLowerCase().equals("circular")) {
                            sequence.put("topology", Sequence.Topology.CIRCULAR);
                        } else {
                            sequence.put("topology", Sequence.Topology.LINEAR);
                        }

                        if (tokenz.hasMoreTokens()) {
                            sequence.put("classification", tokenz.nextToken());
                        }

                        if (tokenz.hasMoreTokens()) {
                            sequence.put("creation-date", tokenz.nextToken());
                        }
                        name = "";
                    } else if (name.equals("version") && value.contains("GI:")) {
                        sequence.put("GI",
                                     value.substring(value.indexOf("GI:") + 3).trim());
                    } else if (name.equals("origin")) {
                        name = "sequence";
                        valueBuffer = new StringBuffer();
                    }
                }
            }
	    if (!name.equals("sequence")) {
		original.append(line).append("\n");
	    }
        }
	
	if (!sequence.isEmpty()) {
	    sequence.put("originalgb", original);
	    result.add(new Sequence(sequence));
	    sequence.clear();
	    org.biolegato.core.main.BLMain.warning("Genbank entry missing // terminator", "GenBankFile2008.class");
	}
        return result.toArray(new Sequence[0]);
    }

    /**
     * Determines whether or not the file should be accepted by the filter
     * @param	file	the file to test
     * @return	whether or not the file is accepted
     * @see javax.swing.filechooser.FileFilter#accept
     */
    public boolean accept (File file) {
        return (file.isDirectory() ||
                file.getAbsolutePath().toLowerCase().endsWith(".gen") ||
                file.getAbsolutePath().toLowerCase().endsWith(".gp") ||
                file.getAbsolutePath().toLowerCase().endsWith(".gb"));
    }

    /**
     * Obtains the name of the file format for File Chooser purposes
     * @return  the name of the file format
     */
    @Override
    public String getName () {
        return "genbank";
    }

    /**
     * The file description to display inside the JFileChooser
     * @return	the string description of the DataFormat
     * @see javax.swing.filechooser.FileFilter#getDescription
     */
    public String getDescription () {
        return "GenBank file (*.gb,*.gp,*.gen)";
    }

    /**
     * Converts Bio Legato's internal strandedness to GenBank
     * @param	value	the value to convert
     * @return	the GenBank representation
     */
    private static String getGBStrandedness (Strandedness value) {
        String result = "ss";

        switch (value) {
            case DOUBLE:
                result = "ds";
                break;
            case MIXED:
                result = "ms";
                break;
        }
        return result;
    }

    /**
     * Used to convert GB's strandedness to BioLegato's strandedness structure
     *
     * @param string the string to convert.
     * @return the strandedness corresponding to the string parameter
     */
    private static Strandedness toStrandedness (String string) {
        Strandedness result = Strandedness.MIXED;
        if (string.equalsIgnoreCase("ss")) {
            result = Strandedness.SINGLE;
        } else if (string.equalsIgnoreCase("ds")) {
            result = Strandedness.DOUBLE;
        }
        return result;
    }

    /**
     * Used to convert GB's sequence type to BioLegato's type structure
     *
     * @param string the string to convert.
     * @return the type corresponding to the string parameter
     */
    private static Type toType (String string) {
        Type result = Type.DNA;
        if (string.toLowerCase().contains("rna")) {
            result = Type.RNA;
        }
        return result;
    }

    /**
     * Used to right justify the numbers for GenBank Sequences
     *
     * @param string the string to right justify.
     * @return the right justified string.
     */
    private String GBRightJustify (String string) {
        return (9 - string.length() > 0 ? ("            ").substring(0,
                                                                     9 -
                                                                     string.length())
                : "") + string;
    }

    /**
     * Used to auto-detect Bio Legato formats
     *
     * @param test the string to test
     * @return whether or not the format is correct
     */
    @Override
    public boolean isFormat (Reader test) {
        int check = ' ';
        boolean result = false;
        try {
            while (check == ' ' || check == '\t' || check == '\n' || check ==
                                                                     '\r') {
                test.mark(10);
                check = test.read();
            }
            if (check == 'L' || check == 'l') {
                check = test.read();
                if (check == 'O' || check == 'o') {
                    check = test.read();
                    if (check == 'C' || check == 'c') {
                        check = test.read();
                        if (check == 'U' || check == 'u') {
                            check = test.read();
                            if (check == 'S' || check == 's') {
                                result = true;
                            }
                        }
                    }
                }
            }
            test.reset();
        } catch (Throwable e) {
            e.printStackTrace();
        }
        return result;
    }

}
