/*
 * Cell.java
 *
 * Created on August 28, 2008, 2:44 PM
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */
package org.biolegato.gdesupport.data;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.biolegato.gdesupport.files.CharacterColourMaskReader;
import org.biolegato.main.BLMain;

/**
 * This class is used to contain all sequence related functions and test.
 *
 * <p>
 *  This class uses hashtables to pass and store parameters of the sequence.
 *  The hashtable has many intrinsic parameters; however accepts any number of
 *  additional parameters.  The current values used by all known BioLegato plugins
 *  and classes are:
 * </p>
 *
 * <table border="1">
 *  <tr><th>Key</th>            <th>Intrinsic</th>  <th>Default setting</th>    <th>Description</th></tr>
 *  <tr><td>accession</td>      <td>No</td>         <td>N/A></td>               <td>The accession of the sequence</td></tr>
 *  <tr><td>comments</td>       <td>No</td>         <td>N/A></td>               <td>Comments about the sequence</td></tr>
 *  <tr><td>creation-date</td>  <td>No</td>         <td>N/A></td>               <td>The date the sequence was first synthesized</td></tr>
 *  <tr><td>creator</td>        <td>No</td>         <td>N/A></td>               <td>The creator of the sequence</td></tr>
 *  <tr><td>direction</td>      <td>Yes</td>        <td>From 5' to 3'</td>      <td>The direction of the sequence (from 3' to 5' or from 5' to 3')</td></tr>
 *  <tr><td>description</td>    <td>No</td>         <td>N/A></td>               <td>A description of the sequence</td></tr>
 *  <tr><td>GI</td>             <td>No</td>         <td>N/A></td>               <td>The GI number of the sequence</td></tr>
 *  <tr><td>name</td>           <td>Yes</td>        <td>New sequence</td>       <td>The sequence's name</td></tr>
 *  <tr><td>sequence</td>       <td>Yes</td>        <td><i>blank</i></td>       <td>The raw sequence</td></tr>
 *  <tr><td>strandedness</td>   <td>Yes</td>        <td>Single</td>             <td>The strandedness of a sequence</td></tr>
 *  <tr><td>topology</td>       <td>Yes</td>        <td>Linear</td>             <td>The sequence topology</td></tr>
 *  <tr><td>type</td>           <td>Yes</td>        <td>DNA></td>               <td>The type of test the sequence is</td></tr>
 * </table>
 *
 * @author Graham Alvare
 * @author Brian Fristensky
 */
public final class Seq implements Cloneable, Serializable {

    /**
     * This enum is used for typing/storing sequence types.
     * This method of storage ensures proper type casting.
     * Feel free to add more types as necessary.
     */
    public static enum Type {

        /**
         * Sequence type DNA
         */
        DNA {

            /**
             * Prints a nicely formatted string represenation for "DNA type" enum object (Type.DNA).
             *
             * @return "DNA"
             */
            @Override
            public String toString() {
                return "DNA";
            }
        },
        /**
         * Sequence type RNA
         */
        RNA {

            /**
             * Prints a nicely formatted string represenation for "RNA type" enum object (Type.RNA).
             *
             * @return "RNA"
             */
            @Override
            public String toString() {
                return "RNA";
            }
        },
        /**
         * Sequence type protein
         */
        PROTEIN {

            /**
             * Prints a nicely formatted string represenation for the "protein type" enum object (Type.PROTEIN).
             *
             * @return "Protein"
             */
            @Override
            public String toString() {
                return "Protein";
            }
        },
        /**
         * Sequence type colour mask
         */
        MASK {

            /**
             * Prints a nicely formatted string represenation for the "colour mask type" enum object (Type.MASK).
             *
             * @return "Colour mask"
             */
            @Override
            public String toString() {
                return "Colour mask";
            }
        },
        /**
         * Sequence type is text
         * (represented as '"')
         */
        TEXT {

            /**
             * Prints a nicely formatted string represenation for the "text type" enum object (Type.TEXT).
             *
             * @return "Text"
             */
            @Override
            public String toString() {
                return "Text";
            }
        };
    }

    /**
     * Used for typing/storing sequence direction.
     * This is used for all sequences which may have direction.
     * This enum may be ignored if you are dealing with non-sequence
     * test types (such as text), or any type of test that either
     * doesn't have or doesn't need to distinguish direction.
     */
    public static enum Direction {

        /**
         * Direction of the sequence goes from 3' to 5'
         */
        FROM3TO5 {

            /**
             * Prints a nicely formatted string represenation for the "From 3' to 5'" enum object (Direction.FROM3TO5).
             *
             * @return "From 3' to 5'"
             */
            @Override
            public String toString() {
                return "From 3' to 5'";
            }
        },
        /**
         * Direction of the sequence goes from 5' to 3'
         */
        FROM5TO3 {

            /**
             * Prints a nicely formatted string represenation for  the "From 5' to 3'" enum object (Direction.FROM5TO3).
             *
             * @return "From 5' to 3'"
             */
            @Override
            public String toString() {
                return "From 5' to 3'";
            }
        };
    }

    /**
     * Used for typing/storing sequence topology.
     * This is used for all sequences which may have topology.
     * This enum may be ignored if you are dealing with non-sequence
     * test types (such as text), or any type of test that either
     * doesn't have or doesn't need to distinguish topology.
     */
    public static enum Topology {

        /**
         * Linear topology
         */
        LINEAR {

            /**
             * Prints a nicely formatted string represenation for the "linear topology" enum object (Topology.LINEAR).
             *
             * @return "Linear"
             */
            @Override
            public String toString() {
                return "Linear";
            }
        },
        /**
         * Circular topology
         */
        CIRCULAR {

            /**
             * Prints a nicely formatted string represenation for the "circular topology" enum object (Topology.CIRCULAR).
             *
             * @return "Circular"
             */
            @Override
            public String toString() {
                return "Circular";
            }
        };
    }

    /**
     * Used for typing/storing sequence strandedness.
     * This is used for all sequences which may have strandedness.
     * This enum may be ignored if you are dealing with non-sequence
     * test types (such as text), or any type of test that either
     * doesn't have or doesn't need to distinguish strandedness.
     */
    public static enum Strandedness {

        /**
         * Single stranded sequence
         */
        SINGLE {

            /**
             * Prints a nicely formatted string represenation for the "single stranded" enum object (Strandedness.SINGLE).
             *
             * @return "Single stranded"
             */
            @Override
            public String toString() {
                return "Single stranded";
            }
        },
        /**
         * Double stranded sequence
         * (represented as 'D')
         */
        DOUBLE {

            /**
             * Prints a nicely formatted string represenation for the "double stranded" enum object (Strandedness.DOUBLE).
             *
             * @return "Double stranded"
             */
            @Override
            public String toString() {
                return "Double stranded";
            }
        },
        /**
         * Mixed stranded sequence
         * (represented as 'M')
         */
        MIXED {

            /**
             * Prints a nicely formatted string represenation for the "mixed stranded" enum object (Strandedness.MIXED).
             *
             * @return "Mixed strandedness"
             */
            @Override
            public String toString() {
                return "Mixed strandedness";
            }
        };
    }

    /**
     * Used for typing characters within a sequence.
     * This is especially useful for intrinsic functions which rely upon detecting alignment gaps,
     * ambiguous characters and unambiguous characters (such as certain colour maps).
     */
    public static enum CharacterClass {

        /**
         * Alignment gap class.
         */
        ALIGNMENT,
        /**
         * Ambiguous character class.
         */
        AMBIGUOUS,
        /**
         * Unambiguous character class.
         */
        UNAMBIGUOUS;
    }
    // only allow package access -- all
    //
    // because the access is restricted to only the package level, there should
    // not be any major issues with regard to code bugs
    //
    // http://java.dzone.com/articles/getter-setter-use-or-not-use-0
    // http://www.artima.com/intv/sway2.html
    transient int groupID = -1;
    Type type = Type.DNA;
    String name;
    StringBuffer sequence;
    Direction direction = Direction.FROM5TO3;
    Strandedness strandedness = Strandedness.SINGLE;
    Topology topology = Topology.LINEAR;
    transient ColourMask mask = Seq.DEFAULT_MASK;
    // In addition to the above reasoning:
    // -----------------------------------
    // because these variables do not have to be validated (they can be any value even null)
    // and they are only changed within the package there is no reason for accessor methods
    // accessor methods for the below would only cause a slow-down (i.e. additional overhead)
    //
    // also the variables are given explicitly package level access only, so as to minimize
    // the amount of code that can change them.
    transient boolean protect_align = false;
    transient boolean protect_ambig = true;
    transient boolean protect_unambig = true;
    transient String description = null;
    StringBuilder original = null;

/////////////////
//*************//
//* CONSTANTS *//
//*************//
/////////////////
    /**
     * The list of all colour masks loaded in the system.
     */
    static final List<ColourMask> colourMasks = new ArrayList<ColourMask>() {
        {
            try {
                if (System.getenv("BL_MASKS") != null && !"".equals(System.getenv("BL_MASKS"))) {
                    loadMasks(new File(BLMain.envreplace(System.getenv("BL_MASKS"))));
                } else if (BLMain.getProperty("seqcolourdir") != null && !"".equals(BLMain.getProperty("seqcolourdir"))) {
                    loadMasks(new File(BLMain.envreplace(BLMain.getProperty("seqcolourdir"))));
                }
            } catch (Exception th) {
                th.printStackTrace(System.err);
            }
        }

        /**
         * Loads all of the colour masks contained in a file or directory.
         **
         * @param location the file object to read colour masks from.
         */
        private void loadMasks(File location) {
            if (location.exists() && location.canRead()) {
                if (location.isDirectory()) {
                    for (File file : location.listFiles()) {
                        loadMasks(file);
                    }
                } else if (location.isFile() && location.getAbsolutePath().toLowerCase().endsWith(".csv")
                        && location.canRead() && location.exists()) {
                    try {
                        add(CharacterColourMaskReader.readColourMaskFile(location));
                    } catch (IOException ioe) {
                        ioe.printStackTrace(System.err);
                    }
                }
            }
        }
    };
    /**
     * The default colour mask for BioLegato
     */
    public static final ColourMask DEFAULT_MASK = getDefaultMask();
    /**
     * Used for serialization purposes.
     */
    private static final long serialVersionUID = 7526472295622777024L;

///////////////
//***********//
//* METHODS *//
//***********//
///////////////
    /**
     * Constructs new instances of Cell objects.
     */
    public Seq() {
        this(Type.DNA, "", new StringBuffer());
    }

    /**
     * Constructs new instances of sequence.
     **
     * @param type the type of data to store
     * @param name the name of the data sequence
     * @param sequence the text of the sequence
     */
    public Seq(Type type, String name, StringBuffer sequence) {
        this.name = name;
        this.type = type;
        this.sequence = sequence;

        if (this.sequence != null && sequence.length() == 0) {
            protect_align = false;
            protect_ambig = false;
            protect_unambig = false;
        }
    }

    /**
     * Constructs new instances of sequence.
     **
     * @param type the type of data to store
     * @param name the name of the data sequence
     * @param sequence the text of the sequence
     * @param direction the direction of the sequence
     * @param topology the topology of the sequence
     * @param strandedness the strandedness of the sequence
     */
    public Seq(Type type, String name, StringBuffer sequence, Direction direction,
            Topology topology, Strandedness strandedness) {
        this(type, name, sequence);

        this.topology = topology;
        this.direction = direction;
        this.strandedness = strandedness;
    }

    /**
     * Constructs new instances of sequence.
     **
     * @param type the type of data to store
     * @param name the name of the data sequence
     * @param sequence the text of the sequence
     * @param direction the direction of the sequence
     * @param topology the topology of the sequence
     * @param strandedness the strandedness of the sequence
     * @param original the original genbank file entry of the sequence
     */
    public Seq(Type type, String name, StringBuffer sequence, Direction direction, Topology topology,
            Strandedness strandedness, StringBuilder original) {
        this(type, name, sequence, direction, topology, strandedness);

        this.original = original;
    }

    /**
     * Constructs new instances of sequence.
     **
     * @param type the type of data to store
     * @param name the name of the data sequence
     * @param sequence the text of the sequence
     * @param direction the direction of the sequence
     * @param topology the topology of the sequence
     * @param strandedness the strandedness of the sequence
     * @param groupID the group ID for the sequence
     * @param description the description of the sequence
     */
    public Seq(Type type, String name, StringBuffer sequence, Direction direction,
            Topology topology, Strandedness strandedness, int groupID, String description) {
        this(type, name, sequence, direction, topology, strandedness);

        this.groupID = groupID;
        this.description = description;
    }

    /**
     * Constructs new instances of sequence.
     **
     * @param data the sequence to copy
     */
    public Seq(Seq data) {
        this.name = data.name;
        this.type = data.type;
        this.topology = data.topology;
        this.direction = data.direction;
        this.strandedness = data.strandedness;
        this.protect_align = data.protect_align;
        this.protect_ambig = data.protect_ambig;
        this.protect_unambig = data.protect_unambig;
        this.sequence = data.sequence;
        this.original = data.original;
    }

//////////////////////////
//**********************//
//* SEQUENCE FUNCTIONS *//
//**********************//
//////////////////////////

    public final void modified() {
        this.original = null;
    }
    
    public final Type getType() {
        return type;
    }

    public final String getName() {
        return name;
    }

    public final StringBuffer getSequence() {
        return sequence;
    }

    public final Direction getDirection() {
        return direction;
    }

    public final Topology getTopology() {
        return topology;
    }

    public final Strandedness getStrandedness() {
        return strandedness;
    }

    public final boolean getProtectAlignment() {
        return protect_align;
    }

    public final boolean getProtectAmbiguous() {
        return protect_ambig;
    }

    public final boolean getProtectUnambiguous() {
        return protect_unambig;
    }

    public final ColourMask getMask() {
        return mask;
    }

    public final int getGroupID() {
        return groupID;
    }

    public final StringBuilder getOriginal() {
        return original;
    }

    public final String getDescription() {
        return description;
    }

    /**
     * Creates a string representation of the Cell and its fields
     **
     * @return the corresponding sequence
     */
    @Override
    public String toString() {
        // create a new string buffer to append the sequence test to.
        StringBuilder result = new StringBuilder("--- SEQUENCE DATA ---");

        // itterate through each field in the sequence and print it
        // NOTE: append is faster than + or concat operators
        result.append("    type = ").append(type).append(
                "\n    name = ").append(name).append(
                "\n    group = ").append(groupID).append("\n").append(
                "\n    topology = ").append(topology).append(
                "\n    direction = ").append(direction).append(
                "\n    strandedness = ").append(strandedness).append(
                "\n    protect_align = ").append(protect_align).append(
                "\n    protect_ambig = ").append(protect_ambig).append(
                "\n    protect_unambig = ").append(protect_unambig).append(
                "\n    sequence = ").append(sequence).append("\n").append(
                "\n    mask = ").append(mask).append("\n");
        return result.toString();
    }

    /**
     * Clones the current sequence object.
     **
     * @return a cloned copy of the sequence.
     */
    @Override
    public Object clone() {
        // since Cell implements Map as a child of Hashtable, this code works
        return new Seq(this);
    }

    /**
     * This function detects the type of a sequence
     **
     * @param data the sequence to detect the type for.
     * @return the sequence type
     */
    public static Type detectType(StringBuffer data) {
        Seq.Type result = Seq.Type.DNA;

        char test;
        char[] array = new char[data.length()];
        data.getChars(0, data.length(), array, 0);

        for (int count = 0; count < array.length && result == Seq.Type.DNA; count++) {
            test = Character.toUpperCase(array[count]);
            if (test == 'U') {
                result = Seq.Type.RNA;
            } else if (test == 'F' || test == 'E' || test == 'J' || test == 'L'
                    || test == 'O' || test == 'Q' || test == 'X' || test == 'Z') {
                result = Seq.Type.PROTEIN;
            }
        }
        return result;
    }

    /**
     * Load the default colour masks (this was the only way I was able to get this code to work properly).
     */
    public static ColourMask getDefaultMask() {
        ColourMask result = null;

        if (colourMasks != null) {
            System.out.println("--- reading masks ---");
            for (ColourMask c : colourMasks) {
                if ("default".equalsIgnoreCase(c.toString())) {
                    result = c;
                    break;
                }
            }
        }

        return result;
    }
}
