/*
 * Cell.java
 *
 * Created on August 28, 2008, 2:44 PM
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */
package org.biolegato.gdesupport.canvas.data;

import java.io.Serializable;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

/**
 * This class is used to contain all sequence related functions and data.
 *
 * <p>
 *  This class uses hashtables to pass and store parameters of the sequence.
 *  The hashtable has many intrinsic parameters; however accepts any number of
 *  additional parameters.  The current values used by all known BioLegato plugins
 *  and classes are:
 * </p>
 *
 * <table border="1">
 *  <tr><th>Key</th>            <th>Intrinsic</th>  <th>Default setting</th>    <th>Description</th></tr>
 *  <tr><td>accession</td>      <td>No</td>         <td>N/A></td>               <td>The accession of the sequence</td></tr>
 *  <tr><td>comments</td>       <td>No</td>         <td>N/A></td>               <td>Comments about the sequence</td></tr>
 *  <tr><td>creation-date</td>  <td>No</td>         <td>N/A></td>               <td>The date the sequence was first synthesized</td></tr>
 *  <tr><td>creator</td>        <td>No</td>         <td>N/A></td>               <td>The creator of the sequence</td></tr>
 *  <tr><td>direction</td>      <td>Yes</td>        <td>From 5' to 3'</td>      <td>The direction of the sequence (from 3' to 5' or from 5' to 3')</td></tr>
 *  <tr><td>description</td>    <td>No</td>         <td>N/A></td>               <td>A description of the sequence</td></tr>
 *  <tr><td>GI</td>             <td>No</td>         <td>N/A></td>               <td>The GI number of the sequence</td></tr>
 *  <tr><td>modified</td>       <td>Yes</td>        <td>N/A></td>               <td>Used to indicate the sequence was modified</td></tr>
 *  <tr><td>name</td>           <td>Yes</td>        <td>New sequence</td>       <td>The sequence's name</td></tr>
 *  <tr><td>sequence</td>       <td>Yes</td>        <td><i>blank</i></td>       <td>The raw sequence</td></tr>
 *  <tr><td>strandedness</td>   <td>Yes</td>        <td>Single</td>             <td>The strandedness of a sequence</td></tr>
 *  <tr><td>topology</td>       <td>Yes</td>        <td>Linear</td>             <td>The sequence topology</td></tr>
 *  <tr><td>type</td>           <td>Yes</td>        <td>DNA></td>               <td>The type of data the sequence is</td></tr>
 * </table>
 *
 * @author Graham Alvare
 * @author Brian Fristensky
 */
public class Cell extends HashMap<String, Object> implements Cloneable, Serializable {

    /**
     * This enum is used for typing/storing sequence types.
     * This method of storage ensures proper type casting.
     * Feel free to add more types as necessary.
     */
    public static enum Type {

        /**
         * Sequence type DNA
         */
        DNA {
            /**
             * Prints a nicely formatted string represenation for "DNA type" enum object (Type.DNA).
             *
             * @return "DNA"
             */
            @Override
            public String toString() {
                return "DNA";
            }
        },
        /**
         * Sequence type RNA
         */
        RNA {
            /**
             * Prints a nicely formatted string represenation for "RNA type" enum object (Type.RNA).
             *
             * @return "RNA"
             */
            @Override
            public String toString() {
                return "RNA";
            }
        },
        /**
         * Sequence type protein
         */
        PROTEIN {
            /**
             * Prints a nicely formatted string represenation for the "protein type" enum object (Type.PROTEIN).
             *
             * @return "Protein"
             */
            @Override
            public String toString() {
                return "Protein";
            }
        },
        /**
         * Sequence type colour mask
         */
        MASK {
            /**
             * Prints a nicely formatted string represenation for the "colour mask type" enum object (Type.MASK).
             *
             * @return "Colour mask"
             */
            @Override
            public String toString() {
                return "Colour mask";
            }
        },
        /**
         * Sequence type is text
         * (represented as '"')
         */
        TEXT {
            /**
             * Prints a nicely formatted string represenation for the "text type" enum object (Type.TEXT).
             *
             * @return "Text"
             */
            @Override
            public String toString() {
                return "Text";
            }
        };
    }

    /**
     * Used for typing/storing sequence direction.
     * This is used for all sequences which may have direction.
     * This enum may be ignored if you are dealing with non-sequence
     * data types (such as text), or any type of data that either
     * doesn't have or doesn't need to distinguish direction.
     */
    public static enum Direction {

        /**
         * Direction of the sequence goes from 3' to 5'
         */
        FROM3TO5 {
            /**
             * Prints a nicely formatted string represenation for the "From 3' to 5'" enum object (Direction.FROM3TO5).
             *
             * @return "From 3' to 5'"
             */
            @Override
            public String toString() {
                return "From 3' to 5'";
            }
        },
        /**
         * Direction of the sequence goes from 5' to 3'
         */
        FROM5TO3 {
            /**
             * Prints a nicely formatted string represenation for  the "From 5' to 3'" enum object (Direction.FROM5TO3).
             *
             * @return "From 5' to 3'"
             */
            @Override
            public String toString() {
                return "From 5' to 3'";
            }
        };

    }

    /**
     * Used for typing/storing sequence topology.
     * This is used for all sequences which may have topology.
     * This enum may be ignored if you are dealing with non-sequence
     * data types (such as text), or any type of data that either
     * doesn't have or doesn't need to distinguish topology.
     */
    public static enum Topology {

        /**
         * Linear topology
         */
        LINEAR {
            /**
             * Prints a nicely formatted string represenation for the "linear topology" enum object (Topology.LINEAR).
             *
             * @return "Linear"
             */
            @Override
            public String toString() {
                return "Linear";
            }
        },
        /**
         * Circular topology
         */
        CIRCULAR {
            /**
             * Prints a nicely formatted string represenation for the "circular topology" enum object (Topology.CIRCULAR).
             *
             * @return "Circular"
             */
            @Override
            public String toString() {
                return "Circular";
            }
        };

    }

    /**
     * Used for typing/storing sequence strandedness.
     * This is used for all sequences which may have strandedness.
     * This enum may be ignored if you are dealing with non-sequence
     * data types (such as text), or any type of data that either
     * doesn't have or doesn't need to distinguish strandedness.
     */
    public static enum Strandedness {

        /**
         * Single stranded sequence
         */
        SINGLE {
            /**
             * Prints a nicely formatted string represenation for the "single stranded" enum object (Strandedness.SINGLE).
             *
             * @return "Single stranded"
             */
            @Override
            public String toString() {
                return "Single stranded";
            }
        },
        /**
         * Double stranded sequence
         * (represented as 'D')
         */
        DOUBLE {
            /**
             * Prints a nicely formatted string represenation for the "double stranded" enum object (Strandedness.DOUBLE).
             *
             * @return "Double stranded"
             */
            @Override
            public String toString() {
                return "Double stranded";
            }
        },
        /**
         * Mixed stranded sequence
         * (represented as 'M')
         */
        MIXED {
            /**
             * Prints a nicely formatted string represenation for the "mixed stranded" enum object (Strandedness.MIXED).
             *
             * @return "Mixed strandedness"
             */
            @Override
            public String toString() {
                return "Mixed strandedness";
            }
        };

    }

    /**
     * Used for typing characters within a sequence.
     * This is especially useful for intrinsic functions which rely upon detecting alignment gaps,
     * ambiguous characters and unambiguous characters (such as certain colour maps).
     */
    public static enum CharacterClass {

        /**
         * Alignment gap class.
         */
        ALIGNMENT,
        /**
         * Ambiguous character class.
         */
        AMBIGUOUS,
        /**
         * Unambiguous character class.
         */
        UNAMBIGUOUS;

    }

    /**
     * This linked list stores all sequence listeners
     */
    private final List<CellListener> listeners = new LinkedList<CellListener>();
    /**
     * Stores the default values for each intrinsic Cell data key.
     */
    private final static HashMap<String, Object> defaults = new HashMap<String, Object>() {
        /*
         * Used for serializable purposes.
         */

        private static final long serialVersionUID = 7526472295622777006L;
        {
            super.put("modified", Boolean.FALSE);             // stores that by default the sequence has not been modified
            super.put("name", "New Sequence");                //
            super.put("direction", Direction.FROM5TO3);
            super.put("strandedness", Strandedness.SINGLE);
            super.put("topology", Topology.LINEAR);
            super.put("type", Type.DNA);
            super.put("protect_align", Boolean.FALSE);
            super.put("protect_ambig", Boolean.TRUE);
            super.put("protect_unambig", Boolean.TRUE);
        }


    };

    /**
     * Used for serialization purposes.
     */
    private static final long serialVersionUID = 7526472295622777024L;
    
    /**
     * Constructs new instances of Cell objects.
     */
    public Cell () {
        this("");
    }
    
    /**
     * Constructs new instances of Cell objects.
     * 
     * 
     * @param string the string to use for the "sequence" field in the sequence data.
     */
    public Cell (String string) {
	if (string != null) {
            if ("".equals(string)) {
                put("protect_align", Boolean.FALSE);
                put("protect_ambig", Boolean.FALSE);
                put("protect_unambig", Boolean.FALSE);
            }
	    put("sequence", string);
	}
        detectType();
	super.put("modified", Boolean.FALSE);
    }

    /**
     * Constructs new instances of sequence.
     *
     * @param data the map of values to use for the sequence.
     */
    public Cell (Map<String, Object> data) {
	this((data.containsKey("sequence") ? data.get("sequence").toString() : ""));
	
	for (String key : data.keySet()) {
	    put(key, data.get(key));
	}
	super.put("modified", Boolean.FALSE);
    }
    
//////////////////////////
//**********************//
//* SEQUENCE FUNCTIONS *//
//**********************//
//////////////////////////

    /**
     * Adds a sequence listener to keep track of sequence modifications.
     *
     * @param listener the sequence listener to add.
     */
    public void addListener(CellListener listener) {
        listeners.add(listener);
    }

    /**
     * Removes a sequence listener from keeping track of sequence modifications.
     *
     * @param listener the listener to remove.
     */
    public void removeListener(CellListener listener) {
        listeners.remove(listener);
    }
    
    /**
     * Obtains a value for a field of the sequence.
     * This is the same as calling the get function on the Cell's
     * superclass (hashtable)
     * 
     * 
     * @param key the field name for the sequence data
     * @return the value of the field
     */
    public Object get (Object key) {
        // ensure that the sequence contains the desired field
        // if the field is not contained in the hashtable parent,
        // return the default field value instead
	String skey = key.toString().toLowerCase();
        return (containsKey(skey) ? super.get(skey) : getDefault(skey));
    }

    /**
     * Changes a value of a field in the sequence.
     * This is the same as calling the "put" function on the Cell's
     * superclass.
     * 
     * 
     * @param key the name of the field in the sequence.
     * @param value the new value of the field.
     */
    public Object put (String key, Object value) {
        // ensure the key is not null
        if (key == null) {
            throw new NullPointerException("Cannot call Sequence.setField(" + key + "," + value + ")");
	} else {
            // ensure the value is not null
            if (value == null) {
                value = getDefault(key);
            }
	    
	    key = key.toLowerCase();
	    
            super.put("modified", Boolean.TRUE);
            super.put(key, value);
        }
        for (CellListener listener : listeners) {
            listener.sequenceChanged(this, key);
        }
	return value;
    }

    /**
     * Inserts a string into field within the sequence.
     *
     * @param field the field to alter.
     * @param column the column number to do the insertion at.
     * @param string the text to insert.
     * @return true if the operation was successful, otherwise false.
     */
    public boolean insertField (final String field, final int column, final String string) {
	String current = get(field).toString();

	// figure out how to insert the new data
	// 1. if the offset is in the middle of the sequence
	// 2. if the offset is at the end of the sequence
	// 3. if the offset is at the beginning of the sequence
	if (column > 0 && column < current.length()) {
	    put(field, current.substring(0, column) + string + current.substring(column));
	} else if (column >= current.length()) {
	    put(field, current + string);
	} else {
	    put(field, string + current);
	}
	
	// always return true
	return true;
    }
    
    /**
     * This function obtains the default value of a field for a Cell.
     * This is done by accessing the defaults hashtable.
     * 
     * 
     * @param key the name of the field.
     * @return the value of the field.
     */
    public static Object getDefault (String key) {
        // ensure that the sequence contains the desired field
        // if the field is not contained in the hashtable parent,
        // return a blank string instead
	key = key.toLowerCase();
        return (defaults.containsKey(key) ? defaults.get(key) : "");
    }

    /**
     * Creates a string representation of the Cell and its fields
     * 
     * 
     * @return the corresponding sequence
     */
    @Override
    public String toString () {
        // create a new string buffer to append the sequence data to.
        StringBuffer result = new StringBuffer("--- SEQUENCE DATA ---");
        
        // itterate through each field in the sequence and print it
        // NOTE: append is faster than + or concat operators
        for (String key : keySet()) {
            result.append("\t" + key + "=" + get(key) +"\n");
        }
        return result.toString();
    }

    /**
     * Clones the current sequence object.
     *
     * @return a cloned copy of the sequence.
     */
    @Override
    public Object clone () {
        // since Cell implements Map as a child of Hashtable, this code works
        return new Cell(this);
    }

    /**
     * This function detects the type of a sequence and sets the type field for that sequence appropriately.
     *
     * @param seq the sequence to set the type for.
     */
    private void detectType() {
	String data;
	Cell.Type result = Cell.Type.DNA;

	if (containsKey("sequence")) {
	    data = get("sequence").toString().toUpperCase().trim();
	    if (data.indexOf('U') >= 0) {
		result = Cell.Type.RNA;
	    } else if (data.indexOf('F') >= 0 || data.indexOf('E') >= 0 || data.indexOf('J') >= 0
		    || data.indexOf('L') >= 0 || data.indexOf('O') >= 0 || data.indexOf('Q') >= 0
		    || data.indexOf('X') >= 0 || data.indexOf('Z') >= 0) {
		result = Cell.Type.PROTEIN;
	    }
	    put("type", result);
	}
    }

    public CharacterClass charType (char test) {
        CharacterClass type = CharacterClass.ALIGNMENT;

        if (Type.DNA.equals(get("type")) || Type.RNA.equals(get("type"))) {
            switch (test) {
                case 'R': case 'Y': case 'W': case 'S': case 'M': case 'K': case 'H':
                case 'B': case 'V': case 'I': case 'D': case 'N': case 'r': case 'y':
                case 'w': case 's': case 'm': case 'k': case 'h': case 'b': case 'v':
                case 'i': case 'd': case 'n':
                    type = CharacterClass.AMBIGUOUS;
                    break;
                case 'A': case 'C': case 'G': case 'T': case 'U':
                case 'a': case 'c': case 'g': case 't': case 'u':
                    type = CharacterClass.UNAMBIGUOUS;
                    break;
                default:
                    type = CharacterClass.ALIGNMENT;
                    break;
            }
        } else if (Type.PROTEIN.equals(get("type"))) {
            switch (test) {
                case 'B': case 'Z': case 'X': case '*': case 'b': case 'z': case 'x':
                    type = CharacterClass.AMBIGUOUS;
                    break;
                case 'F': case 'M': case 'P': case 'Y': case 'N': case 'E': case 'R':
                case 'J': case 'L': case 'V': case 'T': case 'H': case 'K': case 'C':
                case 'G': case 'O': case 'I': case 'S': case 'A': case 'Q': case 'D':
                case 'W': case 'f': case 'm': case 'p': case 'y': case 'n': case 'e':
                case 'r': case 'j': case 'l': case 'v': case 't': case 'h': case 'k':
                case 'c': case 'g': case 'o': case 'i': case 's': case 'a': case 'q':
                case 'd': case 'w':
                    type = CharacterClass.UNAMBIGUOUS;
                    break;
                default:
                    type = CharacterClass.ALIGNMENT;
                    break;
            }
        }
        return type;
    }
}
