/*
 * FastAFile.java
 *
 * Created on January 30, 2008, 11:58 AM
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */

import java.util.Map;
import java.util.regex.Pattern;
import org.biolegato.core.data.sequence.Sequence;
import org.biolegato.core.plugintypes.DataFormat;
import java.io.File;
import java.io.Reader;
import java.util.HashMap;
import java.util.LinkedList;

/**
 *
 * @author Graham Alvare
 * @author Brian Fristensky
 */
public class FastAFile extends DataFormat {

    /**
     * Creates a new instance of FastAFile
     */
    public FastAFile () {
    }

    /**
     * Translates a sequence into the FastA file format.
     *
     * @param seq the sequence to translate
     * @return the resulting string
     */
    public String translateTo(Sequence seq) {
        StringBuffer result = new StringBuffer();
	java.util.Map<String, Object> sequence;
        
        // translate the sequence
	if (seq != null) {
            result.append(">").append(seq.getField("name"));
	    if (seq.getField("description") != null && !"".equals(seq.getField("description"))) {
		result.append(seq.getField("description"));
	    }
	    result.append("\n").append(seq.getField("sequence")).append("\n");
	}
	return result.toString();
    }

    /**
     * Translates data in the FastA file format to sequence objecte
     *
     * @param data the buffered reader to parse
     * @return the translated sequences
     * @throws java.io.IOException
     */
    public Sequence[] translateFrom (java.io.BufferedReader data) throws
            java.io.IOException {
	String name = "";
        String line = "";
	String description = "";
        LinkedList<Sequence> result = new LinkedList<Sequence>();
        java.util.Map<String, Object> sequence = new HashMap<String, Object>();

        while ((line = data.readLine()) != null) {
	    line = line.trim();
            if (line.startsWith(">")) {
                if ( ! sequence.isEmpty()) {
		    detectType(sequence);
                    result.add(new Sequence(sequence));
                    sequence.clear();
                }
		
		// parse the name
		name = line.substring(1);
		description = "";
		
		// locate a description (if applicable)
                if (name.indexOf(' ') > 0) {
		    description = name.substring(name.indexOf(' ') + 1);
                    name = name.substring(0, name.indexOf(' '));
                }
		
		// remove all GenBank | fields except GI number if applicable
		if (name.indexOf('|') >= 0) {
		    name = name.substring(name.indexOf('|') + 1);
		    if (name.indexOf('|') >= 0) {
			name = name.substring(0, name.indexOf('|'));
		    }
		}
		
		// insert data into sequence object (default type is DNA)
                sequence.put("type", Sequence.Type.DNA);
		sequence.put("description", description);
                sequence.put("name", name);
            } else if ( ! line.startsWith("#") && ! line.startsWith(";")) {
                sequence.put("sequence", (sequence.containsKey("sequence") ? sequence.get("sequence")
                                          : "") + line.replaceAll(
                        "[^A-Za-z\\*\\-]", ""));
            }
        }
        if ( ! sequence.isEmpty()) {
	    detectType(sequence);
            result.add(new Sequence(sequence));
        }
        return result.toArray(new Sequence[0]);
    }

    /**
     * Determines whether or not a specified file is of type GenBank file (based on extension).
     * Currently the only extensions supported are ".wrp", ".fasta", and ".fsa".
     *
     * @param file the file to test
     * @return true if the file is of type FastA file (otherwise false)
     * @see javax.swing.filechooser.FileFilter#accept
     */
    public boolean accept (File file) {
        return (file.isDirectory() ||
                file.getAbsolutePath().toLowerCase().endsWith(".wrp") ||
                file.getAbsolutePath().toLowerCase().endsWith(".fasta") ||
                file.getAbsolutePath().toLowerCase().endsWith(".fsa"));
    }

    /**
     * Obtains the internal name of the file format
     * (this is great for hashtables storage and searching).
     *
     * @return the name of the file format
     */
    @Override
    public String getName () {
        return "fasta";
    }

    /**
     * Returns a description of the file format that can be displayed to the user.
     *
     * @return the string description of the file format
     * @see javax.swing.filechooser.FileFilter#getDescription
     */
    public String getDescription () {
        return "FastA file (*.wrp,*.fasta,*.fsa)";
    }

    /**
     * Used to auto-detect Bio Legato formats
     *
     * @param test the reader to parse data from
     * @return whether or not the format is correct
     */
    @Override
    public boolean isFormat (Reader test) {
        int check = ' ';
        try {
            while (check == ' ' || check == '\t' || check == '\n' || check ==
                                                                     '\r') {
                test.mark(2);
                check = test.read();
            }
            test.reset();
        } catch (Throwable e) {
            e.printStackTrace();
        }
        return (check == '>');
    }

    /**
     * This function detects the type of a sequence and sets the type field for that sequence appropriately.
     *
     * @param sequence the sequence to set the type for.
     */
    private void detectType(Map seq) {
	String data;
	Sequence.Type result = Sequence.Type.DNA;
	
	if (seq != null && seq.containsKey("sequence")) {
	    data = seq.get("sequence").toString().toUpperCase().trim();
	    if (data.indexOf('U') >= 0) {
		result = Sequence.Type.RNA;
	    } else if (data.indexOf('F') >= 0 || data.indexOf('E') >= 0 || data.indexOf('J') >= 0
		    || data.indexOf('L') >= 0 || data.indexOf('O') >= 0 || data.indexOf('Q') >= 0
		    || data.indexOf('X') >= 0 || data.indexOf('Z') >= 0) {
		result = Sequence.Type.PROTEIN;
	    }
	    seq.put("type", result);
	}
    }

}
