package org.biolegato.gdesupport.files;
/*
 * FastAFile.java
 *
 * Created on January 30, 2008, 11:58 AM
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */

import java.io.File;
import java.io.IOException;
import java.io.Reader;
import org.biolegato.gdesupport.data.Seq;
import org.biolegato.gdesupport.data.Dataset;

/**
 * FastA file format parser.
 *
 * NOTE: this parser will automatically detect whether the sequencebuffer is protein, RNA or DNA.
 *
 * @author Graham Alvare
 * @author Brian Fristensky
 */
public class FastAFile extends DataFormat {

    /**
     * Creates a new instance of FastAFile
     */
    public FastAFile() {
    }

    /**
     * Translates a sequence into the FastA file format.
     **
     * @param result the destination for the translated sequence
     * @param seq the sequence to translate
     */
    public void translateTo(Appendable result, Seq seq, int offset, int length) throws IOException {
        java.util.Map<String, Object> sequence;

        // translate the sequencebuffer
        if (seq != null) {
            result.append(">").append(seq.getName());
            /*if (seq.get("description") != null && !"".equals(seq.get("description"))) {
            result.append(" ").append(seq.get("description"));
            }*/
            result.append("\n").append(seq.getSequence(), offset, length).append("\n");
        }
    }

    /**
     * Translates data in the FastA file format to sequence objects
     **
     * @param datamodel the location to store the translated sequences
     * @param data the buffered reader to parse
     * @throws IOException any exeptions that occur while reading the stream are passed
     */
    public void translateFrom(Dataset datamodel, java.io.BufferedReader data) throws
            IOException {
        int y = datamodel.getSize();
        String name = "";
        String line = "";
        String description = "";
        String prefix = "";
        Seq.Type type = null;
        StringBuffer sequencebuffer = null;

        while ((line = data.readLine()) != null) {
            line = line.trim();
            if (line.startsWith(">")) {
                if (sequencebuffer != null) {
                    type = Seq.detectType(sequencebuffer);
                    datamodel.addSequence(y, new Seq(type, name, sequencebuffer));
                    y++;
                }
                // parse the name
                name = line.substring(1);
                description = "";

                // locate a description (if applicable)
                if (name.indexOf(' ') > 0) {
                    description = name.substring(name.indexOf(' ') + 1);
                    name = name.substring(0, name.indexOf(' '));
                }

                // remove all GenBank | fields except GI number if applicable
                if (name.indexOf('|') >= 0) {
                    prefix = name.substring(0, name.indexOf('|'));
                    name = name.substring(name.indexOf('|') + 1);
                    if (name.indexOf('|') >= 0) {
                        name = name.substring(0, name.indexOf('|'));
                    }
                    name = prefix + "|" + name;
                }

                sequencebuffer = new StringBuffer();
            } else if (!line.startsWith("#") && !line.startsWith(";")) {
                sequencebuffer.append(line.replaceAll("[^A-Za-z\\*\\-]", ""));
            }
        }
        if (sequencebuffer != null) {
            datamodel.addSequence(y, new Seq(Seq.detectType(sequencebuffer), name, sequencebuffer));
            y++;
        }
    }

    /**
     * Determines whether a specified file is of type GenBank file (based on extension).
     * Currently the only extensions supported are ".wrp", ".fasta", and ".fsa".
     *
     * @param file the file to test
     * @return true if the file is of type FastA file (otherwise false)
     * @see javax.swing.filechooser.FileFilter#accept
     */
    public boolean accept(File file) {
        return (file.isDirectory()
                || file.getAbsolutePath().toLowerCase().endsWith(".wrp")
                || file.getAbsolutePath().toLowerCase().endsWith(".fasta")
                || file.getAbsolutePath().toLowerCase().endsWith(".fsa"));
    }

    /**
     * Returns a description of the file format that can be displayed to the user.
     *
     * @return the string description of the file format
     * @see javax.swing.filechooser.FileFilter#getDescription
     */
    public String getDescription() {
        return "FastA file (*.wrp,*.fasta,*.fsa)";
    }

    /**
     * Used to auto-detect Bio Legato formats
     **
     * @param test the reader to parse data from
     * @return whether the format is correct
     */
    @Override
    public boolean isFormat(Reader test) {
        int check = ' ';
        try {
            while (check == ' ' || check == '\t' || check == '\n' || check
                    == '\r') {
                test.mark(2);
                check = test.read();
            }
            test.reset();
        } catch (Throwable e) {
            e.printStackTrace();
        }
        return (check == '>');
    }
}
