#!/usr/bin/env python
# 
# Copyright 2009 Mark Fiers, Plant & Food Research
# 
# This file is part of Moa - http://github.com/mfiers/Moa
# 
# Moa is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your
# option) any later version.
# 
# Moa is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
# License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with Moa.  If not, see <http://www.gnu.org/licenses/>.
# 
"""
Splits a multifasta file.

Should do this with seqretsplit from emboss. but that results in a
discrepancy between the fasta header and the filename. 

"""

import os
import sys
import time
import copy
import optparse

def fastareader(f):
    if type(f) == type("aloha"):
        F = open(f)
    else:
        #assume f is a file handle
        F = f

    name, head, seq = "",  "", []
    while True:
        l = F.readline()
        if not l: break
        
        l = l.strip()
        if not l: continue

        if l[0] == '>':
            if name and seq:
                yield name, head, "".join(seq)
            seq = []
            ls = l[1:].split(None, 1)
            name = ls[0]
            if len(ls) > 1: head = ls[1]
            if name[:2] == 'gi':
                head = name + " " + head
                name = "GI" + name.split("|",2)[1]
        else:
            seq.append("".join(l.split()).lower())

    if name and seq:
        yield name, head, "".join(seq)

    F.close()

parser = optparse.OptionParser()

parser.set_defaults(out='./fasta')
parser.add_option('-o', '--out', dest='out',
    help = 'output directory to write to')

parser.set_defaults(fasta='-')
parser.add_option('-f', '--fasta', dest='fasta',
    help = 'fasta file to extract from')

parser.add_option('-n', '--no', dest='no',
    help = 'number of fasta sequences to split')

(options, args) = parser.parse_args()

if options.fasta == '-':
   infileorhandle  = sys.stdin
else:
   infileorhandle  = options.fasta

i = 0
for name, head, seq in fastareader(infileorhandle):
    i += 1
    outfile = os.path.join(options.out, name + '.fasta')
    F = open(outfile, 'w')
    F.write(">%s %s\n%s\n" % (name, head, seq))
    F.close()
    if options.no and options.no >= i:
        break


