#!/usr/bin/env python
# 
# Copyright 2009 Mark Fiers, Plant & Food Research
# 
# This file is part of Moa - http://github.com/mfiers/Moa
# 
# Moa is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your
# option) any later version.
# 
# Moa is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
# License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with Moa.  If not, see <http://www.gnu.org/licenses/>.
# 
import os,sys,re,copy

def reader(f):
    F = open(f)
    while True:
        line = F.readline()
        if not line: break
        yield line.strip()        
    F.close()

gffline = re.compile(
    r'(?i)'
    r'^(?P<seqid>\S+)\s+' +
    r'(?P<source>\S+)\s+' +
    r'(?P<type>\S+)\s+' +
    r'(?P<start>\S+)\s+' +
    r'(?P<end>\S+)\s+' +
    r'(?P<score>\S+)\s+' +
    r'(?P<strand>\S+)\s+' +
    r'(?P<phase>\S+)\s+' + 
    r'ID=(?P<idhead>.*)__(?P=seqid)__(?P<targetid>.*?)__(?P<idrest>.*?[^;]+);' +
    r'Name=(?P=idhead)__(?P=seqid)__(?P=targetid)__(?P=idrest);' +
    r'(?P<attrib1>.*Target=Sequence:)' +
    r'(?P<tseq>\S+) (?P<tstart>\d+) (?P<tstop>\d+)' +
    r'(?P<attrib2>.*)$')

for l in reader(sys.argv[1]):
    if l[0] == "#":
        print l
        continue
    m = gffline.search(l)
    
    if not m:
        sys.stderr.write("\n\n%s\n\n" % l)
        raise Exception("Invalid line?? I cannot make here cheese from")

    d = copy.copy(m.groupdict())
    
    #simple test:
    
    newId=d['idhead'] + "__" +  d['targetid'] + "__" +  d['seqid'] + "__" +  d['idrest']

    print ("%(tseq)s\t" +
           "%(source)s\t" + 
           "%(type)s\t" +
           "%(tstart)s\t" +
           "%(tstop)s\t" +
           "%(score)s\t" +
           "%(strand)s\t" +
           "%(phase)s\t" +
           "ID=%s;Name=%s;" % (newId, newId) +
           "%(attrib1)s" +
           "%(seqid)s " +
           "%(start)s " +
           "%(end)s" +
           "%(attrib2)s") % d





