#!/usr/bin/env python
# 
# Copyright 2009 Mark Fiers, Plant & Food Research
# 
# This file is part of Moa - http://github.com/mfiers/Moa
# 
# Moa is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your
# option) any later version.
# 
# Moa is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
# License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with Moa.  If not, see <http://www.gnu.org/licenses/>.
# 
""" 

A more or less advanced version of the Gnu Join command. It handles more
operators, but is less advanced when it comes to in- and output.

Usage:  lJoin LIST1 OPERATOR LIST2 > OUTPUT
or:     lJoin OPERATOR LIST2 < LIST1 > OUTPUT

This allows chaining of lJoin:

    lJoin LIST1 AND LIST2 | lJoin OR LIST2     

Input:

Two Lists: the field on which the join is performed is *always* the first
    word on the line (delimited by either a tab or space). The rest of the line
    is ignored. 

Operator: AND, OR, DIFFERENCE, INTERSECT

Output: The output is again a list of only the fields that were joined.
"""

import os
import sys
import optparse
import logging

USAGE = "lJoin list1 operator list2"

parser = optparse.OptionParser(usage=USAGE)
parser.set_defaults(verbose=False)
parser.add_option('-v', dest='verbose', help='verbose', 
                  action='store_true')

(options, args) = parser.parse_args()


if options.verbose:
    loglevel = logging.DEBUG
else:
    loglevel = logging.INFO

logging.basicConfig(
    level=loglevel, 
    format = "#%(message)s")
l = logging

if len(args) == 3:
    list1, operator, list2 = args
    ls1 = set([x.split()[0] for x in open(list1).readlines()])
    l.debug('opened 1: %s with %d items' % (list1, len(ls1)))
else:
    operator, list2 = args
    list1 = 'stdin'
    ls1 = set([x.split()[0] for x in sys.stdin.readlines()])
    l.debug('opened stdin: %s with %d items' % (list1, len(ls1)))

ls2 = set([x.split()[0] for x in open(list2).readlines()])
l.debug('opened 2: %s with %d items' % (list2, len(ls2)))

if operator.lower() in  ['union', 'or', '|']:
    l.debug("Performing %s | %s" % (list1, list2))
    result = ls1 | ls2
elif operator.lower() in  ['intersect', 'intersection', 'and', '&']:
    l.debug("Performing %s & %s" % (list1, list2))
    result = ls1 & ls2
elif operator.lower() in ['difference', '-']:
    l.debug("Performing %s - %s" % (list1, list2))
    result = ls1 - ls2

l.debug("Result: %d items" % len(result))

for r in result:
    print r

