#!/usr/bin/python3
#
# Copyright (C) 2006--2010,2012,2014,2016,2017  Kipp Cannon
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


"""
Print things from LIGO LW XML files.  Inspired by lwtprint from LIGOTools.
"""


from optparse import OptionParser


from lal.utils.cache import CacheEntry
import ligolw
from ligolw import __author__, __date__, __version__
try:
	from ligo.segments import utils as segmentsUtils
except ImportError:
	# try Ducan and Leo's forked version
	from igwn_segments import utils as segmentsUtils


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


def parse_command_line():
	parser = OptionParser(
		version = "Name: %%prog\n%s" % __version__,
		usage = "%prog [options] [url ...]",
		description = "Prints the contents of table elements from one or more LIGO Light Weight XML files to stdout in delimited ASCII format.  In addition to regular files, the program can read from many common URLs such as http:// and ftp://.  Gzipped files will be automatically detected and decompressed.  If no filenames or URLs are given, then input is read from stdin."
	)
	parser.add_option("-i", "--input-cache", metavar = "name", action = "append", default = [], help = "Get URLs from the LAL cache file.  Can be provided multiple times to name several caches to iterate over.")
	parser.add_option("-c", "--column", metavar = "name", action = "append", help = "Print only the contents of the given column.  Can be provided multiple times to print multiple columns.  The default is to print all columns from each table.")
	parser.add_option("-d", "--delimiter", metavar = "string", default = ",", help = "Delimit output with the given string.  The default is \",\".")
	parser.add_option("-r", "--rows", metavar = "rowspec", default = ":", help = "Print rows in the given range(s).  The format is first:last[,first:last,...].  Rows are numbered from 0.  A single first:last pair requests rows in the range [first, last).  If the first or last value of a pair is omited it means 0 or infinity respectively.  The default is \":\", or to print all rows.")
	parser.add_option("-t", "--table", metavar = "name", action = "append", default = [], help = "Print rows from this table.  Can be provided multiple times to print rows from multiple tables.  The default is to print the contents of all tables.")
	parser.add_option("-a", "--array", metavar = "name", action = "append", default = [], help = "Print the contents of this array.  Can be provided multiple times to print the elements from multiple arrays.  The default is to print the contents of all arrays.")
	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
	parser.add_option("--constrain-lsc-tables", action = "store_true", help = "Impose additional constraints on official LSC tables.  Provides format validation and allows RAM requirements to be reduced.")
	options, urls = parser.parse_args()

	# add urls from cache files
	urls += [CacheEntry(line).url for cache in options.input_cache for line in open(cache)]

	# strip table names
	if options.table:
		options.table = list(map(ligolw.Table.TableName, options.table))

	if options.array:
		options.array = list(map(ligolw.Array.ArrayName, options.array))

	# turn row requests into a segment list
	try:
		options.rows = segmentsUtils.from_range_strings(options.rows.split(","))
	except ValueError as e:
		raise ValueError("invalid rowspec: %s" % str(e))

	# success
	return options, (urls or [None])


#
# =============================================================================
#
#                         How to find things to print
#
# =============================================================================
#


#
# How to print a table
#


def print_table(table_elem, columns, rows):
	if not columns:
		columns = table_elem.columnnames
	fmts = tuple(ligolw.types.FormatFunc[table_elem.getColumnByName(col).Type] for col in columns)
	for n, row in enumerate(table_elem):
		if n in rows:
			print(options.delimiter.join(fmt(val) if val is not None else "" for fmt, val in zip(fmts, (getattr(row, col) for col in columns))))


#
# How to print an array
#


def print_array(array_elem):
	a = array_elem.array
	if len(a.shape) == 1:
		# a one-dimensional array
		for row in a:
			print(repr(row))
	elif len(a.shape) == 2:
		for row in a.transpose()[:]:
			print(options.delimiter.join(map(repr, row)))
	else:
		# a three or more dimensional array
		raise ValueError("array has more than 2 dimensions")


#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#


options, urls = parse_command_line()

if options.verbose:
	ligolw.set_verbose(True, application = "ligolw_print")


#
# don't quote strings when printing them
#


for typ in ligolw.types.StringTypes:
	ligolw.types.FormatFunc[typ] = str


#
# Enable appropriate level of table parsing.  If specific table names have
# been asked for, don't parse other tables so as to improve parsing speed
# and reduce memory requirements.  Because we do this, we can assume later
# that we should print all the tables that can be found in the document.
#


if not (options.table or options.array):
	# parse the entire document
	ContentHandler = ligolw.ContentHandler
else:
	class ContentHandler(ligolw.PartialContentHandler):
		def __init__(self, xmldoc):
			super(ContentHandler, self).__init__(xmldoc, lambda name, attrs: (name in (ligolw.Table.tagName, ligolw.Array.tagName)) and (ligolw.Table.TableName(attrs["Name"]) in options.table or ligolw.Array.ArrayName(attrs["Name"]) in options.array))

if not options.constrain_lsc_tables:
	ligolw.Table.TableByName.clear()


#
# If specific columns have been requested, don't load any others.
#


if options.column is not None:
	ligolw.Table.loadcolumns = set(options.column)


#
# Loop over input URLs
#


for url in urls:
	xmldoc = ligolw.utils.load_url(url, contenthandler = ContentHandler)
	# with no argument .getElements() returns all elements in the tree,
	# and the Document object, being the root of the tree, is in the
	# 0th position so we exclude it unconditionally.
	for elem in xmldoc.getElements()[1:]:
		if elem.tagName == ligolw.Table.tagName:
			print_table(elem, options.column, options.rows)
		elif elem.tagName == ligolw.Array.tagName:
			print_array(elem)
	xmldoc.unlink()
