#!/usr/bin/env python

import pandas as pd
import argparse
from pathlib import Path
import sys

from yamld.write import write_dataframe


def df2yaml(df, inpath, outpath, is_mini, stdout=False):
    inpath = Path(inpath)
    suffix = inpath.suffix
    if stdout:
        write_dataframe(sys.stdout, df, name='data', is_mini=is_mini)
    else:
        if outpath:
            outpath = Path(outpath)
        else:
            outpath = inpath.with_suffix('.csv')
        with open(outpath, 'w') as f:
            write_dataframe(f, df, name='data', is_mini=is_mini)


def parse_arguments():
    parser = argparse.ArgumentParser(description='Read CSV file using Pandas read_csv() function.')

    parser.add_argument('file_path', type=str, help='Path to the CSV file')


    # Optional arguments
    parser.add_argument('-o', '--output_file', type=str, default=None,
                        help='Output YAML file name')
    parser.add_argument('-s', '--separator', type=str, default=',',
                        help='Separator used in the CSV file (default is comma)')

    parser.add_argument("--table", action="store_true", help="use tab as a seperator, if provided will ignroe -s")
    parser.add_argument("--stdout", action="store_true", help="output YAML to stdout")
    parser.add_argument("--mini", action="store_true", help="output a mini YAML data")
    parser.add_argument("-n", "--nrows", type=int, default=None, help="Max number of lines to read from the file")

    parser.add_argument("--skiprows", type=int, default=None, help="Max number of lines to read from the file")
    parser.add_argument('-c', '--columns', default=None, nargs='+',
                        help='List of columns to select from the CSV file')

    parser.add_argument('-H', '--header', default='infer', action='store_true',
                        help='Specify if the CSV file has a header row')

    parser.add_argument('--comment', type=str, default=None,
                        help='Character indicating the start of a comment line in the CSV file')
    parser.add_argument('-e', '--encoding', type=str, default='utf-8',
                        help='Character indicating the start of a comment line in the CSV file')

    return parser.parse_args()

def main():
    # Check if the script is being run as the main program
    # Parse command line arguments
    args = parse_arguments()

    # Read CSV file using Pandas read_csv() function
    try:
        separator = args.separator if not args.table else '\t'
        df = pd.read_csv(args.file_path, sep=separator, nrows= args.nrows,
                         skiprows= args.skiprows, header=0 if args.header else None,
                         usecols=args.columns, comment=args.comment, encoding=args.encoding)
        df2yaml(df, args.file_path, args.output_file, args.mini, args.stdout)
    except FileNotFoundError:
        print(f"Error: File not found at {args.file_path}")
    except pd.errors.EmptyDataError:
        print(f"Error: The CSV file at {args.file_path} is empty")


# Run the main function
if __name__ == "__main__":
    main()
