#!/usr/bin/env python3

# Copyright (C) 2017, Shan Zhang and Weizhi Song.
# zzfanyi@gmail.com and songwz03@gmail.com

# BioSAK is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# BioSAK is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import sys
import warnings
import argparse
from BioSAK.BioSAK_config import config_dict


def version(config_dict):
    version_file = open('%s/VERSION' % config_dict['config_file_path'])
    return version_file.readline().strip()


def print_main_help():

    help_message = ''' 
                 ...::: BioSAK v%s :::...

    Genome databases
       get_GTDB_taxon_gnm      ->  Get id of genomes from specified GTDB taxons
       get_genome_GTDB         ->  Batch download GTDB genomes
       get_genome_NCBI         ->  Batch download GenBank genomes
       sampling_GTDB_gnms      ->  Select GTDB genomes from a specified taxon at specified sampling rank
       subset_GTDB_meta        ->  Subset metadata of GTDB reference genomes
       MetaBiosample           ->  Extract metadata of NCBI biosamples

    Metagenomics
       Plot_MAG                ->  plot MAGs, (GC vs depth)
       magabund                ->  Calculate MAG abundance
       mean_MAG_cov            ->  Get mean MAG depth (based on MetaBAT produced depth)
       RunGraphMB              ->  Prepare input files for GraphMB
       get_MAG_reads_long      ->  Extract MAG-specific long reads for reassembling
       get_gnm_size            ->  Get the total length of genome(s)
       get_gene_depth          ->  Get gene depth by contig depth
       MeanMappingDepth        ->  Get mean mapping depth 
       CheckM                  ->  Parse CheckM outputs

    Functional annotation
       KEGG                    ->  KEGG annotation
       COG2020                 ->  COG annotation (v2020, by blastp/diamond)
       arCOG                   ->  COG annotation for archaea (version ar18)
       dbCAN                   ->  CAZy annotation with dbCAN

    16S rRNA related
       top_16S_hits            ->  Classify 16S by top-blast-hits approach
       SILVA_for_BLCA          ->  Prepare BLCA-compatible SILVA SSU database
       GTDB_for_BLCA           ->  Prepare BLCA-compatible GTDB SSU database
       BLCA_op_parser          ->  Make the BLCA outputs bit easier to read
       Tax4Fun2IndOTU          ->  Get functional profile of individual OTUs (to be added)
    
    Sequence manipulator
       gbk2fna/gbk2faa/gbk2ffn ->  Format convertors
       ffn2faa/gfa2fa/get_rc   ->  Format convertors
       fq2fa                   ->  Convert fastq to fasta
       fa2id                   ->  Export sequence id
       slice_seq               ->  Get specified region of a sequence
       rename_seq              ->  Rename sequences in a file
       select_seq              ->  Select sequences by their ids
       split_fasta             ->  Split one fasta file into multiple files
       merge_seq               ->  Merge sequence files, remove duplicated ones if any
       cat_fa                  ->  Combine sequence files, prefix sequence id with file name
    
    Multiple Sequence Alignment
       ConvertMSA             ->  Convert MSA format
       fa2phy                 ->  Convert MSA format (fasta to phylip)
       OneLineAln             ->  One-line fasta format alignments
       SliceMSA               ->  Slice MSA by column 
       
    Sam and Bam
       plot_sam_depth          ->  Plot SAM depth
       reads2bam               ->  Mapping and sorting
       sam2bam                 ->  Sam to BAM with samtools
       split_sam               ->  Split SAM/BAM file by reference
       bam2reads               ->  Extract reads (id) from sam file
    
    Others
       iTOL                    ->  Prepare iTOL-compatible files for tree visualization
       js_cmds                 ->  Commands to job scripts
       exe_cmds                ->  Execute commands with multiprocessing (one cmd per line)
       BestHit                 ->  Keep best blast hits (outfmt 6)
       VisGeneFlk              ->  Visualize gene flanking regions
       split_folder            ->  Split folder
       usearch_uc              ->  Parse Usearch uc file
       get_Pfam_hmms           ->  Get Pfam profiles by id
       Reads_simulator         ->  Simulate NGS reads
       SubsampleLongReads      ->  Subsample Long Reads
       rename_reads_Reago      ->  Rename paired reads for Reago
       cross_link_seqs         ->  Cross link matched regions between two sequences (not working well!)
       PhyloBiAssoc            ->  PhyloBiAssoc
       
    Phylogenetic tree-related modules have been moved to TreeSAK, please go to its github page for details.
    ''' % version(config_dict)

    print(help_message)


if __name__ == '__main__':

    ########################################################################################### initialize subparsers ############################################################################################

    # initialize the options parser
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(help="--", dest='subparser_name')

    # disable warning message
    warnings.filterwarnings('ignore')

    # parse options
    if (len(sys.argv) == 1) or (sys.argv[1] in ['-h', '-help', '--help']):
        print_main_help()
        sys.exit(0)

    elif sys.argv[1] == 'COG2020':
        from BioSAK import COG2020
        COG2020_parser = subparsers.add_parser('COG2020', description='Wrapper for COG annotation (v2020)', usage=COG2020.COG2020_parser_usage)
        COG2020_parser.add_argument('-i',                   required=True,                                  help='path to input sequences (in multi-fasta format)')
        COG2020_parser.add_argument('-x',                   required=False,                                 help='file extension')
        COG2020_parser.add_argument('-m',                   required=True,                                  help='sequence type, "N/n" for "nucleotide", "P/p" for "protein"')
        COG2020_parser.add_argument('-depth',               required=False, default=None,                   help='gene depth file/folder')
        COG2020_parser.add_argument('-pct_by_all',          required=False, action='store_true',            help='normalize by all query genes, rather than those with COG assignment')
        COG2020_parser.add_argument('-db_dir',              required=True,                                  help='DB folder')
        COG2020_parser.add_argument('-diamond',             required=False, action='store_true',            help='run diamond (for big dataset), default is NCBI blastp')
        COG2020_parser.add_argument('-t',                   required=False, default=1,     type=int,        help='number of threads')
        COG2020_parser.add_argument('-evalue',              required=False, default=0.001, type=float,      help='evalue cutoff, default: 0.001')
        args = vars(parser.parse_args())
        COG2020.COG2020(args)

    elif sys.argv[1] == 'arCOG':
        from BioSAK import arCOG
        arCOG_parser = subparsers.add_parser('arCOG', description='Wrapper for arCOG', usage=arCOG.arCOG_parser_usage)
        arCOG_parser.add_argument('-i',                   required=True,                                  help='path to input sequences (in multi-fasta format)')
        arCOG_parser.add_argument('-x',                   required=False,                                 help='file extension')
        arCOG_parser.add_argument('-m',                   required=True,                                  help='sequence type, "N/n" for "nucleotide", "P/p" for "protein"')
        arCOG_parser.add_argument('-depth',               required=False, default=None,                   help='gene depth file/folder')
        arCOG_parser.add_argument('-pct_by_all',          required=False, action='store_true',            help='normalize by all query genes, rather than those with COG assignment')
        arCOG_parser.add_argument('-db_dir',              required=True,                                  help='DB folder')
        arCOG_parser.add_argument('-diamond',             required=False, action='store_true',            help='run diamond (for big dataset), default is NCBI blastp')
        arCOG_parser.add_argument('-t',                   required=False, default=1,     type=int,        help='number of threads')
        arCOG_parser.add_argument('-evalue',              required=False, default=0.001, type=float,      help='evalue cutoff, default: 0.001')
        args = vars(parser.parse_args())
        arCOG.arCOG(args)

    elif sys.argv[1] == 'KEGG':
        from BioSAK import KEGG
        KEGG_parser = subparsers.add_parser('KEGG', description='Wrapper for KEGG annotation', usage=KEGG.KEGG_parser_usage)
        KEGG_parser.add_argument('-seq_in',                 required=False,                                 help='faa file')
        KEGG_parser.add_argument('-ko_in',                  required=False,                                 help='annotation results from BlastKOALA/GhostKOALA, normally with name user_ko.txt')
        KEGG_parser.add_argument('-x',                      required=False,                                 help='file extension')
        KEGG_parser.add_argument('-depth',                  required=False, default=None,                   help='gene depth file/folder')
        KEGG_parser.add_argument('-pct_by_all',             required=False, action='store_true',            help='normalize by all query genes, rather than those with ko assignment')
        KEGG_parser.add_argument('-db_dir',                 required=True,                                  help='folder holds sequence, seq2ko and ko00001.keg files')
        KEGG_parser.add_argument('-diamond',                required=False, action='store_true',            help='run diamond (for big dataset), default is NCBI blastp')
        KEGG_parser.add_argument('-t',                      required=False, default=1,     type=int,        help='number of threads, default: 1')
        KEGG_parser.add_argument('-evalue',                 required=False, default=0.001, type=float,      help='evalue cutoff, default: 0.001')
        args = vars(parser.parse_args())
        KEGG.Annotation_KEGG(args)

    elif sys.argv[1] == 'dbCAN':
        from BioSAK import dbCAN
        dbCAN_parser = subparsers.add_parser('dbCAN', description='Wrapper for running dbCAN', usage=dbCAN.dbCAN_parser_usage)
        dbCAN_parser.add_argument('-i',                     required=True,                                  help='path to input sequences (in multi-fasta format)')
        dbCAN_parser.add_argument('-x',                     required=False,                                 help='file extension')
        dbCAN_parser.add_argument('-m',                     required=False, default='P',                    help='The type of input sequences, "N/n" for "nucleotide", "P/p" for "protein"')
        dbCAN_parser.add_argument('-depth',                 required=False, default=None,                   help='gene depth file/folder')
        dbCAN_parser.add_argument('-db_dir',                required=True,                                  help='db folder')
        dbCAN_parser.add_argument('-t',                     required=False, type=int, default=1,            help='number of threads')
        args = vars(parser.parse_args())
        dbCAN.dbCAN(args)

    elif sys.argv[1] == 'CheckM':
        from BioSAK import CheckM
        CheckM_parser = subparsers.add_parser('CheckM', description='Parse CheckM outputs', usage=CheckM.CheckM_usage)
        CheckM_parser.add_argument('-i',    required=True,                       help='CheckM produced quality file')
        CheckM_parser.add_argument('-o',    required=True,                       help='output quality file (reformatted)')
        CheckM_parser.add_argument('-g',    required=False,                      help='MAG folder')
        CheckM_parser.add_argument('-x',    required=False, default='fasta',     help='bin file extension')
        CheckM_parser.add_argument('-cpl',  required=False, type=float,          help='completeness cutoff (0-100)')
        CheckM_parser.add_argument('-ctm',  required=False, type=float,          help='contamination cutoff (0-100)')
        CheckM_parser.add_argument('-r',    required=False, action="store_true", help='recalculate contamination')
        args = vars(parser.parse_args())
        CheckM.CheckM(args)

    elif sys.argv[1] == 'iTOL':
        from BioSAK import iTOL
        iTOL_parser = subparsers.add_parser('iTOL', description='Plot tree with iTOL', usage=iTOL.iTOL_usage)
        iTOL_parser.add_argument('-Labels',          required=False, action='store_true',   help='Labels')
        iTOL_parser.add_argument('-ColorStrip',      required=False, action='store_true',   help='ColorStrip')
        iTOL_parser.add_argument('-ColorRange',      required=False, action='store_true',   help='ColorRange')
        iTOL_parser.add_argument('-SimpleBar',       required=False, action='store_true',   help='SimpleBar')
        iTOL_parser.add_argument('-Heatmap',         required=False, action='store_true',   help='Heatmap')
        iTOL_parser.add_argument('-ExternalShape',   required=False, action='store_true',   help='ExternalShape')
        iTOL_parser.add_argument('-Binary',          required=False, action='store_true',   help='Binary')
        iTOL_parser.add_argument('-Connection',      required=False, action='store_true',   help='Connection')
        iTOL_parser.add_argument('-ll',              required=False, default=None,          help='Leaf Label')
        iTOL_parser.add_argument('-gc',              required=False, default=None,          help='Specify Group/column Color (optional)')
        iTOL_parser.add_argument('-cc',              required=False, default=None,          help='Specify Column Color (for ExternalShape format) (optional)')
        iTOL_parser.add_argument('-lg',              required=False, default=None,          help='Leaf to Group')
        iTOL_parser.add_argument('-lv',              required=False, default=None,          help='Leaf to Value')
        iTOL_parser.add_argument('-lm',              required=False, default=None,          help='Leaf to data Matrix')
        iTOL_parser.add_argument('-dr',              required=False, default=None,          help='Donor to Recipient')
        iTOL_parser.add_argument('-scale',           required=False, default=None,          help='Scale Values, in format 0-3-6-9')
        iTOL_parser.add_argument('-lt',              required=False, default=None,          help='Legend Title')
        iTOL_parser.add_argument('-o',               required=True,                         help='Output filename')
        args = vars(parser.parse_args())
        iTOL.iTOL(args)

    elif sys.argv[1] == 'split_folder':
        from BioSAK import split_folder
        split_folder_parser = subparsers.add_parser('split_folder', description='Split folder', usage=split_folder.split_folder_parser_usage)
        split_folder_parser.add_argument('-in',             required=True,                                  help='file folder')
        split_folder_parser.add_argument('-x',              required=True,                                  help='file extension')
        split_folder_parser.add_argument('-n',              required=False, type=int,                       help='number of subfolder')
        args = vars(parser.parse_args())
        split_folder.split_folder(args)

    elif sys.argv[1] == 'BestHit':
        from BioSAK import keep_best_hit
        BestHit_parser = subparsers.add_parser('BestHit', description='Keep blast hits with highest bit score', usage=keep_best_hit.BestHit_parser_usage)
        BestHit_parser.add_argument('-i',                   required=True,                                  help='input blast results (outfmt: 6)')
        BestHit_parser.add_argument('-o',                   required=True,                                  help='output file')
        args = vars(parser.parse_args())
        keep_best_hit.best_hit(args)

    elif sys.argv[1] == 'magabund':
        from BioSAK import magabund
        magabund_parser = subparsers.add_parser('magabund', description='Calculate MAG abundance', usage=magabund.magabund_usage)
        magabund_parser.add_argument('-s',       required=True,                        help='input sam file')
        magabund_parser.add_argument('-m',       required=True,                        help='MAG folder')
        magabund_parser.add_argument('-x',       required=False, default='fasta',      help='MAG file extension, default: fasta')
        magabund_parser.add_argument('-o',       required=True,                        help='output table')
        magabund_parser.add_argument('-a',       required=False, action='store_true',  help='based on aligned length, rather than read length')
        #get_bin_abundance_parser.add_argument('-g',       required=False, default=None,         help='bin grouping info')
        #get_bin_abundance_parser.add_argument('-Cdb',     required=False, default=None,         help='cluster info from dRep (Cdb.csv)')
        args = vars(parser.parse_args())
        magabund.magabund(args)

    elif sys.argv[1] == 'get_genome_NCBI':
        from BioSAK import get_genome_NCBI
        get_genome_NCBI_parser = subparsers.add_parser('get_genome_NCBI', description='Batch download NCBI genomes', usage=get_genome_NCBI.get_genome_NCBI_parser_usage)
        get_genome_NCBI_parser.add_argument('-i',           required=True,                       help='IDs of genomes to download')
        get_genome_NCBI_parser.add_argument('-o',           required=False, default=None,        help='output folder')
        get_genome_NCBI_parser.add_argument('-t',           required=False, default=1, type=int, help='number of threads')
        get_genome_NCBI_parser.add_argument('-force',       required=False, action="store_true", help='force overwrite existing genome directory')
        args = vars(parser.parse_args())
        get_genome_NCBI.download_GenBank_genome(args)

    elif sys.argv[1] == 'get_genome_GTDB':
        from BioSAK import get_genome_GTDB
        get_genome_GTDB_parser = subparsers.add_parser('get_genome_GTDB', description='Batch download GTDB genomes', usage=get_genome_GTDB.get_genome_GTDB_parser_usage)
        get_genome_GTDB_parser.add_argument('-i',           required=True,                          help='genome id')
        get_genome_GTDB_parser.add_argument('-o',           required=True,                          help='output folder')
        get_genome_GTDB_parser.add_argument('-fastani_dir', required=True,                          help='the "fastani" dir')
        get_genome_GTDB_parser.add_argument('-force',       required=False, action="store_true",    help='Force overwrite existing results')
        args = vars(parser.parse_args())
        get_genome_GTDB.get_genome_GTDB(args)

    elif sys.argv[1] == 'get_GTDB_taxon_gnm':
        from BioSAK import get_GTDB_taxon_gnm
        get_GTDB_taxon_gnm_parser = subparsers.add_parser('get_GTDB_taxon_gnm', description='get id of GTDB genomes from specified taxons', usage=get_GTDB_taxon_gnm.get_GTDB_taxon_gnm_parser_usage)
        get_GTDB_taxon_gnm_parser.add_argument('-p',           required=False, default='Genomes',          help='output prefix')
        get_GTDB_taxon_gnm_parser.add_argument('-meta',        required=True,                              help='GTDB reference genome metadata')
        get_GTDB_taxon_gnm_parser.add_argument('-path',        required=True,                              help='GTDB genome_paths.tsv')
        get_GTDB_taxon_gnm_parser.add_argument('-taxon',       required=True,                              help='interested taxons, one taxon per line')
        get_GTDB_taxon_gnm_parser.add_argument('-cpl',         required=False, default=None, type=float,   help='completeness cutoff (0-100), default: None')
        get_GTDB_taxon_gnm_parser.add_argument('-ctm',         required=False, default=None, type=float,   help='contamination cutoff, default: None')
        args = vars(parser.parse_args())
        get_GTDB_taxon_gnm.get_GTDB_taxon_gnm(args)

    elif sys.argv[1] == 'gbk2fna':
        from BioSAK import gbk2fna
        gbk2fna_parser = subparsers.add_parser('gbk2fna', description='gbk to fna (contig sequence)', usage=gbk2fna.gbk2fna_usage)
        gbk2fna_parser.add_argument('-i',  required=True,                       help='input gbk')
        gbk2fna_parser.add_argument('-ix', required=False, default='gbk',       help='input file extension, default: gbk')
        gbk2fna_parser.add_argument('-o',  required=True,                       help='output fna (contig sequences)')
        gbk2fna_parser.add_argument('-ox', required=False, default='fna',       help='output file extension, default: fna')
        gbk2fna_parser.add_argument('-t',  required=False, type=int, default=1, help='number of threads')
        args = vars(parser.parse_args())
        gbk2fna.gbk2fna(args)

    elif sys.argv[1] == 'gbk2ffn':
        from BioSAK import format_converter
        gbk2ffn_parser = subparsers.add_parser('gbk2ffn', description='gbk to ffn', usage=format_converter.sequence_manipulator_usage)
        gbk2ffn_parser.add_argument('-gbk', required=True, help='input gbk file')
        args = vars(parser.parse_args())
        format_converter.gbk2ffn(args)

    elif sys.argv[1] == 'gbk2faa':
        from BioSAK import gbk2faa
        gbk2faa_parser = subparsers.add_parser('gbk2faa', description='gbk to faa', usage=gbk2faa.gbk2faa_usage)
        gbk2faa_parser.add_argument('-i',   required=True,                          help='input gbk')
        gbk2faa_parser.add_argument('-ix',  required=False, default='gbk',          help='input file extension, default: gbk')
        gbk2faa_parser.add_argument('-o',   required=True,                          help='output faa')
        gbk2faa_parser.add_argument('-ox',  required=False, default='faa',          help='output file extension, default: faa')
        gbk2faa_parser.add_argument('-t',   required=False, type=int, default=1,    help='number of threads')
        args = vars(parser.parse_args())
        gbk2faa.gbk2faa(args)

    elif sys.argv[1] == 'ffn2faa':
        from BioSAK import format_converter
        ffn2faa_parser = subparsers.add_parser('ffn2faa', description='ffn to faa', usage=format_converter.sequence_manipulator_usage)
        ffn2faa_parser.add_argument('-ffn', required=True, help='input ffn file')
        args = vars(parser.parse_args())
        format_converter.ffn2faa(args)

    elif sys.argv[1] == 'get_rc':
        from BioSAK import format_converter
        get_rc_parser = subparsers.add_parser('get_rc', description='get reverse complement sequence', usage=format_converter.sequence_manipulator_usage)
        get_rc_parser.add_argument('-seq', required=True, help='input sequence(s)')
        args = vars(parser.parse_args())
        format_converter.get_rc(args)

    elif sys.argv[1] == 'slice_seq':
        from BioSAK import slice_seq
        slice_seq_parser = subparsers.add_parser('slice_seq', description='slice sequence', usage=slice_seq.slice_seq_usage)
        slice_seq_parser.add_argument('-in',    required=True,                        help='sequence file')
        slice_seq_parser.add_argument('-id',    required=True,                        help='sequence id')
        slice_seq_parser.add_argument('-range', required=True,                        help='sequence range, start-end (in bp). e.g. 200-4000')
        slice_seq_parser.add_argument('-rc',    required=False, action='store_true',  help='write out reverse complement sequence')
        slice_seq_parser.add_argument('-out',   required=True,                        help='output file')
        args = vars(parser.parse_args())
        slice_seq.slice_seq(args)

    elif sys.argv[1] == 'select_seq':
        from BioSAK import select_seq
        select_seq_parser = subparsers.add_parser('select_seq', description='select sequences by id', usage=select_seq.select_seq_usage)
        select_seq_parser.add_argument('-seq',       required=True,                        help='sequence file')
        select_seq_parser.add_argument('-id',        required=True,                        help='sequence ids,one id per line')
        select_seq_parser.add_argument('-option',    required=True, type=int,              help='choose from 0 and 1')
        select_seq_parser.add_argument('-out',       required=True,                        help='output file')
        select_seq_parser.add_argument('-fq',        required=False, action="store_true",  help='in fastq format, default: fa')
        select_seq_parser.add_argument('-oneline',   required=False, action="store_true",  help='put sequence in single line')
        args = vars(parser.parse_args())
        select_seq.select_seq(args)

    elif sys.argv[1] == 'get_Pfam_hmms':
        from BioSAK import get_Pfam_hmms
        get_Pfam_hmms_parser = subparsers.add_parser('get_Pfam_hmms', description='Get Pfam profiles by id', usage=get_Pfam_hmms.get_Pfam_hmms_usage)
        get_Pfam_hmms_parser.add_argument('-pfam',          required=True,                                  help='Pfam db file, normally with name Pfam-A.hmm')
        get_Pfam_hmms_parser.add_argument('-id',            required=True,                                  help='ids of profiles need to be extracted, one id per line')
        args = vars(parser.parse_args())
        get_Pfam_hmms.get_Pfam_hmms(args)

    elif sys.argv[1] == 'get_gene_depth':
        from BioSAK import get_gene_depth
        get_gene_depth_parser = subparsers.add_parser('get_gene_depth', description='Get gene depth by contig depth', usage=get_gene_depth.get_gene_depth_parser_usage)
        get_gene_depth_parser.add_argument('-gbk',          required=False, default=None,                   help='gbk file')
        get_gene_depth_parser.add_argument('-gff',          required=False, default=None,                   help='gff file')
        get_gene_depth_parser.add_argument('-ctg_depth',    required=True,                                  help='contig depth file')
        get_gene_depth_parser.add_argument('-id_column',    required=False, default=1, type=int,            help='contig id column, default is 1')
        get_gene_depth_parser.add_argument('-depth_column', required=False, default=2, type=int,            help='contig depth column, default is 2')
        get_gene_depth_parser.add_argument('-skip_header',  required=False, action='store_true',            help='skip the 1st line in contig depth file')
        args = vars(parser.parse_args())
        get_gene_depth.get_gene_depth(args)

    elif sys.argv[1] == 'rename_seq':
        from BioSAK import rename_seq
        rename_seq_parser = subparsers.add_parser('rename_seq', description='rename contigs in a file', usage=rename_seq.rename_seq_usage)
        rename_seq_parser.add_argument('-in',         required=True,                          help='input sequence file')
        rename_seq_parser.add_argument('-x',          required=False, default='fasta',        help='file extension, default: fasta')
        rename_seq_parser.add_argument('-sep_in',     required=False, default=None,           help='separator for input sequences')
        rename_seq_parser.add_argument('-sep_out',    required=False, default=None,           help='separator for output sequences, default: same as sep_in')
        rename_seq_parser.add_argument('-n',          required=False, default=None, type=int, help='the number of columns to keep')
        rename_seq_parser.add_argument('-prefix',     required=False, default=None,           help='add prefix to sequence')
        rename_seq_parser.add_argument('-oneline',    required=False, action="store_true",    help='put sequence in single line')
        rename_seq_parser.add_argument('-t',          required=False, type=int, default=1,    help='number of threads')
        args = vars(parser.parse_args())
        rename_seq.rename_seq(args)

    elif sys.argv[1] == 'SILVA_for_BLCA':
        from BioSAK import SILVA_for_BLCA
        SILVA_for_BLCA_parser = subparsers.add_parser('SILVA_for_BLCA', description='Prepare BLCA-compatible SILVA SSU database', usage=SILVA_for_BLCA.SILVA_for_BLCA_usage)
        SILVA_for_BLCA_parser.add_argument('-SILVA_ssu',    required=True,                                  help='SILVA SSU sequence file, e.g. SILVA_138_SSURef_NR99_tax_silva.fasta')
        args = vars(parser.parse_args())
        SILVA_for_BLCA.SILVA_for_BLCA(args)

    elif sys.argv[1] == 'GTDB_for_BLCA':
        from BioSAK import GTDB_for_BLCA
        GTDB_for_BLCA_parser = subparsers.add_parser('GTDB_for_BLCA', description='Prepare BLCA-compatible GTDB SSU database', usage=GTDB_for_BLCA.GTDB_for_BLCA_usage)
        GTDB_for_BLCA_parser.add_argument('-GTDB_ssu',      required=True,                                  help='GTDB SSU sequence file, e.g. bac120_ar122_ssu_r89.fna')
        args = vars(parser.parse_args())
        GTDB_for_BLCA.GTDB_for_BLCA(args)

    elif sys.argv[1] == 'Reads_simulator':
        from BioSAK import Reads_simulator
        Reads_simulator_parser = subparsers.add_parser('Reads_simulator', description='Simulate NGS reads', usage=Reads_simulator.Reads_simulator_usage)
        Reads_simulator_parser.add_argument('-p',     required=True,                            help='output prefix')
        Reads_simulator_parser.add_argument('-r',     required=True,                            help='reference genome')
        Reads_simulator_parser.add_argument('-n',     required=False, type=int,   default=None, help='number of read pairs to simulate')
        Reads_simulator_parser.add_argument('-d',     required=False, type=float, default=None, help='desired read depth (X)')
        Reads_simulator_parser.add_argument('-l',     required=True,  type=int,                 help='read length (bp)')
        Reads_simulator_parser.add_argument('-i',     required=True,  type=int,                 help='insert size (bp)')
        Reads_simulator_parser.add_argument('-split', action="store_true",                      help='Export forward and reverse reads to separate files')
        args = vars(parser.parse_args())
        Reads_simulator.Reads_simulator(args)

    elif sys.argv[1] == 'plot_sam_depth':
        from BioSAK import plot_sam_depth
        plot_sam_depth_parser = subparsers.add_parser('plot_sam_depth', description='plot sam depth', usage=plot_sam_depth.plot_sam_depth_usage)
        plot_sam_depth_parser.add_argument('-r',            required=True, type=str,                        help='reference sequence file')
        plot_sam_depth_parser.add_argument('-d',            required=True, type=str,                        help='depth file')
        plot_sam_depth_parser.add_argument('-i',            required=False, type=str, default=None,         help='id of sequence to plot')
        plot_sam_depth_parser.add_argument('-s',            required=False, type=int, default=None,         help='start position to plot')
        plot_sam_depth_parser.add_argument('-e',            required=False, type=int, default=None,         help='end position to plot')
        plot_sam_depth_parser.add_argument('-k',            required=False, type=int, default=100,          help='k-mer mean depth, default: 100')
        plot_sam_depth_parser.add_argument('-l',            required=False, type=str, default=None,         help='position to mark')
        plot_sam_depth_parser.add_argument('-x',            required=False, type=float, default=30,         help='plot width, default: 30')
        plot_sam_depth_parser.add_argument('-y',            required=False, type=float, default=10,         help='plot height, default: 10')
        args = vars(parser.parse_args())
        plot_sam_depth.plot_sam_depth(args)

    elif sys.argv[1] == 'rename_reads_for_Reago':
        from BioSAK import rename_reads_for_Reago
        rename_reads_for_Reago_parser = subparsers.add_parser('rename_reads_for_Reago', description='rename_reads_for_Reago', usage=rename_reads_for_Reago.rename_reads_for_Reago_usage)
        rename_reads_for_Reago_parser.add_argument('-in',  required=True, type=str, help='input fasta file')
        rename_reads_for_Reago_parser.add_argument('-out', required=True, type=str, help='renamed fasta file')
        rename_reads_for_Reago_parser.add_argument('-p',   required=True, type=str, help='prefix of renamed reads')
        rename_reads_for_Reago_parser.add_argument('-d',   required=True, type=int, help='chose from 1 (forward) or 2 (reverse)')
        args = vars(parser.parse_args())
        rename_reads_for_Reago.rename_reads_for_Reago(args)

    elif sys.argv[1] == 'MeanMappingDepth':
        from BioSAK import MeanMappingDepth
        MeanMappingDepth_parser = subparsers.add_parser('MeanMappingDepth', description='get mean mapping depth', usage=MeanMappingDepth.MeanMappingDepth_usage)
        MeanMappingDepth_parser.add_argument('-depth',  required=True,                          help='input depth file from "samtools depth" ')
        MeanMappingDepth_parser.add_argument('-T',      required=False, action="store_true",    help='get overall stats')
        args = vars(parser.parse_args())
        MeanMappingDepth.MeanMappingDepth(args)

    elif sys.argv[1] == 'js_cmds':
        from BioSAK import js_cmds
        js_cmds_parser = subparsers.add_parser('js_cmds', description='put commands into job scripts', usage=js_cmds.js_cmds_usage)
        js_cmds_parser.add_argument('-p',      required=True,                       help='js prefix')
        js_cmds_parser.add_argument('-cmd',    required=True,                       help='cmds file')
        js_cmds_parser.add_argument('-hd',     required=True,                       help='js header')
        js_cmds_parser.add_argument('-n',      required=True, type=int,             help='number of cmds per js')
        js_cmds_parser.add_argument('-auto',   required=False, action="store_true", help='automatically submit next js')
        js_cmds_parser.add_argument('-js_hpc', required=False, default=None,        help='Full path to js folder on HPC')
        js_cmds_parser.add_argument('-force',  required=False, action="store_true", help='force overwrite existing results')
        args = vars(parser.parse_args())
        js_cmds.js_cmds(args)

    elif sys.argv[1] == 'reads2bam':
        from BioSAK import reads2bam
        reads2bam_parser = subparsers.add_parser('reads2bam', usage=reads2bam.reads2bam_usage)
        reads2bam_parser.add_argument('-ref',       required=True,                          help='reference sequences')
        reads2bam_parser.add_argument('-index',     required=False, action="store_true",    help='index reference sequences')
        reads2bam_parser.add_argument('-r1',        required=False, default=None,           help='paired reads r1')
        reads2bam_parser.add_argument('-r2',        required=False, default=None,           help='paired reads r2')
        reads2bam_parser.add_argument('-up',        required=False, default=None,           help='unpaired reads')
        reads2bam_parser.add_argument('-fq',        required=False, action="store_true",    help='reads in fastq format')
        reads2bam_parser.add_argument('-local',     required=False, action="store_true",    help='perform local alignment')
        reads2bam_parser.add_argument('-no_unal',   required=False, action="store_true",    help='Suppress SAM records for reads that failed to align')
        reads2bam_parser.add_argument('-t',         required=False, type=int, default=1,    help='number of threads, default: 1')
        reads2bam_parser.add_argument('-tmp',       required=False, action="store_true",    help='keep temporary files')
        args = vars(parser.parse_args())
        reads2bam.reads2bam(args)

    elif sys.argv[1] == 'sam2bam':
        from BioSAK import sam2bam
        sam2bam_parser = subparsers.add_parser('sam2bam', usage=sam2bam.sam2bam_usage)
        sam2bam_parser.add_argument('-sam', required=True,                          help='sam file')
        sam2bam_parser.add_argument('-t',   required=False, type=int, default=1,    help='number of threads')
        args = vars(parser.parse_args())
        sam2bam.sam2bam(args)

    elif sys.argv[1] == 'fa2tree':
        from BioSAK import fa2tree
        fa2tree_parser = subparsers.add_parser('fa2tree', usage=fa2tree.fa2tree_usage)
        fa2tree_parser.add_argument('-seq', required=True,                      help='sequence file')
        fa2tree_parser.add_argument('-t', required=False, type=int, default=1,  help='number of threads, default: 1')
        args = vars(parser.parse_args())
        fa2tree.fa2tree(args)

    elif sys.argv[1] == 'BLCA_op_parser':
        from BioSAK import BLCA_op_parser
        BLCA_op_parser_parser = subparsers.add_parser('BLCA_op_parser', usage=BLCA_op_parser.BLCA_op_parser_usage)
        BLCA_op_parser_parser.add_argument('-in', required=True, help='BLCA output')
        args = vars(parser.parse_args())
        BLCA_op_parser.BLCA_op_parser(args)

    elif sys.argv[1] == 'VisGeneFlk':
        from BioSAK import VisGeneFlk
        VisGeneFlk_parser = subparsers.add_parser('VisGeneFlk', usage=VisGeneFlk.VisGeneFlk_usage)
        VisGeneFlk_parser.add_argument('-gene',    required=True,                          help='gene id')
        VisGeneFlk_parser.add_argument('-gbk',     required=True,                          help='gbk file')
        VisGeneFlk_parser.add_argument('-len',     required=True, type=int,                help='length (in bp) of flanking sequences to plot')
        VisGeneFlk_parser.add_argument('-scale',   required=False, type=int, default=200,  help='scale for plotting, default: 200bp per cm')
        VisGeneFlk_parser.add_argument('-fmt',     required=False, default='svg',          help='output format (svg or pdf), default: svg')
        args = vars(parser.parse_args())
        VisGeneFlk.VisGeneFlk(args)

    elif sys.argv[1] == 'usearch_uc':
        from BioSAK import usearch_uc
        usearch_uc_parser = subparsers.add_parser('usearch_uc', usage=usearch_uc.usearch_uc_usage)
        usearch_uc_parser.add_argument('-uc', required=True,                        help='uc file from Usearch')
        usearch_uc_parser.add_argument('-n',  required=False, type=int, default=1,  help='minimum number of sequence in a cluster, default: 1')
        usearch_uc_parser.add_argument('-o',  required=True,                        help='output file')
        args = vars(parser.parse_args())
        usearch_uc.usearch_uc(args)

    elif sys.argv[1] == 'top_16S_hits':
        from BioSAK import top_16S_hits
        top_16S_hits_parser = subparsers.add_parser('top_16S_hits', usage=top_16S_hits.top_16S_hits_usage)
        top_16S_hits_parser.add_argument('-p',           required=True,                           help='output prefix')
        top_16S_hits_parser.add_argument('-q',           required=True,                           help='query sequence file')
        top_16S_hits_parser.add_argument('-r',           required=True,                           help='SILVA or GTDB SSU sequence file')
        top_16S_hits_parser.add_argument('-b',           required=False, default=None,            help='blast results between query and reference sequences from previous analysis(if you have)')
        top_16S_hits_parser.add_argument('-evalue',      required=False, default='1e-20',         help='evalue cutoff, default: 1e-20')
        top_16S_hits_parser.add_argument('-top',         required=False, type=int, default=1,     help='Number of top hits to report, default: 1')
        top_16S_hits_parser.add_argument('-t',           required=False, type=int, default=1,     help='number of threads')
        args = vars(parser.parse_args())
        top_16S_hits.top_16S_hits(args)

    elif sys.argv[1] == 'Plot_MAG':
        from BioSAK import plot_mag
        Plot_MAG_parser = subparsers.add_parser('Plot_MAG', usage=plot_mag.Plot_MAG_parser_usage)
        Plot_MAG_parser.add_argument('-i',   required=True,                         help='MAG folder')
        Plot_MAG_parser.add_argument('-x',   required=True,                         help='file extension')
        Plot_MAG_parser.add_argument('-d',   required=True,                         help='contig depth')
        Plot_MAG_parser.add_argument('-sep', required=False, action='store_true',   help='get plot for individual MAGs')
        args = vars(parser.parse_args())
        plot_mag.Plot_MAG(args)

    elif sys.argv[1] == 'get_gnm_size':
        from BioSAK import get_gnm_size
        get_gnm_size_parser = subparsers.add_parser('get_gnm_size', usage=get_gnm_size.get_gnm_size_parser_usage)
        get_gnm_size_parser.add_argument('-i',      required=True,                          help='MAG file/folder')
        get_gnm_size_parser.add_argument('-x',      required=False,                         help='file extension')
        get_gnm_size_parser.add_argument('-total',  required=False, action='store_true',    help='get total size')
        args = vars(parser.parse_args())
        get_gnm_size.get_gnm_size(args)

    elif sys.argv[1] == 'fq2fa':
        from BioSAK import fq2fa
        fq2fa_parser = subparsers.add_parser('fq2fa', usage=fq2fa.fq2fa_usage)
        fq2fa_parser.add_argument('-i', required=True, help='input fastq')
        fq2fa_parser.add_argument('-o', required=True, help='output fasta')
        args = vars(parser.parse_args())
        fq2fa.fq2fa(args)

    elif sys.argv[1] == 'mean_MAG_cov':
        from BioSAK import mean_MAG_cov
        mean_MAG_cov_parser = subparsers.add_parser('mean_MAG_cov', usage=mean_MAG_cov.mean_MAG_cov_usage)
        mean_MAG_cov_parser.add_argument('-d', required=True, help='MetaBAT produced depth file')
        mean_MAG_cov_parser.add_argument('-b', required=True, help='bin folder')
        mean_MAG_cov_parser.add_argument('-x', required=True, help='file extension')
        args = vars(parser.parse_args())
        mean_MAG_cov.mean_MAG_cov(args)

    elif sys.argv[1] == 'GTDB_tree_r207':
        from BioSAK import GTDB_tree_r207
        GTDB_tree_r207_parser = subparsers.add_parser('GTDB_tree_r207', usage=GTDB_tree_r207.GTDB_tree_r207_usage)
        GTDB_tree_r207_parser.add_argument('-p', required=True,                         help='output prefix')
        GTDB_tree_r207_parser.add_argument('-i', required=True,                         help='genome folder')
        GTDB_tree_r207_parser.add_argument('-x', required=True,                         help='genome file extension')
        GTDB_tree_r207_parser.add_argument('-t', required=False, type=int, default=1,   help='number of threads')
        args = vars(parser.parse_args())
        GTDB_tree_r207.GTDB_tree_r207(args)

    elif sys.argv[1] == 'exe_cmds':
        from BioSAK import exe_cmds
        exe_cmds_parser = subparsers.add_parser('exe_cmds', usage=exe_cmds.exe_cmds_usage)
        exe_cmds_parser.add_argument('-c', required=True,                       help='cmds file')
        exe_cmds_parser.add_argument('-t', required=False, type=int, default=1, help='number of threads')
        args = vars(parser.parse_args())
        exe_cmds.exe_cmds(args)

    elif sys.argv[1] == 'split_fasta':
        from BioSAK import split_fasta
        split_fasta_parser = subparsers.add_parser('split_fasta', usage=split_fasta.split_fasta_usage)
        split_fasta_parser.add_argument('-i',  required=True,                          help='input fasta file')
        split_fasta_parser.add_argument('-o',  required=True,                          help='output dir')
        split_fasta_parser.add_argument('-ns', required=False, default=None, type=int, help='number of sequences per file')
        split_fasta_parser.add_argument('-nf', required=False, default=None, type=int, help='number of files to be generated')
        args = vars(parser.parse_args())
        split_fasta.split_fasta(args)

    elif sys.argv[1] == 'split_sam':
        from BioSAK import split_sam
        split_sam_parser = subparsers.add_parser('split_sam', usage=split_sam.split_sam_usage)
        split_sam_parser.add_argument('-p', required=True,                          help='output prefix')
        split_sam_parser.add_argument('-i', required=True,                          help='input SAM/BAM file')
        split_sam_parser.add_argument('-r', required=True,                          help='reference id')
        split_sam_parser.add_argument('-t', required=False, type=int, default=1,    help='number of threads')
        args = vars(parser.parse_args())
        split_sam.split_sam(args)

    elif sys.argv[1] == 'fa2id':
        from BioSAK import fa2id
        fa2id_parser = subparsers.add_parser('fa2id', usage=fa2id.fa2id_usage)
        fa2id_parser.add_argument('-i', required=True,                          help='input fasta file')
        fa2id_parser.add_argument('-o', required=True,                          help='output txt')
        fa2id_parser.add_argument('-fq', required=False, action="store_true",   help='in fastq format, default: fasta')
        args = vars(parser.parse_args())
        fa2id.fa2id(args)

    elif sys.argv[1] == 'sampling_GTDB_gnms':
        from BioSAK import sampling_GTDB_gnms
        sampling_GTDB_gnms_parser = subparsers.add_parser('sampling_GTDB_gnms', usage=sampling_GTDB_gnms.sampling_GTDB_gnms_usage)
        sampling_GTDB_gnms_parser.add_argument('-o',           required=True,                              help='output table')
        sampling_GTDB_gnms_parser.add_argument('-meta',        required=True,                              help='GTDB reference genome metadata')
        sampling_GTDB_gnms_parser.add_argument('-taxon',       required=True,                              help='interested taxon')
        sampling_GTDB_gnms_parser.add_argument('-r',           required=True,                              help='sampling at rank, select from p, c, o, f, g and s')
        sampling_GTDB_gnms_parser.add_argument('-n',           required=False, default=1, type=int,        help='numer of genome to retain per taxon')
        sampling_GTDB_gnms_parser.add_argument('-cpl',         required=False, default=None, type=float,   help='completeness cutoff (0-100), default: None')
        sampling_GTDB_gnms_parser.add_argument('-ctm',         required=False, default=None, type=float,   help='contamination cutoff, default: None')
        sampling_GTDB_gnms_parser.add_argument('-ts',          required=False, action='store_true',        help='only consider type strain')
        sampling_GTDB_gnms_parser.add_argument('-rs',          required=False, action='store_true',        help='only consider representative species')
        args = vars(parser.parse_args())
        sampling_GTDB_gnms.sampling_GTDB_gnms(args)

    elif sys.argv[1] == 'subset_GTDB_meta':
        from BioSAK import subset_GTDB_meta
        subset_GTDB_meta_parser = subparsers.add_parser('subset_GTDB_meta', usage=subset_GTDB_meta.subset_GTDB_meta_usage)
        subset_GTDB_meta_parser.add_argument('-meta',    required=True, help='GTDB reference genome metadata')
        subset_GTDB_meta_parser.add_argument('-id',      required=True, help='id of genomes')
        subset_GTDB_meta_parser.add_argument('-out',     required=True, help='output file')
        args = vars(parser.parse_args())
        subset_GTDB_meta.subset_GTDB_meta(args)

    elif sys.argv[1] == 'get_MAG_reads_long':
        from BioSAK import get_MAG_reads_long
        get_MAG_reads_long_parser = subparsers.add_parser('get_MAG_reads_long', usage=get_MAG_reads_long.get_MAG_reads_long_usage)
        get_MAG_reads_long_parser.add_argument('-r',    required=True,                             help='reads file')
        get_MAG_reads_long_parser.add_argument('-s',    required=True,                             help='sam file')
        get_MAG_reads_long_parser.add_argument('-m',    required=True,                             help='MAG file/folder')
        get_MAG_reads_long_parser.add_argument('-x',    required=False, default='fasta',           help='MAG extension, default: fasta')
        get_MAG_reads_long_parser.add_argument('-l',    required=False, type=int, default=5000,    help='minimal read length (in bp), default: 5000')
        get_MAG_reads_long_parser.add_argument('-o',    required=True,                             help='output folder')
        args = vars(parser.parse_args())
        get_MAG_reads_long.get_MAG_reads_long(args)

    elif sys.argv[1] == 'bam2reads':
        from BioSAK import bam2reads
        bam2reads_parser = subparsers.add_parser('bam2reads', usage=bam2reads.bam2reads_usage)
        bam2reads_parser.add_argument('-b',       required=True,                       help='Sorted Bam file')
        bam2reads_parser.add_argument('-r',       required=True,                       help='Interested region')
        bam2reads_parser.add_argument('-s',       required=False, default=None,        help='Read sequence file')
        bam2reads_parser.add_argument('-o',       required=True,                       help='Output folder')
        bam2reads_parser.add_argument('-force',   required=False, action="store_true", help='Force overwriting')
        args = vars(parser.parse_args())
        bam2reads.bam2reads(args)

    elif sys.argv[1] == 'merge_seq':
        from BioSAK import merge_seq
        merge_seq_parser = subparsers.add_parser('merge_seq', usage=merge_seq.merge_seq_usage)
        merge_seq_parser.add_argument('-1',       required=True,                          help='input file 1')
        merge_seq_parser.add_argument('-2',       required=True,                          help='input file 2')
        merge_seq_parser.add_argument('-o',       required=True,                          help='output sequence file')
        merge_seq_parser.add_argument('-fq',      required=False, action="store_true",    help='in fastq format, default: fasta')
        merge_seq_parser.add_argument('-sl',      required=False, action="store_true",    help='write out sequence in single line format')
        args = vars(parser.parse_args())
        merge_seq.merge_seq(args)

    elif sys.argv[1] == 'cross_link_seqs':
        from BioSAK import cross_link_seqs
        cross_link_seqs_parser = subparsers.add_parser('cross_link_seqs', usage=cross_link_seqs.cross_link_seqs_usage)
        cross_link_seqs_parser.add_argument('-p', required=True,                 help='output prefix')
        cross_link_seqs_parser.add_argument('-1', required=True,                 help='sequence 1, in fasta format')
        cross_link_seqs_parser.add_argument('-2', required=True,                 help='sequence 2, in fasta format')
        cross_link_seqs_parser.add_argument('-i', required=True, type=float,     help='identity cutoff, 0-100')
        cross_link_seqs_parser.add_argument('-l', required=True, type=int,       help='alignment length cutoff, in bp')
        cross_link_seqs_parser.add_argument('-f', required=False, default='PDF', help='plot format, choose from PDF, SVG, EPS and PNG, default: PDF')
        args = vars(parser.parse_args())
        cross_link_seqs.cross_link_seqs(args)

    elif sys.argv[1] == 'RunGraphMB':
        from BioSAK import RunGraphMB
        RunGraphMB_parser = subparsers.add_parser('RunGraphMB', usage=RunGraphMB.RunGraphMB_usage)
        RunGraphMB_parser.add_argument('-gfa',  required=True, help='gfa file')
        RunGraphMB_parser.add_argument('-o',    required=True, help='output folder (i.e., input folder to GraphMB)')
        args = vars(parser.parse_args())
        RunGraphMB.RunGraphMB(args)

    elif sys.argv[1] == 'gfa2fa':
        from BioSAK import format_converter
        gfa2fa_parser = subparsers.add_parser('gfa2fa', description='gfa to fasta', usage=format_converter.sequence_manipulator_usage)
        gfa2fa_parser.add_argument('-gfa', required=True, help='input gfa file')
        gfa2fa_parser.add_argument('-o',   required=True, help='output fasta file')
        args = vars(parser.parse_args())
        format_converter.gfa2fa(args)

    elif sys.argv[1] == 'cat_fa':
        from BioSAK import cat_fa
        cat_fa_parser = subparsers.add_parser('cat_fa', description='combine sequence files', usage=cat_fa.cat_fa_usage)
        cat_fa_parser.add_argument('-i', required=True,                     help='sequence folder')
        cat_fa_parser.add_argument('-x', required=False, default='fasta',   help='file extension, default: fasta')
        cat_fa_parser.add_argument('-s', required=False, default='__',      help='separator, default: __')
        cat_fa_parser.add_argument('-o', required=True,                     help='combined output file')
        args = vars(parser.parse_args())
        cat_fa.cat_fa(args)

    elif sys.argv[1] == 'SubsampleLongReads':
        from BioSAK import SubsampleLongReads
        SubsampleLongReads_parser = subparsers.add_parser('SubsampleLongReads', description='Subsample Long Reads', usage=SubsampleLongReads.SubsampleLongReads_usage)
        SubsampleLongReads_parser.add_argument('-i',        required=True,                          help='input sequence file')
        SubsampleLongReads_parser.add_argument('-s',        required=True,                          help='separate subsample rates (1-100) with comma, e.g. 1,5,10')
        SubsampleLongReads_parser.add_argument('-o',        required=True,                          help='output directory')
        SubsampleLongReads_parser.add_argument('-fq',       required=False, action="store_true",    help='in fastq format, default: fa')
        SubsampleLongReads_parser.add_argument('-oneline',  required=False, action="store_true",    help='put sequence in single line')
        args = vars(parser.parse_args())
        SubsampleLongReads.SubsampleLongReads(args)

    elif sys.argv[1] == 'OneLineAln':
        from BioSAK import OneLineAln
        OneLineAln_parser = subparsers.add_parser('OneLineAln', description='One-line fasta format alignments', usage=OneLineAln.OneLineAln_usage)
        OneLineAln_parser.add_argument('-in',               required=True,                       help='input MSA in fasta format')
        OneLineAln_parser.add_argument('-out',              required=False, default=None,        help='output file')
        OneLineAln_parser.add_argument('-upper',            required=False, action='store_true', help='turn to uppercase')
        args = vars(parser.parse_args())
        OneLineAln.OneLineAln(args)

    elif sys.argv[1] == 'SliceMSA':
        from BioSAK import SliceMSA
        SliceMSA_parser = subparsers.add_parser('SliceMSA', description='Slice MSA by column', usage=SliceMSA.SliceMSA_usage)
        SliceMSA_parser.add_argument('-i',                  required=True,                         help='input MSA in fasta format')
        SliceMSA_parser.add_argument('-fi',                 required=False, default='fasta',       help='format (NOT file extension) of input MSA, default: fasta')
        SliceMSA_parser.add_argument('-s',                  required=True,                         help='columns to export, e.g. 200-300, -100, 50-')
        SliceMSA_parser.add_argument('-o',                  required=True,                         help='output file or folder')
        SliceMSA_parser.add_argument('-fo',                 required=False, default='fasta',       help='format of output MSA, select from fasta and phylip-relaxed, default: fasta')
        SliceMSA_parser.add_argument('-force',              required=False, action="store_true",   help='force overwrite existing output folder')
        args = vars(parser.parse_args())
        SliceMSA.SliceMSA(args)

    elif sys.argv[1] == 'ConvertMSA':
        from BioSAK import ConvertMSA
        ConvertMSA_parser = subparsers.add_parser('ConvertMSA', usage=ConvertMSA.ConvertMSA_usage)
        ConvertMSA_parser.add_argument('-i',       required=True,                       help='input alignment')
        ConvertMSA_parser.add_argument('-xi',      required=False, default='aln',       help='input alignment extension')
        ConvertMSA_parser.add_argument('-fi',      required=True,                       help='input alignment format, e.g., fasta, phylip')
        ConvertMSA_parser.add_argument('-o',       required=True,                       help='output alignment')
        ConvertMSA_parser.add_argument('-xo',      required=False, default='aln',       help='output alignment extension')
        ConvertMSA_parser.add_argument('-fo',      required=True,                       help='output alignment format, e.g., fasta, phylip')
        ConvertMSA_parser.add_argument('-oneline', required=False, action="store_true", help='put sequence in single line, available if -fo is fasta')
        ConvertMSA_parser.add_argument('-nogap',   required=False, action="store_true", help='remove gaps from alignment, available if -fo is fasta')
        ConvertMSA_parser.add_argument('-f',       required=False, action="store_true", help='force overwrite existing output folder')
        args = vars(parser.parse_args())
        ConvertMSA.ConvertMSA(args)

    elif sys.argv[1] == 'fa2phy':
        from BioSAK import fa2phy
        fa2phy_parser = subparsers.add_parser('fa2phy', usage=fa2phy.fa2phy_usage)
        fa2phy_parser.add_argument('-i', required=True, help='input MSA in fasta format')
        fa2phy_parser.add_argument('-o', required=True, help='output MSA in phylip format')
        args = vars(parser.parse_args())
        fa2phy.fa2phy(args)

    elif sys.argv[1] == 'MetaBiosample':
        from BioSAK import MetaBiosample
        MetaBiosample_parser = subparsers.add_parser('MetaBiosample', usage=MetaBiosample.MetaBiosample_usage)
        MetaBiosample_parser.add_argument('-i',     required=True,                          help='biosample id file, one id per line')
        MetaBiosample_parser.add_argument('-a',     required=True,                          help='attributes to extract')
        MetaBiosample_parser.add_argument('-o',     required=True,                          help='output folder')
        MetaBiosample_parser.add_argument('-t',     required=False, type=int, default=1,    help='number of threads, default: 1')
        MetaBiosample_parser.add_argument('-f',     required=False, action="store_true",    help='Force overwrite existing results')
        MetaBiosample_parser.add_argument('-exe',   required=False, action="store_true",    help='execute cmds')
        args = vars(parser.parse_args())
        MetaBiosample.MetaBiosample(args)

    elif sys.argv[1] == 'PhyloBiAssoc':
        from BioSAK import PhyloBiAssoc
        PhyloBiAssoc_parser = subparsers.add_parser('PhyloBiAssoc', usage=PhyloBiAssoc.PhyloBiAssoc_usage)
        PhyloBiAssoc_parser.add_argument('-tree', required=True, help='tree file')
        PhyloBiAssoc_parser.add_argument('-data', required=True, help='data file')
        args = vars(parser.parse_args())
        PhyloBiAssoc.PhyloBiAssoc(args)

    else:
        print('Unrecognized command: %s, program exited' % sys.argv[1])
        exit()


upload_to_pypi_cmd = '''

cd /Users/songweizhi/PycharmProjects/BioSAK
rm -r build dist BioSAK.egg-info
python setup.py sdist bdist_wheel
twine upload dist/*

songweizhi    shan88

pip3 install --upgrade BioSAK

'''
