#!python
import argparse
from pathlib import Path

import pandas as pd

from github_stats_pages import gts_run
from github_stats_pages.logger import app_log as log


def read_csv(csv_file: str) -> pd.DataFrame:
    return pd.read_csv(csv_file)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="CLI to retrieve GitHub traffic statistics"
    )
    parser.add_argument(
        "-u", "--user", required=True, help="user or organization name"
    )
    parser.add_argument("-t", "--token", required=True, help="API token")
    parser.add_argument("-c", "--csv-file", required=True, help="CSV filename")
    parser.add_argument(
        "--test",
        action="store_true",
        help="Flag to quickly run a few repositories",
    )
    args = parser.parse_args()

    log.info("[yellow]Running gts_run_all_repos script")

    df = read_csv(args.csv_file)

    # Exclude forks and archived repos
    n_forks = df["fork"].values.sum()
    n_archived = df["archived"].values.sum()
    if n_forks > 0:
        log.info(f"Excluding forks: {n_forks}")
    if n_archived > 0:
        log.info(f"Excluding archived: {n_archived}")
    new_df = df.loc[(~df["fork"]) & (~df["archived"])]
    log.info(f"Number of repositories: {len(new_df)}")

    if args.test:
        repo_list = new_df["name"][0:5]
    else:
        repo_list = new_df["name"]

    for repo_name in repo_list:
        log.info(f"[yellow]Working on: {repo_name}")
        gts_run.run_each_repo(args.user, args.token, repo_name, save_csv=True)
        gts_run.get_top_paths(args.user, args.token, repo_name, save_csv=True)

    # Save files in a data folder
    log.info("[yellow]Moving records to data/folder")
    p_cwd = Path.cwd()
    p_data = p_cwd / "data"
    if not p_data.exists():
        p_data.mkdir()

    for f in p_cwd.glob("????-??-??-???-???-*stats.csv"):
        f.rename(p_data / f.name)

    log.info("[dark_green]gts_run_all_repos script completed!")
