# -*- coding: utf-8 -*-

"""
This module is meant to be modified by user to set the config variables.
"""

# Add any import you need for function_val
import numpy as np


########################### Remote options #####################################
# remote dir corresponding to dir of this project
remote_dir = "/path/on/remote/server"
# remote name, either "host" or "user@host".
remote_name = "remote_name"
# Set dirs/files/patterns to exclude from rsync command
rsync_excludes = ["fval_data.npz",
                  ".mdrun.xtc_offsets.lock", ".mdrun.xtc_offsets.npz"]


# Sampling variables
# number of simulations per epoch, only affects new epochs
N = 16


# Grompping
gmx = "gmx"
# input files for grompping
mdp = "mdrun.mdp"
topol = "topol.top"
ndx = "index_grompp.ndx"
maxwarn = 0


############################## Function calcs ##################################
# Initial structure file, used for making the selections. String or list of strings.
# With a single value it is used as the topology and list of starting structure,
# With multiple files the first is used as the reference structure and topology,
# but only the subsequent files are used as starting points for the initial epoch.
initial_struct = "start.gro"
# To make next run faster we save this selection to disk
# The coordinates of the selection are used in function_val
# Should be a valid MDAnalysis selection string OR a group name in index_file
# For MDAnalysis selection strings, see
# https://docs.mdanalysis.org/stable/documentation_pages/selections.html
select_str = "protein and resid 638 and not ( type H )"
# Same as above, but selection for clustering
select_str_clust = "protein and name CA"
# index file for selections
index_file = None

# Minimum and maximum values to sample from between. None to ignore boundary, "start" string to use the
# initial value of the starting structure
minval = 0.79
maxval = 1.4


def function_val(positions):
    """
      Write here your analysis function. Positions will be
        numpy array of shape (n,m,3) for n frames of m atoms.
        Note that m is the number of atoms in the selection,
        not the whole trajectory.
      The function should return a numpy array of shape (n).
    """
    m = positions.shape[-2]
    # 4 residues match, so we separate them
    m_res = m//4
    # selections for each res
    ind1 = np.array(list(range(m_res)))
    ind2 = m_res+ind1
    ind3 = m_res+ind2
    ind4 = m_res+ind3

    # distances between average position of res1 to average position of res3
    dist1 = np.linalg.norm(
        np.mean(positions[:, ind1, :], axis=-2) -
        np.mean(positions[:, ind3, :], axis=-2),
        axis=-1)
    # same for res 2 and 4
    dist2 = np.linalg.norm(
        np.mean(positions[:, ind2, :], axis=-2) -
        np.mean(positions[:, ind4, :], axis=-2),
        axis=-1)

    return np.minimum(dist1, dist2)


############################## Trajectory transformations #####################

# whether to centre by a single atom before making anything whole
precenter = False
# The atom to use for precentering. By default use the one closest to box centre.
precenter_atom = None
# whether to make molecules whole from being broken over pbc
unwrap_mols = False
# The selection to make whole
unwrap_sel = "protein"
# A selection of atoms to use as "starters" for unwrapping. Unless mols_in_box=True,
# these atoms are guaranteed to end up inside the box. Makes no difference to outcome
# if mols_in_box=True. None means to use the smallest indexed atom of each mol.
unwrap_starters = None
# Put the molecule COM back into the box (ignored if not unwrapping).
# Only considers atoms in unwrap_sel
mols_in_box = False

# whether to translate clustering coordinates to initial structure
clust_centre = True
# whether to fit clustering coordinates to initial structure, rotationally AND translationally
clust_superpos = True


############################## Advanced options ################################

# the random number seed. None means a totally random run.
rng_seed = None

# List of reps to ignore. Should be a list of tuples (epc,rep),
# e.g. to ignore rep 1 of ep 11 and rep 3 of ep 5: ignore_reps = [(11,1),(5,3)]
ignore_reps = []
# Like above, but just a list of epochs to ignore,
# e.g. to ignore epochs 6 and 7: ignore_epcs = [6,7]
ignore_epcs = []

# Options to handle using only some of the data
# The function values will be recalculated for every epoch and repetition if these options are changed
# Amount of data (frames) to ignore from start of each repetition
ignore_from_start = 0
# The stride for only using every n'th frame
stride = 1

# Histogram building
# At least this much data in each bin of the histogram
data_per_bin = 100
# Maximum amount of bins between boundaries
maxbins = 100
# Minimum amount to choose a frame from. If less frames are in the bin, this many closest frames in value will be used.
minchoice = data_per_bin
# maximum height to choose from, as fraction from minimum found height to maximum
# (maxh-minh)*choice_crit+minh will be the highest allowed histogram bin.
choice_crit = 0.5
# Allow choosing the same frame more than once in the same epoch.
# In some edge cases of multiple choices within a region of low sampling,
# the minchoice values might overlap and still produce duplicates.
allow_choice_duplicates = False

# Cluster histogram
clust_data_per_bin = 1000
clust_maxbins = 10

# Number of epoch before clustering
epochs_pre_clust = 3
# Max number of clusters per bin
maxclust = 15
# choose at most this fraction of choices from clustering
clust_choice_frac = 0.5
# Tolerace for number of clusters in clustering
clust_tol = 0.1
