Module statkit.metrics
Classification metrics not part of sci-kit learn.
Functions
def binary_classification_report(y_true,
y_pred_proba,
threshold: float = 0.5,
n_iterations: int = 1000,
quantile_range: tuple = (0.025, 0.975),
random_state=None) ‑> pandas.core.frame.DataFrame-
Expand source code
def binary_classification_report( y_true, y_pred_proba, threshold: float = 0.5, n_iterations: int = 1000, quantile_range: tuple = (0.025, 0.975), random_state=None, ) -> DataFrame: r"""Compile performance metrics of a binary classifier. Evaluates the model according to the following metrics: - Accuracy, - Average precision (area under the precision-recall curve), - \( F_1 \), - Area under the receiver operating characteristic curve (ROC AUC), - Sensitivity (or, true positive rate, see `sensitivity`), - Specificity (or, true negative rate, see `specificity`). Args: y_true: Ground truth labels (0 or 1). y_pred: Predicted probability of the positive class. threshold: Dichotomise probabilities greater or equal to this threshold as positive. quantile_range: Confidence interval quantiles. n_iterations: Number of bootstrap permutations. Returns: A dataframe with the estimated classification metrics (`point`) and (by default 95%) confidence interval (from `lower` to `upper`). """ scores = DataFrame( index=[ "Accuracy", "Average precision", "$F_1$", "ROC AUC", "Sensitivity", "Specificity", ], columns=["point", "lower", "upper"], ) kwargs = { "n_iterations": n_iterations, "random_state": random_state, "quantile_range": quantile_range, } rank_scorers = { "ROC AUC": roc_auc_score, "Average precision": average_precision_score, } for name, scorer in rank_scorers.items(): score = bootstrap_score(y_true, y_pred_proba, metric=scorer, **kwargs) scores.loc[name] = dict(score) # type: ignore # Metrics that require the probability to be dichotomised. class_scorers = { "$F_1$": f1_score, "Sensitivity": sensitivity, "Specificity": specificity, "Accuracy": accuracy_score, } for name, scorer in class_scorers.items(): y_pred = (y_pred_proba >= threshold).astype(int) score = bootstrap_score(y_true, y_pred, metric=scorer, **kwargs) scores.loc[name] = dict(score) # type: ignore return scoresCompile performance metrics of a binary classifier.
Evaluates the model according to the following metrics:
- Accuracy,
- Average precision (area under the precision-recall curve),
- F_1 ,
- Area under the receiver operating characteristic curve (ROC AUC),
- Sensitivity (or, true positive rate, see
sensitivity()), - Specificity (or, true negative rate, see
specificity()).
Args
y_true- Ground truth labels (0 or 1).
y_pred- Predicted probability of the positive class.
threshold- Dichotomise probabilities greater or equal to this threshold as positive.
quantile_range- Confidence interval quantiles.
n_iterations- Number of bootstrap permutations.
Returns
A dataframe with the estimated classification metrics (
point) and (by default 95%) confidence interval (fromlowertoupper). def false_negative_rate(y_true, y_prob, threshold: float = 0.5) ‑> float-
Expand source code
def false_negative_rate(y_true, y_prob, threshold: float = 0.5) -> float: r"""The number of false negatives out of all positives. Also called the miss rate. $$r_{\mathrm{fn}} = \frac{f_n}{t_p + f_n} = \frac{f_n}{p}$$ Args: y_true: Ground truth label (binarised). y_prob: Probability of positive class. threshold: Classify as positive when probability exceeds threshold. """ if not isinstance(y_true, (ndarray, Series)): y_true = array(y_true) if not isinstance(y_prob, (ndarray, Series)): y_prob = array(y_prob) y_pred = y_prob > threshold positives = sum(y_true) # Actual positive, but classified as negative. is_fn = (y_true == 1) & (y_pred == 0) false_negatives = sum(is_fn) return false_negatives / positivesThe number of false negatives out of all positives.
Also called the miss rate. r_{\mathrm{fn}} = \frac{f_n}{t_p + f_n} = \frac{f_n}{p}
Args
y_true- Ground truth label (binarised).
y_prob- Probability of positive class.
threshold- Classify as positive when probability exceeds threshold.
def false_positive_rate(y_true, y_prob, threshold: float = 0.5) ‑> float-
Expand source code
def false_positive_rate(y_true, y_prob, threshold: float = 0.5) -> float: r"""The number of false positive out of all negatives. Also called the fall out rate. $$r_{\mathrm{fp}} = \frac{f_p}{t_p + f_n} = \frac{f_p}{p}$$ Args: y_true: Ground truth label (binarised). y_prob: Probability of positive class. threshold: Classify as positive when probability exceeds threshold. """ if not isinstance(y_true, (ndarray, Series)): y_true = array(y_true) if not isinstance(y_prob, (ndarray, Series)): y_prob = array(y_prob) y_pred = y_prob > threshold negatives = y_true.size - sum(y_true) # Actual negative, but classified as positive. false_positives = sum((~y_true.astype(bool)) & y_pred) return false_positives / negativesThe number of false positive out of all negatives.
Also called the fall out rate. r_{\mathrm{fp}} = \frac{f_p}{t_p + f_n} = \frac{f_p}{p}
Args
y_true- Ground truth label (binarised).
y_prob- Probability of positive class.
threshold- Classify as positive when probability exceeds threshold.
def perplexity(X, probs)-
Expand source code
def perplexity(X, probs): r"""Per word perplexity. $$ \mathcal{L} = \exp\left(-\frac{1}{m}\sum_{i=1}^m \sum_{j=1}^n \frac{x_{ij} \log p_{ij}}{\sum_{j=1}^n x_{ij}}\right) $$ Args: X: The word counts (a matrix of shape `(m, n)`). probs: The probability of each word (a matrix of shape `(m, n)`). Returns: The perplexity over the dataset (a scalar), ranging from 1 (best) to infinity, with a perplexity equal to `n` corresponding to a uniform distribution. """ n_words = X.sum(axis=1, keepdims=True) log_probs = log(probs) # Make sure we don't end up with nan because -inf * 0 = nan. log_probs = where(isneginf(log_probs) & (X == 0.0), 0.0, log_probs) log_likel = X * log_probs # Carefully divide, since `n_words` might be zero. is_observed = (n_words > 0).squeeze() return exp(-mean(sum(log_likel[is_observed] / n_words[is_observed], axis=1)))Per word perplexity.
\mathcal{L} = \exp\left(-\frac{1}{m}\sum_{i=1}^m \sum_{j=1}^n \frac{x_{ij} \log p_{ij}}{\sum_{j=1}^n x_{ij}}\right)
Args
X- The word counts (a matrix of shape
(m, n)). probs- The probability of each word (a matrix of shape
(m, n)).
Returns
The perplexity over the dataset (a scalar), ranging from 1 (best) to infinity, with a perplexity equal to
ncorresponding to a uniform distribution. def sensitivity(y_true, y_prob, threshold: float = 0.5) ‑> float-
Expand source code
def sensitivity(y_true, y_prob, threshold: float = 0.5) -> float: r"""The number of true positive out of all positives. Aliases: - True positive rate, - Recall, - Hit rate. $$r_{\mathrm{tp}} = \frac{t_p}{t_p + f_n} = \frac{t_p}{p}$$ Args: y_true: Ground truth label (binarised). y_prob: Probability of positive class. threshold: Classify as positive when probability exceeds threshold. """ return true_positive_rate(y_true, y_prob, threshold)The number of true positive out of all positives.
Aliases
- True positive rate,
- Recall,
- Hit rate.
r_{\mathrm{tp}} = \frac{t_p}{t_p + f_n} = \frac{t_p}{p}
Args
y_true- Ground truth label (binarised).
y_prob- Probability of positive class.
threshold- Classify as positive when probability exceeds threshold.
def specificity(y_true, y_prob, threshold: float = 0.5) ‑> float-
Expand source code
def specificity(y_true, y_prob, threshold: float = 0.5) -> float: r"""The number of true negatives out of all negatives. $$r_{\mathrm{tn}} = \frac{t_n}{t_n + f_p} = \frac{t_n}{n} = 1 - r_{\mathrm{fp}}$$ Aliases: - True negative rate, - Selectivity. Args: y_true: Ground truth label (binarised). y_prob: Probability of positive class. threshold: Classify as positive when probability exceeds threshold. """ return 1 - false_positive_rate(y_true, y_prob, threshold)The number of true negatives out of all negatives.
r_{\mathrm{tn}} = \frac{t_n}{t_n + f_p} = \frac{t_n}{n} = 1 - r_{\mathrm{fp}}
Aliases
- True negative rate,
- Selectivity.
Args
y_true- Ground truth label (binarised).
y_prob- Probability of positive class.
threshold- Classify as positive when probability exceeds threshold.
def true_positive_rate(y_true, y_prob, threshold: float = 0.5) ‑> float-
Expand source code
def true_positive_rate(y_true, y_prob, threshold: float = 0.5) -> float: r"""The number of true positive out of all positives (recall). Aliases: - Sensitivity, - Recall, - Hit rate. $$r_{\mathrm{tp}} = \frac{t_p}{t_p + f_n} = \frac{t_p}{p}$$ Args: y_true: Ground truth label (binarised). y_prob: Probability of positive class. threshold: Classify as positive when probability exceeds threshold. """ if not isinstance(y_true, (ndarray, Series)): y_true = array(y_true) if not isinstance(y_prob, (ndarray, Series)): y_prob = array(y_prob) y_pred = y_prob >= threshold positives = sum(y_true) true_positives = sum(y_true.astype(bool) & y_pred) return true_positives / positivesThe number of true positive out of all positives (recall).
Aliases
- Sensitivity,
- Recall,
- Hit rate.
r_{\mathrm{tp}} = \frac{t_p}{t_p + f_n} = \frac{t_p}{p}
Args
y_true- Ground truth label (binarised).
y_prob- Probability of positive class.
threshold- Classify as positive when probability exceeds threshold.
def youden_j(y_true, y_pred) ‑> float-
Expand source code
def youden_j(y_true, y_pred) -> float: r"""Classifier informedness as a balance between true and false postivies. Youden's J statistic is defined as: $$ J = r_{\mathrm{tp}} - r_{\mathrm{fp}}. $$ Args: y_true: Ground truth labels. y_pred: Labels predicted by the classifier. """ return sensitivity(y_true, y_pred) + specificity(y_true, y_pred) - 1Classifier informedness as a balance between true and false postivies.
Youden's J statistic is defined as: J = r_{\mathrm{tp}} - r_{\mathrm{fp}}.
Args
y_true- Ground truth labels.
y_pred- Labels predicted by the classifier.
def youden_j_threshold(y_true, y_pred) ‑> float-
Expand source code
def youden_j_threshold(y_true, y_pred) -> float: """Classification threshold with highest Youden's J. Args: y_true: Ground truth labels. y_pred: Labels predicted by the classifier. """ fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1) j_scores = tpr - fpr j_ordered = sorted(zip(j_scores, thresholds)) return j_ordered[-1][1]Classification threshold with highest Youden's J.
Args
y_true- Ground truth labels.
y_pred- Labels predicted by the classifier.