#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Plots for pyPhi
@author: Sal Garcia <sgarciam@ic.ac.uk> <salvadorgarciamunoz@gmail.com>
Changes on Feb 25th 2026:
- Bokeh 3.x: tooltips removed from figure(); now added via explicit HoverTool
- Extracted shared helpers: _get_lv_labels, _get_xvar_labels, _get_yvar_labels,
_new_output_file, _make_bokeh_palette, _resolve_lpls_space, _mask_by_class,
_add_hover
- Fixed loop-variable collision in predvsobs (i → j)
- Fixed output_file inside loop in mb_r2pb
- Fixed 2-D y_ array in score_line (.flatten())
- Fixed nan_policy on vbar (not supported); use per-series ColumnDataSource
- Fixed bitwise ~ on bool in lineplot
- Added unbound-variable guard in contributions_plot
- Replaced False sentinels with None throughout
- Replaced list(np.arange(...)+1) with range(1, n+1)
- Replaced string concatenation with f-strings
- Standardised colormap keyword args (alpha=1, bytes=True)
- Replaced math.pi with np.pi
- loadings / weighted_loadings: PLS now produces single HTML with X (blue)
and Y (red) bars on a shared axis per LV
"""
from __future__ import annotations
from datetime import datetime
from typing import Optional
import matplotlib
import numpy as np
import pandas as pd
from pyphi import calc as phi
from bokeh.io import output_file, show
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, HoverTool, LabelSet, Legend, LinearAxis, Range1d, Span
from bokeh.plotting import figure
__all__ = [
"r2pv", "loadings", "loadings_map", "weighted_loadings", "vip",
"score_scatter", "score_line", "diagnostics", "predvsobs",
"contributions_plot", "mb_weights", "mb_r2pb", "mb_vip",
"barplot", "lineplot", "plot_spectra", "scatter_with_labels",
]
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _timestr() -> str:
return datetime.now().strftime("%Y%m%d%H%M%S%f")
def _new_output_file(prefix: str, title: str) -> None:
output_file(f"{prefix}_{_timestr()}.html", title=title, mode="inline")
def _add_hover(p, tooltips) -> None:
"""Attach a HoverTool to a figure (Bokeh 3.x compatible)."""
p.add_tools(HoverTool(tooltips=tooltips))
def _get_lv_labels(mvmobj: dict) -> list[str]:
A = mvmobj["T"].shape[1]
prefix = "LV #" if "Q" in mvmobj else "PC #"
return [f"{prefix}{a}" for a in range(1, A + 1)]
def _get_xvar_labels(mvmobj: dict) -> list[str]:
if "varidX" in mvmobj:
return mvmobj["varidX"]
n = mvmobj["P"].shape[0]
return [f"XVar #{i}" for i in range(1, n + 1)]
def _get_yvar_labels(mvmobj: dict) -> list[str]:
if "varidY" in mvmobj:
return mvmobj["varidY"]
n = mvmobj["Q"].shape[0]
return [f"YVar #{i}" for i in range(1, n + 1)]
def _make_bokeh_palette(n: int, cmap_name: str = "rainbow") -> list[str]:
cmap = matplotlib.colormaps[cmap_name]
rgba = cmap(np.linspace(0, 1, n), alpha=1, bytes=True)
return [f"#{r:02x}{g:02x}{b:02x}" for r, g, b in rgba[:, :3]]
def _resolve_lpls_space(mvmobj: dict, material, zspace: bool) -> dict:
obj = mvmobj.copy()
t = obj["type"]
if t == "lpls":
obj["Ws"] = obj["Ss"]
if t in ("jrpls", "tpls") and material is not None:
idx = obj["materials"].index(material)
obj["Ws"] = obj["Ssi"][idx]
obj["varidX"] = obj["varidXi"][idx]
if "r2xpvi" in obj:
obj["r2xpv"] = obj["r2xpvi"][idx]
elif t == "tpls" and zspace:
obj["varidX"] = obj["varidZ"]
if "r2zpv" in obj:
obj["r2xpv"] = obj["r2zpv"]
elif t in ("jrpls", "tpls") and material is None:
obj["Ws"] = obj["Ss"]
return obj
def _mask_by_class(
classid_arr: np.ndarray,
x_arr: np.ndarray,
y_arr: np.ndarray,
obs_ids: list[str],
obs_nums: list[str],
class_val,
) -> dict:
mask = classid_arr == class_val
idx = np.where(mask)[0]
return dict(
x=x_arr[mask].tolist(),
y=y_arr[mask].tolist(),
ObsID=[obs_ids[i] for i in idx],
ObsNum=[obs_nums[i] for i in idx],
Class=[class_val] * int(mask.sum()),
)
def _add_origin_lines(p) -> None:
p.renderers.extend([
Span(location=0, dimension="height", line_color="black", line_width=2),
Span(location=0, dimension="width", line_color="black", line_width=2),
])
def _add_hline(p) -> None:
p.renderers.append(
Span(location=0, dimension="width", line_color="black", line_width=2)
)
def _add_ci_ellipse(p, T_matrix: np.ndarray, mvmobj: dict, xd: int, yd: int) -> None:
T1 = T_matrix[:, [xd - 1]]
T2 = T_matrix[:, [yd - 1]]
T_aux = np.hstack((T1, T2))
st = (T_aux.T @ T_aux) / T_aux.shape[0]
xd95, xd99, yd95p, yd95n, yd99p, yd99n = phi.scores_conf_int_calc(
st, mvmobj["T"].shape[0]
)
p.line(xd95, yd95p, line_color="gold", line_dash="dashed")
p.line(xd95, yd95n, line_color="gold", line_dash="dashed")
p.line(xd99, yd99p, line_color="red", line_dash="dashed")
p.line(xd99, yd99n, line_color="red", line_dash="dashed")
def _obs_ids_from_model(mvmobj: dict) -> list[str]:
if "obsidX" in mvmobj:
return mvmobj["obsidX"]
return [f"Obs #{n}" for n in range(1, mvmobj["T"].shape[0] + 1)]
# ---------------------------------------------------------------------------
# Public plotting functions
# ---------------------------------------------------------------------------
[docs]
def r2pv(
mvm_obj: dict,
*,
plotwidth: int = 600,
plotheight: int = 400,
addtitle: str = "",
material=None,
zspace: bool = False,
) -> None:
"""Plot R² per variable per latent variable / principal component.
Produces a stacked bar chart where each bar represents a variable and
each colour band represents the contribution of one LV/PC to that
variable's explained variance. For PLS models, X and Y plots are
shown stacked vertically in a single HTML file.
Args:
mvm_obj (dict): Fitted model from :func:`pyphi.pca`, :func:`pyphi.pls`,
:func:`pyphi.lpls`, :func:`pyphi.jrpls`, or :func:`pyphi.tpls`.
plotwidth (int): Width of each plot in pixels. Default ``600``.
plotheight (int): Height of each plot in pixels. Default ``400``.
addtitle (str): Optional string appended to the plot title.
Default ``""``.
material (str): For JRPLS / TPLS models, the name of the material
block to display. If ``None``, the R-space (blend) loadings are
used. Default ``None``.
zspace (bool): For TPLS models, if ``True`` plots the Z-space
(trajectory) R² instead of the X-space R². Default ``False``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
mvmobj = _resolve_lpls_space(mvm_obj, material, zspace)
A = mvmobj["T"].shape[1]
is_pls = "Q" in mvmobj
yaxlbl = "Z" if (mvmobj["type"] == "tpls" and zspace) else "X"
lv_labels = _get_lv_labels(mvmobj)
XVar = _get_xvar_labels(mvmobj)
r2pvX_dict: dict = {"XVar": XVar}
for i, lbl in enumerate(lv_labels):
r2pvX_dict[lbl] = mvmobj["r2xpv"][:, i]
palette = _make_bokeh_palette(A)
def _bar(x_range, title, source_dict, key, ylabel, width=plotwidth):
p = figure(
x_range=x_range, title=title,
tools="save,box_zoom,xpan,reset",
width=width, height=plotheight,
)
_add_hover(p, f"$name @{key}: @$name")
v = p.vbar_stack(lv_labels, x=key, width=0.9, color=palette, source=source_dict)
p.y_range.range_padding = 0.1
p.ygrid.grid_line_color = None
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.yaxis.axis_label = ylabel
p.xaxis.major_label_orientation = np.pi / 2
p.add_layout(Legend(items=[(lbl, [v[i]]) for i, lbl in enumerate(lv_labels)]), "right")
return p
if is_pls:
_new_output_file("r2xypv", f"R2{yaxlbl}YPV")
YVar = _get_yvar_labels(mvmobj)
r2pvY_dict: dict = {"YVar": YVar}
for i, lbl in enumerate(lv_labels):
r2pvY_dict[lbl] = mvmobj["r2ypv"][:, i]
y_plotwidth = max(350, int(plotwidth * len(YVar) / len(XVar)))
px = _bar(XVar, f"R2{yaxlbl} Per Variable {addtitle}", r2pvX_dict, "XVar", f"R2{yaxlbl}", plotwidth)
py = _bar(YVar, f"R2Y Per Variable {addtitle}", r2pvY_dict, "YVar", "R2Y", y_plotwidth)
show(column(px, py))
else:
_new_output_file("r2xpv", "R2XPV")
p = _bar(XVar, f"R2X Per Variable {addtitle}", r2pvX_dict, "XVar", "R2X")
show(p)
[docs]
def loadings(
mvm_obj: dict,
*,
plotwidth: int = 750,
xgrid: bool = False,
addtitle: str = "",
material=None,
zspace: bool = False,
ScaleToPlot: bool = False,
) -> None:
"""Plot model loadings as vertical bar charts, one chart per LV/PC.
For **PCA models**: each chart shows X-space loadings (P or W*) in blue.
For **PLS models**: each chart shows X-space loadings (W*) in blue and
Y-space loadings (Q) in red on a shared categorical axis, separated by a
vertical dashed divider. Without ``ScaleToPlot``, a dual y-axis is used
so both X and Y loadings are readable regardless of scale differences.
With ``ScaleToPlot``, both are normalised to [-1, 1] and share a single
y-axis.
All charts are written to a single HTML file.
Args:
mvm_obj (dict): Fitted model from :func:`pyphi.pca`, :func:`pyphi.pls`,
:func:`pyphi.lpls`, :func:`pyphi.jrpls`, or :func:`pyphi.tpls`.
plotwidth (int): Width of each chart in pixels. Default ``750``.
xgrid (bool): If ``True``, show vertical grid lines. Default ``False``.
addtitle (str): Optional string appended to each chart title.
Default ``""``.
material (str): For JRPLS / TPLS models, the material block to plot.
Default ``None`` (uses R-space / blend loadings).
zspace (bool): For TPLS models, if ``True`` plots Z-space (trajectory)
loadings. Default ``False``.
ScaleToPlot (bool): If ``True``, normalises each loading vector to
[-1, 1] before plotting, so all LVs are visually comparable.
Default ``False``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
mvmobj = _resolve_lpls_space(mvm_obj, material, zspace)
t = mvmobj["type"]
space_lbl = "Z" if (t == "tpls" and zspace) else "X"
is_pls = "Q" in mvmobj
lv_labels = _get_lv_labels(mvmobj)
XVar = _get_xvar_labels(mvmobj)
if t in ("lpls", "jrpls", "tpls"):
loading_lbl = "Wz*" if (t == "tpls" and zspace) else "S*"
X_coeff = mvmobj["Ws"]
else:
loading_lbl = "W*" if is_pls else "P"
X_coeff = mvmobj["Ws"] if is_pls else mvmobj["P"]
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [("Variable:", "@names")]
def _norm(v):
return v / np.max(np.abs(v))
if not is_pls:
_new_output_file(f"Loadings_{space_lbl}_Space", f"{space_lbl} Loadings PCA")
p_list = []
for i, lbl in enumerate(lv_labels):
vals = _norm(X_coeff[:, i]) if ScaleToPlot else X_coeff[:, i]
src = ColumnDataSource(dict(x_=XVar, y_=vals, names=XVar))
p = figure(x_range=XVar,
title=f"{space_lbl} Space Loadings {lbl}{addtitle}",
tools=TOOLS, width=plotwidth)
_add_hover(p, TOOLTIPS)
p.vbar(x="x_", top="y_", source=src, width=0.5, color="steelblue")
p.ygrid.grid_line_color = None
p.xgrid.grid_line_color = "lightgray" if xgrid else None
p.yaxis.axis_label = f"{loading_lbl} [{i+1}]"
_add_hline(p)
p.xaxis.major_label_orientation = np.pi / 2
p_list.append(p)
show(column(p_list))
else:
YVar = _get_yvar_labels(mvmobj)
combined_vars = XVar + YVar
n_x = len(XVar)
_new_output_file("Loadings_XY", f"{space_lbl}/Y Loadings PLS")
p_list = []
for i, lbl in enumerate(lv_labels):
x_vals = X_coeff[:, i]
y_vals = mvmobj["Q"][:, i]
if ScaleToPlot:
# Normalise each space to [-1, 1] for visual comparability
x_vals = _norm(x_vals)
y_vals = _norm(y_vals)
src_x = ColumnDataSource(dict(var=XVar, top=x_vals.tolist(), names=XVar))
src_y = ColumnDataSource(dict(var=YVar, top=y_vals.tolist(), names=YVar))
p = figure(x_range=combined_vars,
title=f"{space_lbl}/Y Loadings {lbl}{addtitle}",
tools=TOOLS, width=plotwidth)
_add_hover(p, TOOLTIPS)
p.vbar(x="var", top="top", source=src_x, width=0.5,
color="steelblue", legend_label=f"{loading_lbl} (X)")
p.vbar(x="var", top="top", source=src_y, width=0.5,
color="tomato", legend_label="Q (Y)")
p.yaxis.axis_label = f"{loading_lbl} / Q (normalised) [{i+1}]"
else:
# Dual y-axis: X on left (blue), Y on right (red)
# Both axes made symmetric around zero so zeros align at the same height.
abs_x = max(abs(float(x_vals.min())), abs(float(x_vals.max()))) * 1.15
abs_y = max(abs(float(y_vals.min())), abs(float(y_vals.max()))) * 1.15
src_x = ColumnDataSource(dict(var=XVar, top=x_vals.tolist(), names=XVar))
src_y = ColumnDataSource(dict(var=YVar, top=y_vals.tolist(), names=YVar))
p = figure(x_range=combined_vars,
y_range=Range1d(-abs_x, abs_x),
title=f"{space_lbl}/Y Loadings {lbl}{addtitle}",
tools=TOOLS, width=plotwidth)
_add_hover(p, TOOLTIPS)
p.vbar(x="var", top="top", source=src_x, width=0.5,
color="steelblue", legend_label=f"{loading_lbl} (X)")
# Add right axis for Y loadings
p.extra_y_ranges = {"y_range": Range1d(-abs_y, abs_y)}
p.add_layout(LinearAxis(y_range_name="y_range",
axis_label=f"Q [{i+1}]",
axis_line_color="tomato",
major_label_text_color="tomato",
axis_label_text_color="tomato"), "right")
p.vbar(x="var", top="top", source=src_y, width=0.5,
color="tomato", legend_label="Q (Y)",
y_range_name="y_range")
p.yaxis[0].axis_label = f"{loading_lbl} [{i+1}]"
p.add_layout(Span(location=n_x, dimension="height",
line_color="black", line_width=1, line_dash="dashed"))
p.ygrid.grid_line_color = None
p.xgrid.grid_line_color = "lightgray" if xgrid else None
# Move legend outside the plot area to avoid overlapping bars
legend = p.legend[0]
p.add_layout(legend, "right")
legend.click_policy = "hide"
_add_hline(p)
p.xaxis.major_label_orientation = np.pi / 2
p_list.append(p)
show(column(p_list))
[docs]
def loadings_map(
mvm_obj: dict,
dims: list[int],
*,
plotwidth: int = 600,
addtitle: str = "",
material=None,
zspace: bool = False,
textalpha: float = 0.75,
) -> None:
"""Scatter plot of loadings on two selected components (loadings map / biplot).
For PLS models, X-space loadings (W*, blue) and Y-space loadings (Q, red)
are overlaid on a normalised [-1, 1] axis, making it easy to identify
which X variables co-vary with which Y variables. For PCA models, only
P loadings are shown.
Args:
mvm_obj (dict): Fitted model from :func:`pyphi.pca`, :func:`pyphi.pls`,
:func:`pyphi.lpls`, :func:`pyphi.jrpls`, or :func:`pyphi.tpls`.
dims (list[int]): Two component indices to plot, e.g. ``[1, 2]`` for
the first two LVs/PCs (1-indexed).
plotwidth (int): Width of the plot in pixels. Default ``600``.
addtitle (str): Optional string appended to the plot title.
Default ``""``.
material (str): For JRPLS / TPLS, the material block to use.
Default ``None``.
zspace (bool): For TPLS, if ``True`` uses Z-space loadings.
Default ``False``.
textalpha (float): Transparency of variable name labels (0–1).
Default ``0.75``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
mvmobj = _resolve_lpls_space(mvm_obj, material, zspace)
is_pls = "Q" in mvmobj
lv_labels = _get_lv_labels(mvmobj)
XVar = _get_xvar_labels(mvmobj)
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [("index", "$index"), ("(x,y)", "($x, $y)"), ("Variable:", "@names")]
_new_output_file("Loadings_Map", "Loadings Map")
def _norm(v):
return v / np.max(np.abs(v))
d0, d1 = dims[0] - 1, dims[1] - 1
if is_pls:
YVar = _get_yvar_labels(mvmobj)
src_x = ColumnDataSource(dict(x=_norm(mvmobj["Ws"][:, d0]),
y=_norm(mvmobj["Ws"][:, d1]), names=XVar))
src_y = ColumnDataSource(dict(x=_norm(mvmobj["Q"][:, d0]),
y=_norm(mvmobj["Q"][:, d1]), names=YVar))
p = figure(tools=TOOLS, width=plotwidth,
title=f"Loadings Map LV[{dims[0]}] - LV[{dims[1]}] {addtitle}",
x_range=(-1.5, 1.5), y_range=(-1.5, 1.5))
_add_hover(p, TOOLTIPS)
p.scatter("x", "y", source=src_x, size=10, color="darkblue")
p.scatter("x", "y", source=src_y, size=10, color="red")
for src in (src_x, src_y):
p.add_layout(LabelSet(x="x", y="y", text="names", level="glyph",
x_offset=5, y_offset=5, source=src,
text_color="darkgray", text_alpha=textalpha))
else:
src_x = ColumnDataSource(dict(x=mvmobj["P"][:, d0],
y=mvmobj["P"][:, d1], names=XVar))
p = figure(tools=TOOLS, width=plotwidth,
title=f"Loadings Map PC[{dims[0]}] - PC[{dims[1]}] {addtitle}",
x_range=(-1.5, 1.5), y_range=(-1.5, 1.5))
_add_hover(p, TOOLTIPS)
p.scatter("x", "y", source=src_x, size=10, color="darkblue")
p.add_layout(LabelSet(x="x", y="y", text="names", level="glyph",
x_offset=5, y_offset=5, source=src_x,
text_color="darkgray", text_alpha=textalpha))
p.xaxis.axis_label = lv_labels[d0]
p.yaxis.axis_label = lv_labels[d1]
_add_origin_lines(p)
show(p)
[docs]
def weighted_loadings(
mvm_obj: dict,
*,
plotwidth: int = 750,
xgrid: bool = False,
addtitle: str = "",
material=None,
zspace: bool = False,
ScaleToPlot: bool = False,
) -> None:
"""Plot loadings weighted by their R² contribution, one chart per LV/PC.
Each loading coefficient is multiplied by the corresponding per-variable
R² value, so variables that explain more variance appear larger. This
helps identify truly important variables vs. those that load highly but
explain little variance.
For **PCA models**: shows ``P × R²X`` (blue).
For **PLS models**: shows ``W* × R²X`` (blue, left axis) and
``Q × R²Y`` (red, right axis) on a shared categorical axis with a
dual y-axis layout (or single normalised axis with ``ScaleToPlot``).
All charts are written to a single HTML file.
Args:
mvm_obj (dict): Fitted model from :func:`pyphi.pca`, :func:`pyphi.pls`,
:func:`pyphi.lpls`, :func:`pyphi.jrpls`, or :func:`pyphi.tpls`.
plotwidth (int): Width of each chart in pixels. Default ``750``.
xgrid (bool): If ``True``, show vertical grid lines. Default ``False``.
addtitle (str): Optional string appended to each chart title.
Default ``""``.
material (str): For JRPLS / TPLS, the material block to plot.
Default ``None``.
zspace (bool): For TPLS, if ``True`` plots Z-space weighted loadings.
Default ``False``.
ScaleToPlot (bool): If ``True``, normalises loadings to [-1, 1]
before weighting, placing X and Y on a single shared axis.
Default ``False``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
mvmobj = _resolve_lpls_space(mvm_obj, material, zspace)
t = mvmobj["type"]
is_pls = "Q" in mvmobj
space_lbl = "Z" if (t == "tpls" and zspace) else "X"
loading_lbl = "Wz*" if (t == "tpls" and zspace) else (
"S*" if t in ("lpls", "jrpls", "tpls") else ("W*" if is_pls else "P")
)
lv_labels = _get_lv_labels(mvmobj)
XVar = _get_xvar_labels(mvmobj)
X_coeff = mvmobj["Ws"] if is_pls else mvmobj["P"]
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [("Variable:", "@names")]
def _norm(v):
return v / np.max(np.abs(v))
if not is_pls:
_new_output_file(f"WeightedLoadings_{space_lbl}_Space",
f"{space_lbl} Weighted Loadings PCA")
p_list = []
for i, lbl in enumerate(lv_labels):
# Normalise before weighting if ScaleToPlot, preserving R² weighting effect
coeffs = _norm(X_coeff[:, i]) if ScaleToPlot else X_coeff[:, i]
vals = mvmobj["r2xpv"][:, i] * coeffs
src = ColumnDataSource(dict(x_=XVar, y_=vals, names=XVar))
p = figure(x_range=XVar,
title=f"{space_lbl} Space Weighted Loadings {lbl}{addtitle}",
tools=TOOLS, width=plotwidth)
_add_hover(p, TOOLTIPS)
p.vbar(x="x_", top="y_", source=src, width=0.5, color="steelblue")
p.ygrid.grid_line_color = None
p.xgrid.grid_line_color = "lightgray" if xgrid else None
p.yaxis.axis_label = f"{loading_lbl} × R²{space_lbl} [{i+1}]"
_add_hline(p)
p.xaxis.major_label_orientation = np.pi / 2
p_list.append(p)
show(column(p_list))
else:
YVar = _get_yvar_labels(mvmobj)
combined_vars = XVar + YVar
n_x = len(XVar)
_new_output_file("WeightedLoadings_XY",
f"{space_lbl}/Y Weighted Loadings PLS")
p_list = []
for i, lbl in enumerate(lv_labels):
# Normalise loadings before multiplying by R² when ScaleToPlot
cx = _norm(X_coeff[:, i]) if ScaleToPlot else X_coeff[:, i]
cy = _norm(mvmobj["Q"][:, i]) if ScaleToPlot else mvmobj["Q"][:, i]
wx = mvmobj["r2xpv"][:, i] * cx
wy = mvmobj["r2ypv"][:, i] * cy
if ScaleToPlot:
src_x = ColumnDataSource(dict(var=XVar, top=wx.tolist(), names=XVar))
src_y = ColumnDataSource(dict(var=YVar, top=wy.tolist(), names=YVar))
p = figure(x_range=combined_vars,
title=f"{space_lbl}/Y Weighted Loadings {lbl}{addtitle}",
tools=TOOLS, width=plotwidth)
_add_hover(p, TOOLTIPS)
p.vbar(x="var", top="top", source=src_x, width=0.5,
color="steelblue", legend_label=f"{loading_lbl}×R²{space_lbl}")
p.vbar(x="var", top="top", source=src_y, width=0.5,
color="tomato", legend_label="Q×R²Y")
p.yaxis.axis_label = f"{loading_lbl}×R²{space_lbl} / Q×R²Y (normalised) [{i+1}]"
else:
# Dual y-axis: X on left (blue), Y on right (red)
# Both axes made symmetric around zero so zeros align at the same height.
abs_x = max(abs(float(wx.min())), abs(float(wx.max()))) * 1.15
abs_y = max(abs(float(wy.min())), abs(float(wy.max()))) * 1.15
src_x = ColumnDataSource(dict(var=XVar, top=wx.tolist(), names=XVar))
src_y = ColumnDataSource(dict(var=YVar, top=wy.tolist(), names=YVar))
p = figure(x_range=combined_vars,
y_range=Range1d(-abs_x, abs_x),
title=f"{space_lbl}/Y Weighted Loadings {lbl}{addtitle}",
tools=TOOLS, width=plotwidth)
_add_hover(p, TOOLTIPS)
p.vbar(x="var", top="top", source=src_x, width=0.5,
color="steelblue", legend_label=f"{loading_lbl}×R²{space_lbl}")
p.extra_y_ranges = {"y_range": Range1d(-abs_y, abs_y)}
p.add_layout(LinearAxis(y_range_name="y_range",
axis_label=f"Q×R²Y [{i+1}]",
axis_line_color="tomato",
major_label_text_color="tomato",
axis_label_text_color="tomato"), "right")
p.vbar(x="var", top="top", source=src_y, width=0.5,
color="tomato", legend_label="Q×R²Y",
y_range_name="y_range")
p.yaxis[0].axis_label = f"{loading_lbl}×R²{space_lbl} [{i+1}]"
p.add_layout(Span(location=n_x, dimension="height",
line_color="black", line_width=1, line_dash="dashed"))
p.ygrid.grid_line_color = None
p.xgrid.grid_line_color = "lightgray" if xgrid else None
# Move legend outside the plot area to avoid overlapping bars
legend = p.legend[0]
p.add_layout(legend, "right")
legend.click_policy = "hide"
_add_hline(p)
p.xaxis.major_label_orientation = np.pi / 2
p_list.append(p)
show(column(p_list))
[docs]
def vip(
mvm_obj: dict,
*,
plotwidth: int = 600,
material=None,
zspace: bool = False,
addtitle: str = "",
) -> None:
"""Plot Variable Importance in Projection (VIP) scores.
VIP summarises the contribution of each X variable across all LVs,
weighted by the R²Y explained by each LV. Variables are sorted from
highest to lowest VIP. Only applicable to PLS models (silently returns
for PCA models).
Args:
mvm_obj (dict): Fitted PLS model from :func:`pyphi.pls`,
:func:`pyphi.lpls`, :func:`pyphi.jrpls`, or :func:`pyphi.tpls`.
plotwidth (int): Width of the plot in pixels. Default ``600``.
material (str): For JRPLS / TPLS, the material block to use.
Default ``None``.
zspace (bool): For TPLS, if ``True`` computes VIP in Z-space.
Default ``False``.
addtitle (str): Optional string appended to the plot title.
Default ``""``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
mvmobj = _resolve_lpls_space(mvm_obj, material, zspace)
if "Q" not in mvmobj:
return
XVar = _get_xvar_labels(mvmobj)
vip_vals = np.sum(
np.abs(mvmobj["Ws"] * np.tile(mvmobj["r2y"], (mvmobj["Ws"].shape[0], 1))),
axis=1,
)
sort_idx = np.argsort(-vip_vals)
sorted_vars = [XVar[i] for i in sort_idx]
sorted_vip = vip_vals[sort_idx]
_new_output_file("VIP", "VIP Coefficient")
src = ColumnDataSource(dict(x_=sorted_vars, y_=sorted_vip, names=sorted_vars))
p = figure(x_range=sorted_vars, title=f"VIP {addtitle}",
tools="save,box_zoom,pan,reset", width=plotwidth)
_add_hover(p, [("Variable", "@names")])
p.vbar(x="x_", top="y_", source=src, width=0.5)
p.xgrid.grid_line_color = None
p.yaxis.axis_label = "Very Important to the Projection"
p.xaxis.major_label_orientation = np.pi / 2
show(p)
def _create_classid_(df: pd.DataFrame, column: str, *, nbins: int = 5) -> pd.DataFrame:
"""Internal: create a CLASSID dataframe from numeric values binned into nbins groups."""
vals = df[column].values
nan_mask = np.isnan(vals)
valid = vals[~nan_mask]
_, bin_edges = np.histogram(valid, bins=nbins)
range_list = [
f"{np.round(bin_edges[i], 3)} to {np.round(bin_edges[i+1], 3)}"
for i in range(len(bin_edges) - 1)
]
edges_ = bin_edges.copy()
edges_[-1] += 0.1
membership_valid = np.digitize(valid, edges_) - 1
membership = []
valid_counter = 0
for is_nan in nan_mask:
if is_nan:
membership.append("Missing Value")
else:
membership.append(range_list[membership_valid[valid_counter]])
valid_counter += 1
out = df[[df.columns[0]]].copy()
out[column] = membership
return out
[docs]
def score_scatter(
mvm_obj: dict,
xydim: list[int],
*,
CLASSID: Optional[pd.DataFrame] = None,
colorby: Optional[str] = None,
Xnew=None,
add_ci: bool = False,
add_labels: bool = False,
add_legend: bool = True,
legend_cols: int = 1,
addtitle: str = "",
plotwidth: int = 600,
plotheight: int = 600,
rscores: bool = False,
material=None,
marker_size: int = 7,
nbins=None,
include_model: bool = False,
) -> None:
"""Score scatter plot for two selected latent variables or principal components.
Plots training scores (or scores for new observations) on two chosen
components. Supports class-based colouring, confidence ellipses,
observation labels, and overlaying new observations on top of training
data.
Args:
mvm_obj (dict): Fitted model from :func:`pyphi.pca`, :func:`pyphi.pls`,
:func:`pyphi.lpls`, :func:`pyphi.jrpls`, or :func:`pyphi.tpls`.
xydim (list[int]): Two component indices to plot, e.g. ``[1, 2]``
(1-indexed).
CLASSID (pd.DataFrame): Optional class-membership DataFrame. First
column should be observation IDs; ``colorby`` specifies the
column to colour by. Default ``None``.
colorby (str): Column name in ``CLASSID`` to use for colouring.
Required if ``CLASSID`` is provided. Default ``None``.
Xnew (pd.DataFrame or np.ndarray): New observations to project and
plot. If ``None``, training scores are used. Not supported for
LPLS / JRPLS models. Default ``None``.
add_ci (bool): If ``True``, overlays 95% (gold) and 99% (red)
confidence ellipses. Default ``False``.
add_labels (bool): If ``True``, annotates each point with its
observation ID. Default ``False``.
add_legend (bool): If ``True`` and ``CLASSID`` is provided, adds a
click-to-hide legend. Default ``True``.
legend_cols (int): Number of legend columns when using ``CLASSID``.
Default ``1``.
addtitle (str): Optional string appended to the plot title.
Default ``""``.
plotwidth (int): Width of the plot in pixels. Default ``600``.
plotheight (int): Height of the plot in pixels. Default ``600``.
rscores (bool): For LPLS / JRPLS / TPLS models, if ``True`` plots
R-space scores instead of X-space scores. Default ``False``.
material (str): For JRPLS / TPLS, the material block to plot when
``rscores=True``. Default ``None`` (plots all materials, coloured
by material name).
marker_size (int): Marker size in pixels. Default ``7``.
nbins (int): If provided, bins a continuous ``colorby`` variable into
``nbins`` groups before colouring. Default ``None``.
include_model (bool): If ``True`` and ``Xnew`` is provided, overlays
training set scores (grey, labelled "Model") alongside the new
observations. Default ``False``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
if nbins is not None and CLASSID is not None and colorby in CLASSID.columns.tolist():
CLASSID = _create_classid_(CLASSID, colorby, nbins=nbins)
mvmobj = mvm_obj.copy()
if mvmobj["type"] in ("lpls", "jrpls", "tpls") and Xnew is not None:
Xnew = None
print("score_scatter does not take Xnew for jrpls or lpls for now")
if Xnew is None:
ObsID_ = _obs_ids_from_model(mvmobj)
T_matrix = mvmobj["T"]
if not rscores:
if mvmobj["type"] == "lpls":
ObsID_ = mvmobj["obsidR"]
elif mvmobj["type"] in ("jrpls", "tpls"):
ObsID_ = mvmobj["obsidRi"][0]
else:
if mvmobj["type"] == "lpls":
ObsID_ = mvmobj["obsidX"]
T_matrix = mvmobj["Rscores"]
elif mvmobj["type"] in ("jrpls", "tpls"):
if material is None:
ObsID_ = [o for sub in mvmobj["obsidXi"] for o in sub]
T_matrix = np.vstack(mvmobj["Rscores"])
classes = [
m for i, m in enumerate(mvmobj["materials"])
for _ in mvmobj["obsidXi"][i]
]
CLASSID = pd.DataFrame({"obs": ObsID_, "material": classes})
colorby = "material"
else:
idx = mvmobj["materials"].index(material)
ObsID_ = mvmobj["obsidXi"][idx]
T_matrix = mvmobj["Rscores"][idx]
else:
if isinstance(Xnew, np.ndarray):
X_ = Xnew.copy()
ObsID_ = [f"Obs #{n}" for n in range(1, Xnew.shape[0] + 1)]
elif isinstance(Xnew, pd.DataFrame):
X_ = Xnew.values[:, 1:].astype(float)
ObsID_ = Xnew.values[:, 0].astype(str).tolist()
pred = phi.pls_pred(X_, mvmobj) if "Q" in mvmobj else phi.pca_pred(X_, mvmobj)
T_matrix = pred["Tnew"]
if include_model:
ObsID_model = _obs_ids_from_model(mvmobj)
T_model = mvmobj["T"].copy()
if CLASSID is None:
source_col = ["Model"] * T_model.shape[0] + ["New"] * T_matrix.shape[0]
ObsID_ = ObsID_model + ObsID_
CLASSID = pd.DataFrame({"ObsID": ObsID_, "_Source_": source_col})
colorby = "_Source_"
else:
src_model = pd.DataFrame({CLASSID.columns[0]: ObsID_model,
colorby: ["Model"] * T_model.shape[0]})
CLASSID = pd.concat([src_model, CLASSID], ignore_index=True)
ObsID_ = ObsID_model + ObsID_
T_matrix = np.vstack((T_model, T_matrix))
if rscores and material is not None:
# r-scores for a specific material are in that material's column space —
# user's CLASSID is built against the observation space, ignore it.
CLASSID = None
colorby = None
# When rscores=True and material=None, the code above already replaced
# CLASSID with the internally-built material-coloring dataframe, so no
# action needed — that coloring is correct and should be preserved.
ObsNum_ = [str(n) for n in range(1, len(ObsID_) + 1)]
x_ = T_matrix[:, xydim[0] - 1]
y_ = T_matrix[:, xydim[1] - 1]
ax_lbl = "r" if rscores else "t"
_new_output_file("Score_Scatter", f"Score Scatter {ax_lbl}[{xydim[0]}] - {ax_lbl}[{xydim[1]}]")
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [("Obs #", "@ObsNum"), ("(x,y)", "($x, $y)"), ("Obs: ", "@ObsID")]
p = figure(tools=TOOLS, width=plotwidth, height=plotheight,
title=f"Score Scatter {ax_lbl}[{xydim[0]}] - {ax_lbl}[{xydim[1]}] {addtitle}",
toolbar_location="above")
_add_hover(p, TOOLTIPS)
if CLASSID is None:
src = ColumnDataSource(dict(x=x_, y=y_, ObsID=ObsID_, ObsNum=ObsNum_))
p.scatter("x", "y", source=src, size=marker_size)
if add_labels:
p.add_layout(LabelSet(x="x", y="y", text="ObsID", level="glyph",
x_offset=5, y_offset=5, source=src))
else:
Classes_ = phi.unique(CLASSID, colorby)
if nbins is not None:
non_mv = [c for c in Classes_ if c != "Missing Value"]
order = np.argsort([float(c.split()[0]) for c in non_mv])
Classes_ = (["Missing Value"] if "Missing Value" in Classes_ else []) + \
[non_mv[i] for i in order]
cmap_name = "viridis" if nbins is not None else "rainbow"
if Classes_ and Classes_[0] in ("Model", "Missing Value"):
inner = _make_bokeh_palette(len(Classes_) - 1, cmap_name)
palette = ["#e1e1e1"] + inner
else:
palette = _make_bokeh_palette(len(Classes_), cmap_name)
classid_ = list(CLASSID[colorby])[:len(ObsID_)]
Classes_ = phi.unique(CLASSID.iloc[:len(ObsID_)], colorby)
legend_it = []
for cls_val, color_ in zip(Classes_, palette):
x_aux, y_aux, obsid_aux, obsnum_aux, classid_aux = [], [], [], [], []
for i in range(len(ObsID_)):
if classid_[i] == cls_val:
x_aux.append(float(x_[i]))
y_aux.append(float(y_[i]))
obsid_aux.append(ObsID_[i])
obsnum_aux.append(ObsNum_[i])
classid_aux.append(cls_val)
src = ColumnDataSource(dict(x=x_aux, y=y_aux, ObsID=obsid_aux,
ObsNum=obsnum_aux, Class=classid_aux))
c = p.scatter("x", "y", source=src, color=color_, size=marker_size)
if add_labels:
p.add_layout(LabelSet(x="x", y="y", text="ObsID", level="glyph",
x_offset=5, y_offset=5, source=src))
if add_legend:
legend_it.append((str(cls_val), [c]))
if add_legend:
ipc = [int(np.round(len(legend_it) / legend_cols))] * legend_cols
ipc[-1] = len(legend_it) - sum(ipc[:-1])
offset = 0
for chunk in ipc:
leg = Legend(items=legend_it[offset:offset + chunk])
p.add_layout(leg, "right")
leg.click_policy = "hide"
offset += chunk
if add_ci:
_add_ci_ellipse(p, T_matrix, mvmobj, xydim[0], xydim[1])
p.xaxis.axis_label = f"{ax_lbl} [{xydim[0]}]"
p.yaxis.axis_label = f"{ax_lbl} [{xydim[1]}]"
_add_origin_lines(p)
show(p)
[docs]
def score_line(
mvmobj: dict,
dim,
*,
CLASSID: Optional[pd.DataFrame] = None,
colorby: Optional[str] = None,
Xnew=None,
add_ci: bool = False,
add_labels: bool = False,
add_legend: bool = True,
plotline: bool = True,
plotwidth: int = 600,
plotheight: int = 600,
) -> None:
"""Score line plot: one component plotted against observation sequence.
Useful for monitoring or batch data where the observation order is
meaningful. When ``CLASSID`` is provided, markers are coloured by class
with a single grey connecting line underneath.
Args:
mvmobj (dict): Fitted model from :func:`pyphi.pca` or :func:`pyphi.pls`.
dim (int or list[int]): Component index to plot (1-indexed). Pass a
single integer or a one-element list, e.g. ``1`` or ``[1]``.
CLASSID (pd.DataFrame): Optional class-membership DataFrame. First
column is observation IDs; ``colorby`` specifies the column used
for colouring. Default ``None``.
colorby (str): Column name in ``CLASSID`` to colour by. Required if
``CLASSID`` is provided. Default ``None``.
Xnew (pd.DataFrame or np.ndarray): New observations to project and
plot. If ``None``, training scores are used. Default ``None``.
add_ci (bool): If ``True``, overlays 95% (gold) and 99% (red)
univariate confidence limits as dashed horizontal lines.
Default ``False``.
add_labels (bool): If ``True``, annotates each point with its
observation ID. Default ``False``.
add_legend (bool): If ``True`` and ``CLASSID`` is provided, adds a
click-to-hide legend. Default ``True``.
plotline (bool): If ``True``, draws a line connecting the observations
in sequence. Default ``True``.
plotwidth (int): Width of the plot in pixels. Default ``600``.
plotheight (int): Height of the plot in pixels. Default ``600``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
if not isinstance(dim, list):
dim = [dim] if isinstance(dim, int) else list(dim)
if Xnew is None:
ObsID_ = _obs_ids_from_model(mvmobj)
T_matrix = mvmobj["T"]
else:
if isinstance(Xnew, np.ndarray):
X_ = Xnew.copy()
ObsID_ = [f"Obs #{n}" for n in range(1, Xnew.shape[0] + 1)]
elif isinstance(Xnew, pd.DataFrame):
X_ = Xnew.values[:, 1:].astype(float)
ObsID_ = Xnew.values[:, 0].astype(str).tolist()
pred = phi.pls_pred(X_, mvmobj) if "Q" in mvmobj else phi.pca_pred(X_, mvmobj)
T_matrix = pred["Tnew"]
ObsNum_ = [f"Obs #{n}" for n in range(1, len(ObsID_) + 1)]
y_ = T_matrix[:, dim[0] - 1].flatten()
x_ = list(range(1, len(y_) + 1))
_new_output_file("Score_Line", f"Score Line t[{dim[0]}]")
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [("Obs#", "@ObsNum"), ("(x,y)", "($x, $y)"), ("Obs: ", "@ObsID")]
def _add_ci_lines(p):
lim95, lim99 = phi.single_score_conf_int(mvmobj["T"][:, [dim[0] - 1]])
for lim, col in ((lim95, "gold"), (lim99, "red")):
p.line(x_, lim, line_color=col, line_dash="dashed")
p.line(x_, -lim, line_color=col, line_dash="dashed")
if CLASSID is None:
src = ColumnDataSource(dict(x=x_, y=y_, ObsID=ObsID_, ObsNum=ObsNum_))
p = figure(tools=TOOLS, width=plotwidth, height=plotheight,
title=f"Score Line t[{dim[0]}]")
_add_hover(p, TOOLTIPS)
p.scatter("x", "y", source=src, size=10)
if plotline:
p.line("x", "y", source=src)
if add_ci:
_add_ci_lines(p)
if add_labels:
p.add_layout(LabelSet(x="x", y="y", text="ObsID", level="glyph",
x_offset=5, y_offset=5, source=src))
else:
Classes_ = phi.unique(CLASSID, colorby)
palette = _make_bokeh_palette(len(Classes_))
classid_arr = np.array(CLASSID[colorby].tolist())
x_arr = np.array(x_)
p = figure(tools=TOOLS, toolbar_location="above",
width=plotwidth, height=plotheight,
title=f"Score Line t[{dim[0]}]")
_add_hover(p, TOOLTIPS + [("Class:", "@Class")])
legend_it = []
# Single continuous line through all observations in global sequence,
# then per-class coloured markers on top.
if plotline:
src_all = ColumnDataSource(dict(x=x_, y=y_.tolist()))
p.line("x", "y", source=src_all, color="lightgray", line_width=1)
for cls_val, color_ in zip(Classes_, palette):
mask = classid_arr == cls_val
idx = np.where(mask)[0]
src = ColumnDataSource(dict(
x=x_arr[mask].tolist(),
y=y_[mask].tolist(),
ObsID=[ObsID_[i] for i in idx],
ObsNum=[ObsNum_[i] for i in idx],
Class=[cls_val] * int(mask.sum()),
))
c = p.scatter("x", "y", source=src, color=color_, size=10)
if add_labels:
p.add_layout(LabelSet(x="x", y="y", text="ObsID", level="glyph",
x_offset=5, y_offset=5, source=src))
if add_legend:
legend_it.append((str(cls_val), [c]))
if add_ci:
_add_ci_lines(p)
if add_legend:
leg = Legend(items=legend_it, location="top_right")
p.add_layout(leg, "right")
leg.click_policy = "hide"
p.xaxis.axis_label = "Observation"
p.yaxis.axis_label = f"t [{dim[0]}]"
show(p)
[docs]
def diagnostics(
mvmobj: dict,
*,
Xnew=None,
Ynew=None,
score_plot_xydim=None,
plotwidth: int = 600,
ht2_logscale: bool = False,
spe_logscale: bool = False,
) -> None:
"""Hotelling's T² and SPE diagnostic dashboard.
Produces a column of linked plots:
1. **Hotelling's T²** vs. observation sequence, with 95% (gold) and
99% (red) control limits.
2. **SPE X-space** vs. observation sequence, with control limits.
3. **Outlier map**: T² vs. SPE X, with 99% limits as crosshairs.
4. **SPE Y-space** (PLS models only, when ``Ynew`` is supplied or using
training data).
5. **Score scatter** (optional, if ``score_plot_xydim`` is provided).
Args:
mvmobj (dict): Fitted model from :func:`pyphi.pca` or :func:`pyphi.pls`.
Xnew (pd.DataFrame or np.ndarray): New X observations to diagnose.
If ``None``, training set diagnostics are plotted. Default ``None``.
Ynew (pd.DataFrame or np.ndarray): New Y observations (optional).
Used only for PLS models to compute Y-space SPE. Default ``None``.
score_plot_xydim (list[int]): If provided (e.g. ``[1, 2]``), appends a
score scatter plot for the specified components, with confidence
ellipses overlaid. Default ``None``.
plotwidth (int): Width of each plot in pixels. Default ``600``.
ht2_logscale (bool): If ``True``, plots Hotelling's T² on a log₁₀
scale. Default ``False``.
spe_logscale (bool): If ``True``, plots SPE X on a log₁₀ scale.
Default ``False``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
add_score_plot = score_plot_xydim is not None
if Xnew is None:
ObsID_ = _obs_ids_from_model(mvmobj)
Obs_num = np.arange(mvmobj["T"].shape[0]) + 1
t2_ = mvmobj["T2"].copy()
spex_ = mvmobj["speX"].copy()
spey_ = mvmobj.get("speY") if "Q" in mvmobj else None
if add_score_plot:
t_x = mvmobj["T"][:, [score_plot_xydim[0] - 1]]
t_y = mvmobj["T"][:, [score_plot_xydim[1] - 1]]
else:
t_x = t_y = None
else:
if isinstance(Xnew, np.ndarray):
X_ = Xnew
ObsID_ = [f"Obs #{n}" for n in range(1, Xnew.shape[0] + 1)]
elif isinstance(Xnew, pd.DataFrame):
X_ = Xnew.values[:, 1:].astype(float)
ObsID_ = Xnew.values[:, 0].astype(str).tolist()
t2_ = phi.hott2(mvmobj, Xnew=Xnew)
Obs_num = np.arange(t2_.shape[0]) + 1
if "Q" in mvmobj and Ynew is not None:
spex_, spey_ = phi.spe(mvmobj, Xnew, Ynew=Ynew)
else:
spex_ = phi.spe(mvmobj, Xnew)
spey_ = None
if add_score_plot:
pred = phi.pls_pred(X_, mvmobj) if "Q" in mvmobj else phi.pca_pred(X_, mvmobj)
T_matrix = pred["Tnew"]
t_x = T_matrix[:, [score_plot_xydim[0] - 1]]
t_y = T_matrix[:, [score_plot_xydim[1] - 1]]
else:
t_x = t_y = None
if ht2_logscale:
t2_ = np.log10(t2_)
if spe_logscale:
spex_ = np.log10(spex_)
ObsNum_ = [f"Obs #{n}" for n in range(1, len(ObsID_) + 1)]
src_dict = dict(x=Obs_num, ObsID=ObsID_, ObsNum=ObsNum_, t2=t2_, spex=spex_)
if spey_ is not None:
src_dict["spey"] = spey_
if add_score_plot:
src_dict["tx"] = t_x
src_dict["ty"] = t_y
source = ColumnDataSource(src_dict)
TOOLS = "save,wheel_zoom,box_zoom,reset,lasso_select"
TOOLTIPS = [("Obs #", "@x"), ("(x,y)", "($x, $y)"), ("Obs: ", "@ObsID")]
_new_output_file("Diagnostics", "Diagnostics")
def _lim(val, log):
return np.log10(val) if log else val
p_list = []
p = figure(tools=TOOLS, width=plotwidth, title="Hotelling's T2")
_add_hover(p, TOOLTIPS)
p.scatter("x", "t2", source=source)
p.line([0, Obs_num[-1]], [_lim(mvmobj["T2_lim95"], ht2_logscale)] * 2, line_color="gold")
p.line([0, Obs_num[-1]], [_lim(mvmobj["T2_lim99"], ht2_logscale)] * 2, line_color="red")
p.xaxis.axis_label = "Observation sequence"
p.yaxis.axis_label = "HT2"
p_list.append(p)
p = figure(tools=TOOLS, width=plotwidth, title="SPE X")
_add_hover(p, TOOLTIPS)
p.scatter("x", "spex", source=source)
p.line([0, Obs_num[-1]], [_lim(mvmobj["speX_lim95"], spe_logscale)] * 2, line_color="gold")
p.line([0, Obs_num[-1]], [_lim(mvmobj["speX_lim99"], spe_logscale)] * 2, line_color="red")
p.xaxis.axis_label = "Observation sequence"
p.yaxis.axis_label = "SPE X-Space"
p_list.append(p)
p = figure(tools=TOOLS, width=plotwidth, title="Outlier Map")
_add_hover(p, TOOLTIPS)
p.scatter("t2", "spex", source=source)
p.renderers.extend([
Span(location=_lim(mvmobj["T2_lim99"], ht2_logscale), dimension="height",
line_color="red", line_width=1),
Span(location=_lim(mvmobj["speX_lim99"], spe_logscale), dimension="width",
line_color="red", line_width=1),
])
p.xaxis.axis_label = "Hotelling's T2"
p.yaxis.axis_label = "SPE X-Space"
p_list.append(p)
if "Q" in mvmobj and spey_ is not None:
p = figure(tools=TOOLS, height=400, title="SPE Y")
_add_hover(p, TOOLTIPS)
p.scatter("x", "spey", source=source, size=10)
p.line([0, Obs_num[-1]], [mvmobj["speY_lim95"]] * 2, line_color="gold")
p.line([0, Obs_num[-1]], [mvmobj["speY_lim99"]] * 2, line_color="red")
p.xaxis.axis_label = "Observation sequence"
p.yaxis.axis_label = "SPE Y-Space"
p_list.append(p)
if add_score_plot:
p = figure(tools=TOOLS, width=plotwidth, title="Score Scatter")
_add_hover(p, TOOLTIPS)
p.scatter("tx", "ty", source=source, size=10)
_add_ci_ellipse(p, mvmobj["T"], mvmobj, score_plot_xydim[0], score_plot_xydim[1])
p.xaxis.axis_label = f"t [{score_plot_xydim[0]}]"
p.yaxis.axis_label = f"t [{score_plot_xydim[1]}]"
_add_origin_lines(p)
p_list.append(p)
show(column(p_list))
[docs]
def predvsobs(
mvmobj: dict,
X,
Y,
*,
CLASSID: Optional[pd.DataFrame] = None,
colorby: Optional[str] = None,
x_space: bool = False,
) -> None:
"""Observed vs. predicted scatter plots for all Y (and optionally X) variables.
One square scatter plot is produced per variable, with a 45° reference
line (cyan, dashed). All plots are stacked vertically in a single HTML
file.
Args:
mvmobj (dict): Fitted model from :func:`pyphi.pca` or :func:`pyphi.pls`.
X (pd.DataFrame, np.ndarray, or dict): Predictor data. For multi-block
models pass a dict of DataFrames keyed by block name.
Y (pd.DataFrame or np.ndarray): Observed response values. Must have
the same number of rows as ``X``.
CLASSID (pd.DataFrame): Optional class-membership DataFrame for
colouring points. First column is observation IDs. Default ``None``.
colorby (str): Column in ``CLASSID`` to use for colouring. Required
if ``CLASSID`` is provided. Default ``None``.
x_space (bool): If ``True``, also plots observed vs. predicted for
each X variable (X-space reconstruction). For PCA models this is
always ``True``. Default ``False``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
if isinstance(X, np.ndarray):
X_ = X.copy()
ObsID_ = [f"Obs #{n}" for n in range(1, X.shape[0] + 1)]
elif isinstance(X, pd.DataFrame):
X_ = X.values[:, 1:].astype(float)
ObsID_ = X.values[:, 0].astype(str).tolist()
elif isinstance(X, dict):
X_ = X.copy() # kept as dict — phi.pls_pred/pca_pred expect dict for MB input
k = list(X.keys())
ObsID_ = X[k[0]].values[:, 0].astype(str).tolist()
XVar = _get_xvar_labels(mvmobj)
if isinstance(Y, np.ndarray):
Y_ = Y.copy()
elif isinstance(Y, pd.DataFrame):
Y_ = Y.values[:, 1:].astype(float)
if "Q" in mvmobj:
pred = phi.pls_pred(X_, mvmobj)
yhat = pred["Yhat"]
xhat = pred["Xhat"] if x_space else None
YVar = _get_yvar_labels(mvmobj)
else:
x_space = True
pred = phi.pca_pred(X_, mvmobj)
xhat = pred["Xhat"]
yhat = None
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [("index", "$index"), ("(x,y)", "($x, $y)"), ("Obs: ", "@ObsID")]
_new_output_file("ObsvsPred", "ObsvsPred")
def _base_fig(obs, pred_vals, var_name):
mn = np.nanmin([np.nanmin(obs), np.nanmin(pred_vals)])
mx = np.nanmax([np.nanmax(obs), np.nanmax(pred_vals)])
p = figure(tools=TOOLS, width=600, height=600, title=var_name,
x_range=(mn, mx), y_range=(mn, mx))
_add_hover(p, TOOLTIPS)
p.line([mn, mx], [mn, mx], line_color="cyan", line_dash="dashed")
p.xaxis.axis_label = "Observed"
p.yaxis.axis_label = "Predicted"
return p
all_panels = []
if CLASSID is None:
if yhat is not None:
for i in range(Y_.shape[1]):
p = _base_fig(Y_[:, i], yhat[:, i], YVar[i])
src = ColumnDataSource(dict(x=Y_[:, i], y=yhat[:, i], ObsID=ObsID_))
p.scatter("x", "y", source=src, size=7, color="darkblue")
all_panels.append(p)
if x_space and xhat is not None:
for i in range(X_.shape[1]):
p = _base_fig(X_[:, i], xhat[:, i], XVar[i])
src = ColumnDataSource(dict(x=X_[:, i], y=xhat[:, i], ObsID=ObsID_))
p.scatter("x", "y", source=src, size=10, color="darkblue")
all_panels.append(p)
else:
Classes_ = phi.unique(CLASSID, colorby)
palette = _make_bokeh_palette(len(Classes_))
classid_ = list(CLASSID[colorby])
def _add_classified(obs_col, pred_col, var_name):
p = _base_fig(obs_col, pred_col, var_name)
for cls_val, color_ in zip(Classes_, palette):
mask = np.array([c == cls_val for c in classid_])
valid = mask & ~np.isnan(obs_col)
src = ColumnDataSource(dict(
x=obs_col[valid], y=pred_col[valid],
ObsID=[ObsID_[j] for j in np.where(valid)[0]],
Class=[cls_val] * valid.sum(),
))
p.scatter("x", "y", source=src, color=color_,
legend_label=str(cls_val))
p.legend.click_policy = "hide"
p.legend.location = "top_left"
return p
if yhat is not None:
for i in range(Y_.shape[1]):
all_panels.append(_add_classified(Y_[:, i], yhat[:, i], YVar[i]))
if x_space and xhat is not None:
for i in range(X_.shape[1]):
all_panels.append(_add_classified(X_[:, i], xhat[:, i], XVar[i]))
show(column(all_panels))
[docs]
def contributions_plot(
mvmobj: dict,
X,
cont_type: str,
*,
Y=False, # kept as False: pyphi uses isinstance(Y, bool) to detect "not provided"
from_obs=None,
to_obs=None,
lv_space=False, # kept as False: pyphi may use isinstance(lv_space, bool) internally
plotwidth: int = 800,
plotheight: int = 600,
xgrid: bool = False,
) -> None:
"""Bar chart of variable contributions to a monitoring statistic.
Shows how much each variable contributes to the T², SPE, or score of
a specific observation (or the difference between two observations).
For SPE with PLS models, separate X and Y contribution bars are shown.
Args:
mvmobj (dict): Fitted model from :func:`pyphi.pca` or :func:`pyphi.pls`.
X (pd.DataFrame, np.ndarray, or dict): Data containing the observation(s)
to diagnose. For multi-block models, pass a dict of DataFrames.
cont_type (str): Type of contribution to compute and plot.
``'scores'``: contribution to each latent variable score.
``'spe'``: contribution to SPE (X and, for PLS, Y).
``'t2'``: contribution to Hotelling's T².
Y (pd.DataFrame or np.ndarray): Y data. Required for
``cont_type='spe'`` with a PLS model. Default ``False``
(not provided).
from_obs (int, str, or list): Reference observation for difference-based
contributions. Can be an integer index, an observation ID string,
or a list thereof. If ``None``, contributions are computed
absolutely for ``to_obs``. Default ``None``.
to_obs (int, str, or list): The observation(s) to diagnose. Can be an
integer index, observation ID string, or list thereof. This
argument is required. Default ``None``.
lv_space (bool): If ``True``, computes contributions in latent
variable space. Default ``False``.
plotwidth (int): Width of the plot in pixels. Default ``800``.
plotheight (int): Height of the plot in pixels. Default ``600``.
xgrid (bool): If ``True``, shows vertical grid lines. Default ``False``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
def _dict_to_df(d):
parts = list(d.values())
return pd.concat([parts[0]] + [p.iloc[:, 1:] for p in parts[1:]], axis=1)
if isinstance(X, dict):
X = _dict_to_df(X)
if isinstance(Y, dict):
Y = _dict_to_df(Y)
def _resolve_obs(obs, obs_ids):
if obs is None:
return False # kept as False: pyphi may use isinstance(..., bool) internally
if isinstance(obs, str):
return obs_ids.index(obs)
if isinstance(obs, int):
return obs
if isinstance(obs, list):
if isinstance(obs[0], str):
return [obs_ids.index(o) for o in obs]
return obs.copy()
return None
if isinstance(X, pd.DataFrame):
obs_ids = X.values[:, 0].tolist()
to_obs_ = _resolve_obs(to_obs, obs_ids)
from_obs_ = _resolve_obs(from_obs, obs_ids)
else:
to_obs_ = to_obs
from_obs_ = from_obs
if to_obs_ is False:
print("contributions_plot: to_obs is required.")
return
if cont_type == "scores":
Y = False
if isinstance(Y, bool):
Xconts = phi.contributions(mvmobj, X, cont_type, Y=False,
from_obs=from_obs_, to_obs=to_obs_, lv_space=lv_space)
Yconts = None
elif "Q" in mvmobj and cont_type == "spe":
Xconts, Yconts = phi.contributions(mvmobj, X, cont_type, Y=Y,
from_obs=from_obs_, to_obs=to_obs_, lv_space=lv_space)
else:
print("contributions_plot: unsupported combination of Y and cont_type.")
return
XVar = _get_xvar_labels(mvmobj)
def _obs_txt(obs, prefix):
if obs is None:
return ""
if isinstance(obs, list):
return f"{prefix}{', '.join(map(str, obs))}"
return f"{prefix}{obs}"
from_txt = _obs_txt(from_obs, " from obs: ")
to_txt = _obs_txt(to_obs, ", to obs: ")
_new_output_file("Contributions", "Contributions")
TOOLTIPS = [("Variable", "@names")]
p_list = []
src = ColumnDataSource(dict(x_=XVar, y_=Xconts[0], names=XVar))
p = figure(x_range=XVar, height=plotheight, width=plotwidth,
title=f"Contributions Plot{from_txt}{to_txt}",
tools="save,box_zoom,pan,reset")
_add_hover(p, TOOLTIPS)
p.vbar(x="x_", top="y_", source=src, width=0.5)
p.ygrid.grid_line_color = None
p.xgrid.grid_line_color = "lightgray" if xgrid else None
p.yaxis.axis_label = f"Contributions to {cont_type}"
_add_hline(p)
p.xaxis.major_label_orientation = np.pi / 2
p_list.append(p)
if Yconts is not None:
YVar = _get_yvar_labels(mvmobj)
src_y = ColumnDataSource(dict(x_=YVar, y_=Yconts[0], names=YVar))
p = figure(x_range=YVar, height=plotheight, width=plotwidth,
title="Contributions Plot", tools="save,box_zoom,pan,reset")
_add_hover(p, TOOLTIPS)
p.vbar(x="x_", top="y_", source=src_y, width=0.5)
p.ygrid.grid_line_color = None
p.xgrid.grid_line_color = "lightgray" if xgrid else None
p.yaxis.axis_label = f"Contributions to {cont_type}"
_add_hline(p)
p.xaxis.major_label_orientation = np.pi / 2
p_list.append(p)
show(column(p_list))
[docs]
def mb_weights(mvmobj: dict, *, plotwidth: int = 600, plotheight: int = 400) -> None:
"""Bar chart of super-weights (block weights) for a Multi-Block PLS model.
Shows the weight of each X block on each latent variable, indicating
which blocks drive the model. One bar chart is produced per LV, stacked
vertically in a single HTML file.
Args:
mvmobj (dict): Fitted MBPLS model from :func:`pyphi.mbpls`.
Must contain the ``'Wt'`` (super-weights) and ``'Xblocknames'`` keys.
plotwidth (int): Width of each plot in pixels. Default ``600``.
plotheight (int): Height of each plot in pixels. Default ``400``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
lv_labels = _get_lv_labels(mvmobj)
XVar = mvmobj["Xblocknames"]
_new_output_file("blockweights", "Block Weights")
p_list = []
for i, lbl in enumerate(lv_labels):
src = ColumnDataSource(dict(x_=XVar, y_=mvmobj["Wt"][:, i], names=XVar))
p = figure(x_range=XVar, title=f"Block weights for MBPLS {lbl}",
tools="save,box_zoom,reset", width=plotwidth, height=plotheight)
_add_hover(p, [("Var:", "@x_")])
p.vbar(x="x_", top="y_", source=src, width=0.5)
p.y_range.range_padding = 0.1
p.ygrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.yaxis.axis_label = f"Wt [{i+1}]"
p.xaxis.major_label_orientation = np.pi / 2
_add_hline(p)
p_list.append(p)
show(column(p_list))
[docs]
def mb_r2pb(mvmobj: dict, *, plotwidth: int = 600, plotheight: int = 400) -> None:
"""Stacked bar chart of R² per block per LV for a Multi-Block PLS model.
Each bar represents an X block; colour bands show the contribution of
each LV to that block's explained variance.
Args:
mvmobj (dict): Fitted MBPLS model from :func:`pyphi.mbpls`.
Must contain the ``'r2pbX'`` and ``'Xblocknames'`` keys.
plotwidth (int): Width of the plot in pixels. Default ``600``.
plotheight (int): Height of the plot in pixels. Default ``400``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
A = mvmobj["T"].shape[1]
lv_labels = _get_lv_labels(mvmobj)
XVar = mvmobj["Xblocknames"]
palette = _make_bokeh_palette(A)
r2_dict: dict = {"XVar": XVar}
for i, lbl in enumerate(lv_labels):
r2_dict[lbl] = mvmobj["r2pbX"][:, i]
_new_output_file("r2perblock", "R2 per Block")
p = figure(x_range=XVar, title="R2 per Block for MBPLS",
tools="save,box_zoom,reset",
width=plotwidth, height=plotheight)
_add_hover(p, "$name @XVar: @$name")
p.vbar_stack(lv_labels, x="XVar", width=0.9, color=palette, source=r2_dict)
p.y_range.range_padding = 0.1
p.ygrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.yaxis.axis_label = "R2 per Block per LV"
p.xaxis.major_label_orientation = np.pi / 2
show(p)
[docs]
def mb_vip(mvmobj: dict, *, plotwidth: int = 600, plotheight: int = 400) -> None:
"""Bar chart of block-level VIP scores for a Multi-Block PLS model.
Computes a VIP score for each X block (using block super-weights and
R²Y per LV) and displays them sorted from highest to lowest.
Args:
mvmobj (dict): Fitted MBPLS model from :func:`pyphi.mbpls`.
Must contain the ``'Wt'``, ``'r2y'``, and ``'Xblocknames'`` keys.
plotwidth (int): Width of the plot in pixels. Default ``600``.
plotheight (int): Height of the plot in pixels. Default ``400``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
A = mvmobj["T"].shape[1]
XVar = mvmobj["Xblocknames"]
Wt = mvmobj["Wt"]
r2y = mvmobj["r2y"]
vip_vals = np.sum(
Wt * r2y if A == 1 else
np.column_stack([Wt[:, a] * r2y[a] for a in range(A)]),
axis=1,
)
order = np.argsort(-vip_vals)
XVar_sorted = [XVar[i] for i in order]
vip_sorted = vip_vals[order]
_new_output_file("blockvip", "Block VIP")
src = ColumnDataSource(dict(x_=XVar_sorted, y_=vip_sorted, names=XVar_sorted))
p = figure(x_range=XVar_sorted, title="Block VIP for MBPLS",
tools="save,box_zoom,reset", width=plotwidth, height=plotheight)
_add_hover(p, [("Block:", "@x_")])
p.vbar(x="x_", top="y_", source=src, width=0.5)
p.y_range.range_padding = 0.1
p.ygrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.yaxis.axis_label = "Block VIP"
p.xaxis.major_label_orientation = np.pi / 2
_add_hline(p)
show(p)
[docs]
def barplot(
yheights,
xtick_labels: list[str],
*,
plotwidth: int = 600,
plotheight: int = 600,
addtitle: str = "",
xlabel: str = "",
ylabel: str = "",
tabtitle: str = "Bar Plot",
) -> None:
"""Generic vertical bar plot.
A simple utility for plotting any set of values as a bar chart with
custom tick labels.
Args:
yheights (list or np.ndarray): Bar heights (one value per bar).
xtick_labels (list[str]): Category labels for the x-axis. Length
must match ``yheights``.
plotwidth (int): Width of the plot in pixels. Default ``600``.
plotheight (int): Height of the plot in pixels. Default ``600``.
addtitle (str): Plot title. Default ``""``.
xlabel (str): X-axis label. Default ``""``.
ylabel (str): Y-axis label. Default ``""``.
tabtitle (str): Browser tab / HTML file title. Default ``"Bar Plot"``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
_new_output_file("BarPlot", tabtitle)
src = ColumnDataSource(dict(x_=xtick_labels, y_=yheights, names=xtick_labels))
p = figure(x_range=xtick_labels, title=addtitle,
tools="save,box_zoom,pan,reset", width=plotwidth)
_add_hover(p, [("Variable", "@names")])
p.vbar(x="x_", top="y_", source=src, width=0.5)
p.xgrid.grid_line_color = None
p.yaxis.axis_label = ylabel
p.xaxis.axis_label = xlabel
p.xaxis.major_label_orientation = np.pi / 2
show(p)
[docs]
def lineplot(
X,
*,
ids_2_include=None,
x_axis=None,
plot_title: str = "Main Title",
tab_title: str = "Tab Title",
xaxis_label: str = "X-axis",
yaxis_label: str = "",
plotheight: int = 400,
plotwidth: int = 600,
legend_cols: int = 1,
linecolor: str = "blue",
linewidth: int = 2,
add_marker: bool = False,
individual_plots: bool = False,
add_legend: bool = True,
markercolor: str = "darkblue",
markersize: int = 10,
fill_alpha: float = 0.2,
line_alpha: float = 0.4,
ncx_x_col=None,
ncx_y_col=None,
ncx_id_col=None,
CLASSID: Optional[pd.DataFrame] = None,
colorby: Optional[str] = None,
yaxis_log: bool = False,
) -> None:
"""Multi-series line plot from a DataFrame or a list of DataFrames.
Supports two input modes:
**Common x-axis** (``X`` is a ``pd.DataFrame``): Each selected column
is plotted as a separate line against a shared x-axis (either the row
index or a custom ``x_axis`` array).
**Non-common x-axis** (``X`` is a list of DataFrames): Each DataFrame
represents one series with its own x-values. Use ``ncx_x_col``,
``ncx_y_col``, and ``ncx_id_col`` to specify the column names for
x-values, y-values, and series IDs respectively.
Args:
X (pd.DataFrame or list[pd.DataFrame]): Data to plot.
ids_2_include (list[str] or str): Column names (common x-axis mode)
or series ID values (non-common x-axis mode) to include. If
``None``, all columns / series are included. Default ``None``.
x_axis (list or np.ndarray): Custom x-axis values for common x-axis
mode. If ``None``, observation sequence numbers are used.
Default ``None``.
plot_title (str): Main plot title. Default ``"Main Title"``.
tab_title (str): Browser tab / HTML file title. Default ``"Tab Title"``.
xaxis_label (str): X-axis label. Default ``"X-axis"``.
yaxis_label (str): Y-axis label. Default ``""``.
plotheight (int): Height of the plot in pixels. Default ``400``.
plotwidth (int): Width of the plot in pixels. Default ``600``.
legend_cols (int): Number of side-by-side legend columns. Default ``1``.
linecolor (str): Line colour when a single ``individual_plots`` line
is drawn. Default ``"blue"``.
linewidth (int): Line width in pixels. Default ``2``.
add_marker (bool): If ``True``, overlays scatter markers on each line.
Default ``False``.
individual_plots (bool): If ``True``, produces one separate plot per
series stacked vertically. If ``False``, all series are overlaid
on a single plot. Default ``False``.
add_legend (bool): If ``True``, adds a click-to-hide legend.
Default ``True``.
markercolor (str): Marker fill colour when ``add_marker=True``.
Default ``"darkblue"``.
markersize (int): Marker size in pixels. Default ``10``.
fill_alpha (float): Marker fill transparency (0–1). Default ``0.2``.
line_alpha (float): Line transparency (0–1). Default ``0.4``.
ncx_x_col (str): Column name for x-values in non-common x-axis mode.
Default ``None``.
ncx_y_col (str): Column name for y-values in non-common x-axis mode.
Default ``None``.
ncx_id_col (str): Column name used to identify each series in
non-common x-axis mode. Default ``None``.
CLASSID (pd.DataFrame): Optional class-membership DataFrame. First
column is series IDs; ``colorby`` specifies the column to colour
by. Only used in common x-axis mode. Default ``None``.
colorby (str): Column in ``CLASSID`` to use for colouring. Required
if ``CLASSID`` is provided. Default ``None``.
yaxis_log (bool): If ``True``, uses a logarithmic y-axis scale.
Default ``False``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [("Obs #", "@ObsNum"), ("(x,y)", "($x, $y)"), ("ID: ", "@ColID")]
_new_output_file("LinePlot", tab_title)
def _make_figure(title_):
kw = dict(y_axis_type="log") if yaxis_log else {}
p = figure(tools=TOOLS, width=plotwidth, height=plotheight, title=title_, **kw)
_add_hover(p, TOOLTIPS)
return p
def _get_x(n):
if x_axis is None:
return list(range(1, n + 1))
return list(x_axis) if isinstance(x_axis, np.ndarray) else x_axis
def _add_legend_layout(p, legend_it):
ipc = [int(np.round(len(legend_it) / legend_cols))] * legend_cols
ipc[-1] = len(legend_it) - sum(ipc[:-1])
offset = 0
for chunk in ipc:
leg = Legend(items=legend_it[offset:offset + chunk])
p.add_layout(leg, "right")
leg.click_policy = "hide"
offset += chunk
# --- Common x-axis (X is a DataFrame) ---
if isinstance(X, pd.DataFrame):
if ids_2_include is None:
ids_2_include = X.columns.tolist()
elif isinstance(ids_2_include, str):
ids_2_include = [ids_2_include]
if individual_plots:
p_list = []
for k, col_name in enumerate(ids_2_include):
y_ = X[col_name].values
x_ = _get_x(X.shape[0])
src = ColumnDataSource(dict(x=x_, y=y_,
ColID=[col_name] * X.shape[0], ObsNum=x_))
p = _make_figure(plot_title if k == 0 else "")
lkw = dict(line_color=linecolor, color=markercolor,
line_width=linewidth, line_alpha=line_alpha)
if add_legend:
p.line("x", "y", source=src, legend_label=col_name, **lkw)
else:
p.line("x", "y", source=src, **lkw)
if add_marker:
p.scatter("x", "y", source=src, color=markercolor,
size=markersize, fill_alpha=fill_alpha)
_add_hline(p)
p.xaxis.axis_label = xaxis_label
p.yaxis.axis_label = yaxis_label
p_list.append(p)
show(column(p_list))
elif CLASSID is None:
palette = _make_bokeh_palette(len(ids_2_include))
p = _make_figure(plot_title)
legend_it = []
for col_name, color_ in zip(ids_2_include, palette):
y_ = X[col_name].values
x_ = _get_x(X.shape[0])
src = ColumnDataSource(dict(x=x_, y=y_ if y_.ndim == 1 else y_,
ColID=[col_name] * len(x_), ObsNum=x_))
if y_.ndim == 1:
g = p.line("x", "y", source=src, line_color=color_,
line_width=linewidth, line_alpha=line_alpha, color=color_)
glyphs = [g]
if add_marker:
gm = p.scatter("x", "y", source=src, color=color_,
size=markersize, fill_alpha=fill_alpha)
glyphs.append(gm)
legend_it.append((col_name, glyphs))
else:
for ci in range(y_.shape[1]):
src_ = ColumnDataSource(dict(x=x_, y=y_[:, ci],
ColID=[col_name] * len(x_), ObsNum=x_))
g = p.line("x", "y", source=src_, line_color=color_,
line_width=linewidth, line_alpha=line_alpha)
glyphs = [g]
if add_marker:
gm = p.scatter("x", "y", source=src_, color=color_,
size=markersize, fill_alpha=fill_alpha)
glyphs.append(gm)
legend_it.append((col_name, glyphs))
_add_hline(p)
p.xaxis.axis_label = xaxis_label
p.yaxis.axis_label = yaxis_label
if add_legend:
_add_legend_layout(p, legend_it)
show(p)
else:
classes = phi.unique(CLASSID, colorby)
palette = _make_bokeh_palette(len(classes))
p = _make_figure(plot_title)
legend_it = []
for cls_val, color_ in zip(classes, palette):
cols = CLASSID[CLASSID.columns[0]][CLASSID[colorby] == cls_val].values.tolist()
leg_glyphs = []
for col_name in cols:
y_ = X[col_name].values
x_ = _get_x(X.shape[0])
if y_.ndim == 1:
src = ColumnDataSource(dict(x=x_, y=y_,
ColID=[col_name] * len(x_), ObsNum=x_))
g = p.line("x", "y", source=src, line_color=color_,
line_width=linewidth, line_alpha=line_alpha, color=color_)
leg_glyphs.append(g)
if add_marker:
gm = p.scatter("x", "y", source=src, color=color_,
size=markersize, fill_alpha=fill_alpha)
leg_glyphs.append(gm)
else:
for ci in range(y_.shape[1]):
src_ = ColumnDataSource(dict(x=x_, y=y_[:, ci],
ColID=[col_name] * len(x_), ObsNum=x_))
g = p.line("x", "y", source=src_, line_color=color_,
line_width=linewidth, line_alpha=line_alpha)
leg_glyphs.append(g)
if add_marker:
gm = p.scatter("x", "y", source=src_, color=color_,
size=markersize, fill_alpha=fill_alpha)
leg_glyphs.append(gm)
legend_it.append((str(cls_val), leg_glyphs))
_add_hline(p)
p.xaxis.axis_label = xaxis_label
p.yaxis.axis_label = yaxis_label
if add_legend:
_add_legend_layout(p, legend_it)
show(p)
# --- Non-common x-axis (X is a list of DataFrames) ---
elif (isinstance(X, list) and ncx_y_col is not None
and ncx_x_col is not None and ncx_id_col is not None):
if ids_2_include is None:
ids_2_include = [df[ncx_id_col].values[0] for df in X]
elif isinstance(ids_2_include, str):
ids_2_include = [ids_2_include]
id_to_df = {df[ncx_id_col].values[0]: df for df in X}
if individual_plots:
p_list = []
for k, col_name in enumerate(ids_2_include):
df = id_to_df[col_name]
x_ = df[ncx_x_col].values
y_ = df[ncx_y_col].values
src = ColumnDataSource(dict(x=x_, y=y_,
ColID=[col_name] * len(x_), ObsNum=x_))
p = _make_figure(plot_title if k == 0 else "")
lkw = dict(line_color=linecolor, color=markercolor,
line_width=linewidth, line_alpha=line_alpha)
if add_legend:
p.line("x", "y", source=src, legend_label=col_name, **lkw)
else:
p.line("x", "y", source=src, **lkw)
if add_marker:
p.scatter("x", "y", source=src, color=markercolor,
size=markersize, fill_alpha=fill_alpha)
_add_hline(p)
p.xaxis.axis_label = ncx_x_col
p.yaxis.axis_label = ncx_y_col
p_list.append(p)
show(column(p_list))
else:
palette = _make_bokeh_palette(len(ids_2_include))
p = _make_figure(plot_title)
legend_it = []
for col_name, color_ in zip(ids_2_include, palette):
df = id_to_df[col_name]
x_ = df[ncx_x_col].values
y_ = df[ncx_y_col].values
src = ColumnDataSource(dict(x=x_, y=y_,
ColID=[col_name] * len(x_), ObsNum=x_))
g = p.line("x", "y", source=src, line_color=color_,
line_width=linewidth, line_alpha=line_alpha, color=color_)
glyphs = [g]
if add_marker:
gm = p.scatter("x", "y", source=src, color=color_,
size=markersize, fill_alpha=fill_alpha)
glyphs.append(gm)
legend_it.append((col_name, glyphs))
_add_hline(p)
p.xaxis.axis_label = xaxis_label
p.yaxis.axis_label = yaxis_label
if add_legend:
_add_legend_layout(p, legend_it)
show(p)
[docs]
def plot_spectra(
X,
*,
xaxis=None,
plot_title: str = "Main Title",
tab_title: str = "Tab Title",
xaxis_label: str = "X-axis",
yaxis_label: str = "Y-axis",
linecolor: str = "blue",
linewidth: int = 2,
) -> None:
"""Plot spectral data: one line per sample (row).
Draws all spectra as a multi-line plot where each row of ``X`` is a
separate spectrum. The x-axis values are taken from the DataFrame column
headers (if a DataFrame) or from ``xaxis`` (if an ndarray).
Args:
X (pd.DataFrame or np.ndarray): Spectral matrix (n_samples × n_wavelengths).
If a DataFrame, the first column must contain sample IDs and the
remaining column headers are used as x-axis tick values.
xaxis (list or np.ndarray): Custom x-axis values (e.g. wavelengths).
Only used when ``X`` is an ndarray. If ``None``, sequential
integers are used. Default ``None``.
plot_title (str): Main plot title. Default ``"Main Title"``.
tab_title (str): Browser tab / HTML file title. Default ``"Tab Title"``.
xaxis_label (str): X-axis label (e.g. ``"Wavelength (nm)"``).
Default ``"X-axis"``.
yaxis_label (str): Y-axis label (e.g. ``"Absorbance"``).
Default ``"Y-axis"``.
linecolor (str): Colour applied to all spectral lines. Default ``"blue"``.
linewidth (int): Line width in pixels. Default ``2``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
if isinstance(X, pd.DataFrame):
x = np.array(X.columns[1:].tolist()).reshape(1, -1)
y = X.values[:, 1:].astype(float)
elif isinstance(X, np.ndarray):
y = X.copy()
x = (np.arange(X.shape[1]) if xaxis is None else np.array(xaxis)).reshape(1, -1)
_new_output_file("Spectra", tab_title)
p = figure(title=plot_title)
p.xaxis.axis_label = xaxis_label
p.yaxis.axis_label = yaxis_label
p.multi_line(x.tolist() * y.shape[0], y.tolist(),
line_color=linecolor, line_width=linewidth)
show(p)
[docs]
def scatter_with_labels(
x,
y,
*,
xlabel: str = "X var",
ylabel: str = "Y var",
labels=None,
tabtitle: str = "Scatter Plot",
plottitle: str = "Scatter",
legend_cols: int = 1,
CLASSID: Optional[pd.DataFrame] = None,
colorby: Optional[str] = None,
plotwidth: int = 600,
plotheight: int = 600,
markercolor: str = "darkblue",
markersize: int = 10,
fill_alpha: float = 0.2,
line_alpha: float = 0.4,
) -> None:
"""Generic scatter plot with hover labels and optional class colouring.
A flexible utility for plotting any two arrays against each other, with
per-point labels shown on hover and optional class-based colouring.
Origin lines (x=0, y=0) are added automatically.
Args:
x (list or np.ndarray): X coordinates of each point.
y (list or np.ndarray): Y coordinates of each point. Must have the
same length as ``x``.
xlabel (str): X-axis label. Default ``"X var"``.
ylabel (str): Y-axis label. Default ``"Y var"``.
labels (list[str]): Per-point labels shown in the hover tooltip.
If ``None``, points are labelled ``"Obs 0"``, ``"Obs 1"``, etc.
Default ``None``.
tabtitle (str): Browser tab / HTML file title. Default ``"Scatter Plot"``.
plottitle (str): Main plot title. Default ``"Scatter"``.
legend_cols (int): Number of side-by-side legend columns when
``CLASSID`` is used. Default ``1``.
CLASSID (pd.DataFrame): Optional class-membership DataFrame for
colouring points. First column is point IDs. Default ``None``.
colorby (str): Column in ``CLASSID`` to colour by. Required if
``CLASSID`` is provided. Default ``None``.
plotwidth (int): Width of the plot in pixels. Default ``600``.
plotheight (int): Height of the plot in pixels. Default ``600``.
markercolor (str): Marker colour when no ``CLASSID`` is provided.
Default ``"darkblue"``.
markersize (int): Marker size in pixels. Default ``10``.
fill_alpha (float): Marker fill transparency (0–1). Default ``0.2``.
line_alpha (float): Marker edge transparency (0–1). Default ``0.4``.
Returns:
None: Saves and opens an HTML file via Bokeh's ``show()``.
"""
if labels is None:
labels = [f"Obs {i}" for i in range(len(x))]
_new_output_file("Scatter_Plot", tabtitle)
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [("index", "$index"), ("(x,y)", "($x, $y)"), ("Property", "@names")]
p = figure(tools=TOOLS, width=plotwidth, height=plotheight, title=plottitle)
_add_hover(p, TOOLTIPS)
if CLASSID is None or colorby is None:
src = ColumnDataSource(dict(x=x, y=y, names=labels))
p.scatter("x", "y", source=src, size=markersize, color=markercolor,
fill_alpha=fill_alpha, line_alpha=line_alpha)
else:
Classes_ = phi.unique(CLASSID, colorby)
palette = _make_bokeh_palette(len(Classes_))
x_arr = np.array(x)
y_arr = np.array(y)
labels_arr = np.array(labels)
class_arr = CLASSID[colorby].values
legend_it = []
for cls_val, color_ in zip(Classes_, palette):
mask = class_arr == cls_val
src = ColumnDataSource(dict(x=x_arr[mask], y=y_arr[mask],
names=labels_arr[mask]))
g = p.scatter("x", "y", source=src, color=color_, size=markersize,
fill_alpha=fill_alpha, line_alpha=line_alpha)
legend_it.append((str(cls_val), [g]))
ipc = [int(np.round(len(legend_it) / legend_cols))] * legend_cols
ipc[-1] = len(legend_it) - sum(ipc[:-1])
offset = 0
for chunk in ipc:
leg = Legend(items=legend_it[offset:offset + chunk])
p.add_layout(leg, "right")
leg.click_policy = "hide"
offset += chunk
p.xaxis.axis_label = xlabel
p.yaxis.axis_label = ylabel
_add_origin_lines(p)
show(p)