"""Single dose-response curve fitting via nonlinear least squares.
Uses ``scipy.optimize.least_squares`` with Trust Region Reflective (TRF)
algorithm for bounded optimisation. EC50 is constrained to be positive.
Includes data-driven self-starting estimates so the user never has to
guess initial parameter values.
Validates against: R drc::drm()
"""
from __future__ import annotations
import numpy as np
from numpy.typing import NDArray
from scipy.optimize import least_squares
from pystatsbio.doseresponse._common import CurveParams, DoseResponseResult
from pystatsbio.doseresponse._models import _MODEL_MAP, VALID_MODELS
# ---------------------------------------------------------------------------
# Self-starting parameter estimation
# ---------------------------------------------------------------------------
def _interpolate_ec50(
dose_sorted: NDArray,
resp_sorted: NDArray,
midpoint: float,
) -> float:
"""Find dose at which response crosses *midpoint* via linear interpolation
on the log-dose scale.
"""
for i in range(len(resp_sorted) - 1):
r1, r2 = resp_sorted[i], resp_sorted[i + 1]
if (r1 - midpoint) * (r2 - midpoint) <= 0:
d1 = np.log(dose_sorted[i])
d2 = np.log(dose_sorted[i + 1])
if abs(r2 - r1) < 1e-12:
return float(np.exp((d1 + d2) / 2.0))
frac = (midpoint - r1) / (r2 - r1)
return float(np.exp(d1 + frac * (d2 - d1)))
# No crossing — geometric mean of dose range
return float(np.exp(np.mean(np.log(dose_sorted))))
def _estimate_hill(
dose_sorted: NDArray,
resp_sorted: NDArray,
bottom: float,
top: float,
) -> float:
"""Estimate Hill coefficient via logit-linear regression."""
span = top - bottom
if abs(span) < 1e-12:
return 1.0
y_norm = np.clip((resp_sorted - bottom) / span, 0.01, 0.99)
logit_y = np.log(y_norm / (1.0 - y_norm))
log_dose = np.log(dose_sorted)
if len(log_dose) < 2:
return 1.0
# Simple linear regression: logit(y) ~ slope * log(dose) + intercept
coeffs = np.polyfit(log_dose, logit_y, 1)
hill = float(np.clip(coeffs[0], -20.0, 20.0))
if abs(hill) < 0.05:
hill = 1.0 if hill >= 0 else -1.0
return hill
def _initial_params(
dose: NDArray,
response: NDArray,
model: str,
) -> dict[str, float]:
"""Data-driven starting values for nonlinear fitting.
Algorithm
---------
1. Use only dose > 0 for log-scale estimation.
2. bottom/top from lowest/highest dose-group means.
3. Direction from correlation of response with dose rank.
4. EC50 via linear interpolation at midpoint on log-dose scale.
5. Hill via logit-linear regression.
"""
mask = dose > 0
dose_pos = dose[mask]
resp_pos = response[mask]
if len(dose_pos) < 2:
# Fallback — not enough positive-dose data
return {
"bottom": float(np.min(response)),
"top": float(np.max(response)),
"ec50": 1.0,
"hill": 1.0,
**({"asymmetry": 1.0} if model == "LL.5" else {}),
**({"hormesis": 0.0} if model == "BC.5" else {}),
}
order = np.argsort(dose_pos)
d_sorted = dose_pos[order]
r_sorted = resp_pos[order]
n_edge = max(1, len(d_sorted) // 4)
low_resp = float(np.mean(r_sorted[:n_edge]))
high_resp = float(np.mean(r_sorted[-n_edge:]))
# Include dose=0 data in direction detection
if np.any(dose == 0):
zero_resp = float(np.mean(response[dose == 0]))
low_resp = min(low_resp, zero_resp)
increasing = high_resp > zero_resp
else:
increasing = high_resp > low_resp
if increasing:
bottom_est = low_resp
top_est = high_resp
else:
bottom_est = high_resp
top_est = low_resp
# EC50 — dose at midpoint response
mid = (bottom_est + top_est) / 2.0
ec50_est = _interpolate_ec50(d_sorted, r_sorted, mid)
# Hill slope
hill_est = _estimate_hill(d_sorted, r_sorted, bottom_est, top_est)
start: dict[str, float] = {
"bottom": bottom_est,
"top": top_est,
"ec50": max(ec50_est, 1e-20), # ensure positive
"hill": hill_est,
}
if model == "LL.5":
start["asymmetry"] = 1.0
elif model == "BC.5":
start["hormesis"] = 0.0
return start
# ---------------------------------------------------------------------------
# Standard error computation
# ---------------------------------------------------------------------------
def _compute_se(
jac: NDArray,
rss: float,
n_obs: int,
n_params: int,
) -> NDArray[np.floating]:
"""Standard errors from Jacobian: ``se = sqrt(diag((J'J)^{-1} * s²))``.
Parameters
----------
jac : (n_obs, n_params)
Jacobian of the residual vector at the solution.
rss : float
Residual sum of squares.
n_obs, n_params : int
Number of observations and parameters.
"""
if n_obs <= n_params:
return np.full(n_params, np.nan)
s2 = rss / (n_obs - n_params)
JtJ = jac.T @ jac
try:
cov = np.linalg.inv(JtJ) * s2
se = np.sqrt(np.maximum(np.diag(cov), 0.0))
except np.linalg.LinAlgError:
se = np.full(n_params, np.nan)
return se
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
[docs]
def fit_drm(
dose: NDArray[np.floating],
response: NDArray[np.floating],
*,
model: str = "LL.4",
weights: NDArray[np.floating] | None = None,
start: dict[str, float] | None = None,
lower: dict[str, float] | None = None,
upper: dict[str, float] | None = None,
) -> DoseResponseResult:
"""Fit a dose-response model to a single curve.
Uses Trust Region Reflective nonlinear least squares
(``scipy.optimize.least_squares``).
Parameters
----------
dose : array
Dose (concentration) values.
response : array
Response values.
model : str
Model name: ``'LL.4'``, ``'LL.5'``, ``'W1.4'``, ``'W2.4'``, ``'BC.5'``.
weights : array or None
Optional observation weights.
start : dict or None
Starting values for parameters. If ``None``, uses self-starting
estimates derived from the data.
lower, upper : dict or None
Box constraints on parameters.
Returns
-------
DoseResponseResult
Examples
--------
>>> import numpy as np
>>> dose = np.array([0, 0.01, 0.1, 1, 10, 100])
>>> response = np.array([10, 12, 30, 55, 85, 92])
>>> result = fit_drm(dose, response, model='LL.4')
>>> round(result.params.ec50, 1)
1.0
Validates against: R drc::drm()
"""
# --- Validate ---
dose = np.asarray(dose, dtype=np.float64)
response = np.asarray(response, dtype=np.float64)
if dose.ndim != 1 or response.ndim != 1:
raise ValueError("dose and response must be 1-D arrays")
if dose.shape != response.shape:
raise ValueError(
f"dose and response must have same shape, got {dose.shape} and {response.shape}"
)
if model not in VALID_MODELS:
raise ValueError(f"model must be one of {VALID_MODELS}, got {model!r}")
model_func, param_names = _MODEL_MAP[model]
n_params = len(param_names)
n_obs = len(dose)
if n_obs < n_params + 1:
raise ValueError(
f"Need at least {n_params + 1} observations for model {model}, got {n_obs}"
)
if weights is not None:
weights = np.asarray(weights, dtype=np.float64)
if weights.shape != dose.shape:
raise ValueError("weights must have same shape as dose")
# --- Starting values ---
if start is None:
start = _initial_params(dose, response, model)
x0 = np.array([start[name] for name in param_names], dtype=np.float64)
# --- Bounds ---
lb = np.full(n_params, -np.inf)
ub = np.full(n_params, np.inf)
# EC50 must be positive
ec50_idx = param_names.index("ec50")
lb[ec50_idx] = 1e-20
if lower is not None:
for name, val in lower.items():
lb[param_names.index(name)] = val
if upper is not None:
for name, val in upper.items():
ub[param_names.index(name)] = val
# Ensure starting values are within bounds
x0 = np.clip(x0, lb + 1e-15, ub - 1e-15)
# --- Residual function ---
def residuals(p: NDArray) -> NDArray:
kwargs = dict(zip(param_names, p))
pred = model_func(dose, **kwargs)
r = response - pred
if weights is not None:
r = r * np.sqrt(weights)
return r
# --- Fit ---
result = least_squares(
residuals,
x0,
method="trf",
bounds=(lb, ub),
jac="2-point",
max_nfev=2000,
xtol=1e-12,
ftol=1e-12,
gtol=1e-12,
)
# --- Extract ---
popt = result.x
res_vec = result.fun
rss = float(np.sum(res_vec**2))
converged = result.success
n_iter = result.nfev
jac = result.jac
se = _compute_se(jac, rss, n_obs, n_params)
# AIC / BIC
aic = float(n_obs * np.log(rss / n_obs) + 2 * n_params)
bic = float(n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs))
curve_params = CurveParams.from_array(popt, model)
return DoseResponseResult(
params=curve_params,
se=se,
residuals=res_vec,
rss=rss,
aic=aic,
bic=bic,
converged=converged,
n_iter=n_iter,
model=model,
dose=dose,
response=response,
n_obs=n_obs,
jac=jac,
)