Source code for ggpubpy.qqplot

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from typing import Tuple, Optional


def _ppoints(n: int) -> np.ndarray:
    """Return plotting positions (i - 0.5) / n for i in 1..n."""
    if n <= 0:
        return np.array([])
    return (np.arange(1, n + 1) - 0.5) / float(n)


def _ensure_distribution(dist: "str | object"):
    return getattr(stats, dist) if isinstance(dist, str) else dist


def _clean_sample(x: np.ndarray) -> np.ndarray:
    x = np.asarray(x)
    return x[~np.isnan(x)]


def _validate_sparams(dist, sparams) -> Tuple[tuple, int]:
    if not isinstance(sparams, (tuple, list)):
        sparams = (sparams,)
    if len(sparams) < dist.numargs:
        raise ValueError(
            "Missing required shape parameters for distribution %s. See scipy.stats.%s." % (
                dist.shapes,
                dist.name,
            )
        )
    return tuple(sparams), dist.numargs


def _compute_quantiles(x: np.ndarray, dist, sparams: tuple) -> Tuple[np.ndarray, np.ndarray, Tuple]:
    theor, observed = stats.probplot(x, sparams=sparams, dist=dist, fit=False)
    fit_params = dist.fit(x)
    loc, scale = fit_params[-2], fit_params[-1]
    shape = fit_params[:-2] if len(fit_params) > 2 else None
    if loc != 0 and scale != 1:
        observed = (np.sort(observed) - loc) / scale
    return theor, observed, (shape, loc, scale)


def _fit_regression(x: np.ndarray, y: np.ndarray) -> Tuple[float, float, float]:
    slope, intercept, r, _, _ = stats.linregress(x, y)
    return slope, intercept, r**2


def _confidence_envelope(
    theor: np.ndarray,
    slope: float,
    dist,
    shape: Optional[tuple],
    n: int,
    confidence: float,
) -> Tuple[np.ndarray, np.ndarray]:
    P = _ppoints(n)
    crit = stats.norm.ppf(1 - (1 - confidence) / 2)
    pdf = dist.pdf(theor) if shape in (None, ()) else dist.pdf(theor, *shape)
    se = (slope / pdf) * np.sqrt(P * (1 - P) / n)
    return crit * se, -crit * se


[docs] def qqplot( x, dist: "str | object" = "norm", sparams=(), confidence: "float | bool" = 0.95, square: bool = True, ax=None, **kwargs, ): """Create a Q–Q plot with optional confidence envelope and regression line. Parameters are consistent with scipy-based Q–Q plots. Returns the Matplotlib Axes. """ scatter_kwargs = {"marker": "o", "color": "blue"} scatter_kwargs.update(kwargs) dist = _ensure_distribution(dist) x = _clean_sample(x) sparams, _ = _validate_sparams(dist, sparams) theor, observed, (shape, _, _) = _compute_quantiles(x, dist, sparams) slope, intercept, r2 = _fit_regression(theor, observed) if ax is None: ax = plt.gca() ax.scatter(theor, observed, **scatter_kwargs) ax.set_xlabel("Theoretical quantiles") ax.set_ylabel("Ordered quantiles") # 45-degree line bounds xlim, ylim = ax.get_xlim(), ax.get_ylim() low = min(xlim[0], ylim[0]) high = max(xlim[1], ylim[1]) ax.plot([low, high], [low, high], color="slategrey", lw=1.5) ax.set_xlim((low, high)) ax.set_ylim((low, high)) # Regression line and R^2 fit_val = slope * theor + intercept ax.plot(theor, fit_val, "r-", lw=2) posx = low + 0.60 * (high - low) posy = low + 0.10 * (high - low) ax.text(posx, posy, f"$R^2={r2:.3f}$") if confidence is not False: conf = float(confidence) delta_up, delta_low = _confidence_envelope(theor, slope, dist, shape, x.size, conf) upper = fit_val + delta_up lower = fit_val + delta_low ax.plot(theor, upper, "r--", lw=1.25) ax.plot(theor, lower, "r--", lw=1.25) if square: ax.set_aspect("equal") return ax