Source code for ggpubpy.qqplot

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from typing import Tuple, Optional


def _ppoints(n: int) -> np.ndarray:
    """Return plotting positions (i - 0.5) / n for i in 1..n."""
    if n <= 0:
        return np.array([])
    return (np.arange(1, n + 1) - 0.5) / float(n)


def _ensure_distribution(dist: "str | object"):
    return getattr(stats, dist) if isinstance(dist, str) else dist


def _clean_sample(x: np.ndarray) -> np.ndarray:
    x = np.asarray(x)
    return x[~np.isnan(x)]


def _validate_sparams(dist, sparams) -> Tuple[tuple, int]:
    if not isinstance(sparams, (tuple, list)):
        sparams = (sparams,)
    if len(sparams) < dist.numargs:
        raise ValueError(
            "Missing required shape parameters for distribution %s. See scipy.stats.%s." % (
                dist.shapes,
                dist.name,
            )
        )
    return tuple(sparams), dist.numargs


def _compute_quantiles(x: np.ndarray, dist, sparams: tuple) -> Tuple[np.ndarray, np.ndarray, Tuple]:
    theor, observed = stats.probplot(x, sparams=sparams, dist=dist, fit=False)
    fit_params = dist.fit(x)
    loc, scale = fit_params[-2], fit_params[-1]
    shape = fit_params[:-2] if len(fit_params) > 2 else None
    if loc != 0 and scale != 1:
        observed = (np.sort(observed) - loc) / scale
    return theor, observed, (shape, loc, scale)


def _fit_regression(x: np.ndarray, y: np.ndarray) -> Tuple[float, float, float]:
    slope, intercept, r, _, _ = stats.linregress(x, y)
    return slope, intercept, r**2


def _confidence_envelope(
    theor: np.ndarray,
    slope: float,
    dist,
    shape: Optional[tuple],
    n: int,
    confidence: float,
) -> Tuple[np.ndarray, np.ndarray]:
    P = _ppoints(n)
    crit = stats.norm.ppf(1 - (1 - confidence) / 2)
    pdf = dist.pdf(theor) if shape in (None, ()) else dist.pdf(theor, *shape)
    se = (slope / pdf) * np.sqrt(P * (1 - P) / n)
    return crit * se, -crit * se



[docs]
def qqplot(
    x,
    dist: "str | object" = "norm",
    sparams=(),
    confidence: "float | bool" = 0.95,
    square: bool = True,
    ax=None,
    **kwargs,
):
    """Create a Q–Q plot with optional confidence envelope and regression line.

    Parameters are consistent with scipy-based Q–Q plots. Returns the Matplotlib Axes.
    """
    scatter_kwargs = {"marker": "o", "color": "blue"}
    scatter_kwargs.update(kwargs)

    dist = _ensure_distribution(dist)
    x = _clean_sample(x)
    sparams, _ = _validate_sparams(dist, sparams)

    theor, observed, (shape, _, _) = _compute_quantiles(x, dist, sparams)
    slope, intercept, r2 = _fit_regression(theor, observed)

    if ax is None:
        ax = plt.gca()

    ax.scatter(theor, observed, **scatter_kwargs)
    ax.set_xlabel("Theoretical quantiles")
    ax.set_ylabel("Ordered quantiles")

    # 45-degree line bounds
    xlim, ylim = ax.get_xlim(), ax.get_ylim()
    low = min(xlim[0], ylim[0])
    high = max(xlim[1], ylim[1])
    ax.plot([low, high], [low, high], color="slategrey", lw=1.5)
    ax.set_xlim((low, high))
    ax.set_ylim((low, high))

    # Regression line and R^2
    fit_val = slope * theor + intercept
    ax.plot(theor, fit_val, "r-", lw=2)
    posx = low + 0.60 * (high - low)
    posy = low + 0.10 * (high - low)
    ax.text(posx, posy, f"$R^2={r2:.3f}$")

    if confidence is not False:
        conf = float(confidence)
        delta_up, delta_low = _confidence_envelope(theor, slope, dist, shape, x.size, conf)
        upper = fit_val + delta_up
        lower = fit_val + delta_low
        ax.plot(theor, upper, "r--", lw=1.25)
        ax.plot(theor, lower, "r--", lw=1.25)

    if square:
        ax.set_aspect("equal")

    return ax