Source code for nireports.reportlets.nuisance

# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
#
# Copyright 2023 The NiPreps Developers <nipreps@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# We support and encourage derived works from this project, please read
# about our expectations at
#
#     https://www.nipreps.org/community/licensing/
#
# STATEMENT OF CHANGES: This file was ported carrying over full git history from
# other NiPreps projects licensed under the Apache-2.0 terms.
"""Plotting distributions."""

import math
import operator
import os.path as op

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.backends.backend_pdf import FigureCanvasPdf as FigureCanvas
from matplotlib.colors import Normalize
from matplotlib.gridspec import GridSpec, GridSpecFromSubplotSpec

from nireports.tools.ndimage import _get_values_inside_a_mask

DEFAULT_DPI = 300
DINA4_LANDSCAPE = (11.69, 8.27)
DINA4_PORTRAIT = (8.27, 11.69)



[docs]
def plot_fd(fd_file, fd_radius, mean_fd_dist=None, figsize=DINA4_LANDSCAPE):
    fd_power = _calc_fd(fd_file, fd_radius)

    fig = plt.Figure(figsize=figsize)
    FigureCanvas(fig)

    if mean_fd_dist is not None:
        grid = GridSpec(2, 4)
    else:
        grid = GridSpec(1, 2, width_ratios=[3, 1])
        grid.update(hspace=1.0, right=0.95, left=0.1, bottom=0.2)

    ax = fig.add_subplot(grid[0, :-1])
    ax.plot(fd_power)
    ax.set_xlim((0, len(fd_power)))
    ax.set_ylabel("Frame Displacement [mm]")
    ax.set_xlabel("Frame number")
    ylim = ax.get_ylim()

    ax = fig.add_subplot(grid[0, -1])
    sns.histplot(y=fd_power, kde=True, stat="density", kde_kws={"cut": 3}, edgecolor=None, ax=ax)
    ax.set_ylim(ylim)

    if mean_fd_dist is not None:
        ax = fig.add_subplot(grid[1, :])
        sns.histplot(
            mean_fd_dist, kde=True, stat="density", kde_kws={"cut": 3}, edgecolor=None, ax=ax
        )
        ax.set_xlabel("Mean Frame Displacement (over all subjects) [mm]")
        mean_fd = fd_power.mean()
        label = f"$\\overline{{\\mathrm{{FD}}}}$ = {mean_fd:g}"
        plot_vline(mean_fd, label, ax=ax)

    return fig




[docs]
def plot_dist(
    main_file,
    mask_file,
    xlabel,
    distribution=None,
    xlabel2=None,
    figsize=DINA4_LANDSCAPE,
):
    data = _get_values_inside_a_mask(main_file, mask_file)

    fig = plt.Figure(figsize=figsize)
    FigureCanvas(fig)

    gsp = GridSpec(2, 1)
    ax = fig.add_subplot(gsp[0, 0])
    sns.histplot(data.astype(np.double), kde=False, bins=100, edgecolor=None, ax=ax)
    ax.set_xlabel(xlabel)

    ax = fig.add_subplot(gsp[1, 0])
    sns.histplot(
        np.array(distribution, dtype="f8"),
        kde=True,
        stat="density",
        kde_kws={"cut": 3},
        edgecolor=None,
        ax=ax,
    )
    cur_val = np.median(data)
    label = f"{cur_val:g}"
    plot_vline(cur_val, label, ax=ax)
    ax.set_xlabel(xlabel2)

    return fig




[docs]
def plot_vline(cur_val, label, ax):
    ax.axvline(cur_val)
    ylim = ax.get_ylim()
    vloc = (ylim[0] + ylim[1]) / 2.0
    xlim = ax.get_xlim()
    pad = (xlim[0] + xlim[1]) / 100.0
    ax.text(
        cur_val - pad,
        vloc,
        label,
        color="blue",
        rotation=90,
        verticalalignment="center",
        horizontalalignment="right",
    )



def _calc_rows_columns(ratio, n_images):
    rows = 2
    for _ in range(100):
        columns = math.floor(ratio * rows)
        total = (rows - 1) * columns
        if total > n_images:
            rows = np.ceil(n_images / columns) + 1
            break
        rows += 1
    return int(rows), int(columns)


def _calc_fd(fd_file, fd_radius):
    from math import pi

    with open(fd_file) as f:
        lines = f.readlines()
    rows = [[float(x) for x in line.split()] for line in lines]
    cols = np.array([list(col) for col in zip(*rows)])

    translations = np.transpose(np.abs(np.diff(cols[0:3, :])))
    rotations = np.transpose(np.abs(np.diff(cols[3:6, :])))

    fd_power = np.sum(translations, axis=1) + (fd_radius * pi / 180) * np.sum(rotations, axis=1)

    # FD is zero for the first time point
    fd_power = np.insert(fd_power, 0, 0)

    return fd_power


def _get_mean_fd_distribution(fd_files, fd_radius):
    mean_fds = []
    max_fds = []
    for fd_file in fd_files:
        fd_power = _calc_fd(fd_file, fd_radius)
        mean_fds.append(fd_power.mean())
        max_fds.append(fd_power.max())

    return mean_fds, max_fds



[docs]
def plot_qi2(x_grid, ref_pdf, fit_pdf, ref_data, cutoff_idx, out_file=None):
    fig, ax = plt.subplots()

    ax.plot(
        x_grid,
        ref_pdf,
        linewidth=2,
        alpha=0.5,
        label="background",
        color="dodgerblue",
    )

    refmax = np.percentile(ref_data, 99.95)
    x_max = x_grid[-1]

    ax.hist(
        ref_data,
        40 * max(int(refmax / x_max), 1),
        fc="dodgerblue",
        histtype="stepfilled",
        alpha=0.2,
        density=True,
    )
    fit_pdf[fit_pdf > 1.0] = np.nan
    ax.plot(
        x_grid,
        fit_pdf,
        linewidth=2,
        alpha=0.5,
        label="chi2",
        color="darkorange",
    )

    ylims = ax.get_ylim()
    ax.axvline(
        x_grid[-cutoff_idx],
        ymax=ref_pdf[-cutoff_idx] / ylims[1],
        color="dodgerblue",
    )
    plt.xlabel('Intensity within "hat" mask')
    plt.ylabel("Frequency")
    ax.set_xlim([0, x_max])
    plt.legend()

    if out_file is None:
        out_file = op.abspath("qi2_plot.svg")

    fig.savefig(out_file, bbox_inches="tight", pad_inches=0, dpi=300)
    plt.close(fig=fig)
    return out_file




[docs]
def plot_carpet(
    data,
    segments=None,
    cmap=None,
    tr=None,
    detrend=True,
    subplot=None,
    title=None,
    output_file=None,
    size=(900, 1200),
    sort_rows="ward",
    drop_trs=0,
    legend=True,
    fontsize=None,
):
    """
    Plot an image representation of voxel intensities across time.

    This kind of plot is known as "carpet plot" or "Power plot".
    See Jonathan Power Neuroimage 2017 Jul 1; 154:150-158.

    Parameters
    ----------
    data : N x T :obj:`numpy.array`
        The functional data to be plotted (*N* sampling locations by *T* timepoints).
    segments: :obj:`dict`, optional
        A mapping between segment labels (e.g., `"Left Cortex"`) and list of indexes
        in the data array.
    cmap : colormap
        Overrides the generation of an automated colormap.
    tr : float , optional
        Specify the TR, if specified it uses this value. If left as None,
        # of frames is plotted instead of time.
    detrend : :obj:`bool`, optional
        Detrend and standardize the data prior to plotting.
    subplot : matplotlib subplot, optional
        Subplot to plot figure on.
    title : string, optional
        The title displayed on the figure.
    output_file : string, or None, optional
        The name of an image file to export the plot to. Valid extensions
        are .png, .pdf, .svg. If output_file is not None, the plot
        is saved to a file, and the display is closed.
    size : :obj:`tuple`
        Maximum number of samples to plot (voxels, timepoints)
    sort_rows : :obj:`str` or :obj:`False` or :obj:`None`
        Apply a clustering algorithm to reorganize the rows of the carpet.
        ``""``, ``False``, and ``None`` skip clustering sorting.
        ``"linkage"`` uses linkage hierarchical clustering
        :obj:`scipy.cluster.hierarchy.linkage`.
        Any other value that Python evaluates to ``True`` will use the
        default clustering, which is :obj:`sklearn.cluster.ward_tree`.

    """
    if segments is None:
        segments = {"whole brain (voxels)": list(range(data.shape[0]))}

    nsegments = len(segments)
    if nsegments == 1:
        legend = False

    if cmap is None:
        colors = mpl.colormaps["tab10"].colors
    elif cmap == "paired":
        colors = list(mpl.colormaps["Paired"].colors)
        colors[0], colors[1] = colors[1], colors[0]
        colors[2], colors[7] = colors[7], colors[2]

    if detrend:
        from nilearn.signal import clean

        data = clean(data.T, t_r=tr, filter=False).T

    # We want all subplots to have the same dynamic range
    vminmax = np.percentile(data[:, drop_trs:], [2, 98])

    # Decimate number of time-series before clustering
    n_dec = int((1.8 * data.shape[0]) // size[0])
    if n_dec > 1:
        segments = {
            lab: idx[::n_dec] for lab, idx in segments.items() if np.array(idx).shape >= (1,)
        }

    # Cluster segments (if argument enabled)
    if sort_rows:
        from scipy.cluster.hierarchy import dendrogram, linkage
        from sklearn.cluster import ward_tree

        for seg_label, seg_idx in segments.items():
            # In debugging cases, we might have ROIs too small to have enough rows to sort
            if len(seg_idx) < 2:
                continue
            roi_data = data[seg_idx]
            if isinstance(sort_rows, str) and sort_rows.lower() == "linkage":
                linkage_matrix = linkage(
                    roi_data, method="average", metric="euclidean", optimal_ordering=True
                )
            else:
                children, _, n_leaves, _, distances = ward_tree(roi_data, return_distance=True)
                linkage_matrix = _ward_to_linkage(children, n_leaves, distances)

            dn = dendrogram(linkage_matrix, no_plot=True)
            # Override the ordering of the indices in this segment
            segments[seg_label] = np.array(seg_idx)[np.array(dn["leaves"])]

    # If subplot is not defined
    if subplot is None:
        figure, allaxes = plt.subplots(figsize=(19.2, 10))
        subplot = allaxes.get_subplotspec()
    else:
        subplotax = plt.subplot(subplot)
        figure = subplotax.get_figure()

    # Remove spines and ticks in all figure's axes
    for ax in figure.axes:
        ax.spines[:].set_visible(False)
        ax.spines[:].set_color("none")
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

    fontsize = fontsize or 24

    # Length before decimation
    n_trs = data.shape[-1]

    # Calculate time decimation factor
    t_step = max(n_trs // size[1], 1)
    # Skip over dropped TRs if this doesn't reduce the number of time points
    t_start = min(t_step - 1, drop_trs)
    t_stop = size[1] * t_step + t_start
    data = data[:, t_start:t_stop:t_step]

    xticks = np.linspace(0, data.shape[-1] - 1, endpoint=True, num=7)
    idx_ticks = t_step * xticks + t_start
    if tr is not None:
        xticklabels = [f"{t // 60:02d}:{t % 60:02d}" for t in np.rint(tr * idx_ticks).astype(int)]
    else:
        xticklabels = np.rint(idx_ticks).astype(int)

    # Define nested GridSpec
    gs = GridSpecFromSubplotSpec(
        nsegments,
        1,
        subplot_spec=subplot,
        hspace=0.05,
        height_ratios=[len(v) for v in segments.values()],
    )

    for i, indices in enumerate(segments.values()):
        # Carpet plot
        ax = plt.subplot(gs[i])

        ax.imshow(
            data[indices, :],
            interpolation="nearest",
            aspect="auto",
            cmap="gray",
            vmin=vminmax[0],
            vmax=vminmax[1],
        )

        # Toggle the spine objects
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["bottom"].set_visible(False)

        # Make colored left axis
        ax.spines["left"].set_linewidth(3)
        ax.spines["left"].set_color(colors[i])
        ax.spines["left"].set_capstyle("butt")
        ax.spines["left"].set_position(("outward", 2))

        ax.set_yticks([])
        ax.grid(False)

        # Make all subplots have same xticks
        ax.set_xticks(xticks, labels=xticklabels)
        # Remove ticks except for final axis
        if i < nsegments - 1:
            ax.set_xticks([])

        if title and i == 0:
            ax.set_title(title)

    ax.set_xlabel("time (mm:ss)" if tr else "time-points (index)")
    ax.spines["bottom"].set_position(("outward", 5))
    ax.spines["bottom"].set_color("k")
    ax.spines["bottom"].set_linewidth(0.8)
    ax.spines["bottom"].set_visible(True)

    if nsegments == 1:
        ax.set_ylabel(next(iter(segments)))

    if legend:
        from matplotlib.patches import Patch
        from mpl_toolkits.axes_grid1.inset_locator import inset_axes

        axlegend = inset_axes(
            ax,
            width="100%",
            height=0.01,
            loc="lower center",
            borderpad=-4.1,
        )
        axlegend.grid(False)
        axlegend.set_xticks([])
        axlegend.set_yticks([])
        axlegend.patch.set_alpha(0.0)
        for loc in ("top", "bottom", "left", "right"):
            axlegend.spines[loc].set_color("none")
            axlegend.spines[loc].set_visible(False)

        axlegend.legend(
            handles=[
                Patch(color=colors[i], label=_label) for i, _label in enumerate(segments.keys())
            ],
            loc="upper center",
            bbox_to_anchor=(0.5, 0),
            shadow=False,
            fancybox=False,
            ncol=min(len(segments.keys()), 5),
            frameon=False,
            prop={"size": fontsize} if fontsize else {},
        )

    if fontsize is not None:
        ax.xaxis.label.set_fontsize(max(8, fontsize * 0.75))
        # Change font size according to figure size
        for item in [ax.title, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels():
            item.set_fontsize(fontsize)

    if output_file is not None:
        figure.savefig(output_file, bbox_inches="tight")
        plt.close(figure)
        figure = None
        return output_file

    return gs




[docs]
def spikesplot(
    ts_z,
    outer_gs=None,
    tr=None,
    zscored=True,
    spike_thresh=6.0,
    title="Spike plot",
    ax=None,
    cmap="viridis",
    hide_x=True,
    nskip=0,
):
    """
    A spikes plot. Thanks to Bob Dogherty (this docstring needs be improved with proper ack)
    """

    if ax is None:
        ax = plt.gca()

    if outer_gs is not None:
        gs = GridSpecFromSubplotSpec(
            1, 2, subplot_spec=outer_gs, width_ratios=[1, 100], wspace=0.0
        )
        ax = plt.subplot(gs[1])

    # Define TR and number of frames
    if tr is None:
        tr = 1.0

    # Load timeseries, zscored slice-wise
    nslices = ts_z.shape[0]
    ntsteps = ts_z.shape[1]

    # Load a colormap
    my_cmap = mpl.colormaps[cmap]
    norm = Normalize(vmin=0, vmax=float(nslices - 1))
    colors = [my_cmap(norm(sl)) for sl in range(nslices)]

    stem = len(np.unique(ts_z).tolist()) == 2
    # Plot one line per axial slice timeseries
    for sl in range(nslices):
        if not stem:
            ax.plot(ts_z[sl, :], color=colors[sl], lw=0.5)
        else:
            markerline, stemlines, baseline = ax.stem(ts_z[sl, :])
            plt.setp(markerline, "markerfacecolor", colors[sl])
            plt.setp(baseline, "color", colors[sl], "linewidth", 1)
            plt.setp(stemlines, "color", colors[sl], "linewidth", 1)

    # Handle X, Y axes
    ax.grid(False)

    # Handle X axis
    last = ntsteps - 1
    ax.set_xlim(0, last)
    xticks = list(range(0, last)[::20]) + [last] if not hide_x else []
    ax.set_xticks(xticks)

    if not hide_x:
        if tr is None:
            ax.set_xlabel("time (frame #)")
        else:
            ax.set_xlabel("time (s)")
            ax.set_xticklabels([f"{t:.2f}" for t in (tr * np.array(xticks)).tolist()])

    # Handle Y axis
    ylabel = "slice-wise noise average on background"
    if zscored:
        ylabel += " (z-scored)"
        zs_max = np.abs(ts_z).max()
        ax.set_ylim(
            (
                -(np.abs(ts_z[:, nskip:]).max()) * 1.05,
                (np.abs(ts_z[:, nskip:]).max()) * 1.05,
            )
        )

        ytick_vals = np.arange(0.0, zs_max, float(np.floor(zs_max / 2.0)))
        yticks = list(reversed((-1.0 * ytick_vals[ytick_vals > 0]).tolist())) + ytick_vals.tolist()

        # TODO plot min/max or mark spikes
        # yticks.insert(0, ts_z.min())
        # yticks += [ts_z.max()]
        for val in ytick_vals:
            ax.plot((0, ntsteps - 1), (-val, -val), "k:", alpha=0.2)
            ax.plot((0, ntsteps - 1), (val, val), "k:", alpha=0.2)

        # Plot spike threshold
        if zs_max < spike_thresh:
            ax.plot((0, ntsteps - 1), (-spike_thresh, -spike_thresh), "k:")
            ax.plot((0, ntsteps - 1), (spike_thresh, spike_thresh), "k:")
    else:
        yticks = [
            ts_z[:, nskip:].min(),
            np.median(ts_z[:, nskip:]),
            ts_z[:, nskip:].max(),
        ]
        ax.set_ylim(0, max(yticks[-1] * 1.05, (yticks[-1] - yticks[0]) * 2.0 + yticks[-1]))
        # ax.set_ylim(ts_z[:, nskip:].min() * 0.95,
        #             ts_z[:, nskip:].max() * 1.05)

    ax.annotate(
        ylabel,
        xy=(0.0, 0.7),
        xycoords="axes fraction",
        xytext=(0, 0),
        textcoords="offset points",
        va="center",
        ha="left",
        color="gray",
        size=plt.rcParams["font.size"] * 0.5,
        bbox={
            "boxstyle": "round",
            "fc": "w",
            "ec": "none",
            "color": "none",
            "lw": 0,
            "alpha": 0.8,
        },
    )
    ax.set_yticks([])
    ax.set_yticklabels([])

    # if yticks:
    #     # ax.set_yticks(yticks)
    #     # ax.set_yticklabels(['%.02f' % y for y in yticks])
    #     # Plot maximum and minimum horizontal lines
    #     ax.plot((0, ntsteps - 1), (yticks[0], yticks[0]), 'k:')
    #     ax.plot((0, ntsteps - 1), (yticks[-1], yticks[-1]), 'k:')

    for side in ["top", "right"]:
        ax.spines[side].set_color("none")
        ax.spines[side].set_visible(False)

    if not hide_x:
        ax.spines["bottom"].set_position(("outward", 10))
        ax.xaxis.set_ticks_position("bottom")
    else:
        ax.spines["bottom"].set_color("none")
        ax.spines["bottom"].set_visible(False)

    # ax.spines["left"].set_position(('outward', 30))
    # ax.yaxis.set_ticks_position('left')
    ax.spines["left"].set_visible(False)
    ax.spines["left"].set_color(None)

    # labels = [label for label in ax.yaxis.get_ticklabels()]
    # labels[0].set_weight('bold')
    # labels[-1].set_weight('bold')
    if title:
        ax.set_title(title)
    return ax




[docs]
def confoundplot(
    tseries,
    gs_ts,
    gs_dist=None,
    name=None,
    units=None,
    tr=None,
    hide_x=True,
    color="b",
    nskip=0,
    cutoff=None,
    ylims=None,
):
    # Define TR and number of frames
    notr = False
    if tr is None:
        notr = True
        tr = 1.0
    ntsteps = len(tseries)
    tseries = np.array(tseries)

    # Define nested GridSpec
    gs = GridSpecFromSubplotSpec(1, 2, subplot_spec=gs_ts, width_ratios=[1, 100], wspace=0.0)

    ax_ts = plt.subplot(gs[1])
    ax_ts.grid(False)

    # Set 10 frame markers in X axis
    interval = max((ntsteps // 10, ntsteps // 5, 1))
    xticks = list(np.arange(0, ntsteps)[::interval])
    ax_ts.set_xticks(xticks)

    if not hide_x:
        if notr:
            ax_ts.set_xlabel("time (frame #)")
        else:
            ax_ts.set_xlabel("time (s)")
            labels = tr * np.array(xticks)
            ax_ts.set_xticklabels([f"{t:.02f}" for t in labels.tolist()])
    else:
        ax_ts.set_xticklabels([])

    fontsize = plt.rcParams["font.size"]
    if name is not None:
        if units is not None:
            name += f" [{units}]"

        ax_ts.annotate(
            name,
            xy=(0.0, 0.2),
            xytext=(0, 0),
            xycoords="axes fraction",
            textcoords="offset points",
            va="top",
            ha="left",
            color=color,
            size=fontsize * 0.45,
            bbox={
                "boxstyle": "round",
                "fc": "w",
                "ec": "none",
                "color": "none",
                "lw": 0,
                "alpha": 0.8,
            },
        )

    for side in ["top", "right"]:
        ax_ts.spines[side].set_color("none")
        ax_ts.spines[side].set_visible(False)

    if not hide_x:
        ax_ts.spines["bottom"].set_position(("outward", 20))
        ax_ts.xaxis.set_ticks_position("bottom")
    else:
        ax_ts.spines["bottom"].set_color("none")
        ax_ts.spines["bottom"].set_visible(False)

    # ax_ts.spines["left"].set_position(('outward', 30))
    ax_ts.spines["left"].set_color("none")
    ax_ts.spines["left"].set_visible(False)
    # ax_ts.yaxis.set_ticks_position('left')

    ax_ts.set_yticks([])
    ax_ts.set_yticklabels([])

    # Skip non-steady-state volumes for statistics and Y limits
    valseries = tseries[nskip:]
    nonnan = valseries[~np.isnan(valseries)]
    if nonnan.size > 0:
        # Calculate Y limits
        valrange = nonnan.max() - nonnan.min()
        def_ylims = [nonnan.min() - 0.1 * valrange, nonnan.max() + 0.1 * valrange]
        if ylims is not None:
            if ylims[0] is not None:
                def_ylims[0] = min([def_ylims[0], ylims[0]])
            if ylims[1] is not None:
                def_ylims[1] = max([def_ylims[1], ylims[1]])

        # Add space for plot title and mean/SD annotation
        def_ylims[0] -= 0.1 * (def_ylims[1] - def_ylims[0])

        ax_ts.set_ylim(def_ylims)

        # Annotate stats
        maxv = nonnan.max()
        mean = nonnan.mean()
        stdv = nonnan.std()
        p95 = np.percentile(nonnan, 95.0)
    else:
        maxv = 0
        mean = 0
        stdv = 0
        p95 = 0

    units = units or ""
    stats_label = (
        rf"max: {maxv:.3f}{units} $\bullet$ mean: {mean:.3f}{units} "
        rf"$\bullet$ $\sigma$: {stdv:.3f}"
    )
    ax_ts.annotate(
        stats_label,
        xy=(0.98, 0.1),
        xycoords="axes fraction",
        xytext=(0, 0),
        textcoords="offset points",
        va="top",
        ha="right",
        color=color,
        size=fontsize * 0.5,
        bbox={
            "boxstyle": "round",
            "fc": "w",
            "ec": "none",
            "color": "none",
            "lw": 0,
            "alpha": 0.8,
        },
    )

    # Annotate percentile 95
    ax_ts.plot((0, ntsteps - 1), [p95] * 2, linewidth=0.1, color="lightgray")
    ax_ts.annotate(
        f"{p95:.2f}",
        xy=(0, p95),
        xytext=(-1, 0),
        textcoords="offset points",
        va="center",
        ha="right",
        color="lightgray",
        size=fontsize * 0.4,
    )

    if cutoff is None:
        cutoff = []

    for thr in cutoff:
        ax_ts.plot((0, ntsteps - 1), [thr] * 2, linewidth=0.2, color="dimgray")

        ax_ts.annotate(
            f"{thr:.2f}",
            xy=(0, thr),
            xytext=(-1, 0),
            textcoords="offset points",
            va="center",
            ha="right",
            color="dimgray",
            size=fontsize * 0.4,
        )

    ax_ts.plot(tseries, color=color, linewidth=0.8)
    ax_ts.set_xlim((0, ntsteps - 1))

    if gs_dist is not None:
        ax_dist = plt.subplot(gs_dist)
        sns.displot(tseries, vertical=True, ax=ax_dist)
        ax_dist.set_xlabel("Timesteps")
        ax_dist.set_ylim(ax_ts.get_ylim())
        ax_dist.set_yticklabels([])

        return [ax_ts, ax_dist], gs
    return ax_ts, gs



def _ward_to_linkage(children, n_leaves, distances):
    """Create linkage matrix from the output of Ward clustering."""
    # create the counts of samples under each node
    counts = np.zeros(children.shape[0])
    n_samples = n_leaves
    for i, merge in enumerate(children):
        current_count = 0
        for child_idx in merge:
            current_count += 1 if child_idx < n_samples else counts[child_idx - n_samples]
        counts[i] = current_count

    return np.column_stack([children, distances, counts]).astype(float)



[docs]
def confounds_correlation_plot(
    confounds_file,
    columns=None,
    figure=None,
    max_dim=20,
    output_file=None,
    reference="global_signal",
    ignore_initial_volumes=0,
):
    """
    Generate a bar plot with the correlation of confounds.

    Parameters
    ----------
    confounds_file: :obj:`str`
        File containing all confound regressors to be included in the
        correlation plot.
    figure: figure or None
        Existing figure on which to plot.
    columns: :obj:`list` or :obj:`None`.
        Select a list of columns from the dataset.
    max_dim: :obj:`int`
        The maximum number of regressors to be included in the output plot.
        Reductions (e.g., CompCor) of high-dimensional data can yield so many
        regressors that the correlation structure becomes obfuscated. This
        criterion selects the ``max_dim`` regressors that have the largest
        correlation magnitude with ``reference`` for inclusion in the plot.
    output_file: :obj:`str` or :obj:`None`
        Path where the output figure should be saved. If this is not defined,
        then the plotting axes will be returned instead of the saved figure
        path.
    reference: :obj:`str`
        ``confounds_correlation_plot`` prepares a bar plot of the correlations
        of each confound regressor with a reference column. By default, this
        is the global signal (so that collinearities with the global signal
        can readily be assessed).
    ignore_initial_volumes : :obj:`int`
        Number of non-steady-state volumes at the beginning of the scan to ignore.

    Returns
    -------
    axes and gridspec
        Plotting axes and gridspec. Returned only if ``output_file`` is ``None``.
    output_file: :obj:`str`
        The file where the figure is saved.
    """

    confounds_data = pd.read_table(confounds_file)

    if columns:
        columns = dict.fromkeys(columns)  # Drop duplicates
        columns[reference] = None  # Make sure the reference is included
        confounds_data = confounds_data[list(columns)]

    confounds_data = confounds_data.loc[
        ignore_initial_volumes:,
        np.logical_not(np.isclose(confounds_data.var(skipna=True), 0)),
    ]
    corr = confounds_data.corr()

    gscorr = corr.copy()
    gscorr["index"] = gscorr.index
    gscorr[reference] = np.abs(gscorr[reference])
    gs_descending = gscorr.sort_values(by=reference, ascending=False)["index"]
    n_vars = corr.shape[0]
    max_dim = min(n_vars, max_dim)

    gs_descending = gs_descending[:max_dim]
    features = [p for p in corr.columns if p in gs_descending]
    corr = corr.loc[features, features]
    np.fill_diagonal(corr.values, 0)

    figure = figure if figure is not None else plt.figure(figsize=(15, 5))
    gs = GridSpec(1, 21)
    ax0 = plt.subplot(gs[0, :10])
    ax1 = plt.subplot(gs[0, 11:])

    mask = np.zeros_like(corr, dtype=bool)
    mask[np.triu_indices_from(mask)] = True
    sns.heatmap(corr, linewidths=0.5, cmap="coolwarm", center=0, square=True, ax=ax0)
    ax0.tick_params(axis="both", which="both", width=0)

    for label in ax0.xaxis.get_majorticklabels():
        label.set_fontsize("small")
    for label in ax0.yaxis.get_majorticklabels():
        label.set_fontsize("small")
    sns.barplot(
        data=gscorr,
        x="index",
        y=reference,
        hue="index",
        ax=ax1,
        order=gs_descending,
        palette="Reds_d",
        saturation=0.5,
        legend=False,
    )

    ax1.set_xlabel("Confound time series")
    ax1.set_ylabel(f"Magnitude of correlation with {reference}")
    ax1.tick_params(axis="x", which="both", width=0)
    ax1.tick_params(axis="y", which="both", width=5, length=5)

    for label in ax1.xaxis.get_majorticklabels():
        label.set_fontsize("small")
        label.set_rotation("vertical")
    for label in ax1.yaxis.get_majorticklabels():
        label.set_fontsize("small")
    for side in ["top", "right", "left"]:
        ax1.spines[side].set_color("none")
        ax1.spines[side].set_visible(False)

    if output_file is not None:
        figure.savefig(output_file, bbox_inches="tight")
        plt.close(figure)
        figure = None
        return output_file
    return [ax0, ax1], gs



def _plot_density(x, y, df, group_name, palette, orient):
    ax = sns.violinplot(
        x=x,
        y=y,
        data=df,
        hue=group_name,
        dodge=False,
        palette=palette,
        density_norm="width",
        inner=None,
        orient=orient,
    )

    # Cut half of the violins
    for violin in ax.collections:
        bbox = violin.get_paths()[0].get_extents()
        x0, y0, width, height = bbox.bounds
        width_denom = 2
        height_denom = 1
        if orient == "h":
            width_denom = 1
            height_denom = 2
        violin.set_clip_path(
            plt.Rectangle(
                (x0, y0), width / width_denom, height / height_denom, transform=ax.transData
            )
        )

    return ax


def _jitter_data_points(old_len_collections, orient, width, ax):
    offset = np.array([width, 0])
    if orient == "h":
        offset = np.array([0, width])
    for dots in ax.collections[old_len_collections:]:
        dots.set_offsets(dots.get_offsets() + offset)


def _plot_nans(df, x, y, color, orient, ax):
    df_nans = df[df.isna().any(axis=1)]
    sns.stripplot(
        x=x,
        y=y,
        data=df_nans,
        color=color,
        orient=orient,
        ax=ax,
    )


def _plot_out_of_range(
    df,
    x,
    feature,
    orient,
    limit_offset,
    limit_value,
    limit_color,
    limit_name,
    color_vble_name,
    _op,
    ax,
):
    if limit_color is None:
        raise ValueError(
            f"``{color_vble_name}`` must be provided if ``{limit_name}`` is provided."
        )
    if limit_offset is None:
        raise ValueError(f"``limit_offset`` must be provided if ``{limit_name}`` is provided.")
    if _op == operator.gt:
        arithm = operator.add
    elif _op == operator.lt:
        arithm = operator.sub
    else:
        raise ValueError(f"``{_op}`` must be either ``gt`` or ``lt``.")

    df_overflow = df[_op(df[feature], limit_value)]
    sns.stripplot(
        x=x,
        y=arithm(limit_value, limit_offset),
        data=df_overflow,
        color=limit_color,
        orient=orient,
        ax=ax,
    )



[docs]
def plot_raincloud(
    data_file,
    group_name,
    feature,
    palette="Set2",
    orient="v",
    density=True,
    upper_limit_value=None,
    upper_limit_color="gray",
    lower_limit_value=None,
    lower_limit_color="gray",
    limit_offset=None,
    mark_nans=True,
    nans_value=None,
    nans_color="black",
    figure=None,
    output_file=None,
):
    """
    Generate a raincloud plot with the data points corresponding to the
    ``feature`` value contained in the data file. If ``upper_limit_value`` or
    ``lower_limit_value`` is provided, the values outside that range are
    clipped. Thus, a large density around those values, together with the values
    plot with the distinctive ``upper_limit_color`` and ``lower_limit_color``
    styles may be indicative of unexpected values in the data. Similarly, NaN
    values, if present, will be marked with the distinctive ``nans_color``
    style, and may again be indicative of unexpected values in the data.

    Parameters
    ----------
    data_file : :obj:`str`
        File containing the data of interest.
    figure : :obj:`matplotlib.pyplot.figure` or None
        Existing figure on which to plot.
    group_name : :obj:`str`
        The group name of interest to be plot.
    feature : :obj:`str`
        The feature of interest to be plot.
    palette : :obj:`str`, optional
        Color palette name provided to :func:`sns.stripplot`.
    orient : :obj:`str`, optional
        Plot orientation (``v`` or ``h``).
    density : :obj:`bool`, optional
        ``True`` to plot the density of the data points.
    upper_limit_value : :obj:`float`, optional
        Upper limit value over which any value in the data will be styled with a
        different style.
    upper_limit_color : :obj:`str`, optional
        Color name to represent values over ``upper_limit_value``.
    lower_limit_value : :obj:`float`, optional
        Lower limit value under which any value in the data will be styled with
        a different style.
    lower_limit_color : :obj:`str`, optional
        Color name to represent values under ``lower_limit_value``.
    limit_offset : :obj:`float`, optional
        Offset to plot the values over/under the upper/lower limit values.
    mark_nans : :obj:`bool`, optional
        ``True`` to plot NaNs as dots. ``nans_values`` must be provided if True.
    nans_value : :obj:`float`, optional
        Value to use for NaN values.
    nans_color : :obj:`str`, optional
        Color name to represent NaN values.
    output_file : :obj:`str` or :obj:`None`
        Path where the output figure should be saved. If this is not defined,
        then the plotting axes will be returned instead of the saved figure
        path.

    Returns
    -------
    axes and gridspec
        Plotting axes and gridspec. Returned only if ``output_file`` is ``None``.
    output_file : :obj:`str`
        The file where the figure is saved.
    """

    df = pd.read_csv(data_file, sep=r"[\t\s]+", engine="python")

    df_clip = df.copy(deep=True)
    df_clip[feature] = df[feature].clip(lower=lower_limit_value, upper=upper_limit_value)

    figure = figure if figure is not None else plt.figure(figsize=(7, 5))
    gs = GridSpec(1, 1)
    ax = plt.subplot(gs[0, 0])

    sns.set(style="white", font_scale=2)

    x = feature
    y = group_name
    # Swap x/y if the requested orientation is vertical
    if orient == "v":
        x = group_name
        y = feature

    # Plot the density
    if density:
        ax = _plot_density(x, y, df_clip, group_name, palette, orient)

    # Add boxplots
    width = 0.15
    sns.boxplot(
        x=x,
        y=y,
        data=df_clip,
        color="black",
        width=width,
        zorder=10,
        showcaps=True,
        boxprops={"facecolor": "none", "zorder": 10},
        showfliers=True,
        whiskerprops={"linewidth": 2, "zorder": 10},
        saturation=1,
        orient=orient,
        ax=ax,
    )

    old_len_collections = len(ax.collections)

    # Plot the data points as dots
    sns.stripplot(
        x=x,
        y=y,
        hue=group_name,
        data=df_clip,
        palette=palette,
        edgecolor="white",
        size=3,
        jitter=0.1,
        zorder=0,
        orient=orient,
        ax=ax,
    )

    # Offset the dots that would be otherwise shadowed by the violins
    if density:
        _jitter_data_points(old_len_collections, orient, width, ax)

    # Draw nans if any
    if mark_nans:
        if nans_value is None:
            raise ValueError("``nans_value`` must be provided if ``mark_nans`` is True.")
        _plot_nans(df, x, nans_value, nans_color, orient, ax)

    # If upper/lower limits are provided, draw the points with a different color
    if upper_limit_value is not None:
        _plot_out_of_range(
            df,
            x,
            feature,
            orient,
            limit_offset,
            upper_limit_value,
            upper_limit_color,
            "upper_limit_value",
            "upper_limit_color",
            operator.gt,
            ax,
        )

    if lower_limit_value is not None:
        _plot_out_of_range(
            df,
            x,
            feature,
            orient,
            limit_offset,
            lower_limit_value,
            lower_limit_color,
            "lower_limit_value",
            "lower_limit_color",
            operator.lt,
            ax,
        )

    if output_file is not None:
        plt.tight_layout()
        figure.savefig(output_file, bbox_inches="tight")
        plt.close(figure)
        figure = None
        return output_file

    return ax, gs