Source code for nireports.reportlets.nuisance

# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
#
# Copyright 2023 The NiPreps Developers <nipreps@gmail.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# We support and encourage derived works from this project, please read
# about our expectations at
#
#     https://www.nipreps.org/community/licensing/
#
# STATEMENT OF CHANGES: This file was ported carrying over full git history from
# other NiPreps projects licensed under the Apache-2.0 terms.
"""Plotting distributions."""

import math
import operator
import os.path as op

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.backends.backend_pdf import FigureCanvasPdf as FigureCanvas
from matplotlib.colors import Normalize
from matplotlib.gridspec import GridSpec, GridSpecFromSubplotSpec

from nireports.tools.ndimage import _get_values_inside_a_mask

DEFAULT_DPI = 300
DINA4_LANDSCAPE = (11.69, 8.27)
DINA4_PORTRAIT = (8.27, 11.69)


[docs] def plot_fd(fd_file, fd_radius, mean_fd_dist=None, figsize=DINA4_LANDSCAPE): fd_power = _calc_fd(fd_file, fd_radius) fig = plt.Figure(figsize=figsize) FigureCanvas(fig) if mean_fd_dist is not None: grid = GridSpec(2, 4) else: grid = GridSpec(1, 2, width_ratios=[3, 1]) grid.update(hspace=1.0, right=0.95, left=0.1, bottom=0.2) ax = fig.add_subplot(grid[0, :-1]) ax.plot(fd_power) ax.set_xlim((0, len(fd_power))) ax.set_ylabel("Frame Displacement [mm]") ax.set_xlabel("Frame number") ylim = ax.get_ylim() ax = fig.add_subplot(grid[0, -1]) sns.histplot(y=fd_power, kde=True, stat="density", kde_kws={"cut": 3}, edgecolor=None, ax=ax) ax.set_ylim(ylim) if mean_fd_dist is not None: ax = fig.add_subplot(grid[1, :]) sns.histplot( mean_fd_dist, kde=True, stat="density", kde_kws={"cut": 3}, edgecolor=None, ax=ax ) ax.set_xlabel("Mean Frame Displacement (over all subjects) [mm]") mean_fd = fd_power.mean() label = f"$\\overline{{\\mathrm{{FD}}}}$ = {mean_fd:g}" plot_vline(mean_fd, label, ax=ax) return fig
[docs] def plot_dist( main_file, mask_file, xlabel, distribution=None, xlabel2=None, figsize=DINA4_LANDSCAPE, ): data = _get_values_inside_a_mask(main_file, mask_file) fig = plt.Figure(figsize=figsize) FigureCanvas(fig) gsp = GridSpec(2, 1) ax = fig.add_subplot(gsp[0, 0]) sns.histplot(data.astype(np.double), kde=False, bins=100, edgecolor=None, ax=ax) ax.set_xlabel(xlabel) ax = fig.add_subplot(gsp[1, 0]) sns.histplot( np.array(distribution, dtype="f8"), kde=True, stat="density", kde_kws={"cut": 3}, edgecolor=None, ax=ax, ) cur_val = np.median(data) label = f"{cur_val:g}" plot_vline(cur_val, label, ax=ax) ax.set_xlabel(xlabel2) return fig
[docs] def plot_vline(cur_val, label, ax): ax.axvline(cur_val) ylim = ax.get_ylim() vloc = (ylim[0] + ylim[1]) / 2.0 xlim = ax.get_xlim() pad = (xlim[0] + xlim[1]) / 100.0 ax.text( cur_val - pad, vloc, label, color="blue", rotation=90, verticalalignment="center", horizontalalignment="right", )
def _calc_rows_columns(ratio, n_images): rows = 2 for _ in range(100): columns = math.floor(ratio * rows) total = (rows - 1) * columns if total > n_images: rows = np.ceil(n_images / columns) + 1 break rows += 1 return int(rows), int(columns) def _calc_fd(fd_file, fd_radius): from math import pi with open(fd_file) as f: lines = f.readlines() rows = [[float(x) for x in line.split()] for line in lines] cols = np.array([list(col) for col in zip(*rows)]) translations = np.transpose(np.abs(np.diff(cols[0:3, :]))) rotations = np.transpose(np.abs(np.diff(cols[3:6, :]))) fd_power = np.sum(translations, axis=1) + (fd_radius * pi / 180) * np.sum(rotations, axis=1) # FD is zero for the first time point fd_power = np.insert(fd_power, 0, 0) return fd_power def _get_mean_fd_distribution(fd_files, fd_radius): mean_fds = [] max_fds = [] for fd_file in fd_files: fd_power = _calc_fd(fd_file, fd_radius) mean_fds.append(fd_power.mean()) max_fds.append(fd_power.max()) return mean_fds, max_fds
[docs] def plot_qi2(x_grid, ref_pdf, fit_pdf, ref_data, cutoff_idx, out_file=None): fig, ax = plt.subplots() ax.plot( x_grid, ref_pdf, linewidth=2, alpha=0.5, label="background", color="dodgerblue", ) refmax = np.percentile(ref_data, 99.95) x_max = x_grid[-1] ax.hist( ref_data, 40 * max(int(refmax / x_max), 1), fc="dodgerblue", histtype="stepfilled", alpha=0.2, density=True, ) fit_pdf[fit_pdf > 1.0] = np.nan ax.plot( x_grid, fit_pdf, linewidth=2, alpha=0.5, label="chi2", color="darkorange", ) ylims = ax.get_ylim() ax.axvline( x_grid[-cutoff_idx], ymax=ref_pdf[-cutoff_idx] / ylims[1], color="dodgerblue", ) plt.xlabel('Intensity within "hat" mask') plt.ylabel("Frequency") ax.set_xlim([0, x_max]) plt.legend() if out_file is None: out_file = op.abspath("qi2_plot.svg") fig.savefig(out_file, bbox_inches="tight", pad_inches=0, dpi=300) plt.close(fig=fig) return out_file
[docs] def plot_carpet( data, segments=None, cmap=None, tr=None, detrend=True, subplot=None, title=None, output_file=None, size=(900, 1200), sort_rows="ward", drop_trs=0, legend=True, fontsize=None, ): """ Plot an image representation of voxel intensities across time. This kind of plot is known as "carpet plot" or "Power plot". See Jonathan Power Neuroimage 2017 Jul 1; 154:150-158. Parameters ---------- data : N x T :obj:`numpy.array` The functional data to be plotted (*N* sampling locations by *T* timepoints). segments: :obj:`dict`, optional A mapping between segment labels (e.g., `"Left Cortex"`) and list of indexes in the data array. cmap : colormap Overrides the generation of an automated colormap. tr : float , optional Specify the TR, if specified it uses this value. If left as None, # of frames is plotted instead of time. detrend : :obj:`bool`, optional Detrend and standardize the data prior to plotting. subplot : matplotlib subplot, optional Subplot to plot figure on. title : string, optional The title displayed on the figure. output_file : string, or None, optional The name of an image file to export the plot to. Valid extensions are .png, .pdf, .svg. If output_file is not None, the plot is saved to a file, and the display is closed. size : :obj:`tuple` Maximum number of samples to plot (voxels, timepoints) sort_rows : :obj:`str` or :obj:`False` or :obj:`None` Apply a clustering algorithm to reorganize the rows of the carpet. ``""``, ``False``, and ``None`` skip clustering sorting. ``"linkage"`` uses linkage hierarchical clustering :obj:`scipy.cluster.hierarchy.linkage`. Any other value that Python evaluates to ``True`` will use the default clustering, which is :obj:`sklearn.cluster.ward_tree`. """ if segments is None: segments = {"whole brain (voxels)": list(range(data.shape[0]))} nsegments = len(segments) if nsegments == 1: legend = False if cmap is None: colors = mpl.colormaps["tab10"].colors elif cmap == "paired": colors = list(mpl.colormaps["Paired"].colors) colors[0], colors[1] = colors[1], colors[0] colors[2], colors[7] = colors[7], colors[2] if detrend: from nilearn.signal import clean data = clean(data.T, t_r=tr, filter=False).T # We want all subplots to have the same dynamic range vminmax = np.percentile(data[:, drop_trs:], [2, 98]) # Decimate number of time-series before clustering n_dec = int((1.8 * data.shape[0]) // size[0]) if n_dec > 1: segments = { lab: idx[::n_dec] for lab, idx in segments.items() if np.array(idx).shape >= (1,) } # Cluster segments (if argument enabled) if sort_rows: from scipy.cluster.hierarchy import dendrogram, linkage from sklearn.cluster import ward_tree for seg_label, seg_idx in segments.items(): # In debugging cases, we might have ROIs too small to have enough rows to sort if len(seg_idx) < 2: continue roi_data = data[seg_idx] if isinstance(sort_rows, str) and sort_rows.lower() == "linkage": linkage_matrix = linkage( roi_data, method="average", metric="euclidean", optimal_ordering=True ) else: children, _, n_leaves, _, distances = ward_tree(roi_data, return_distance=True) linkage_matrix = _ward_to_linkage(children, n_leaves, distances) dn = dendrogram(linkage_matrix, no_plot=True) # Override the ordering of the indices in this segment segments[seg_label] = np.array(seg_idx)[np.array(dn["leaves"])] # If subplot is not defined if subplot is None: figure, allaxes = plt.subplots(figsize=(19.2, 10)) subplot = allaxes.get_subplotspec() else: subplotax = plt.subplot(subplot) figure = subplotax.get_figure() # Remove spines and ticks in all figure's axes for ax in figure.axes: ax.spines[:].set_visible(False) ax.spines[:].set_color("none") ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) fontsize = fontsize or 24 # Length before decimation n_trs = data.shape[-1] # Calculate time decimation factor t_step = max(n_trs // size[1], 1) # Skip over dropped TRs if this doesn't reduce the number of time points t_start = min(t_step - 1, drop_trs) t_stop = size[1] * t_step + t_start data = data[:, t_start:t_stop:t_step] xticks = np.linspace(0, data.shape[-1] - 1, endpoint=True, num=7) idx_ticks = t_step * xticks + t_start if tr is not None: xticklabels = [f"{t // 60:02d}:{t % 60:02d}" for t in np.rint(tr * idx_ticks).astype(int)] else: xticklabels = np.rint(idx_ticks).astype(int) # Define nested GridSpec gs = GridSpecFromSubplotSpec( nsegments, 1, subplot_spec=subplot, hspace=0.05, height_ratios=[len(v) for v in segments.values()], ) for i, indices in enumerate(segments.values()): # Carpet plot ax = plt.subplot(gs[i]) ax.imshow( data[indices, :], interpolation="nearest", aspect="auto", cmap="gray", vmin=vminmax[0], vmax=vminmax[1], ) # Toggle the spine objects ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["bottom"].set_visible(False) # Make colored left axis ax.spines["left"].set_linewidth(3) ax.spines["left"].set_color(colors[i]) ax.spines["left"].set_capstyle("butt") ax.spines["left"].set_position(("outward", 2)) ax.set_yticks([]) ax.grid(False) # Make all subplots have same xticks ax.set_xticks(xticks, labels=xticklabels) # Remove ticks except for final axis if i < nsegments - 1: ax.set_xticks([]) if title and i == 0: ax.set_title(title) ax.set_xlabel("time (mm:ss)" if tr else "time-points (index)") ax.spines["bottom"].set_position(("outward", 5)) ax.spines["bottom"].set_color("k") ax.spines["bottom"].set_linewidth(0.8) ax.spines["bottom"].set_visible(True) if nsegments == 1: ax.set_ylabel(next(iter(segments))) if legend: from matplotlib.patches import Patch from mpl_toolkits.axes_grid1.inset_locator import inset_axes axlegend = inset_axes( ax, width="100%", height=0.01, loc="lower center", borderpad=-4.1, ) axlegend.grid(False) axlegend.set_xticks([]) axlegend.set_yticks([]) axlegend.patch.set_alpha(0.0) for loc in ("top", "bottom", "left", "right"): axlegend.spines[loc].set_color("none") axlegend.spines[loc].set_visible(False) axlegend.legend( handles=[ Patch(color=colors[i], label=_label) for i, _label in enumerate(segments.keys()) ], loc="upper center", bbox_to_anchor=(0.5, 0), shadow=False, fancybox=False, ncol=min(len(segments.keys()), 5), frameon=False, prop={"size": fontsize} if fontsize else {}, ) if fontsize is not None: ax.xaxis.label.set_fontsize(max(8, fontsize * 0.75)) # Change font size according to figure size for item in [ax.title, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels(): item.set_fontsize(fontsize) if output_file is not None: figure.savefig(output_file, bbox_inches="tight") plt.close(figure) figure = None return output_file return gs
[docs] def spikesplot( ts_z, outer_gs=None, tr=None, zscored=True, spike_thresh=6.0, title="Spike plot", ax=None, cmap="viridis", hide_x=True, nskip=0, ): """ A spikes plot. Thanks to Bob Dogherty (this docstring needs be improved with proper ack) """ if ax is None: ax = plt.gca() if outer_gs is not None: gs = GridSpecFromSubplotSpec( 1, 2, subplot_spec=outer_gs, width_ratios=[1, 100], wspace=0.0 ) ax = plt.subplot(gs[1]) # Define TR and number of frames if tr is None: tr = 1.0 # Load timeseries, zscored slice-wise nslices = ts_z.shape[0] ntsteps = ts_z.shape[1] # Load a colormap my_cmap = mpl.colormaps[cmap] norm = Normalize(vmin=0, vmax=float(nslices - 1)) colors = [my_cmap(norm(sl)) for sl in range(nslices)] stem = len(np.unique(ts_z).tolist()) == 2 # Plot one line per axial slice timeseries for sl in range(nslices): if not stem: ax.plot(ts_z[sl, :], color=colors[sl], lw=0.5) else: markerline, stemlines, baseline = ax.stem(ts_z[sl, :]) plt.setp(markerline, "markerfacecolor", colors[sl]) plt.setp(baseline, "color", colors[sl], "linewidth", 1) plt.setp(stemlines, "color", colors[sl], "linewidth", 1) # Handle X, Y axes ax.grid(False) # Handle X axis last = ntsteps - 1 ax.set_xlim(0, last) xticks = list(range(0, last)[::20]) + [last] if not hide_x else [] ax.set_xticks(xticks) if not hide_x: if tr is None: ax.set_xlabel("time (frame #)") else: ax.set_xlabel("time (s)") ax.set_xticklabels([f"{t:.2f}" for t in (tr * np.array(xticks)).tolist()]) # Handle Y axis ylabel = "slice-wise noise average on background" if zscored: ylabel += " (z-scored)" zs_max = np.abs(ts_z).max() ax.set_ylim( ( -(np.abs(ts_z[:, nskip:]).max()) * 1.05, (np.abs(ts_z[:, nskip:]).max()) * 1.05, ) ) ytick_vals = np.arange(0.0, zs_max, float(np.floor(zs_max / 2.0))) yticks = list(reversed((-1.0 * ytick_vals[ytick_vals > 0]).tolist())) + ytick_vals.tolist() # TODO plot min/max or mark spikes # yticks.insert(0, ts_z.min()) # yticks += [ts_z.max()] for val in ytick_vals: ax.plot((0, ntsteps - 1), (-val, -val), "k:", alpha=0.2) ax.plot((0, ntsteps - 1), (val, val), "k:", alpha=0.2) # Plot spike threshold if zs_max < spike_thresh: ax.plot((0, ntsteps - 1), (-spike_thresh, -spike_thresh), "k:") ax.plot((0, ntsteps - 1), (spike_thresh, spike_thresh), "k:") else: yticks = [ ts_z[:, nskip:].min(), np.median(ts_z[:, nskip:]), ts_z[:, nskip:].max(), ] ax.set_ylim(0, max(yticks[-1] * 1.05, (yticks[-1] - yticks[0]) * 2.0 + yticks[-1])) # ax.set_ylim(ts_z[:, nskip:].min() * 0.95, # ts_z[:, nskip:].max() * 1.05) ax.annotate( ylabel, xy=(0.0, 0.7), xycoords="axes fraction", xytext=(0, 0), textcoords="offset points", va="center", ha="left", color="gray", size=plt.rcParams["font.size"] * 0.5, bbox={ "boxstyle": "round", "fc": "w", "ec": "none", "color": "none", "lw": 0, "alpha": 0.8, }, ) ax.set_yticks([]) ax.set_yticklabels([]) # if yticks: # # ax.set_yticks(yticks) # # ax.set_yticklabels(['%.02f' % y for y in yticks]) # # Plot maximum and minimum horizontal lines # ax.plot((0, ntsteps - 1), (yticks[0], yticks[0]), 'k:') # ax.plot((0, ntsteps - 1), (yticks[-1], yticks[-1]), 'k:') for side in ["top", "right"]: ax.spines[side].set_color("none") ax.spines[side].set_visible(False) if not hide_x: ax.spines["bottom"].set_position(("outward", 10)) ax.xaxis.set_ticks_position("bottom") else: ax.spines["bottom"].set_color("none") ax.spines["bottom"].set_visible(False) # ax.spines["left"].set_position(('outward', 30)) # ax.yaxis.set_ticks_position('left') ax.spines["left"].set_visible(False) ax.spines["left"].set_color(None) # labels = [label for label in ax.yaxis.get_ticklabels()] # labels[0].set_weight('bold') # labels[-1].set_weight('bold') if title: ax.set_title(title) return ax
[docs] def confoundplot( tseries, gs_ts, gs_dist=None, name=None, units=None, tr=None, hide_x=True, color="b", nskip=0, cutoff=None, ylims=None, ): # Define TR and number of frames notr = False if tr is None: notr = True tr = 1.0 ntsteps = len(tseries) tseries = np.array(tseries) # Define nested GridSpec gs = GridSpecFromSubplotSpec(1, 2, subplot_spec=gs_ts, width_ratios=[1, 100], wspace=0.0) ax_ts = plt.subplot(gs[1]) ax_ts.grid(False) # Set 10 frame markers in X axis interval = max((ntsteps // 10, ntsteps // 5, 1)) xticks = list(np.arange(0, ntsteps)[::interval]) ax_ts.set_xticks(xticks) if not hide_x: if notr: ax_ts.set_xlabel("time (frame #)") else: ax_ts.set_xlabel("time (s)") labels = tr * np.array(xticks) ax_ts.set_xticklabels([f"{t:.02f}" for t in labels.tolist()]) else: ax_ts.set_xticklabels([]) fontsize = plt.rcParams["font.size"] if name is not None: if units is not None: name += f" [{units}]" ax_ts.annotate( name, xy=(0.0, 0.2), xytext=(0, 0), xycoords="axes fraction", textcoords="offset points", va="top", ha="left", color=color, size=fontsize * 0.45, bbox={ "boxstyle": "round", "fc": "w", "ec": "none", "color": "none", "lw": 0, "alpha": 0.8, }, ) for side in ["top", "right"]: ax_ts.spines[side].set_color("none") ax_ts.spines[side].set_visible(False) if not hide_x: ax_ts.spines["bottom"].set_position(("outward", 20)) ax_ts.xaxis.set_ticks_position("bottom") else: ax_ts.spines["bottom"].set_color("none") ax_ts.spines["bottom"].set_visible(False) # ax_ts.spines["left"].set_position(('outward', 30)) ax_ts.spines["left"].set_color("none") ax_ts.spines["left"].set_visible(False) # ax_ts.yaxis.set_ticks_position('left') ax_ts.set_yticks([]) ax_ts.set_yticklabels([]) # Skip non-steady-state volumes for statistics and Y limits valseries = tseries[nskip:] nonnan = valseries[~np.isnan(valseries)] if nonnan.size > 0: # Calculate Y limits valrange = nonnan.max() - nonnan.min() def_ylims = [nonnan.min() - 0.1 * valrange, nonnan.max() + 0.1 * valrange] if ylims is not None: if ylims[0] is not None: def_ylims[0] = min([def_ylims[0], ylims[0]]) if ylims[1] is not None: def_ylims[1] = max([def_ylims[1], ylims[1]]) # Add space for plot title and mean/SD annotation def_ylims[0] -= 0.1 * (def_ylims[1] - def_ylims[0]) ax_ts.set_ylim(def_ylims) # Annotate stats maxv = nonnan.max() mean = nonnan.mean() stdv = nonnan.std() p95 = np.percentile(nonnan, 95.0) else: maxv = 0 mean = 0 stdv = 0 p95 = 0 units = units or "" stats_label = ( rf"max: {maxv:.3f}{units} $\bullet$ mean: {mean:.3f}{units} " rf"$\bullet$ $\sigma$: {stdv:.3f}" ) ax_ts.annotate( stats_label, xy=(0.98, 0.1), xycoords="axes fraction", xytext=(0, 0), textcoords="offset points", va="top", ha="right", color=color, size=fontsize * 0.5, bbox={ "boxstyle": "round", "fc": "w", "ec": "none", "color": "none", "lw": 0, "alpha": 0.8, }, ) # Annotate percentile 95 ax_ts.plot((0, ntsteps - 1), [p95] * 2, linewidth=0.1, color="lightgray") ax_ts.annotate( f"{p95:.2f}", xy=(0, p95), xytext=(-1, 0), textcoords="offset points", va="center", ha="right", color="lightgray", size=fontsize * 0.4, ) if cutoff is None: cutoff = [] for thr in cutoff: ax_ts.plot((0, ntsteps - 1), [thr] * 2, linewidth=0.2, color="dimgray") ax_ts.annotate( f"{thr:.2f}", xy=(0, thr), xytext=(-1, 0), textcoords="offset points", va="center", ha="right", color="dimgray", size=fontsize * 0.4, ) ax_ts.plot(tseries, color=color, linewidth=0.8) ax_ts.set_xlim((0, ntsteps - 1)) if gs_dist is not None: ax_dist = plt.subplot(gs_dist) sns.displot(tseries, vertical=True, ax=ax_dist) ax_dist.set_xlabel("Timesteps") ax_dist.set_ylim(ax_ts.get_ylim()) ax_dist.set_yticklabels([]) return [ax_ts, ax_dist], gs return ax_ts, gs
def _ward_to_linkage(children, n_leaves, distances): """Create linkage matrix from the output of Ward clustering.""" # create the counts of samples under each node counts = np.zeros(children.shape[0]) n_samples = n_leaves for i, merge in enumerate(children): current_count = 0 for child_idx in merge: current_count += 1 if child_idx < n_samples else counts[child_idx - n_samples] counts[i] = current_count return np.column_stack([children, distances, counts]).astype(float)
[docs] def confounds_correlation_plot( confounds_file, columns=None, figure=None, max_dim=20, output_file=None, reference="global_signal", ignore_initial_volumes=0, ): """ Generate a bar plot with the correlation of confounds. Parameters ---------- confounds_file: :obj:`str` File containing all confound regressors to be included in the correlation plot. figure: figure or None Existing figure on which to plot. columns: :obj:`list` or :obj:`None`. Select a list of columns from the dataset. max_dim: :obj:`int` The maximum number of regressors to be included in the output plot. Reductions (e.g., CompCor) of high-dimensional data can yield so many regressors that the correlation structure becomes obfuscated. This criterion selects the ``max_dim`` regressors that have the largest correlation magnitude with ``reference`` for inclusion in the plot. output_file: :obj:`str` or :obj:`None` Path where the output figure should be saved. If this is not defined, then the plotting axes will be returned instead of the saved figure path. reference: :obj:`str` ``confounds_correlation_plot`` prepares a bar plot of the correlations of each confound regressor with a reference column. By default, this is the global signal (so that collinearities with the global signal can readily be assessed). ignore_initial_volumes : :obj:`int` Number of non-steady-state volumes at the beginning of the scan to ignore. Returns ------- axes and gridspec Plotting axes and gridspec. Returned only if ``output_file`` is ``None``. output_file: :obj:`str` The file where the figure is saved. """ confounds_data = pd.read_table(confounds_file) if columns: columns = dict.fromkeys(columns) # Drop duplicates columns[reference] = None # Make sure the reference is included confounds_data = confounds_data[list(columns)] confounds_data = confounds_data.loc[ ignore_initial_volumes:, np.logical_not(np.isclose(confounds_data.var(skipna=True), 0)), ] corr = confounds_data.corr() gscorr = corr.copy() gscorr["index"] = gscorr.index gscorr[reference] = np.abs(gscorr[reference]) gs_descending = gscorr.sort_values(by=reference, ascending=False)["index"] n_vars = corr.shape[0] max_dim = min(n_vars, max_dim) gs_descending = gs_descending[:max_dim] features = [p for p in corr.columns if p in gs_descending] corr = corr.loc[features, features] np.fill_diagonal(corr.values, 0) figure = figure if figure is not None else plt.figure(figsize=(15, 5)) gs = GridSpec(1, 21) ax0 = plt.subplot(gs[0, :10]) ax1 = plt.subplot(gs[0, 11:]) mask = np.zeros_like(corr, dtype=bool) mask[np.triu_indices_from(mask)] = True sns.heatmap(corr, linewidths=0.5, cmap="coolwarm", center=0, square=True, ax=ax0) ax0.tick_params(axis="both", which="both", width=0) for label in ax0.xaxis.get_majorticklabels(): label.set_fontsize("small") for label in ax0.yaxis.get_majorticklabels(): label.set_fontsize("small") sns.barplot( data=gscorr, x="index", y=reference, hue="index", ax=ax1, order=gs_descending, palette="Reds_d", saturation=0.5, legend=False, ) ax1.set_xlabel("Confound time series") ax1.set_ylabel(f"Magnitude of correlation with {reference}") ax1.tick_params(axis="x", which="both", width=0) ax1.tick_params(axis="y", which="both", width=5, length=5) for label in ax1.xaxis.get_majorticklabels(): label.set_fontsize("small") label.set_rotation("vertical") for label in ax1.yaxis.get_majorticklabels(): label.set_fontsize("small") for side in ["top", "right", "left"]: ax1.spines[side].set_color("none") ax1.spines[side].set_visible(False) if output_file is not None: figure.savefig(output_file, bbox_inches="tight") plt.close(figure) figure = None return output_file return [ax0, ax1], gs
def _plot_density(x, y, df, group_name, palette, orient): ax = sns.violinplot( x=x, y=y, data=df, hue=group_name, dodge=False, palette=palette, density_norm="width", inner=None, orient=orient, ) # Cut half of the violins for violin in ax.collections: bbox = violin.get_paths()[0].get_extents() x0, y0, width, height = bbox.bounds width_denom = 2 height_denom = 1 if orient == "h": width_denom = 1 height_denom = 2 violin.set_clip_path( plt.Rectangle( (x0, y0), width / width_denom, height / height_denom, transform=ax.transData ) ) return ax def _jitter_data_points(old_len_collections, orient, width, ax): offset = np.array([width, 0]) if orient == "h": offset = np.array([0, width]) for dots in ax.collections[old_len_collections:]: dots.set_offsets(dots.get_offsets() + offset) def _plot_nans(df, x, y, color, orient, ax): df_nans = df[df.isna().any(axis=1)] sns.stripplot( x=x, y=y, data=df_nans, color=color, orient=orient, ax=ax, ) def _plot_out_of_range( df, x, feature, orient, limit_offset, limit_value, limit_color, limit_name, color_vble_name, _op, ax, ): if limit_color is None: raise ValueError( f"``{color_vble_name}`` must be provided if ``{limit_name}`` is provided." ) if limit_offset is None: raise ValueError(f"``limit_offset`` must be provided if ``{limit_name}`` is provided.") if _op == operator.gt: arithm = operator.add elif _op == operator.lt: arithm = operator.sub else: raise ValueError(f"``{_op}`` must be either ``gt`` or ``lt``.") df_overflow = df[_op(df[feature], limit_value)] sns.stripplot( x=x, y=arithm(limit_value, limit_offset), data=df_overflow, color=limit_color, orient=orient, ax=ax, )
[docs] def plot_raincloud( data_file, group_name, feature, palette="Set2", orient="v", density=True, upper_limit_value=None, upper_limit_color="gray", lower_limit_value=None, lower_limit_color="gray", limit_offset=None, mark_nans=True, nans_value=None, nans_color="black", figure=None, output_file=None, ): """ Generate a raincloud plot with the data points corresponding to the ``feature`` value contained in the data file. If ``upper_limit_value`` or ``lower_limit_value`` is provided, the values outside that range are clipped. Thus, a large density around those values, together with the values plot with the distinctive ``upper_limit_color`` and ``lower_limit_color`` styles may be indicative of unexpected values in the data. Similarly, NaN values, if present, will be marked with the distinctive ``nans_color`` style, and may again be indicative of unexpected values in the data. Parameters ---------- data_file : :obj:`str` File containing the data of interest. figure : :obj:`matplotlib.pyplot.figure` or None Existing figure on which to plot. group_name : :obj:`str` The group name of interest to be plot. feature : :obj:`str` The feature of interest to be plot. palette : :obj:`str`, optional Color palette name provided to :func:`sns.stripplot`. orient : :obj:`str`, optional Plot orientation (``v`` or ``h``). density : :obj:`bool`, optional ``True`` to plot the density of the data points. upper_limit_value : :obj:`float`, optional Upper limit value over which any value in the data will be styled with a different style. upper_limit_color : :obj:`str`, optional Color name to represent values over ``upper_limit_value``. lower_limit_value : :obj:`float`, optional Lower limit value under which any value in the data will be styled with a different style. lower_limit_color : :obj:`str`, optional Color name to represent values under ``lower_limit_value``. limit_offset : :obj:`float`, optional Offset to plot the values over/under the upper/lower limit values. mark_nans : :obj:`bool`, optional ``True`` to plot NaNs as dots. ``nans_values`` must be provided if True. nans_value : :obj:`float`, optional Value to use for NaN values. nans_color : :obj:`str`, optional Color name to represent NaN values. output_file : :obj:`str` or :obj:`None` Path where the output figure should be saved. If this is not defined, then the plotting axes will be returned instead of the saved figure path. Returns ------- axes and gridspec Plotting axes and gridspec. Returned only if ``output_file`` is ``None``. output_file : :obj:`str` The file where the figure is saved. """ df = pd.read_csv(data_file, sep=r"[\t\s]+", engine="python") df_clip = df.copy(deep=True) df_clip[feature] = df[feature].clip(lower=lower_limit_value, upper=upper_limit_value) figure = figure if figure is not None else plt.figure(figsize=(7, 5)) gs = GridSpec(1, 1) ax = plt.subplot(gs[0, 0]) sns.set(style="white", font_scale=2) x = feature y = group_name # Swap x/y if the requested orientation is vertical if orient == "v": x = group_name y = feature # Plot the density if density: ax = _plot_density(x, y, df_clip, group_name, palette, orient) # Add boxplots width = 0.15 sns.boxplot( x=x, y=y, data=df_clip, color="black", width=width, zorder=10, showcaps=True, boxprops={"facecolor": "none", "zorder": 10}, showfliers=True, whiskerprops={"linewidth": 2, "zorder": 10}, saturation=1, orient=orient, ax=ax, ) old_len_collections = len(ax.collections) # Plot the data points as dots sns.stripplot( x=x, y=y, hue=group_name, data=df_clip, palette=palette, edgecolor="white", size=3, jitter=0.1, zorder=0, orient=orient, ax=ax, ) # Offset the dots that would be otherwise shadowed by the violins if density: _jitter_data_points(old_len_collections, orient, width, ax) # Draw nans if any if mark_nans: if nans_value is None: raise ValueError("``nans_value`` must be provided if ``mark_nans`` is True.") _plot_nans(df, x, nans_value, nans_color, orient, ax) # If upper/lower limits are provided, draw the points with a different color if upper_limit_value is not None: _plot_out_of_range( df, x, feature, orient, limit_offset, upper_limit_value, upper_limit_color, "upper_limit_value", "upper_limit_color", operator.gt, ax, ) if lower_limit_value is not None: _plot_out_of_range( df, x, feature, orient, limit_offset, lower_limit_value, lower_limit_color, "lower_limit_value", "lower_limit_color", operator.lt, ax, ) if output_file is not None: plt.tight_layout() figure.savefig(output_file, bbox_inches="tight") plt.close(figure) figure = None return output_file return ax, gs