Source code for omicscope.General.GeneralVisualization

""" Module for OmicScope Object Visualization

This module allows the user to extract and visualize information from OmicScope object.
Here, it is possible to evaluate data normalization (MA-Plot, Volcano Plot, Dynamic range plot),
individual protein abundance (barplot, boxplot), search for protein-protein interactions,
and perform Principal Component Analysis (PCA), Hierarchical clustering analysis (heatmap, pearson
correlation plot) and K-means clustering (k-trend).

Some functions below allow user to choose protein to be highlighted and/or plotted.
For that, user must write protein 'gene_name' (See examples in OmicScope Object tab).
Additionally, colors and color palettes follows the matplotlib and seaborn libraries options.

"""

import copy
import itertools
import random

import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import networkx as nx
import numpy as np
import pandas as pd
import requests
import seaborn as sns
from kneed import KneeLocator
from scipy.stats import zscore
from sklearn import preprocessing
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA


[docs]def bar_ident(self, logscale=False, col='darkcyan', save=None, dpi=300,
              vector=True):
    """Show the amount of entities identified and differentially regulated
    in the study.

    Args:
        logscale (bool, optional): Y-axis log-scaled. Defaults to True.
        col (str, optional): Color. Defaults to 'darkcyan'.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): Resolution to save figure. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to True.

    Returns:
        ax [matplotlib object]: Barplot
    """
    # Define plt parameters
    plt.style.use('default')
    sns.set(rc={'figure.figsize': (11.7, 8.27)})
    sns.set_style("ticks")
    plt.rcParams["figure.dpi"] = dpi
    OmicScope = copy.copy(self)
    df = OmicScope.quant_data
    # Get number of identified proteins
    identified = df.Accession.count()
    # Get number of quantified proteins
    quantified = df.dropna(axis=0, subset=[OmicScope.pvalue]).Accession.count()
    # Get number of differentially regulated proteins
    deps = OmicScope.deps['Accession'].count()
    if identified != quantified:
        df = ['Identified', 'Quantified', 'Differentially\nregulated']
        protein_number = [identified, quantified, deps]
    else:
        df = ['Quantified', 'Differentially\nregulated']
        protein_number = [quantified, deps]
    df = pd.DataFrame(protein_number, df)
    # Log scaling y-axis
    if logscale is True:
        ax = df.plot(kind='bar', color=col, rot=0, edgecolor="black", log=True,
                     figsize=(2.4, 3.5))
    else:
        ax = df.plot(kind='bar', color=col, rot=0, edgecolor="black",
                     figsize=(2.4, 3.5))
    # Add label upside bar

    def autolabel(rects, ax):
        for rect in rects:
            x = rect.get_x() + rect.get_width() / 2.
            y = rect.get_height()
            ax.annotate("{}".format(y), (x, y), xytext=(0, 5), textcoords="offset points",
                        ha='center', va='bottom')
    autolabel(ax.patches, ax)
    ax.margins(y=0.1)
    ax.legend().remove()
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    plt.ylabel('#Proteins')
    plt.title(label=OmicScope.ctrl + ' vs ' + '-'.join(OmicScope.experimental), loc='left')
    plt.grid(b=False)
    if save is not None:
        if vector is True:
            plt.savefig(save + 'barplot.svg', bbox_inches='tight')
        else:
            plt.savefig(save + 'barplot.png', dpi=dpi, bbox_inches='tight')
    plt.show()
    return ax

[docs]def normalization_boxplot(self, palette='Dark2', dpi=300,
                          save=None, vector=True):
        """
        Generates a boxplot of log2-transformed expression data for each sample to 
        evaluate normalization.

        Parameters:
        - palette (str): The color palette to use for the boxplot. Default is 'Dark2'.
        - dpi (int): resolution of the saved figure. Default is 300.
        - save (str): Filepath to save the generated plot. If None, the plot is not saved. Default is None.
        - vector (bool): If True, save the plot in vector format (SVG). If False, save in raster format (PNG). Default is True.

        """
        plt.rcParams['figure.dpi']=dpi
        df = self.expression.copy()
        if self.logTransform is True:
            df = np.log2(df)
        df = df.replace([-np.inf,np.inf],0)
        conditions = self.pdata.Condition
        colors = sns.color_palette(palette=palette,
                                as_cmap=False, n_colors=len(conditions.drop_duplicates()))
        color_dict = {i:j for i,j in zip(conditions.drop_duplicates(),colors)}
        bplot = plt.boxplot(df, vert=True,  # vertical box alignment
                            patch_artist=True, labels=df.columns,
                            notch=True, medianprops=dict(color='black'),
                            boxprops=dict(linewidth=0.8))
        plt.xticks(rotation=90)
        plt.title('Normalization Boxplot')
        for patch, condition in zip(bplot['boxes'], conditions):
            patch.set_facecolor(color_dict[condition])
        # Create legend for colors
        legend_labels = [mpatches.Patch(color=color_dict[i], label=i) for i in color_dict]
        plt.legend(handles=legend_labels, loc='center', bbox_to_anchor=(1.1, .5))
        sns.despine()
        plt.tight_layout()
        if save is not None:
            if vector is True:
                plt.savefig(save + 'boxplot_normalization.svg', bbox_inches='tight')
            else:
                plt.savefig(save + 'boxplot_normalization.png', dpi=dpi, bbox_inches='tight')
        plt.show()

[docs]def volcano_Multicond(self, *Proteins, pvalue=0.05, palette='viridis',
                      bcol='#962558', non_regulated='#606060',
                      save=None, dpi=300, vector=True):
    """Creates a volcano plot for multiple conditions.

    In general, volcano plots are designed to compare 2 conditions.
    Here, we aim to see the distribution of quantified proteins' p-value and
    fold changes among multiple conditions.

    Args:
        pvalue (float, optional): p-value threshold. Defaults to 0.05.
        palette (str, optional): Color palette to differentiate dots. Defaults to 'viridis'.
        bcol (str, optional): color for density plot. Defaults to '#962558'.
        non_regulated (str, optional): Proteins not differentially regulated. Defaults to '#606060'.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): figure resolution. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to True.
    """
    plt.style.use('default')
    plt.rcParams["figure.dpi"] = dpi
    OmicScope = copy.copy(self)
    FoldChange_cutoff = OmicScope.FoldChange_cutoff
    # Definitions for the axes
    df_initial = OmicScope.quant_data
    dropped_inf = copy.copy(df_initial)
    dropped_inf = dropped_inf.replace([np.inf, -np.inf], np.nan)
    dropped_inf = dropped_inf.dropna()
    fc = df_initial['log2(fc)']
    fc = fc.replace(np.inf, dropped_inf['log2(fc)'].max() + 1)
    fc = fc.replace(-np.inf, dropped_inf['log2(fc)'].min() - 1)
    pval = df_initial[f'-log10({OmicScope.pvalue})']
    pval = pval.replace(
        np.inf, dropped_inf[f'-log10({OmicScope.pvalue})'].max() + 1)
    df_initial[f'-log10({OmicScope.pvalue})'] = pval
    df_initial['log2(fc)'] = fc

    # colors per condition
    comparisons = df_initial.Comparison
    comparisons = comparisons.str[-1] + '-' + comparisons.str[0]
    number_of_comparison = len(comparisons.drop_duplicates())
    color_per_comparison = sns.color_palette(palette=palette,
                                             n_colors=number_of_comparison).as_hex()
    color_comp_dict = dict(zip(comparisons, color_per_comparison))
    print(color_comp_dict)
    col = []
    comparison = []
    for pv, comp in zip(pval, comparisons):
        if pv >= -np.log10(pvalue):
            comparison.append(comp)
            col.append(comp)
        else:
            col.append(non_regulated)
            comparison.append('Non-regulated')

    col = pd.Series(col)
    comparison = pd.Series(comparison)
    df = pd.DataFrame(data=zip(pval, fc, col, comparison))
    df.columns = [OmicScope.pvalue, 'fc', 'col', 'comparison']
    df[OmicScope.pvalue] = df[OmicScope.pvalue].astype(float)
    df['fc'] = df['fc'].astype(float)
    df['col'] = df.col.replace(color_comp_dict)
    # annotation if it is on args

    if len(Proteins) > 0:
        ls = pd.DataFrame(Proteins)
        ls.columns = ['gene_name']
        ls_final = ls.merge(df_initial)
        ls_final = ls_final[['gene_name', f'-log10({OmicScope.pvalue})', 'log2(fc)']]
        ls_final = ls_final.sort_values('log2(fc)', ascending=False)

    # dimensions
    left, width = 0.1, 0.65
    bottom, height = 0.1, 0.65
    spacing = 0.02
    rect_scatter = [left, bottom, width, height]
    rect_histx = [left, bottom + height + spacing, width, 0.2]
    rect_histy = [left + width + spacing, bottom, 0.2, height]
    # plot
    plt.style.use('bmh')
    plt.figure(figsize=(8, 8))
    # Scatter plot
    ax_scatter = plt.axes(rect_scatter)
    sns.scatterplot(x=df.fc, y=df[OmicScope.pvalue], alpha=0.7,
                    hue=df.comparison)
    sns.despine(left=False)
    plt.xlabel("log2( FC)")
    plt.ylabel(f'-log10({OmicScope.pvalue})')
    ax_scatter.tick_params(direction='in', top=True, right=True)
    plt.grid(b=None)
    ax_scatter.plot(alpha=0.5)
    # Density plot on x-axis (upper)
    ax_histx = plt.axes(rect_histx)
    plt.ylabel("Density")
    plt.title(label=OmicScope.ctrl + ' vs ' + '-'.join(OmicScope.experimental), loc='left')
    ax_histx.tick_params(direction='in', labelbottom=False)
    ax_histx.set_facecolor('white')
    ax_histx.grid(b=None)
    sns.kdeplot(fc, fill=True, color=bcol, edgecolor='black')
    sns.despine(bottom=True, top=True)
    # Density plot on y-axis (right)
    ax_histy = plt.axes(rect_histy)
    ax_histy.tick_params(direction='in', labelleft=False)
    ax_histy.set_facecolor('white')
    ax_histy.grid(b=None)
    sns.kdeplot(y=pval, fill=True, color=bcol, edgecolor='black')
    sns.despine(top=True, left=True)
    plt.xlabel("Density")
    ax_scatter.set_facecolor('white')
    # Annotation positions
    if len(Proteins) > 0:
        from adjustText import adjust_text
        texts = []
        for a, b, c in zip(ls_final['log2(fc)'], ls_final[f'-log10({OmicScope.pvalue})'],
                           ls_final['gene_name']):
            texts.append(ax_scatter.text(a, b, c, size=8))
        adjust_text(texts, ax=ax_scatter, force_points=0.25, force_text=0.25,
                    expand_points=(1.5, 1.5), expand_text=(1.5, 1.5),
                    arrowprops=dict(arrowstyle="-", color='black', lw=0.5))
    ax_scatter.axhline(y=-np.log10(0.05), color='gray', linestyle=':')
    ax_scatter.legend()
    if FoldChange_cutoff == 0:
        ax_scatter.axvline(x=0, color='gray', linestyle=':')
    else:
        ax_scatter.axvline(x=FoldChange_cutoff, color='gray', linestyle=':')
    limh = round(fc.max(), 2)
    liml = round(fc.min(), 2)
    limp = round(pval.max() + 0.5, 0)
    ax_scatter.set_xlim((liml - (limh - liml) * 0.10, limh + ((limh - liml) * 0.10)))
    ax_scatter.set_ylim((pval.min(), limp + limp * .1))
    ax_histx.set_xlim(ax_scatter.get_xlim())
    ax_histy.set_ylim(ax_scatter.get_ylim())
    if save is not None:
        if vector is True:
            plt.savefig(save + 'volcano.svg', bbox_inches='tight')
        else:
            plt.savefig(save + 'volcano.png', dpi=dpi, bbox_inches='tight')
    plt.show()
    plt.show(block=True)


[docs]def volcano_2cond(self, *Proteins, pvalue=0.05, bcol='darkcyan',
                  non_regulated='#606060', up_regulated='#E4001B', down_regulated='#6194BC',
                  save=None, dpi=300, vector=True):
    """Plot a conventional volcano plot

    Args:
        pvalue (float, optional): p-value threshold. Defaults to 0.05.
        bcol (str, optional): color for density plot. Defaults to 'darkcyan'.
        non_regulated (str, optional): Proteins not differentially regulated.
         Defaults to '#606060'.
        up_regulated (str, optional): Proteins up-regulated in relation
         to Control group. Defaults to '#E4001B'.
        down_regulated (str, optional): Proteins down-regulated in relation
         to Control group. Defaults to '#6194BC'.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): figure resolution. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to
         True.
    """
    plt.style.use('default')
    plt.rcParams["figure.dpi"] = dpi
    OmicScope = copy.copy(self)
    FoldChange_cutoff = OmicScope.FoldChange_cutoff
    # Definitions for the axes
    df_initial = OmicScope.quant_data
    dropped_inf = copy.copy(df_initial)
    dropped_inf = dropped_inf.replace([np.inf, -np.inf], np.nan)
    dropped_inf = dropped_inf.dropna()
    fc = df_initial['log2(fc)']
    fc = fc.replace(np.inf, dropped_inf['log2(fc)'].max() + 1)
    fc = fc.replace(-np.inf, dropped_inf['log2(fc)'].min() - 1)
    pval = df_initial[f'-log10({OmicScope.pvalue})']
    pval = pval.replace(
        np.inf, dropped_inf[f'-log10({OmicScope.pvalue})'].max() + 1)
    df_initial[f'-log10({OmicScope.pvalue})'] = pval
    df_initial['log2(fc)'] = fc
    # Defining colors for dots
    col = []
    comparison = []
    for i, j in zip(fc, pval):
        if j <= -np.log10(pvalue):
            col.append(non_regulated)
            comparison.append('non-regulated')
        else:
            if i >= FoldChange_cutoff:
                col.append(up_regulated)
                comparison.append('up-regulated')
            elif i <= -FoldChange_cutoff:
                col.append(down_regulated)
                comparison.append('down-regulated')
            else:
                col.append(non_regulated)
                comparison.append('non-regulated')
    col, comparison = pd.Series(col), pd.Series(comparison)
    df = pd.DataFrame(zip(pval, fc, col, comparison))
    df.columns = ['pval', 'fc', 'col', 'Regulation']
    # annotation
    if len(Proteins) > 0:
        ls = pd.DataFrame(Proteins)
        ls.columns = ['gene_name']
        ls_final = ls.merge(df_initial)
        ls_final = ls_final[['gene_name', f'-log10({OmicScope.pvalue})', 'log2(fc)']]
        ls_final = ls_final.sort_values('log2(fc)', ascending=False)
    # plot dimensions
    left, width = 0.1, 0.65
    bottom, height = 0.1, 0.65
    spacing = 0.02
    rect_scatter = [left, bottom, width, height]
    rect_histx = [left, bottom + height + spacing, width, 0.2]
    rect_histy = [left + width + spacing, bottom, 0.2, height]
    # Plot
    plt.style.use('bmh')
    plt.figure(figsize=(8, 8))
    ax_scatter = plt.axes(rect_scatter)
    # Scatter plot
    sns.scatterplot(x=df.fc, y=df.pval, alpha=0.5, linewidth=0.5,
                    hue=df.Regulation,
                    palette=list(df.col.drop_duplicates().dropna()))
    plt.xlabel("log2( FC)")
    plt.ylabel(f'-log10({OmicScope.pvalue})')
    ax_scatter.tick_params(direction='in', top=True, right=True)
    plt.grid(b=None)
    ax_scatter.plot(alpha=0.5)
    # Density Plot in x-axis (upper)
    ax_histx = plt.axes(rect_histx)
    plt.ylabel("Density")
    plt.title(label=OmicScope.experimental[0] + ' - ' + OmicScope.ctrl, loc='left')
    ax_histx.tick_params(direction='in', labelbottom=False)
    ax_histx.set_facecolor('white')
    ax_histx.grid(b=None)
    sns.kdeplot(fc, fill=True, color=bcol, alpha=0.8, edgecolor='black')
    sns.despine(bottom=True, top=True)
    # Density plot in y axis (right)
    ax_histy = plt.axes(rect_histy)
    ax_histy.tick_params(direction='in', labelleft=False)
    ax_histy.set_facecolor('white')
    ax_histy.grid(b=None)
    sns.kdeplot(y=pval, fill=True, color=bcol, alpha=0.8, edgecolor='black')
    sns.despine(top=True, left=True)
    plt.xlabel("Density")
    ax_scatter.set_facecolor('white')
    # Annotation positions
    if len(Proteins) > 0:
        from adjustText import adjust_text
        texts = []
        for a, b, c in zip(ls_final['log2(fc)'], ls_final[f'-log10({OmicScope.pvalue})'],
                           ls_final['gene_name']):
            texts.append(ax_scatter.text(a, b, c, size=8))
        adjust_text(texts, ax=ax_scatter,
                    arrowprops=dict(arrowstyle="-", color='k', lw=0.5))
    ax_scatter.axhline(y=-np.log10(0.05), color='gray', linestyle=':')

    # Fold change cutoff
    if FoldChange_cutoff != 0:
        ax_scatter.axvline(x=-FoldChange_cutoff,
                           color='gray', linestyle=':')
        ax_scatter.axvline(x=FoldChange_cutoff,
                           color='gray', linestyle=':')
    else:
        ax_scatter.axvline(x=FoldChange_cutoff,
                           color='gray', linestyle=':')
    # Figure limits
    limh = round(fc.max(), 2)
    liml = round(fc.min(), 2)
    limp = round(pval.max() + 0.5, 0)
    ax_scatter.set_xlim((liml - ((limh - liml) * 0.10),
                         limh + ((limh - liml) * 0.10)))
    ax_scatter.set_ylim((pval.min(), limp))
    ax_histx.set_xlim(ax_scatter.get_xlim())
    ax_histy.set_ylim(ax_scatter.get_ylim())
    # save figure and how to save
    if save is not None:
        if vector is True:
            plt.savefig(save + 'volcano.svg', bbox_inches='tight')
        else:
            plt.savefig(save + 'volcano.png', dpi=dpi, bbox_inches='tight')
    plt.show()
    plt.show(block=True)


[docs]def volcano(self, *Proteins,
            pvalue=0.05, bcol='#962558', palette='viridis',
            non_regulated='#606060', up_regulated='#E4001B', down_regulated='#6194BC',
            save=None, dpi=300, vector=True):
    """Plot volcano plot.

    Args:
        pvalue (float, optional): p-value threshold. Defaults to 0.05.
        bcol (str, optional): Density plot color. Defaults to '#962558'.
        palette (str, optional): Palette for Multiconditions volcano plot.
         Defaults to 'viridis'.
        non_regulated (str, optional): Color of non-differentially regulated
         proteins. Defaults to '#606060'.
        up_regulated (str, optional): Color of up-regulated proteins for volcano
         with 2 conditions. Defaults to '#E4001B'.
        down_regulated (str, optional): Color of down-regulated proteins for volcano
         with 2 conditions. Defaults to '#6194BC'.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): . Resolution to save figure. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to
         True.
    """
    OmicScope = copy.copy(self)
    if len(OmicScope.Conditions) == 2:
        volcano_2cond(OmicScope, *Proteins, pvalue=pvalue, bcol=bcol,
                      non_regulated=non_regulated, up_regulated=up_regulated,
                      down_regulated=down_regulated,
                      save=save, dpi=dpi, vector=vector)
    if len(OmicScope.Conditions) > 2:
        volcano_Multicond(OmicScope, *Proteins,
                          pvalue=pvalue, palette=palette, bcol=bcol,
                          save=None, dpi=dpi, vector=vector)


[docs]def heatmap(self, *Proteins, pvalue=0.05, c_cluster=True,
            clust_metric="euclidean", clust_method='average',
            palette='RdYlBu_r', linewidth=0.01, color_groups='tab20',
            sample_label=False,
            save=None, dpi=300, vector=True):
    """ Heatmap with hierarchical clustering

    Args:
        pvalue (float, optional): p-value threshold. Defaults to 0.05.
        c_cluster (bool, optional): Applies Hierarchical clustering for
         columns. Defaults to True.
        clust_metric (str, optional): The distance metric to use. Optionally: braycurtis, canberra,
         chebyshev, cityblock, correlation, cosine, dice, euclidean, hamming, jaccard,
         jensenshannon, kulczynski1, mahalanobis, matching, minkowski, rogerstanimoto, russellrao,
         seuclidean, sokalmichener, sokalsneath, sqeuclidean, yule.
        clust_method (str, optional): Linkage method to use for calculating clusters.
         Optionally: single, complete, average, weighted, centroid, median, or ward.
        color_groups (str, optional): Palette for group colors.
         Defaults to 'tab20'.
        palette (str, optional): Palette for protein abundance. Defaults to 'RdYlBu_r'.
        linewidth (float, optional): Line width. Defaults to 0.01.
        sample_label (bool, optional): insert biological sample label and condition.
        Defaults to False.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): figure resolution. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to
         True.
    """
    plt.style.use('default')
    plt.rcParams["figure.dpi"] = dpi
    OmicScope = copy.copy(self)
    FoldChange_cutoff = OmicScope.FoldChange_cutoff
    df = OmicScope.quant_data
    pdata = OmicScope.pdata
    sample_condition = dict(zip(pdata.Sample, pdata.Condition))
    sort_columns = list(pdata.sort_values(['Condition'])['Sample'])
    if 'TimeCourse' in pdata.columns:
        sort_columns = list(pdata.sort_values(
            ['Condition', 'TimeCourse'])['Sample'])
        times = pdata.TimeCourse.drop_duplicates()
        ntimes = len(times)
        pal = sns.color_palette(palette='BuPu', as_cmap=False, n_colors=ntimes)
        dic = {}
        for C, c in zip(times, pal):
            dic.update({C: c})
        replacer = dic.get
        time_colors = [replacer(n, n) for n in list(
            pdata.sort_values(['Condition', 'TimeCourse']).TimeCourse)]
    else:
        time_colors = []
    Conditions = OmicScope.Conditions
    # Creating Heatmap Matrix
    heatmap = df[df.loc[:, OmicScope.pvalue] <= pvalue]
    heatmap = heatmap.loc[(heatmap['log2(fc)'] <= -FoldChange_cutoff)
                          | (heatmap['log2(fc)'] >= FoldChange_cutoff)]
    heatmap = heatmap.dropna()
    # Filtering for specific proteins
    if len(Proteins) > 0:
        heatmap = heatmap[heatmap['gene_name'].isin(Proteins)]
    heatmap = heatmap.set_index('gene_name')
    heatmap = heatmap.loc[:, heatmap.columns.str.contains('.', regex=False)]
    # Log2 transform
    if self.logTransform is True:
        heatmap = np.log2(heatmap)
    heatmap = heatmap.replace([-np.inf], int(0))
    heatmap = heatmap[sort_columns]
    heatmap_original_label = heatmap.copy()
    heatmap = heatmap.rename(columns=sample_condition)
    Col = list(heatmap.columns)
    # Creating matrix for group colors
    ngroup = len(Conditions)
    pal = sns.color_palette(palette=color_groups,
                            as_cmap=False, n_colors=ngroup)
    dic = {}
    for C, c in zip(Conditions, pal):
        dic.update({C: c})
    replacer = dic.get
    colcolors = [replacer(n, n) for n in Col]
    colors = [colcolors] + [time_colors]
    if colors[-1] == []:
        colors = colcolors
    # Title
    title = 'Heatmap - ' + OmicScope.ctrl + \
        ' vs ' + '-'.join(OmicScope.experimental)
    # Plot
    heatmap.columns.name = 'Samples'
    heatmap.index.name = 'Gene name'
    if sample_label is True:
        heatmap = heatmap_original_label
    sns.clustermap(heatmap,
                   cmap=palette, z_score=0, linewidths=linewidth, linecolor='black',
                   col_colors=colors, metric=clust_metric, method=clust_method,
                   col_cluster=c_cluster, center=0).fig.suptitle(title, y=1.02)
    
    # Create legend for colors
    if type(colors[0]) is list:
        col_dict_Time = {i:j for i,j in zip(pdata.TimeCourse, colors[1])}
        legend_labels_time = [mpatches.Patch(color=col_dict_Time[i], label=i) for i in col_dict_Time]
        legend1 = plt.legend(handles=legend_labels_time, bbox_to_anchor=(1, 1),
                              bbox_transform=plt.gcf().transFigure, loc='upper right',
                              title='Time Point')

        col_dict = {i:j for i,j in zip(pdata.Condition, colors[0])}
        legend_labels = [mpatches.Patch(color=col_dict[i], label=i) for i in col_dict]
        legend2 = plt.legend(handles=legend_labels, bbox_to_anchor=(1, 0.88),
                              bbox_transform=plt.gcf().transFigure, loc='upper right',
                              title='Condition')
        plt.gca().add_artist(legend1)

    else:
        col_dict = {i:j for i,j in zip(heatmap.columns, colors)}
        legend_labels = [mpatches.Patch(color=col_dict[i], label=i) for i in col_dict]
        plt.legend(handles=legend_labels, bbox_to_anchor=(1, 1),
                    bbox_transform=plt.gcf().transFigure, loc='upper right',
                    title='Condition')

    # Save
    if save is not None:
        if vector is True:
            plt.savefig(save + 'heatmap.svg', bbox_inches='tight')
        else:
            plt.savefig(save + 'heatmap.png', dpi=dpi, bbox_inches='tight')
    plt.show()

[docs]def correlation(self, *Proteins, pvalue=1.0,
                sample_method='pearson',
                clust_metric='euclidean',
                clust_method='average', palette='RdYlBu_r', linewidth=0.005,
                color_groups='tab20', sample_label=False,
                save=None, dpi=300, vector=True):
    """Pairwise correlation plot for samples.

    Args:
        pvalue (float, optional): p-value threshold. Defaults to 1.0.
        sample_method (str, optional): Method to compute pair-wise correlation.
         Options: pearson, kendal, spearman. Defaults to 'pearson'.
        clust_metric (str, optional): The distance metric to use. Optionally: braycurtis, canberra,
         chebyshev, cityblock, correlation, cosine, dice, euclidean, hamming, jaccard,
         jensenshannon, kulczynski1, mahalanobis, matching, minkowski, rogerstanimoto, russellrao,
         seuclidean, sokalmichener, sokalsneath, sqeuclidean, yule.
        clust_method (str, optional): Linkage method to use for calculating clusters.
         Optionally: single, complete, average, weighted, centroid, median, or ward.
        palette (str, optional): Palette for R-distribution. Defaults to 'RdYlBu_r'.
        linewidth (float, optional): Line width. Defaults to 0.005.
        sample_label (bool, optional): insert biological sample label and condition.
        Defaults to False.
        color_groups (str, optional): Color of each group. Defaults to 'tab20'.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): figure resolution. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to
         True.
    """
    plt.style.use('default')
    plt.rcParams["figure.dpi"] = dpi
    OmicScope = copy.copy(self)
    FoldChange_cutoff = OmicScope.FoldChange_cutoff
    df = OmicScope.quant_data
    pdata = OmicScope.pdata
    sample_condition = dict(zip(pdata.Sample, pdata.Condition))
    sort_columns = list(pdata.sort_values(['Condition'])['Sample'])
    if 'TimeCourse' in pdata.columns:
        sort_columns = list(pdata.sort_values(
            ['Condition', 'TimeCourse'])['Sample'])
        times = pdata.TimeCourse.drop_duplicates()
        ntimes = len(times)
        pal = sns.color_palette(palette='BuPu', as_cmap=False, n_colors=ntimes)
        dic = {}
        for C, c in zip(times, pal):
            dic.update({C: c})
        replacer = dic.get
        time_colors = [replacer(n, n) for n in list(
            pdata.sort_values(['Condition', 'TimeCourse']).TimeCourse)]
    else:
        time_colors = []
    Conditions = OmicScope.Conditions
    # Selecting Matrix for correlation
    pearson = df[df.loc[:, OmicScope.pvalue] <= pvalue]
    pearson = pearson.loc[(pearson['log2(fc)'] <= -FoldChange_cutoff)
                          | (pearson['log2(fc)'] >= FoldChange_cutoff)]
    pearson = pearson.dropna()
    # Filtering for specific proteins
    if len(Proteins) > 0:
        pearson = pearson[pearson['gene_name'].isin(Proteins)]
    pearson = pearson.set_index('gene_name')
    pearson = pearson.loc[:, pearson.columns.str.contains('.', regex=False)]
    # log2 transform
    if self.logTransform is True:
        pearson = np.log2(pearson)
    pearson = pearson.replace([-np.inf], int(0))
    # Performing Pearson's Correlation
    corr_matrix_raw = pearson.corr(method=sample_method)
    # Creating matrix for group colors
    corr_matrix_raw = corr_matrix_raw[sort_columns]
    corr_matrix = corr_matrix_raw.rename(columns=sample_condition)
    Col = list(corr_matrix.columns)
    # Creating matrix for group colors
    ngroup = len(Conditions)
    pal = sns.color_palette(palette=color_groups, as_cmap=False, n_colors=ngroup)
    dic = {}
    for C, c in zip(Conditions, pal):
        dic.update({C: c})
    replacer = dic.get
    colcolors = [replacer(n, n) for n in Col]
    colors = [colcolors] + [time_colors]
    if colors[-1] == []:
        colors = colcolors
    # Title
    title = 'Heatmap - ' + OmicScope.ctrl + ' vs ' + '-'.join(OmicScope.experimental)
    # Plot
    corr_matrix.columns.name = 'Samples'
    corr_matrix.index.name = 'Samples'
    if sample_label is False:
        sns.clustermap(corr_matrix, metric=clust_metric, method=clust_method,
                       xticklabels=corr_matrix.columns, row_colors=colors,
                       yticklabels=corr_matrix.columns, col_colors=colors,
                       cmap=palette, linewidths=linewidth, linecolor='black').fig.suptitle(title, y=1.02)
    else:
        sns.clustermap(corr_matrix_raw, metric=clust_metric, method=clust_method,
                       xticklabels=corr_matrix_raw.columns, row_colors=colors,
                       yticklabels=corr_matrix_raw.columns, col_colors=colors,
                       cmap=palette, linewidths=linewidth, linecolor='black').fig.suptitle(title, y=1.02)
        # Create legend for colors
    if type(colors[0]) is list:
        col_dict_Time = {i:j for i,j in zip(pdata.TimeCourse, colors[1])}
        legend_labels_time = [mpatches.Patch(color=col_dict_Time[i], label=i) for i in col_dict_Time]
        legend1 = plt.legend(handles=legend_labels_time, bbox_to_anchor=(1, 1),
                              bbox_transform=plt.gcf().transFigure, loc='upper right',
                              title='Time Point')

        col_dict = {i:j for i,j in zip(pdata.Condition, colors[0])}
        legend_labels = [mpatches.Patch(color=col_dict[i], label=i) for i in col_dict]
        legend2 = plt.legend(handles=legend_labels, bbox_to_anchor=(1, 0.88),
                              bbox_transform=plt.gcf().transFigure, loc='upper right',
                              title='Condition')
        plt.gca().add_artist(legend1)
    else:
        col_dict = {i:j for i,j in zip(pdata.Condition, colors)}
        legend_labels = [mpatches.Patch(color=col_dict[i], label=i) for i in col_dict]
        plt.legend(handles=legend_labels, bbox_to_anchor=(1, 1),
                    bbox_transform=plt.gcf().transFigure, loc='upper right',
                    title='Condition')

    if save is not None:
        if vector is True:
            plt.savefig(save + 'pearson.svg', bbox_inches='tight')
        else:
            plt.savefig(save + 'pearson.png', dpi=dpi, bbox_inches='tight')
    plt.xlabel('')
    plt.ylabel('')
    plt.show()
    plt.show(block=True)


[docs]def DynamicRange(self, *Proteins, color='#565059',
                 protein_color='orange', max_min=False,
                 min_color='#18ab75', max_color='#ab4e18', dpi=300, save=None,
                 vector=True):
    """Dynamic range plot

    Args:
        mean_color (str, optional): default color for dots (mean abundance).
         Defaults to '#565059'.
        protein_color (str, optional): Color for specific proteins (Args).
         Defaults to 'orange'.
        max_min (bool, optional): Plot the maximum and minimum abundance
         value for each protein. Defaults to False.
        min_color (str, optional): Color of minimum values. Defaults to '#1daec7'.
        max_color (str, optional): Color of maximum values. Defaults to '#f7463d'.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): figure resolution. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to
         True.
    """

    plt.style.use('default')
    plt.rcParams["figure.dpi"] = dpi
    OmicScope = copy.copy(self)
    df = OmicScope.quant_data
    # Dictionary for Accessions
    accession = dict(zip(df.Accession, df.gene_name))
    df = df.set_index('Accession')
    df = df.loc[:, df.columns.str.contains('.', regex=False)]
    df = np.log10(df).transpose()
    # Getting Mean, Max and Min values for each protein
    df = df.describe().transpose()
    df = df.dropna()
    df = df.reset_index()
    # Applying Dictionary to get gene name
    df['gene_name'] = df.Accession.replace(accession)
    # Ranking entities
    df['rank'] = df['mean'].rank(method='first')
    # Return variation for each protein
    ax_scatter = plt.axes()
    if max_min is True:
        plt.scatter(y=df['rank'], x=df['min'],
                    alpha=0.7, s=2, c=min_color)
        plt.scatter(y=df['rank'], x=df['max'],
                    alpha=0.7, s=2, c=max_color)
    # Plot mean
    plt.scatter(y=df['rank'], x=df['mean'],
                c=color, s=10, alpha=0.5,
                linewidths=0.5)
    # Highlight specific proteins
    if len(Proteins) > 0:
        df = df[df.gene_name.isin(Proteins)]
        plt.scatter(y=df['rank'], x=df['mean'],
                    c=protein_color, s=10, alpha=1, edgecolors='black',
                    linewidths=0.5)
        ls_final = df
        ls_final = ls_final[['gene_name', 'rank', 'mean']]
        from adjustText import adjust_text
        texts = []
        for a, b, c in zip(ls_final['mean'], ls_final['rank'],
                           ls_final['gene_name']):
            texts.append(ax_scatter.text(a, b, c, size=8))
        adjust_text(texts, ax=ax_scatter, force_points=0.25, force_text=0.25,
                    expand_points=(1.5, 1.5), expand_text=(1.5, 1.5),
                    arrowprops=dict(arrowstyle="-", color='black', lw=0.5))
    plt.grid(b=False)
    plt.xlabel('log10(Abundance)')
    plt.ylabel('Rank')
    plt.title('Dynamic Range')
    if save is not None:
        if vector is True:
            plt.savefig(save + 'DynamicRange.svg', bbox_inches='tight')
        else:
            plt.savefig(save + 'DynamicRange.png', dpi=dpi, bbox_inches='tight')
    sns.despine()
    plt.show()


[docs]def pca(self, pvalue=1.00, scree_color='#900C3F',
        sample_label=False, palette='tab20', FoldChange_cutoff=0,
        save=None, dpi=300, vector=True):
    """ Perform Principal Component Analysis.

    Args:
        pvalue (float, optional): p-value threshold. Defaults to 1.00.
        scree_color (str, optional): Color of Scree plot. Defaults to '#900C3F'.
        sample_label (bool, optional): Insert biological sample label. Defaults to False.
        palette (str, optional): Palette for groups. Defaults to 'tab20'.
         FoldChange_cutoff (int, optional): Fold change threshold. Defaults to 0.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): figure resolution. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to
         True.
    """
    plt.style.use('default')
    plt.rcParams["figure.dpi"] = dpi
    OmicScope = copy.copy(self)
    df = OmicScope.quant_data
    FoldChange_cutoff = OmicScope.FoldChange_cutoff
    # Filtering P-value and Fold Change
    df = df[df[OmicScope.pvalue] <= pvalue]
    if len(OmicScope.Conditions) == 2:
        df = df.loc[(df['log2(fc)'] <= -FoldChange_cutoff) | (df['log2(fc)'] >= FoldChange_cutoff)]
    df = df.loc[:, df.columns.str.contains('.', regex=False)]
    samples = df.columns
    # Getting Conditions
    Conditions = OmicScope.Conditions
    group = []
    for i in Conditions:
        ncond = df.columns.str.contains(i).sum()
        nlist = list(itertools.repeat(i, ncond))
        group.extend(nlist)
    # Center and Scale data
    scaled_data = preprocessing.scale(df.T)
    pca = PCA()  # PCA object
    pca.fit(scaled_data)
    # get PCA coordinates for scaled_data
    pca_data = pca.transform(scaled_data)
    # Scree plot
    per_var = np.round(pca.explained_variance_ratio_ * 100, decimals=1)
    labels = ['PC' + str(x) for x in range(1, len(per_var) + 1)]
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.set_figwidth(16)
    fig.set_figheight(7)
    ax1.bar(x=range(1, len(per_var) + 1), height=per_var, tick_label=labels,
            color=scree_color, edgecolor="black", linewidth=1)
    ax1.set(ylabel='Percentage of Explained Variance',
            xlabel='Principal Component', title='Scree plot')
    ax1.tick_params(labelrotation=45)
    sns.despine()
    ax1.grid(b=False)
    # PC1 vs PC2 plot
    pca_df = pd.DataFrame(pca_data, index=samples, columns=labels)
    pdata = OmicScope.pdata
    conditions = pdata.set_index('Sample')[['Condition']]
    if 'TimeCourse' in pdata.columns:
        conditions = pdata.set_index('Sample')[['Condition', 'TimeCourse']]
        pca_df = pca_df.merge(conditions, right_index=True, left_index=True)
        pca_df = pca_df.set_index(['Condition', 'TimeCourse'])
    else:
        pca_df = pca_df.merge(conditions, right_index=True, left_index=True)
        pca_df = pca_df.set_index(['Condition'])

    ax_scatter = sns.scatterplot(x="PC1", y="PC2", data=pca_df,
                                 hue=list(pca_df.index), palette=palette,
                                 edgecolor='black', linewidth=1, s=100, alpha=0.7)
    plt.title('Principal Component Analysis ' + '(pvalue=' + str(pvalue) + ')')
    plt.xlabel('PC1 - {0}%'.format(per_var[0]))
    plt.ylabel('PC2 - {0}%'.format(per_var[1]))
    plt.grid(b=False)
    sns.despine()
    # Annotate samples in PCA plot
    if sample_label is True:
        from adjustText import adjust_text
        texts = []
        for a, b, c in zip(pca_df['PC1'], pca_df['PC2'],
                           conditions.index):
            texts.append(ax_scatter.text(a, b, c.split('.')[0], size=8))
        adjust_text(texts, ax=ax_scatter, force_points=0.25, force_text=0.25,
                    expand_points=(1.5, 1.5), expand_text=(1.5, 1.5),
                    arrowprops=dict(arrowstyle="-", color='black', lw=0.5))
    if save is not None:
        if vector is True:
            plt.savefig(save + 'pca.svg', bbox_inches='tight')
        else:
            plt.savefig(save + 'pca.png', dpi=dpi, bbox_inches='tight')
    plt.show()


[docs]def color_scheme(df, palette):
    """Generate colors to barplot and

    Args:
        df (DataFrame): dataframe
        palette (str): palette

    Returns:
        color (dict): dictionary assign each condition with respective color.
    """
    color = df.index.to_frame().reset_index(drop=True)
    color['variable'] = color.astype(str).apply(lambda x: '_'.join(x), axis=1)
    color = color.drop_duplicates().sort_values('variable')
    colors_scheme = ['red', 'blue', 'orange', 'green', 'darkcyan', 'magenta']
    if 'TimeCourse' in color.columns:
        timecourse = len(color.TimeCourse.drop_duplicates())
    else:
        timecourse = 1
    random.seed(10)
    colors_random = random.sample(colors_scheme, color.Condition.nunique())
    palettes = []
    if timecourse > 1:
        for i in colors_random:
            palette = sns.light_palette(i, n_colors=timecourse).as_hex()
            palettes.append(palette)
    else:
        ncolors = color.Condition.nunique()
        palettes = sns.color_palette(palette=palette, n_colors=ncolors).as_hex()
    color['colors'] = list(pd.Series(palettes).explode())
    color = dict(zip(color.variable, color.colors))
    return color


[docs]def bar_protein(self, *Proteins, logscale=True,
                palette='Spectral', save=None, dpi=300,
                vector=True):
    """Bar plot to show protein abundance in each condition

    Args:
        logscale (bool, optional): Apply log-transformed abundance.
        Defaults to True.
        palette (str, optional): Palette for groups. Defaults to 'Spectral'.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): figure resolution. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to
         True.
    """
    plt.rcParams['figure.dpi'] = dpi
    OmicScope = copy.copy(self)
    df = copy.copy(OmicScope.whole_data)
    # Proteins to plot
    df = df[df['gene_name'].isin(Proteins)]
    # Get conditions
    ctrl = [copy.copy(OmicScope.ctrl)]
    conditions = copy.copy(OmicScope.Conditions)
    conditions.remove(ctrl[0])
    # Get protein abundance for each condition
    df = df.set_index('gene_name')
    df = df.iloc[:, df.columns.str.contains('.', regex=False)]
    df = df.melt(var_name='variable', ignore_index=False)
    df = df.reset_index()
    pdata = copy.copy(OmicScope.pdata)
    pdata = pdata.set_index('Sample')
    df = df.set_index('variable')
    df = pdata.merge(df, left_index=True, right_index=True)
    if 'TimeCourse' in df.columns:
        df = df[['Condition',  'TimeCourse', 'gene_name', 'value']]
        df = df.set_index(['Condition', 'TimeCourse'])
    else:
        df = df[['Condition', 'gene_name', 'value']]
        df = df.set_index('Condition')
    # Apply log transformation
    if logscale is True:
        df['value'] = np.log2(df['value'])
        df['value'] = df['value'].replace(-np.inf, np.nan)

    color = color_scheme(df, palette=palette)
    df_index = df.index.to_frame().reset_index(drop=True)
    df_index = df_index.astype(str).apply(lambda x: '_'.join(x), axis=1)
    df.index = df_index
    # Size of figure
    sns.set(rc={'figure.figsize': (1.5*len(Proteins), 5)})
    custom_params = {"axes.spines.right": False, "axes.spines.top": False}
    sns.set_theme(style="ticks", rc=custom_params)
    # Plot
    if len(Proteins) == 1:
        sns.barplot(x=df.index, y='value',
                    data=df, errwidth=1, capsize=0.07,
                    palette=color,
                    errorbar='se',
                    edgecolor='black', linewidth=1, dodge=False)
    else:
        sns.barplot(x='gene_name', y='value', hue=list(df.index),
                    data=df, errwidth=1,
                    capsize=0.07, edgecolor='black',
                    errorbar='se',
                    palette=color,
                    linewidth=1)
    plt.legend(bbox_to_anchor=(1.0, 1), loc=2, borderaxespad=0.)
    sns.despine()
    plt.title('Abundance - ' + ' and '.join(Proteins))
    plt.xlabel('')
    plt.xticks(rotation=70)
    plt.ylabel('log2(Abundance)')
    if save is not None:
        if vector is True:
            plt.savefig(save + 'barplot_' + '_'.join(Proteins) + '.svg', bbox_inches='tight')
        else:
            plt.savefig(save + 'barplot_' + '_'.join(Proteins) + '.png', dpi=dpi, bbox_inches='tight')
    plt.show()


[docs]def boxplot_protein(self, *Proteins, logscale=True,
                    palette='Spectral', save=None, dpi=300,
                    vector=True):
    """Boxplot to show protein abundance in each condition

    Args:
        logscale (bool, optional): Apply abundance log-transformed.
        Defaults to True.
        palette (str, optional): Palette for groups. Defaults to 'Spectral'.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): figure resolution. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to
        True.
    """
    plt.rcParams['figure.dpi'] = dpi
    OmicScope = copy.copy(self)
    df = copy.copy(OmicScope.whole_data)
    # Proteins to plot
    df = df[df['gene_name'].isin(Proteins)]
    # Get conditions
    ctrl = [copy.copy(OmicScope.ctrl)]
    conditions = copy.copy(OmicScope.Conditions)
    conditions.remove(ctrl[0])
    # Get protein abundance for each condition
    df = df.set_index('gene_name')
    df = df.iloc[:, df.columns.str.contains('.', regex=False)]
    df = df.melt(var_name='variable', ignore_index=False)
    df = df.reset_index()
    pdata = copy.copy(OmicScope.pdata)
    pdata = pdata.set_index('Sample')
    df = df.set_index('variable')
    df = pdata.merge(df, left_index=True, right_index=True)
    if 'TimeCourse' in df.columns:
        df = df[['Condition',  'TimeCourse', 'gene_name', 'value']]
        df = df.set_index(['Condition', 'TimeCourse'])
    else:
        df = df[['Condition', 'gene_name', 'value']]
        df = df.set_index('Condition')
    # Apply log transformation
    if logscale is True:
        df['value'] = np.log2(df['value'])
        df['value'] = df['value'].replace(-np.inf, np.nan)

    color = color_scheme(df, palette=palette)
    df_index = df.index.to_frame().reset_index(drop=True)
    df_index = df_index.astype(str).apply(lambda x: '_'.join(x), axis=1)
    df.index = df_index
    # Size of figure
    sns.set(rc={'figure.figsize': (1.5*len(Proteins), 5)})
    custom_params = {"axes.spines.right": False, "axes.spines.top": False}
    sns.set_theme(style="ticks", rc=custom_params)
    # Plot
    if len(Proteins) == 1:
        sns.boxplot(x=df.index, y='value',
                    data=df,
                    palette=color,
                    linewidth=1, dodge=False)
    else:
        sns.boxplot(x='gene_name', y='value', hue=list(df.index),
                    data=df,
                    palette=color,
                    linewidth=1)
    plt.legend(bbox_to_anchor=(1.0, 1), loc=2, borderaxespad=0.)
    sns.despine()
    plt.title('Abundance - ' + ' and '.join(Proteins))
    plt.xlabel('')
    plt.xticks(rotation=70)
    plt.ylabel('log2(Abundance)')
    if save is not None:
        if vector is True:
            plt.savefig(save + 'barplot_' + '_'.join(Proteins) + '.svg', bbox_inches='tight')
        else:
            plt.savefig(save + 'barplot_' + '_'.join(Proteins) + '.png', dpi=dpi, bbox_inches='tight')
    plt.show()


[docs]def MAplot(self, *Proteins,
           pvalue=0.05, non_regulated='#606060', up_regulated='#E4001B',
           down_regulated='#6194BC', FoldChange_cutoff=0,
           save=None, dpi=300, vector=True):
    """MA plot

    Args:
        pvalue (float, optional): p-value threshold. Defaults to 0.05.
        non_regulated (str, optional): color for non-regulated proteins.
         Defaults to '#606060'.
        up_regulated (str, optional): color for up-regulated proteins.
         Defaults to '#E4001B'.
        down_regulated (str, optional): color for down-regulated proteins.
         Defaults to '#6194BC'.
        FoldChange_cutoff (int, optional): Foldchange threshold. Defaults to 0.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): figure resolution. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to
         True.
    """
    plt.rcParams['figure.dpi'] = dpi
    OmicScope = copy.copy(self)
    df = copy.copy(OmicScope.quant_data)
    if self.logTransform is True:
                df['TotalMean'] = np.log2(df['TotalMean'])
    df['TotalMean'] = df['TotalMean'].replace(-np.inf, 0.01)
    # Defining axis
    y = df[f'-log10({OmicScope.pvalue})']
    x = df['log2(fc)']
    # Defining colors
    col = np.where(y >= -np.log10(pvalue), np.where(x >= -FoldChange_cutoff,
                                                   np.where(x <= FoldChange_cutoff, non_regulated, up_regulated),
                                                   down_regulated), non_regulated)
    regulation = np.where(y >= -np.log10(pvalue), np.where(x >= -FoldChange_cutoff,
                                                          np.where(x <= FoldChange_cutoff, 'non-regulated',
                                                                   'up-regulated'), 'down-regulated'), 'non-regulated')
    df['col'] = col
    df['Regulation'] = regulation
    # Plot
    sns.set_style("whitegrid")
    ax_scatter = sns.scatterplot(x='TotalMean', y='log2(fc)', data=df, hue='Regulation',
                                 palette=list(df.col.drop_duplicates()))
    if len(Proteins) > 0:
        df = df[df.gene_name.isin(Proteins)]
        ls_final = df
        ls_final = ls_final[['gene_name', 'log2(fc)', 'TotalMean']]
        from adjustText import adjust_text
        texts = []
        for a, b, c in zip(ls_final['TotalMean'], ls_final['log2(fc)'],
                           ls_final['gene_name']):
            texts.append(ax_scatter.text(a, b, c, size=8))
        adjust_text(texts, ax=ax_scatter, force_points=0.25, force_text=0.25,
                    expand_points=(1.5, 1.5), expand_text=(1.5, 1.5),
                    arrowprops=dict(arrowstyle="-", color='black', lw=0.5))
    sns.despine()
    plt.axhline(y=0, color='gray', linestyle=':')
    plt.grid(b=False)
    plt.xlabel('log2(Mean)')
    plt.title('MA plot')
    if save is not None:
        if vector is True:
            plt.savefig(save + 'MAPlot.svg', bbox_inches='tight')
        else:
            plt.savefig(save + 'MAPlot.png', dpi=dpi, bbox_inches='tight')
    plt.show(block=True)


[docs]def find_k(df):
    """Find the optimum k clusters"""

    df_norm = df
    sse = {}

    for k in range(1, 21):
        kmeans = KMeans(n_clusters=k, random_state=1)
        kmeans.fit(df_norm)
        sse[k] = kmeans.inertia_

    kn = KneeLocator(x=list(sse.keys()),
                     y=list(sse.values()),
                     curve='convex',
                     direction='decreasing')
    k = kn.knee
    print('KneeLocator identifies: ' + str(k) + ' clusters')
    return k


[docs]def k_trend(self, pvalue=0.05, k_cluster=None,
            save=None, dpi=300, vector=True):
    """Perform a K-mean algorithm

    BigTrend apply k-mean algorithm to identify co-expressed
    proteins/genes. For longitudinal analysis, k-means can help users to
    visualize the trends of proteins in the evaluated timecourse.

    Optionally, the user can define the number of clusters that k-means will cluster proteins and samples.
    By default, OmicScope run KneeLocator algorithm to suggest an optimal number of clusters.

    Args:
        OmicScope (_type_): OmicScope object.
        pvalue (float, optional): _description_. Defaults to 0.05.
        k_cluster (int, optional): Number of cluster to perform k-means.
         If None, OmicScope defines k-clusters based on KneeLocator algorithm.
         Defaults to None.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): figure resolution. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to
         True.

    Returns:
        _type_: _description_
    """
    plt.rcParams['figure.dpi'] = dpi
    omics = copy.copy(self)
    pdata = omics.pdata
    try:
        pdata['sample'] = pdata[['Condition', 'TimeCourse']].astype(str).apply(lambda x: '-'.join(x), axis=1)
    except KeyError:
        pdata['sample'] = pdata[['Condition', 'Biological']].astype(str).apply(lambda x: '-'.join(x), axis=1)
    data = omics.quant_data
    protein_dictionary = dict(zip(data['Accession'], data['gene_name']))
    data = data[data[omics.pvalue] <= pvalue]
    data = data.set_index('Accession')
    data = data.iloc[:, data.columns.str.contains('.', regex=False)]
    data = data.replace(0, 0.01)
    if self.logTransform is True:
        data = np.log2(data)
    # Scale protein abundance according to their mean (z-score)
    zscored_data = zscore(data, axis=1)
    if k_cluster is None:
        n_clusters = find_k(zscored_data)
    else:
        n_clusters = k_cluster
    kmeans = KMeans(n_clusters=n_clusters, init="k-means++",
                    max_iter=500)
    protein_cluster_assig = kmeans.fit(zscored_data)
    k_data = zscored_data
    k_data.columns = pdata['sample']
    k_data['cluster'] = protein_cluster_assig.labels_
    k_data_protein = k_data.reset_index().melt(id_vars=['Accession', 'cluster'],
                                               var_name='sample')
    dictionary = dict(zip(pdata['sample'], pdata['Condition']))
    k_data_protein['Condition'] = k_data_protein[['sample']].replace(dictionary)
    k_data_protein['gene_name'] = k_data_protein[['Accession']].replace(protein_dictionary)
    g = sns.catplot(
        data=k_data_protein, x="sample", y="value", hue="Condition", col="cluster",
        capsize=.2, palette="viridis",
        kind="point", col_wrap=2
    )
    g.despine()
    (g.map(plt.axhline, y=0, color="black", dashes=(2, 1), zorder=0)
     .set_ylabels('z-score', clear_inner=True)
     .set_xlabels('Conditions', clear_inner=True)
     .set(xticklabels=[]))
    if save is not None:
        if vector is True:
            plt.savefig(save + 'MAPlot.svg', bbox_inches='tight')
        else:
            plt.savefig(save + 'MAPlot.png', dpi=dpi, bbox_inches='tight')
    plt.show()
    k_data_protein = k_data_protein.groupby('cluster')['gene_name'].apply(set).reset_index()
    return k_data_protein


[docs]def PPInteractions(self, *Proteins,
                   network_k=None,
                   network_iterations=50,
                   score_threshold=0.6, labels=False, modules=False,
                   module_palette='Paired', species=9606,
                   pvalue=0.05, network_type='functional', save=None, dpi=300, vector=True):
    """Protein-Protein interaction

    Using String API, OmicScope search or protein-protein interactions among
    the proteins from users's dataset.

    Args:
        network_k (int, optional): Optimal distance between nodes. If None the distance is set to
          1/sqrt(n) where n is the number of nodes. Defauts to None.
        network_iterations (int, optional): Maximum number of iterations taken. Defaults to 50.
        score_threshold (float, optional): String-score threshold. Defaults to 0.6.
        labels (bool, optional): Show protein names. Defaults to False.
        modules (bool, optional): Perform Louvain Method to find communities/modules.
         Defaults to False.
        module_palette (str, optional): Color palette to assign modules. Defaults to 'Paired'.
        species (int, optional): String requires the definition of organism to search
         for proteins name, according to NCBI identifier. Defaults to 9606 (Human).
         Mus musculus = 10090; Rattus norvegicus = 10116.
        pvalue (float, optional): p-value threshold to proteins to be accepted.
         Defaults to 0.05 (differentially regulated).
        network_type (str, optional): Interactions can be defined as 'functional' or
         'physical'. Defaults to 'functional'.
        save (str, optional): Path to save figure. Defaults to None.
        dpi (int, optional): figure resolution. Defaults to 300.
        vector (bool, optional): Save figure in as vector (.svg). Defaults to
         True.

    Returns:
        Networkx object
    """
    # Set parameters to import data
    plt.rcParams['figure.dpi'] = dpi
    string_api_url = "https://version12.string-db.org/api"
    output_format = "json"
    method = "network"
    data = copy.copy(self)
    conds = data.Conditions
    data = data.quant_data
    data = data[data[self.pvalue] <= pvalue]
    data = data.sort_values(self.pvalue)
    data = data.reset_index(drop=True)
    if len(data) > 2000:
        data = data.sort_values(self.pvalue)
        data = data.iloc[0:1999, :]
    data['TopTerms'] = data.index+1
    foldchange_range = data['log2(fc)']
    genes = data['gene_name'].dropna().drop_duplicates()
    genes = list(genes.astype(str))

    request_url = "/".join([string_api_url, output_format, method])

    params = {

        "identifiers": "%0d".join(genes),
        "species": species,  # species NCBI identifier
        "caller_identity": "Omicscope",
        'show_query_node_labels': 1,
        'network_type': network_type

    }
    response = requests.post(request_url, data=params)
    string = pd.read_json(response.text)
    # Filter string results based on score
    string = string[string['score'] >= score_threshold]
    if len(conds)==2:
        norm = mpl.colors.TwoSlopeNorm(vmin=min(foldchange_range),
                                    vmax=max(foldchange_range),
                                    vcenter=0)
    else:
        norm = mpl.colors.TwoSlopeNorm(vmin=min(foldchange_range),
                                    vmax=max(foldchange_range),
                                    vcenter=np.mean(foldchange_range))
    cmap = cm.RdBu_r
    m = cm.ScalarMappable(norm=norm, cmap=cmap)
    color_hex = [mcolors.to_hex(m.to_rgba(x)) for x in data['log2(fc)']]
    source = pd.DataFrame({'ID': data['gene_name'],
                           'Size': 10,
                           'type': 'protein',
                           'color': color_hex,
                           'regulation': data['log2(fc)']})
    source = source.drop_duplicates()
    source = source.dropna()
    string = string.rename(columns={'score': 'weight'})
    G = nx.from_pandas_edgelist(string,
                                source='preferredName_A',
                                target='preferredName_B',
                                edge_attr='weight',
                                create_using=nx.Graph)

    carac = source.set_index('ID')
    carac['color_edge'] = 'black'
    linewidths = 0.8
    nx.set_node_attributes(G, dict(zip(carac.index, carac.color)), name="color")
    nx.set_node_attributes(G, dict(zip(carac.index, carac.Size)), name="Size")
    # widths = nx.get_edge_attributes(G, 'score')
    G.edges(data=True)
    # Find Communities
    if modules is True:
        import networkx.algorithms.community as nx_comm

        # Find communities based on label propagation
        communities = nx_comm.louvain_communities(G)
        module = []
        terms = []
        color = []
        degree = []
        # Linking Terms, module, colors
        cmap = sns.color_palette(module_palette, len(communities), desat=.9)
        for i, g in enumerate(communities):
            subgraph = G.subgraph(g)
            degree_subgraph = subgraph.degree
            degree.extend(degree_subgraph)
            module.extend([i]*len(g))
            terms.extend(list(g))
            if len(g) > len(source)*0.05:
                color.extend([mcolors.to_hex(cmap[i])]*len(g))
            else:
                color.extend(['#666666']*len(g))
            # DataFrame
        modularity = pd.DataFrame(zip(module, terms, color, degree), columns=[
                                  'Module', 'ID', 'color_edge', 'Degree'])
        # Merging carac with modularity data
        carac = carac.merge(modularity, on='ID', suffixes=('_x', ''))
        carac = carac.set_index('ID')
        nx.set_node_attributes(G, dict(zip(carac.index, carac.Module)), name="Module")
        carac = carac.reindex(G.nodes())
        linewidths = 1.5
    pos = nx.spring_layout(G,
                           k=network_k,
                           iterations=network_iterations,
                           threshold=0.0001, weight='weight',
                           center=None,)
    carac = carac.reindex(G.nodes())
    weights = nx.get_edge_attributes(G, 'weight')
    nx.draw(G,
            pos=pos,
            node_color=carac['color'],
            node_size=carac['Size']*10,
            edgecolors=carac['color_edge'],
            linewidths=linewidths,
            alpha=0.9,
            width=[i**4 for i in list(weights.values())],
            edge_color='#666666')
    if labels is True:
        nx.draw_networkx_labels(G, pos, font_size=6)
    if save is not None:
        nx.write_graphml(G, save + 'PPinteraction.graphml', named_key_ids=True)
        if vector is True:
            plt.savefig(save + 'PPinteractions.svg', bbox_inches='tight')
        else:
            plt.savefig(save + 'PPinteractions.dpi', dpi=dpi, bbox_inches='tight')
        plt.show(block=True)
    return G