pySCENIC下游可视化(python版本)

## Regulon specificity scores (RSS) across predicted cell types
from math import sqrt
import numpy as np
import pandas as pd
from scipy.spatial.distance import jensenshannon
def regulon_specificity_scores(auc_mtx, cell_type_series):
    """
    Calculates the Regulon Specificty Scores (RSS). [doi: 10.1016/j.celrep.2018.10.045]
    :param auc_mtx: The dataframe with the AUC values for all cells and regulons (n_cells x n_regulons).
    :param cell_type_series: A pandas Series object with cell identifiers as index and cell type labels as values.
    :return: A pandas dataframe with the RSS values (cell type x regulon).
    """
    cell_types = list(cell_type_series.unique())
    n_types = len(cell_types)
    regulons = list(auc_mtx.columns)
    n_regulons = len(regulons)
    rss_values = np.empty(shape=(n_types, n_regulons), dtype=float)
    def rss(aucs, labels):
        # jensenshannon function provides distance which is the sqrt of the JS divergence.
        return 1.0 - jensenshannon(aucs / aucs.sum(), labels / labels.sum())
    for cidx, regulon_name in enumerate(regulons):
        for ridx, cell_type in enumerate(cell_types):
            rss_values[ridx, cidx] = rss(auc_mtx[regulon_name], (cell_type_series == cell_type).astype(int))
    return pd.DataFrame(data=rss_values, index=cell_types, columns=regulons)

# Calculate the regulon specificity scores
rss = regulon_specificity_scores(df_aucell, myeloid_cells_har.obs['cell_type'])

# RSS plot
from math import ceil, floor
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

def plot_rss(rss, cell_type, top_n=5, max_n=None, ax=None):
    if ax is None:
        _, ax = plt.subplots(1, 1, figsize=(4, 4))
    if max_n is None:
        max_n = rss.shape[1]
    data = rss.T[cell_type].sort_values(ascending=False)[0:max_n]
    ax.plot(np.arange(len(data)), data, ".")
    ax.set_ylim([floor(data.min() * 100.0) / 100.0, ceil(data.max() * 100.0) / 100.0])
    ax.set_ylabel("RSS")
    ax.set_xlabel("Regulon")
    ax.set_title(cell_type)
    ax.set_xticklabels([])
    font = {"color": "red", "weight": "normal", "size": 4}
    for idx, (regulon_name, rss_val) in enumerate(
        zip(data[0:top_n].index, data[0:top_n].values)
    ):
        ax.plot([idx, idx], [rss_val, rss_val], "r.")
        ax.text(idx + (max_n / 25), rss_val, regulon_name, fontdict=font, horizontalalignment="left", verticalalignment="center")

## Select the top 5 regulons from each cell type
for cell_type in rss.index:
    plot_rss(rss, cell_type=cell_type, top_n=5)
    plt.savefig(cell_type+"-RSS-top5.pdf", dpi=300, bbox_inches = "tight")
    plt.close()

# Visualize the top regulons for each cell type
for cell_type in rss.index:
    print(rss.T[cell_type].sort_values(ascending=False).head(5))
# Plot a heatmap of the regulon specificity scores
import seaborn as sns
sns.heatmap(rss, cmap="viridis", annot=False)
plt.title("Regulon Specificity Scores")
plt.xlabel("Cell Type")
plt.ylabel("Regulon")
plt.savefig("RSS_heatmap.pdf")
plt.close()

Python版SCENIC转录因子分析(四)一文就够了-腾讯云开发者社区-腾讯云 (tencent.com)
单细胞转录组实战06: pySCENIC转录因子分析(可视化)-腾讯云开发者社区-腾讯云 (tencent.com)

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容