分享一套用Pyecharts绘制RNA Seq火山图的代码

一直以来都比较习惯使用matplotlib绘制各种图像,但是在RNA Seq数据的展示中,静态图像实在是无法将信息全部有效展示。之前偶然间遇到了pyecharts,实在惊艳于echats图的漂亮,因此这段时间再次遇到需要绘制火山图的时候,就整理了一份绘制动态火山图的代码。

/image/Snipaste_2023-09-27_17-17-12.png

1、首先是引包

1
2
3
4
5
import pandas as pd
import pyecharts.options as opts
from pyecharts.charts import Scatter
import numpy as np
from pyecharts.commons.utils import JsCode

2、随后读入的数据,是DESeq2的输出文件,并做一些数据的初始化计算

1
2
3
4
5
data = pd.read_csv('DESeq2_output_file.csv')
data['-log10padj'] = data['padj'].apply(lambda x:-np.log10(x))
data['-log10pval'] = data['pval'].apply(lambda x:-np.log10(x))
x_axis_name = 'log2FoldChange'
y_axis_name = '-log10pval'

3、上调、下调、非显著的点需要标记不同的颜色,需要写一个函数提前计算好标记点的颜色

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
def set_color(data, y_threshold=-np.log10(0.05), x_left_threshold=-1, x_right_threshold=1):
    x = data[x_axis_name]
    y = data[y_axis_name]
    if y<y_threshold:
        return "gray"
    else:
        if x < x_left_threshold:
            return "blue"
        elif x > x_right_threshold:
            return "red"
        else:
            return "gray"

并且需要调用该函数计算好color

1
data['color'] =data.apply(lambda row:set_color(row), axis=1)

4、准备一个数据转化的接口,使pandas Dataframe转化为pyecharts可以读取的数据

1
2
3
4
5
6
7
8
def get_ydata(data, color):
    return [list(z) for z in zip(
        data[data['color']==color][y_axis_name].tolist(),
        data[data['color']==color][x_axis_name].tolist(),
        data[data['color']==color]['Name'].tolist(),
        data[data['color']==color]['Description'].tolist(),
        data[data['color']==color]['UniProtAC'].tolist())
    ]

5、最后一步,调用pyecharts绘制火山图

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
(
    Scatter(init_opts=opts.InitOpts(width="100%", height="720px"))
    .add_xaxis(
        xaxis_data=data[data['color']=='gray'][x_axis_name].tolist(),
    )
    .add_yaxis(
        series_name="gray",
        color='gray',
        y_axis=get_ydata(data, 'gray'),
        symbol_size=2,
        label_opts=opts.LabelOpts(is_show=False),
    )

    .add_xaxis(
        xaxis_data=data[data['color']=='red'][x_axis_name].tolist(),
    )
    .add_yaxis(
        series_name="red",
        color='red',
        y_axis=get_ydata(data, 'red'),
        symbol_size=5,
        label_opts=opts.LabelOpts(
            is_show=True,
            formatter=JsCode("function(params){return params.value[3]}"),
            position="right",
        ),
    )

    .add_xaxis(
        xaxis_data=data[data['color']=='blue'][x_axis_name].tolist(),
    )
    .add_yaxis(
        series_name="blue",
        color='blue',
        y_axis=get_ydata(data, 'blue'),
        symbol_size=5,
        label_opts=opts.LabelOpts(
            is_show=True,
            formatter=JsCode("function(params){return params.value[3]}"),
            position="left",
        ),
    )

    .set_global_opts(
        tooltip_opts=opts.TooltipOpts(
            is_show=True,
            formatter=JsCode(
                "function(params){return params.value[3] + '-'+ params.value[5] +'<br/> FDR: ' + params.value[1]+'<br/> log2FC: ' + params.value[2]+'<br/>' + params.value[4];}"
            )
        ),
        title_opts=opts.TitleOpts(
            title="Volcano Plot Title",
            pos_left="center",
        ),
        legend_opts=opts.LegendOpts(is_show=False),
        yaxis_opts=opts.AxisOpts(
            name = y_axis_name,
            type_= 'value',
        ),
        xaxis_opts=opts.AxisOpts(
            name = x_axis_name,
            name_location='middle',
            type_= 'value',
            name_gap=20,
            max_=8,
            min_=-8,
        ),
        datazoom_opts=[
            opts.DataZoomOpts(
                pos_left='5px',
                orient='vertical',
                range_start=0,
                range_end=100,
            ),
            opts.DataZoomOpts(
                pos_bottom='5px',
                range_start=0,
                range_end=100,
            )
        ],
        toolbox_opts=opts.ToolboxOpts(is_show=True)
    )
    .set_series_opts(
        markline_opts=opts.MarkLineOpts(
            data=[
                {"xAxis": -np.log2(2)},
                {"xAxis": np.log2(2)},
                {"yAxis":-np.log10(0.05)}
            ],
            # label_opts=opts.LabelOpts(position="end"),
            is_silent=True,
            symbol_size=0,
            linestyle_opts=opts.LineStyleOpts(color="gray", type_='dashed')
        )
    )
    .render(f"volcano_plot_output.html")
)