oncoPrint: visualizing TCGA Lung Adenocarcinoma Carcinoma Variants DatasetΒΆ
[2]:
import os,sys import pandas as pd %matplotlib inline import matplotlib.pylab as plt import pickle plt.rcParams['figure.dpi'] = 100 plt.rcParams['savefig.dpi']=300 plt.rcParams['font.family']='sans serif' plt.rcParams['font.sans-serif']='Arial' plt.rcParams['pdf.fonttype']=42 # sys.path.append(os.path.expanduser("~/Projects/Github/PyComplexHeatmap/")) from PyComplexHeatmap import * use_pch_style() # or plt.style.use('default') to restore default style
Here, we use the same dataset from R package ComplexHeatmap (https://jokergoo.github.io/ComplexHeatmap-reference/book/oncoprint.html#apply-to-cbioportal-dataset).
[3]:
data=pd.read_csv("../data/tcga_lung_adenocarcinoma_provisional_ras_raf_mek_jnk_signalling.txt",sep='\t',index_col=0) data=data.iloc[:,:-1] data=data.stack().reset_index() data.columns=['SampleID','Genes','Variants'] data.Variants.replace({' ':np.nan},inplace=True)
[4]:
print(data.Variants.dropna().unique()) data.head()
['MUT;' 'AMP;' 'HOMDEL;' 'MUT;AMP;']
[4]:
SampleID | Genes | Variants | |
---|---|---|---|
0 | TCGA-05-4384-01 | KRAS | NaN |
1 | TCGA-05-4384-01 | HRAS | NaN |
2 | TCGA-05-4384-01 | BRAF | NaN |
3 | TCGA-05-4384-01 | RAF1 | NaN |
4 | TCGA-05-4384-01 | MAP3K1 | NaN |
[5]:
unique_variants=[] for var in data.Variants.dropna().unique(): for v1 in var.split(';'): v1=v1.strip() if v1=='': continue if v1 not in unique_variants: unique_variants.append(v1) print(unique_variants) for var in unique_variants: data[var]=data.Variants.fillna('').apply(lambda x:1 if var in x else 0)
['MUT', 'AMP', 'HOMDEL']
[6]:
data.head()
[6]:
SampleID | Genes | Variants | MUT | AMP | HOMDEL | |
---|---|---|---|---|---|---|
0 | TCGA-05-4384-01 | KRAS | NaN | 0 | 0 | 0 |
1 | TCGA-05-4384-01 | HRAS | NaN | 0 | 0 | 0 |
2 | TCGA-05-4384-01 | BRAF | NaN | 0 | 0 | 0 |
3 | TCGA-05-4384-01 | RAF1 | NaN | 0 | 0 | 0 |
4 | TCGA-05-4384-01 | MAP3K1 | NaN | 0 | 0 | 0 |
Plot oncoPrint with columns split
[7]:
cols=['AMP','HOMDEL','MUT'] colors=["red","blue","#008000"] # calculate genes (row) mutation frequencies. row_vc=data.groupby('Genes').apply(lambda x:x.loc[:,cols].sum()) # calculate samples (cols) mutation frequencies. col_vc=data.groupby('SampleID').apply(lambda x:x.loc[:,cols].sum()) #Samples with variants at KRAS kras_samples=data.loc[(data.Genes=='KRAS') & (data.loc[:,cols].sum(axis=1)>0)].SampleID.unique().tolist() df_col_split=pd.DataFrame(index=data.SampleID.unique(),data={'KRAS':['No KRAS Var']*data.SampleID.nunique()}) df_col_split.loc[kras_samples,'KRAS']='KRAS Var' top_annotation=HeatmapAnnotation(axis=1, KRAS=anno_simple(df_col_split.KRAS,add_text=True,height=6), Col=anno_barplot(col_vc,colors=colors,legend=False,height=10,linewidth=0.1), verbose=0, label_side='left', label_kws={'horizontalalignment': 'right','visible':False}) right_annotation = HeatmapAnnotation(axis=0,orientation='right', Row=anno_barplot(row_vc,colors=colors,legend=False,height=10,linewidth=0.1), verbose=0, label_side='top', label_kws={'horizontalalignment': 'left','rotation':45,'visible':False}) plt.figure(figsize=(12,8)) op=oncoPrintPlotter(data=data,y='Genes',x='SampleID', values=cols,colors=colors,subplot_gap=3,label='Alteration', top_annotation=top_annotation,right_annotation=right_annotation, col_split=df_col_split.KRAS,col_split_order=['KRAS Var','No KRAS Var'],col_split_gap=3, legend_hpad=0,show_rownames=True,show_colnames=False) #xticklabels_kws={'labelsize':3} plt.savefig("oncoPrint.pdf",bbox_inches='tight') plt.show()
Starting plotting.. Starting calculating row orders.. Reordering rows.. Starting calculating col orders.. Reordering cols.. Plotting matrix.. Collecting legends.. Plotting legends.. Estimated legend width: 28.22361111111111 mm

Adding more annotations to the oncoPrint heatmap
[8]:
cols=['AMP','HOMDEL','MUT'] colors=["red","blue","#008000"] row_vc=data.groupby('Genes').apply(lambda x:x.loc[:,cols].sum()) col_vc=data.groupby('SampleID').apply(lambda x:x.loc[:,cols].sum()) row_var_freq=data.assign(IsVar=(data.loc[:,cols].sum(axis=1)>0)).groupby('Genes').IsVar.sum() * 100 / data.SampleID.nunique() top_annotation=HeatmapAnnotation(axis=1,orientation='up', Col=anno_barplot(col_vc,colors=colors,legend=False,height=15,linewidth=0.1), verbose=0, label_side='left', label_kws={'horizontalalignment': 'right','visible':False}) right_annotation = HeatmapAnnotation(axis=0,orientation='right', Row=anno_barplot(row_vc,colors=colors,legend=False,height=15,linewidth=0.1), label=anno_label(row_var_freq.apply(lambda x:str(round(x,1))+" %"), height=1,relpos=(0,0.5)), verbose=0, label_side='top', label_kws={'horizontalalignment': 'left','rotation':45,'visible':False}) plt.figure(figsize=(12,8)) op=oncoPrintPlotter(data=data,y='Genes',x='SampleID', values=cols,colors=colors,subplot_gap=3,label='Alteration', top_annotation=top_annotation,right_annotation=right_annotation, show_rownames=True,show_colnames=False,width=0.9) # remove the grid op.top_annotation.annotations[0].ax.grid(False) #remove spines for top annotation and right annotation despine(ax=op.top_annotation.annotations[0].ax,left=False, bottom=True, right=False, top=True) despine(ax=op.right_annotation.annotations[0].ax,left=True, bottom=False, right=True, top=False) plt.savefig("oncoPrint2.pdf",bbox_inches='tight') plt.show()
Starting plotting.. Starting calculating row orders.. Reordering rows.. Starting calculating col orders.. Reordering cols.. Plotting matrix.. Collecting legends.. Plotting legends.. Estimated legend width: 25.930555555555557 mm
