oncoPrint: visualizing TCGA Lung Adenocarcinoma Carcinoma Variants DatasetΒΆ

[2]: 
import os,sys import pandas as pd %matplotlib inline import matplotlib.pylab as plt import pickle plt.rcParams['figure.dpi'] = 100 plt.rcParams['savefig.dpi']=300 plt.rcParams['font.family']='sans serif' plt.rcParams['font.sans-serif']='Arial' plt.rcParams['pdf.fonttype']=42 # sys.path.append(os.path.expanduser("~/Projects/Github/PyComplexHeatmap/")) from PyComplexHeatmap import * use_pch_style() # or plt.style.use('default') to restore default style 

Here, we use the same dataset from R package ComplexHeatmap (https://jokergoo.github.io/ComplexHeatmap-reference/book/oncoprint.html#apply-to-cbioportal-dataset).

[3]: 
data=pd.read_csv("../data/tcga_lung_adenocarcinoma_provisional_ras_raf_mek_jnk_signalling.txt",sep='\t',index_col=0) data=data.iloc[:,:-1] data=data.stack().reset_index() data.columns=['SampleID','Genes','Variants'] data.Variants.replace({' ':np.nan},inplace=True) 
[4]: 
print(data.Variants.dropna().unique()) data.head() 
 ['MUT;' 'AMP;' 'HOMDEL;' 'MUT;AMP;'] 
[4]: 
SampleID Genes Variants
0 TCGA-05-4384-01 KRAS NaN
1 TCGA-05-4384-01 HRAS NaN
2 TCGA-05-4384-01 BRAF NaN
3 TCGA-05-4384-01 RAF1 NaN
4 TCGA-05-4384-01 MAP3K1 NaN
[5]: 
unique_variants=[] for var in data.Variants.dropna().unique(): for v1 in var.split(';'): v1=v1.strip() if v1=='': continue if v1 not in unique_variants: unique_variants.append(v1) print(unique_variants) for var in unique_variants: data[var]=data.Variants.fillna('').apply(lambda x:1 if var in x else 0) 
 ['MUT', 'AMP', 'HOMDEL'] 
[6]: 
data.head() 
[6]: 
SampleID Genes Variants MUT AMP HOMDEL
0 TCGA-05-4384-01 KRAS NaN 0 0 0
1 TCGA-05-4384-01 HRAS NaN 0 0 0
2 TCGA-05-4384-01 BRAF NaN 0 0 0
3 TCGA-05-4384-01 RAF1 NaN 0 0 0
4 TCGA-05-4384-01 MAP3K1 NaN 0 0 0

Plot oncoPrint with columns split

[7]: 
cols=['AMP','HOMDEL','MUT'] colors=["red","blue","#008000"] # calculate genes (row) mutation frequencies. row_vc=data.groupby('Genes').apply(lambda x:x.loc[:,cols].sum()) # calculate samples (cols) mutation frequencies. col_vc=data.groupby('SampleID').apply(lambda x:x.loc[:,cols].sum()) #Samples with variants at KRAS kras_samples=data.loc[(data.Genes=='KRAS') & (data.loc[:,cols].sum(axis=1)>0)].SampleID.unique().tolist() df_col_split=pd.DataFrame(index=data.SampleID.unique(),data={'KRAS':['No KRAS Var']*data.SampleID.nunique()}) df_col_split.loc[kras_samples,'KRAS']='KRAS Var' top_annotation=HeatmapAnnotation(axis=1, KRAS=anno_simple(df_col_split.KRAS,add_text=True,height=6), Col=anno_barplot(col_vc,colors=colors,legend=False,height=10,linewidth=0.1), verbose=0, label_side='left', label_kws={'horizontalalignment': 'right','visible':False}) right_annotation = HeatmapAnnotation(axis=0,orientation='right', Row=anno_barplot(row_vc,colors=colors,legend=False,height=10,linewidth=0.1), verbose=0, label_side='top', label_kws={'horizontalalignment': 'left','rotation':45,'visible':False}) plt.figure(figsize=(12,8)) op=oncoPrintPlotter(data=data,y='Genes',x='SampleID', values=cols,colors=colors,subplot_gap=3,label='Alteration', top_annotation=top_annotation,right_annotation=right_annotation, col_split=df_col_split.KRAS,col_split_order=['KRAS Var','No KRAS Var'],col_split_gap=3, legend_hpad=0,show_rownames=True,show_colnames=False) #xticklabels_kws={'labelsize':3} plt.savefig("oncoPrint.pdf",bbox_inches='tight') plt.show() 
 Starting plotting.. Starting calculating row orders.. Reordering rows.. Starting calculating col orders.. Reordering cols.. Plotting matrix.. Collecting legends.. Plotting legends.. Estimated legend width: 28.22361111111111 mm 
../_images/notebooks_oncoPrint_8_1.png

Adding more annotations to the oncoPrint heatmap

[8]: 
cols=['AMP','HOMDEL','MUT'] colors=["red","blue","#008000"] row_vc=data.groupby('Genes').apply(lambda x:x.loc[:,cols].sum()) col_vc=data.groupby('SampleID').apply(lambda x:x.loc[:,cols].sum()) row_var_freq=data.assign(IsVar=(data.loc[:,cols].sum(axis=1)>0)).groupby('Genes').IsVar.sum() * 100 / data.SampleID.nunique() top_annotation=HeatmapAnnotation(axis=1,orientation='up', Col=anno_barplot(col_vc,colors=colors,legend=False,height=15,linewidth=0.1), verbose=0, label_side='left', label_kws={'horizontalalignment': 'right','visible':False}) right_annotation = HeatmapAnnotation(axis=0,orientation='right', Row=anno_barplot(row_vc,colors=colors,legend=False,height=15,linewidth=0.1), label=anno_label(row_var_freq.apply(lambda x:str(round(x,1))+" %"), height=1,relpos=(0,0.5)), verbose=0, label_side='top', label_kws={'horizontalalignment': 'left','rotation':45,'visible':False}) plt.figure(figsize=(12,8)) op=oncoPrintPlotter(data=data,y='Genes',x='SampleID', values=cols,colors=colors,subplot_gap=3,label='Alteration', top_annotation=top_annotation,right_annotation=right_annotation, show_rownames=True,show_colnames=False,width=0.9) # remove the grid op.top_annotation.annotations[0].ax.grid(False) #remove spines for top annotation and right annotation despine(ax=op.top_annotation.annotations[0].ax,left=False, bottom=True, right=False, top=True) despine(ax=op.right_annotation.annotations[0].ax,left=True, bottom=False, right=True, top=False) plt.savefig("oncoPrint2.pdf",bbox_inches='tight') plt.show() 
 Starting plotting.. Starting calculating row orders.. Reordering rows.. Starting calculating col orders.. Reordering cols.. Plotting matrix.. Collecting legends.. Plotting legends.. Estimated legend width: 25.930555555555557 mm 
../_images/notebooks_oncoPrint_10_1.png