Visualizing categorical variables using oncoPrint

[1]: 
import os,sys # sys.path.append(os.path.expanduser("~/Projects/Github/PyComplexHeatmap/")) from PyComplexHeatmap import * use_pch_style() # or plt.style.use('default') to restore default style import pandas as pd import random 
[2]: 
# Create toy dataset samples = [f'Sample_{i}' for i in range(1, 11)] genes = [f'Gene_{i}' for i in range(1, 11)] alts_lol = list() for sample in samples: for gene in genes: amp_value = random.randint(0, 1) if amp_value == 0: del_value = random.randint(0, 1) else: del_value = 0 if (amp_value == 0) & (del_value == 0): neut_value = 1 else: neut_value = 0 alts_lol.append([sample, gene, amp_value, neut_value, del_value]) alts_df = pd.DataFrame(alts_lol, columns=['sample', 'gene', 'amp', 'neut', 'del']) # Prepare column annotations annot_1_df = pd.DataFrame([[i, random.randint(0,100)] for i in samples], columns=['sample', 'annot1']) annot_1_df.index = annot_1_df['sample'] annot_2_df = pd.DataFrame([[i, random.randint(500, 5000)] for i in samples], columns=['sample', 'annot2']) annot_2_df.index = annot_2_df['sample'] annot_3_df = pd.DataFrame([[i, 'patient' + str(random.randint(1,5))] for i in samples], columns=['sample', 'patient']) annot_3_df.index = annot_3_df['sample'] 
[3]: 
alts_df 
[3]: 
sample gene amp neut del
0 Sample_1 Gene_1 1 0 0
1 Sample_1 Gene_2 1 0 0
2 Sample_1 Gene_3 0 1 0
3 Sample_1 Gene_4 0 0 1
4 Sample_1 Gene_5 1 0 0
... ... ... ... ... ...
95 Sample_10 Gene_6 1 0 0
96 Sample_10 Gene_7 1 0 0
97 Sample_10 Gene_8 1 0 0
98 Sample_10 Gene_9 0 1 0
99 Sample_10 Gene_10 1 0 0

100 rows × 5 columns

[4]: 
annot_3_df 
[4]: 
sample patient
sample
Sample_1 Sample_1 patient4
Sample_2 Sample_2 patient3
Sample_3 Sample_3 patient5
Sample_4 Sample_4 patient1
Sample_5 Sample_5 patient1
Sample_6 Sample_6 patient3
Sample_7 Sample_7 patient3
Sample_8 Sample_8 patient4
Sample_9 Sample_9 patient3
Sample_10 Sample_10 patient2
[5]: 
annot_1_df 
[5]: 
sample annot1
sample
Sample_1 Sample_1 31
Sample_2 Sample_2 5
Sample_3 Sample_3 26
Sample_4 Sample_4 96
Sample_5 Sample_5 17
Sample_6 Sample_6 32
Sample_7 Sample_7 4
Sample_8 Sample_8 96
Sample_9 Sample_9 3
Sample_10 Sample_10 92
[6]: 
annot_2_df 
[6]: 
sample annot2
sample
Sample_1 Sample_1 4870
Sample_2 Sample_2 4572
Sample_3 Sample_3 2378
Sample_4 Sample_4 4970
Sample_5 Sample_5 4662
Sample_6 Sample_6 1196
Sample_7 Sample_7 3065
Sample_8 Sample_8 2553
Sample_9 Sample_9 4390
Sample_10 Sample_10 4068
[7]: 
top_annotation=HeatmapAnnotation(label=anno_label(annot_3_df.patient, merge=True,rotation=45), A3=anno_simple(annot_3_df['patient']), A1=anno_barplot(annot_1_df['annot1'],height=5), A2=anno_barplot(annot_2_df['annot2'])) # Plot oncoprint plt.figure(figsize=(4,6)) op=oncoPrintPlotter(data=alts_df, y='gene', x='sample', values=['amp', 'neut', 'del'], show_rownames=True, show_colnames=True, colors=['red', 'white', 'green'], top_annotation=top_annotation, col_split=annot_3_df['patient'], col_split_gap=0.2,width=0.8) #width control the width of the bar in each cell # there are other plot_kws, such as 'align' # Remove the grid # for annotation in op.top_annotation.annotations: # ax=annotation.ax # ax.grid(False) # #remove spines for top annotation and right annotation # despine(ax=ax,left=False, bottom=True, right=False, top=True) # despine(ax=ax,left=True, bottom=False, right=True, top=False) # Remove the grid for ax in op.top_annotation.axes.flatten(): ax.grid(False) #remove spines for top annotation and right annotation despine(ax=ax,left=False, bottom=True, right=False, top=True) despine(ax=ax,left=True, bottom=False, right=True, top=False) 
 Starting plotting.. Starting calculating row orders.. Reordering rows.. Starting calculating col orders.. Reordering cols.. Plotting matrix.. Starting plotting HeatmapAnnotations Collecting legends.. Collecting annotation legends.. Plotting legends.. Estimated legend width: 21.344444444444445 mm 
../_images/notebooks_oncoPrint2_7_1.png