1313Exploratory Analysis
1414"""
1515import helper_functions as hf
16- import matplotlib .pyplot as plt
17- import seaborn as sns
18-
19- breast_cancer = hf .breast_cancer
16+ from data_extraction import breast_cancer
17+ import matplotlib .pyplot as plt
18+ import seaborn as sns
2019
2120print ('''
2221########################################
2322## DATA FRAME SHAPE AND DTYPES ##
2423########################################
2524''' )
2625
27- print ("Here's the dimensions of our data frame:\n " ,
26+ print ("Here's the dimensions of our data frame:\n " ,
2827breast_cancer .shape )
2928
30- print ("Here's the data types of our columns:\n " ,
31- breast_cancer .dtypes )
29+ print ("Here's the data types of our columns:\n " ,
30+ breast_cancer .dtypes )
3231
33- print ("Some more statistics for our data frame: \n " ,
32+ print ("Some more statistics for our data frame: \n " ,
3433breast_cancer .describe ())
3534
3635print ('''
5049# Scatterplot Matrix
5150# Variables chosen from Random Forest modeling.
5251
53- cols = ['concave_points_worst' , 'concavity_mean' ,
54- 'perimeter_worst' , 'radius_worst' ,
52+ cols = ['concave_points_worst' , 'concavity_mean' ,
53+ 'perimeter_worst' , 'radius_worst' ,
5554'area_worst' , 'diagnosis' ]
5655
5756sns .pairplot (breast_cancer ,
5857x_vars = cols ,
5958y_vars = cols ,
60- hue = 'diagnosis' ,
61- palette = ('Red' , '#875FDB' ),
59+ hue = 'diagnosis' ,
60+ palette = ('Red' , '#875FDB' ),
6261markers = ["o" , "D" ])
6362
6463plt .title ('Scatterplot Matrix' )
6564plt .show ()
6665plt .close ()
6766
6867# Pearson Correlation Matrix
69- corr = breast_cancer .corr (method = 'pearson' ) # Correlation Matrix
68+ corr = breast_cancer .corr (method = 'pearson' ) # Correlation Matrix
7069f , ax = plt .subplots (figsize = (11 , 9 ))
7170
7271# Generate a custom diverging colormap
73- cmap = sns .diverging_palette (10 , 275 , as_cmap = True )
72+ cmap = sns .diverging_palette (10 , 275 , as_cmap = True )
7473
7574# Draw the heatmap with the mask and correct aspect ratio
7675sns .heatmap (corr ,
7776cmap = cmap ,
78- square = True ,
79- xticklabels = True ,
77+ square = True ,
78+ xticklabels = True ,
8079yticklabels = True ,
81- linewidths = .5 ,
82- cbar_kws = {"shrink" : .5 },
80+ linewidths = .5 ,
81+ cbar_kws = {"shrink" : .5 },
8382ax = ax )
8483
8584plt .title ("Pearson Correlation Matrix" )
8685plt .yticks (rotation = 0 )
87- plt .xticks (rotation = 270 )
86+ plt .xticks (rotation = 270 )
8887plt .show ()
8988plt .close ()
9089
9190# BoxPlot
9291hf .plot_box_plot (breast_cancer , 'Pre-Processed' , (- .05 , 50 ))
9392
94- # Normalizing data
93+ # Normalizing data
9594breast_cancer_norm = hf .normalize_data_frame (breast_cancer )
9695
9796# Visuals relating to normalized data to show significant difference
103102
104103print (breast_cancer_norm .describe ())
105104
106- hf .plot_box_plot (breast_cancer_norm , 'Transformed' , (- .05 , 1.05 ))
105+ hf .plot_box_plot (breast_cancer_norm , 'Transformed' , (- .05 , 1.05 ))
0 commit comments