Skip to content

Commit cb8f088

Browse files
committed
cleaning functions and function docs
1 parent a7e3820 commit cb8f088

File tree

8 files changed

+326
-254
lines changed

8 files changed

+326
-254
lines changed

src/python/data_extraction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@
5353

5454
# Create Training and Test Set ----------------------------------
5555
feature_space = breast_cancer.iloc[:,
56-
breast_cancer.columns != 'diagnosis']
56+
breast_cancer.columns != 'diagnosis']
5757
feature_class = breast_cancer.iloc[:,
58-
breast_cancer.columns == 'diagnosis']
58+
breast_cancer.columns == 'diagnosis']
5959

6060

6161
training_set, test_set, class_set, test_class_set = train_test_split(feature_space,

src/python/exploratory_analysis.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,23 @@
1313
Exploratory Analysis
1414
"""
1515
import helper_functions as hf
16-
import matplotlib.pyplot as plt
17-
import seaborn as sns
18-
19-
breast_cancer = hf.breast_cancer
16+
from data_extraction import breast_cancer
17+
import matplotlib.pyplot as plt
18+
import seaborn as sns
2019

2120
print('''
2221
########################################
2322
## DATA FRAME SHAPE AND DTYPES ##
2423
########################################
2524
''')
2625

27-
print("Here's the dimensions of our data frame:\n",
26+
print("Here's the dimensions of our data frame:\n",
2827
breast_cancer.shape)
2928

30-
print("Here's the data types of our columns:\n",
31-
breast_cancer.dtypes)
29+
print("Here's the data types of our columns:\n",
30+
breast_cancer.dtypes)
3231

33-
print("Some more statistics for our data frame: \n",
32+
print("Some more statistics for our data frame: \n",
3433
breast_cancer.describe())
3534

3635
print('''
@@ -50,48 +49,48 @@
5049
# Scatterplot Matrix
5150
# Variables chosen from Random Forest modeling.
5251

53-
cols = ['concave_points_worst', 'concavity_mean',
54-
'perimeter_worst', 'radius_worst',
52+
cols = ['concave_points_worst', 'concavity_mean',
53+
'perimeter_worst', 'radius_worst',
5554
'area_worst', 'diagnosis']
5655

5756
sns.pairplot(breast_cancer,
5857
x_vars = cols,
5958
y_vars = cols,
60-
hue = 'diagnosis',
61-
palette = ('Red', '#875FDB'),
59+
hue = 'diagnosis',
60+
palette = ('Red', '#875FDB'),
6261
markers=["o", "D"])
6362

6463
plt.title('Scatterplot Matrix')
6564
plt.show()
6665
plt.close()
6766

6867
# Pearson Correlation Matrix
69-
corr = breast_cancer.corr(method = 'pearson') # Correlation Matrix
68+
corr = breast_cancer.corr(method = 'pearson') # Correlation Matrix
7069
f, ax = plt.subplots(figsize=(11, 9))
7170

7271
# Generate a custom diverging colormap
73-
cmap = sns.diverging_palette(10, 275, as_cmap=True)
72+
cmap = sns.diverging_palette(10, 275, as_cmap=True)
7473

7574
# Draw the heatmap with the mask and correct aspect ratio
7675
sns.heatmap(corr,
7776
cmap=cmap,
78-
square=True,
79-
xticklabels=True,
77+
square=True,
78+
xticklabels=True,
8079
yticklabels=True,
81-
linewidths=.5,
82-
cbar_kws={"shrink": .5},
80+
linewidths=.5,
81+
cbar_kws={"shrink": .5},
8382
ax=ax)
8483

8584
plt.title("Pearson Correlation Matrix")
8685
plt.yticks(rotation = 0)
87-
plt.xticks(rotation = 270)
86+
plt.xticks(rotation = 270)
8887
plt.show()
8988
plt.close()
9089

9190
# BoxPlot
9291
hf.plot_box_plot(breast_cancer, 'Pre-Processed', (-.05, 50))
9392

94-
# Normalizing data
93+
# Normalizing data
9594
breast_cancer_norm = hf.normalize_data_frame(breast_cancer)
9695

9796
# Visuals relating to normalized data to show significant difference
@@ -103,4 +102,4 @@
103102

104103
print(breast_cancer_norm.describe())
105104

106-
hf.plot_box_plot(breast_cancer_norm, 'Transformed', (-.05, 1.05))
105+
hf.plot_box_plot(breast_cancer_norm, 'Transformed', (-.05, 1.05))

0 commit comments

Comments
 (0)