Skip to content

Commit 77bf35c

Browse files
committed
Update CDC_ML_v1.py
1 parent 1541186 commit 77bf35c

File tree

1 file changed

+13
-15
lines changed

1 file changed

+13
-15
lines changed

code/CDC_ML_v1.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@ def univariate_numerical_statistic(dataframe, num_features):
160160
Args:
161161
dataframe (dataframe): cdc dataset
162162
"""
163-
# call encode_age_category function to encode the values
164163
cat_col = list(num_features)
165164
# creat a list of univariate numreical feature as num_col
166165
print(dataframe.describe()[1:][cat_col])
@@ -172,7 +171,6 @@ def bivariate_categorical_graph(dataframe, cat_features):
172171
dataframe (dataframe): cdc dataset
173172
cat_features (list): list of all categorical features
174173
"""
175-
# encode_age_category(dataframe)
176174
i = 1
177175
# creat an empty canvas size of 25*15
178176
plt.figure(figsize = (25,15))
@@ -422,20 +420,20 @@ def main():
422420
"""
423421
df_cdc = load_dataset('G:\\My Drive\\CDC_ML\\2-Working\\dataset\\heart_2020_cleaned.csv')
424422
# unique_values(df_cdc)
425-
# print(numerical_features(df_cdc))
426-
# print(categorical_features(df_cdc))
427-
# univariate_categorical_graph(df_cdc, categorical_features(df_cdc))
428-
# univariate_numerical_graph(df_cdc, numerical_features(df_cdc))
429-
# univariate_numerical_statistic(df_cdc, numerical_features(df_cdc))
430-
# bivariate_categorical_graph(df_cdc, categorical_features(df_cdc))
431-
# bivariate_numerical_graph(df_cdc, numerical_features(df_cdc), 'DiffWalking')
432-
433-
df_cdc_encode = preprocessing_encode_columns(df_cdc)
434-
X_train, y_train, X_test, y_test = preprocessing_splitting(df_cdc_encode, 'HeartDisease')
435-
X_train_scale, X_test_scale = preprocessing_scaling(X_train, X_test)
423+
424+
# print(numerical_features (df_cdc))
425+
# print(categorical_features (df_cdc))
426+
# univariate_categorical_graph (df_cdc, categorical_features(df_cdc))
427+
# univariate_numerical_graph (df_cdc, numerical_features(df_cdc))
428+
# univariate_numerical_statistic (df_cdc, numerical_features(df_cdc))
429+
# bivariate_categorical_graph (df_cdc, categorical_features(df_cdc))
430+
# bivariate_numerical_graph (df_cdc, numerical_features(df_cdc), 'DiffWalking')
436431

437-
X_train_blnc, y_train_blnc = balance_data('smote', X_train_scale, y_train)
438-
cv = k_fold_cross_validation(10)
432+
df_cdc_encode = preprocessing_encode_columns (df_cdc)
433+
X_train, y_train, X_test, y_test = preprocessing_splitting (df_cdc_encode, 'HeartDisease')
434+
X_train_scale, X_test_scale = preprocessing_scaling (X_train, X_test)
435+
X_train_blnc, y_train_blnc = balance_data ('smote', X_train_scale, y_train)
436+
cv = k_fold_cross_validation (10)
439437

440438
params= {
441439
"criterion" : ["gini", "entropy", "log_loss"],

0 commit comments

Comments
 (0)