@@ -160,7 +160,6 @@ def univariate_numerical_statistic(dataframe, num_features):
160160 Args:
161161 dataframe (dataframe): cdc dataset
162162 """
163- # call encode_age_category function to encode the values
164163 cat_col = list (num_features )
165164 # creat a list of univariate numreical feature as num_col
166165 print (dataframe .describe ()[1 :][cat_col ])
@@ -172,7 +171,6 @@ def bivariate_categorical_graph(dataframe, cat_features):
172171 dataframe (dataframe): cdc dataset
173172 cat_features (list): list of all categorical features
174173 """
175- # encode_age_category(dataframe)
176174 i = 1
177175 # creat an empty canvas size of 25*15
178176 plt .figure (figsize = (25 ,15 ))
@@ -422,20 +420,20 @@ def main():
422420 """
423421 df_cdc = load_dataset ('G:\\ My Drive\\ CDC_ML\\ 2-Working\\ dataset\\ heart_2020_cleaned.csv' )
424422 # unique_values(df_cdc)
425- # print(numerical_features(df_cdc))
426- # print(categorical_features(df_cdc))
427- # univariate_categorical_graph(df_cdc, categorical_features(df_cdc))
428- # univariate_numerical_graph(df_cdc, numerical_features(df_cdc))
429- # univariate_numerical_statistic(df_cdc, numerical_features(df_cdc))
430- # bivariate_categorical_graph(df_cdc, categorical_features(df_cdc))
431- # bivariate_numerical_graph(df_cdc, numerical_features(df_cdc), 'DiffWalking')
432-
433- df_cdc_encode = preprocessing_encode_columns (df_cdc )
434- X_train , y_train , X_test , y_test = preprocessing_splitting (df_cdc_encode , 'HeartDisease' )
435- X_train_scale , X_test_scale = preprocessing_scaling (X_train , X_test )
423+
424+ # print(numerical_features (df_cdc))
425+ # print(categorical_features (df_cdc))
426+ # univariate_categorical_graph (df_cdc, categorical_features(df_cdc))
427+ # univariate_numerical_graph (df_cdc, numerical_features(df_cdc))
428+ # univariate_numerical_statistic (df_cdc, numerical_features(df_cdc))
429+ # bivariate_categorical_graph (df_cdc, categorical_features(df_cdc))
430+ # bivariate_numerical_graph (df_cdc, numerical_features(df_cdc), 'DiffWalking')
436431
437- X_train_blnc , y_train_blnc = balance_data ('smote' , X_train_scale , y_train )
438- cv = k_fold_cross_validation (10 )
432+ df_cdc_encode = preprocessing_encode_columns (df_cdc )
433+ X_train , y_train , X_test , y_test = preprocessing_splitting (df_cdc_encode , 'HeartDisease' )
434+ X_train_scale , X_test_scale = preprocessing_scaling (X_train , X_test )
435+ X_train_blnc , y_train_blnc = balance_data ('smote' , X_train_scale , y_train )
436+ cv = k_fold_cross_validation (10 )
439437
440438 params = {
441439 "criterion" : ["gini" , "entropy" , "log_loss" ],
0 commit comments