11import sys
2- sys .path .append ("E :/" )
2+ sys .path .append ("G :/" )
33
44import pandas as pd
55import os
88import classification_utils as cutils
99import seaborn as sns
1010
11- dir = 'E :/'
11+ dir = 'G :/'
1212titanic_train = pd .read_csv (os .path .join (dir , 'train.csv' ))
1313print (titanic_train .shape )
1414print (titanic_train .info ())
@@ -64,6 +64,11 @@ def convert_familysize(size):
6464#one hot encoding
6565titanic = utils .ohe (titanic , cat_features )
6666
67+ #scale the data
68+ scaler = preprocessing .StandardScaler ()
69+ tmp = scaler .fit_transform (titanic )
70+ titanic = pd .DataFrame (tmp , columns = titanic .columns )
71+
6772titanic_train1 = titanic [:titanic_train .shape [0 ]]
6873y_train = titanic_train ['Survived' ]
6974
@@ -74,11 +79,11 @@ def convert_familysize(size):
7479X_train = utils .select_features (rf_final_estimator , titanic_train1 , threshold = 'median' )
7580
7681kernel_svm_estimator = svm .SVC (kernel = 'rbf' )
77- kernel_svm_grid = {'gamma' :[0.001 , 0.01 ], 'C' :[100 ] }
82+ kernel_svm_grid = {'gamma' :[0.001 , 0.01 , 0.05 , 0.1 , 1 ], 'C' :[10 , 100 ] }
7883svm_final_estimator = cutils .grid_search_best_model (kernel_svm_estimator , kernel_svm_grid , X_train , y_train )
7984
8085titanic_test1 = titanic [titanic_train .shape [0 ]:]
81- X_test = utils .select_features (rf_final_estimator , titanic_test1 , threshold = 'mean ' )
86+ X_test = utils .select_features (rf_final_estimator , titanic_test1 , threshold = 'median ' )
8287
8388titanic_test ['Survived' ] = svm_final_estimator .predict (X_test )
84- titanic_test .to_csv (os .path .join (dir , 'submission.csv' ), columns = ['PassengerId' , 'Survived' ], index = False )
89+ titanic_test .to_csv (os .path .join (dir , 'submission.csv' ), columns = ['PassengerId' , 'Survived' ], index = False )
0 commit comments