Skip to content

Commit f75d93c

Browse files
author
Algorithmica
authored
Add files via upload
1 parent 806feb4 commit f75d93c

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

2019-may/7.classification(kaggle)/titanic_solution4(kernel svm with feature engg).py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import sys
2-
sys.path.append("E:/")
2+
sys.path.append("G:/")
33

44
import pandas as pd
55
import os
@@ -8,7 +8,7 @@
88
import classification_utils as cutils
99
import seaborn as sns
1010

11-
dir = 'E:/'
11+
dir = 'G:/'
1212
titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))
1313
print(titanic_train.shape)
1414
print(titanic_train.info())
@@ -64,6 +64,11 @@ def convert_familysize(size):
6464
#one hot encoding
6565
titanic = utils.ohe(titanic, cat_features)
6666

67+
#scale the data
68+
scaler = preprocessing.StandardScaler()
69+
tmp = scaler.fit_transform(titanic)
70+
titanic = pd.DataFrame(tmp, columns=titanic.columns)
71+
6772
titanic_train1 = titanic[:titanic_train.shape[0]]
6873
y_train = titanic_train['Survived']
6974

@@ -74,11 +79,11 @@ def convert_familysize(size):
7479
X_train = utils.select_features(rf_final_estimator, titanic_train1, threshold='median')
7580

7681
kernel_svm_estimator = svm.SVC(kernel='rbf')
77-
kernel_svm_grid = {'gamma':[0.001, 0.01], 'C':[100] }
82+
kernel_svm_grid = {'gamma':[0.001, 0.01, 0.05, 0.1, 1], 'C':[10, 100] }
7883
svm_final_estimator = cutils.grid_search_best_model(kernel_svm_estimator, kernel_svm_grid, X_train, y_train)
7984

8085
titanic_test1 = titanic[titanic_train.shape[0]:]
81-
X_test = utils.select_features(rf_final_estimator, titanic_test1, threshold='mean')
86+
X_test = utils.select_features(rf_final_estimator, titanic_test1, threshold='median')
8287

8388
titanic_test['Survived'] = svm_final_estimator.predict(X_test)
84-
titanic_test.to_csv(os.path.join(dir, 'submission.csv'), columns=['PassengerId', 'Survived'], index=False)
89+
titanic_test.to_csv(os.path.join(dir, 'submission.csv'), columns=['PassengerId', 'Survived'], index=False)

0 commit comments

Comments
 (0)