|
| 1 | +import pandas as pd |
| 2 | +import os |
| 3 | +from sklearn import tree, model_selection |
| 4 | +import io |
| 5 | +import pydot |
| 6 | + |
| 7 | +dir = 'E:/' |
| 8 | +titanic_train = pd.read_csv(os.path.join(dir, 'train.csv')) |
| 9 | +print(titanic_train.info()) |
| 10 | +print(titanic_train.columns) |
| 11 | + |
| 12 | +X_train = titanic_train[ ['SibSp', 'Parch'] ] |
| 13 | +y_train = titanic_train['Survived'] |
| 14 | +dt_estimator = tree.DecisionTreeClassifier() |
| 15 | +dt_estimator.fit(X_train, y_train) |
| 16 | +print(dt_estimator.tree_) |
| 17 | +model_selection.cross_val_score(dt_estimator, X_train, y_train, scoring="accuracy", cv=5).mean() |
| 18 | + |
| 19 | +#visualize the deciion tree |
| 20 | +dot_data = io.StringIO() |
| 21 | +tree.export_graphviz(dt_estimator, out_file = dot_data, feature_names = X_train.columns) |
| 22 | +graph = pydot.graph_from_dot_data(dot_data.getvalue())[0] |
| 23 | +dir = 'E:/' |
| 24 | +graph.write_pdf(os.path.join(dir, "tree.pdf")) |
| 25 | + |
| 26 | +titanic_test = pd.read_csv(os.path.join(dir, 'test.csv')) |
| 27 | +print(titanic_test.info()) |
| 28 | +X_test = titanic_test[ ['SibSp', 'Parch'] ] |
| 29 | +titanic_test['Survived'] = dt_estimator.predict(X_test) |
| 30 | +titanic_test.to_csv(os.path.join(dir, 'submission.csv'), columns=['PassengerId', 'Survived'], index=False) |
0 commit comments