Skip to content

Commit ae53655

Browse files
author
Algorithmica
authored
Add files via upload
1 parent 8e83928 commit ae53655

File tree

3 files changed

+83
-0
lines changed

3 files changed

+83
-0
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import pandas as pd
2+
import os
3+
import seaborn as sns
4+
5+
dir = 'E:/'
6+
titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))
7+
print(titanic_train.info())
8+
print(titanic_train.columns)
9+
10+
#explore bivariate relationships: categorical vs categorical
11+
sns.factorplot(x="Sex", hue="Survived", data=titanic_train, kind="count", size=6)
12+
#sns.factorplot(x="Survived", hue="Sex", data=titanic_train, kind="count", size=6)
13+
sns.factorplot(x="Pclass", hue="Survived", data=titanic_train, kind="count", size=6)
14+
sns.factorplot(x="Embarked", hue="Survived", data=titanic_train, kind="count", size=6)
15+
16+
#explore bivariate relationships: continuous vs categorical
17+
sns.FacetGrid(titanic_train, hue="Survived",size=8).map(sns.kdeplot, "Fare").add_legend()
18+
sns.FacetGrid(titanic_train, col="Survived",size=8).map(sns.kdeplot, "Fare").add_legend()
19+
sns.FacetGrid(titanic_train, hue="Survived",size=8).map(sns.kdeplot, "Age").add_legend()
20+
21+
#explore bivariate relationships: continuous vs continuous
22+
sns.jointplot(x="Age", y="Fare", data=titanic_train)
23+
24+
features = ['Sex', 'Pclass', 'Embarked', 'SibSp', 'Parch', 'Fare','Age', 'Survived']
25+
sns.pairplot(titanic_train[features])
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import pandas as pd
2+
import os
3+
import seaborn as sns
4+
5+
dir = 'E:/'
6+
titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))
7+
print(titanic_train.info())
8+
print(titanic_train.columns)
9+
10+
#understand grid cell formation
11+
g = sns.FacetGrid(titanic_train, col="Sex")
12+
g.map(sns.kdeplot, "Age")
13+
g.map(sns.boxplot, "Age")
14+
g = sns.FacetGrid(titanic_train, row="Sex")
15+
g.map(sns.kdeplot, "Fare")
16+
g = sns.FacetGrid(titanic_train, hue="Survived")
17+
g.map(sns.kdeplot, "Fare").add_legend()
18+
19+
#is age have an impact on survived for each sex group?
20+
g = sns.FacetGrid(titanic_train, col="Sex", hue="Survived")
21+
g.map(sns.kdeplot, "Age").add_legend()
22+
23+
#is age have an impact on survived for each pclass and sex group?
24+
g = sns.FacetGrid(titanic_train, row="Pclass", col="Sex", hue="Survived")
25+
g.map(sns.kdeplot, "Age").add_legend()
26+
27+
g = sns.FacetGrid(titanic_train, row="Pclass", col="Sex", hue="Survived")
28+
g.map(sns.kdeplot, "Age")
29+
30+
tmp = titanic_train[['Fare','Age','Parch','SibSp']]
31+
sns.heatmap(tmp.corr())
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import pandas as pd
2+
import os
3+
import seaborn as sns
4+
5+
dir = 'E:/'
6+
titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))
7+
print(titanic_train.info())
8+
print(titanic_train.columns)
9+
10+
#categorical columns: numerical EDA
11+
pd.crosstab(index=titanic_train["Survived"], columns="count")
12+
13+
#categorical columns: visual EDA
14+
sns.countplot(x='Survived',data=titanic_train)
15+
sns.countplot(x='Pclass',data=titanic_train)
16+
sns.countplot(x='Sex',data=titanic_train)
17+
18+
#continuous features: visual EDA
19+
titanic_train['Fare'].describe()
20+
sns.boxplot(x='Fare',data=titanic_train)
21+
sns.distplot(titanic_train['Fare'])
22+
sns.distplot(titanic_train['Fare'], hist=False)
23+
sns.distplot(titanic_train['Age'], hist=False)
24+
sns.boxplot(x='Age',data=titanic_train)
25+
26+
sns.distplot(titanic_train['SibSp'], hist=False)
27+
sns.boxplot(x='SibSp',data=titanic_train)

0 commit comments

Comments
 (0)