1+ import pandas as pd
2+ import os
3+
4+ dir = 'E:/'
5+ titanic_train = pd .read_csv (os .path .join (dir , 'train.csv' ))
6+
7+ #explore the structure of dataframe
8+ print (titanic_train .shape )
9+ print (titanic_train .columns )
10+ print (titanic_train .dtypes )
11+ print (titanic_train .index )
12+ print (titanic_train .values )
13+ print (titanic_train .info ())
14+
15+ #explore sample data
16+ print (titanic_train .head (4 ))
17+ print (titanic_train .tail (4 ))
18+ print (titanic_train .sample (n = 4 ))
19+ print (titanic_train .sample (frac = 0.1 ))
20+
21+ #row access with slicing operator or boolean indexing
22+ print (titanic_train [0 :3 ])
23+ print (titanic_train [titanic_train .Sex == 'male' ])
24+
25+ #column access with single value or list of values
26+ print (titanic_train [ ['Name' , 'Age' , 'Sex' ] ])
27+ print (titanic_train ['Name' ]) #dictionary style access
28+ print (titanic_train .Name ) #property style access
29+
30+ #row and column access based on index
31+ titanic_train .iloc [1 :3 ,:]
32+ titanic_train .iloc [1 :3 ,2 :4 ]
33+ titanic_train .iloc [1 :3 , [True ,True ]]
34+ titanic_train .iloc [[True ,True ], [True , False , True ]]
35+ #row and column access based on name
36+ titanic_train .loc [1 :3 , ['Sex' ,'Fare' ]]
37+ titanic_train .loc [titanic_train .Sex == 'male' ,:]
38+ titanic_train .loc [1 :3 , :'Fare' ]
39+
40+ #creating new columns
41+ titanic_train ['dummy' ] = 1
42+ titanic_train ['FamilySize' ] = titanic_train ['Parch' ] + titanic_train ['SibSp' ] + 1
0 commit comments