100 DAYS OF DATA
WORKING WITH CSV
DATA ANALYSIS OF TITANIC DATA SET INCLUDED
Data analysis from a CSV file in Python
Learn to read and write CSV files in Python
•
COPY
name,age,height(cm),weight(kg)
Lenin,30,188,90
Phil,42,178,76
Claire,40,165,54
Alex,18,140,46
\t
csv
pandas
csv.reader
COPY
import csv
with open('my_family.csv') as input:
csv_reader = csv.reader(input, delimiter=',')
line_count = 0
for row in csv_reader:
if line_count == 0:
print(f'Header row - {", ".join(row)}')
line_count += 1
else:
print(f'{row[0]} is {row[1]} years old, {row[2]} cm tal
line_count += 1
print(f'Total: {line_count} lines')
COPY
Header row - name, age, height(cm), weight(kg)
Lenin is 30 years old, 188 cm tall and 90 kg heavy
Phil is 42 years old, 178 cm tall and 76 kg heavy
Claire is 40 years old, 165 cm tall and 54 kg heavy
Alex is 18 years old, 140 cm tall and 46 kg heavy
Total: 5 lines
line_count
csv.reader next(reader object,
None)
COPY
import csv
with open('my_family.csv') as input:
csv_reader = csv.reader(input, delimiter=',')
line_count = 0
next(csv_reader, None) #ignore the header
for row in csv_reader:
print(f'{row[0]} is {row[1]} years old, {row[2]} cm tall an
line_count += 1
print(f'Total: {line_count} lines')
COPY
Lenin is 30 years old, 188 cm tall and 90 kg heavy
Phil is 42 years old, 178 cm tall and 76 kg heavy
Claire is 40 years old, 165 cm tall and 54 kg heavy
Alex is 18 years old, 140 cm tall and 46 kg heavy
Total: 4 lines
Total: 4 lines
csv.DictReader
DictReader
COPY
import csv
with open('my_family.csv') as input:
csv_reader = csv.DictReader(input, delimiter=',')
for row in csv_reader:
print(f'{row["name"]} is {row["age"]} years old, {row["heig
print(f'Total: {csv_reader.line_num} lines')
csv_reader.line_num
csv.DictReader
csv.DictReader
csv.writer
COPY
import csv
header = ['Name', 'Age', 'Height(cm)', 'Weight(kg)']
data = [ ['Phil', 42, 178, 76],
['Alex', 18, 140, 46],
['Claire', 40, 165, 54] ]
filename = "my_family.csv"
with open(filename, 'w') as output:
csvwriter = csv.writer(output)
# Write a single list
csvwriter.writerow(header)
# Writing a list of lists
csvwriter.writerows(data)
COPY
Name,Age,Height(cm),Weight(kg)
Phil,42,178,76
Alex,18,140,46
Claire,40,165,54
writerow
writerows
csv.writer ,
delimiter
COPY
import csv
header = ['Name', 'Age', 'Height(cm)', 'Weight(kg)']
data = [ ['Phil', 42, 178, 76],
['Alex', 18, 140, 46],
['Claire', 40, 165, 54] ]
filename = "my_family.csv"
with open(filename, 'w') as output:
csvwriter = csv.writer(output, delimiter = '|')
# Write a single list
csvwriter.writerow(header)
# Writing a list of lists
csvwriter.writerows(data)
COPY
Name|Age|Height(cm)|Weight(kg)
Phil|42|178|76
Alex|18|140|46
Claire|40|165|54
DictWriter
fieldnames
COPY
import csv
header = ['Name', 'Age', 'Height(cm)', 'Weight(kg)']
data = [
data = [
{"Name":"Phil", "Age": 42, "Height(cm)":178, "Weight(kg)":76},
{"Name":"Claire", "Age": 40, "Height(cm)":165, "Weight(kg)":54}
{"Name":"Alex", "Age": 18, "Height(cm)":140, "Weight(kg)":46}
]
filename = "my_family.csv"
with open(filename, 'w') as output:
csvwriter = csv.DictWriter(output, fieldnames=header)
csvwriter.writeheader()
for row in data:
csvwriter.writerow(row)
COPY
Name,Age,Height(cm),Weight(kg)
Phil,42,178,76
Claire,40,165,54
Alex,18,140,46
writerows
COPY
import csv
header = ['Name', 'Age', 'Height(cm)', 'Weight(kg)']
data = [
{"Name":"Phil", "Age": 42, "Height(cm)":178, "Weight(kg)":76},
{"Name":"Claire", "Age": 40, "Height(cm)":165, "Weight(kg)":54}
{"Name":"Alex", "Age": 18, "Height(cm)":140, "Weight(kg)":46}
]
filename = "my_family.csv"
with open(filename, 'w') as output:
csvwriter = csv.DictWriter(output, fieldnames=header)
csvwriter.writeheader()
csvwriter.writerows(data)
COPY
Name,Age,Height(cm),Weight(kg)
Phil,42,178,76
Claire,40,165,54
Alex,18,140,46
"
"
COPY
Name,Age,Height(cm),Weight(kg),Address
Phil,42,178,76,'Gryffindor room, Hogwarts'
Claire,40,165,54,'Snapes room, Hogwarts'
Alex,18,140,46,'4 Private Drive, Little Whinging'
quotechar
COPY
import csv
filename = "my_family.csv"
with open(filename, 'r') as output:
csvreader = csv.reader(output, quotechar="'")
for row in csvreader:
print(row)
COPY
['Name', 'Age', 'Height(cm)', 'Weight(kg)', 'Address']
['Phil', '42', '178', '76', 'Gryffindor room, Hogwarts']
['Claire', '40', '165', '54', 'Snapes room, Hogwarts']
['Alex', '18', '140', '46', '4 Private Drive, Little Whinging']
quoting
csv.QUOTE_MINIMAL
csv.QUOTE_ALL
csv.QUOTE_NONNUMERIC
csv.QUOTE_NONE
COPY
import csv
filename = "my_family.csv"
header = ['Name','Age','Height(cm)','Weight(kg)','Address']
data = [
['Phil',42,178,76,'Gryffindor room, Hogwarts'],
['Claire',40,165,54,'Snapes room, Hogwarts'],
[ , , , , p , g ],
['Alex',18,140,46,'4 Private Drive, Little Whinging']
]
with open(filename, 'w') as output:
csvwriter = csv.writer(output, quotechar="'", quoting=csv.QUOTE_A
csvwriter.writerow(header)
csvwriter.writerows(data)
csv.QUOTE_ALL
COPY
'Name','Age','Height(cm)','Weight(kg)','Address'
'Phil','42','178','76','Gryffindor room, Hogwarts'
'Claire','40','165','54','Snapes room, Hogwarts'
'Alex','18','140','46','4 Private Drive, Little Whinging'
csv.QUOTE_NONE
COPY
import csv
filename = "my_family.csv"
header = ['Name','Age','Height(cm)','Weight(kg)','Address']
data = [
['Phil',42,178,76,'Gryffindor room, Hogwarts'],
['Claire',40,165,54,'Snapes room, Hogwarts'],
['Alex',18,140,46,'4 Private Drive, Little Whinging']
]
with open(filename, 'w') as output:
csvwriter = csv.writer(output, quotechar="'", quoting=csv.QUOTE_N
csvwriter.writerow(header)
csvwriter.writerows(data)
COPY
Traceback (most recent call last):
File "main.py", line 16, in <module>
csvwriter.writerows(data)
_csv.Error: need to escape, but no escapechar set
csv.QUOTE_NONE csv
escapechar
\
COPY
import csv
filename = "my_family.csv"
header = ['Name','Age','Height(cm)','Weight(kg)','Address']
data = [
['Phil',42,178,76,'Gryffindor room, Hogwarts'],
['Claire',40,165,54,'Snapes room, Hogwarts'],
['Alex',18,140,46,'4 Private Drive, Little Whinging']
]
with open(filename, 'w') as output:
csvwriter = csv.writer(output, quotechar="'", quoting=csv.QUOTE_N
csvwriter.writerow(header)
csvwriter.writerows(data)
COPY
Name,Age,Height(cm),Weight(kg),Address
Phil,42,178,76,Gryffindor room\, Hogwarts
Claire,40,165,54,Snapes room\, Hogwarts
Alex,18,140,46,4 Private Drive\, Little Whinging
\
COPY
Name, Age, Height(cm), Weight(kg), Address
Phil, 42, 178, 76, 'Gryffindor room, Hogwarts'
Claire, 40, 165, 54, 'Snapes room, Hogwarts'
Alex, 18, 140, 46, '4 Private Drive, Little Whinging'
skipinitialspace
COPY
import csv
with open('my_family.csv', 'r') as f:
csv_reader = csv.reader(f, quotechar="'")
for line in csv_reader:
print(line)
p ( )
COPY
['Name', ' Age', ' Height(cm)', ' Weight(kg)', ' Address']
['Phil', ' 42', ' 178', ' 76', " 'Gryffindor room", " Hogwarts'"]
['Claire', ' 40', ' 165', ' 54', " 'Snapes room", " Hogwarts'"]
['Alex', ' 18', ' 140', ' 46', " '4 Private Drive", " Little Whingi
skipinitialspace
True
COPY
import csv
with open('my_family.csv', 'r') as f:
csv_reader = csv.reader(f, quotechar="'", skipinitialspace=True
for line in csv_reader:
print(line)
COPY
['Name', 'Age', 'Height(cm)', 'Weight(kg)', 'Address']
['Phil', '42', '178', '76', 'Gryffindor room, Hogwarts']
['Claire', '40', '165', '54', 'Snapes room, Hogwarts']
[ , , , , p , g ]
['Alex', '18', '140', '46', '4 Private Drive, Little Whinging']
COPY
import pandas as pd
df = pd.read_csv('my_family.csv')
print(df)
COPY
Name Age Height(cm) Weight(kg)
0 Phil 42 178 76
1 Claire 40 165 54
2 Alex 18 140 46
COPY
import pandas as pd
df = pd.read_csv('my_family.csv')
print(type(df['Age'][0]))
print(type(df['Height(cm)'][0]))
print(type(df['Weight(kg)'][0]))
COPY
<class 'numpy.int64'>
<class 'numpy.int64'>
<class 'numpy.int64'>
names
pd.read_csv()
COPY
Phil,42,178,76
Claire,40,165,54
Alex,18,140,46
COPY
import pandas as pd
df = pd.read_csv('my_family.csv',
index_col='Name',
names=['Name', 'Age', 'Height(cm)', 'Weight(kg)']
)
print(df)
COPY
Age Height(cm) Weight(kg)
Name
Phil 42 178 76
Claire 40 165 54
Alex 18 140 46
df.to_csv
COPY
import pandas as pd
df = pd.read_csv('my_family.csv',
index_col='Name',
names=['Name', 'Age', 'Height(cm)', 'Weight(kg)']
)
df.to_csv('my_new_family.csv')
COPY
Age Height(cm) Weight(kg)
Name
Phil 42 178 76
Claire 40 165 54
Alex 18 140 46
COPY
import pandas as pd
#load the csv file
df = pd.read_csv('train.csv')
# Column Names
print(df.columns)
# Count unique values in Sex column
print(df['Sex'].value_counts())
# Percentage of male and female passengers
print(df['Sex'].value_counts(normalize=True))
COPY
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age',
'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
dtype='object')
male 577
female 314
Name: Sex, dtype: int64
male 0.647587
female 0.352413
Name: Sex, dtype: float64
COPY
import pandas as pd
#load the csv file
df = pd.read_csv('train.csv')
# Column Names
print(df.columns)
# Count unique values in Sex column
print(df[df["Survived"] == 1]['Sex'].value_counts())
# Percentage of surviving male and female passengers
print(df[df["Survived"] == 1]['Sex'].value_counts(normalize=True))
COPY
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age',
'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
dtype='object')
female 233
male 109
Name: Sex, dtype: int64
female 0.681287
male 0.318713
Name: Sex, dtype: float64
value_counts()
COPY
import pandas as pd
#load the csv file
df = pd.read_csv('train.csv')
# median age of each sex
median_age_men=df[df['Sex']=='male']['Age'].median()
median_age_women=df[df['Sex']=='female']['Age'].median()
print(f"The median age of men is {median_age_men}")
print(f"The median age of women is {median_age_women}")
COPY
The median age of men is 29.0
The median age of women is 27.0