Skip to content

Commit 4766e4d

Browse files
Created using Colaboratory - Demographic Data Analyzer
1 parent 5d6d991 commit 4766e4d

File tree

1 file changed

+128
-0
lines changed

1 file changed

+128
-0
lines changed

Demographic-Data-Analyzer.ipynb

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
{
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"name": "first-colab-project.ipynb",
7+
"provenance": [],
8+
"authorship_tag": "ABX9TyPyuORnXGkHTUvBUQQDWHym",
9+
"include_colab_link": true
10+
},
11+
"kernelspec": {
12+
"name": "python3",
13+
"display_name": "Python 3"
14+
},
15+
"language_info": {
16+
"name": "python"
17+
}
18+
},
19+
"cells": [
20+
{
21+
"cell_type": "markdown",
22+
"metadata": {
23+
"id": "view-in-github",
24+
"colab_type": "text"
25+
},
26+
"source": [
27+
"<a href=\"https://colab.research.google.com/github/Mukeshvudayagiri/keep-coding-python/blob/main/Demographic-Data-Analyzer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"metadata": {
33+
"colab": {
34+
"base_uri": "https://localhost:8080/"
35+
},
36+
"id": "2RZGHQo7tOpE",
37+
"outputId": "6d0f4edb-4510-4b55-94a6-12feaac65c3f"
38+
},
39+
"source": [
40+
"import numpy as np\n",
41+
"import pandas as pd\n",
42+
"\n",
43+
"df = pd.read_csv('/adult.data.csv')\n",
44+
"#df.head()\n",
45+
"#1.\n",
46+
"#race_count = df['race'].value_counts()\n",
47+
"#race_count\n",
48+
"#2.\n",
49+
"#avg_men = round(df[df[\"sex\"]=='Male']['age'].mean(),1)\n",
50+
"#avg_men\n",
51+
"\n",
52+
"#3.\n",
53+
"\n",
54+
"# bacherlors_degree = len(df[df['education']== 'Bachelors'])\n",
55+
"# print(bacherlors_degree)\n",
56+
"# total_degree = len(df)\n",
57+
"# #print(total_degree)\n",
58+
"# percentage_bachelors = round(bacherlors_degree/total_degree * 100 , 1)\n",
59+
"# print(percentage_bachelors)\n",
60+
"\n",
61+
"#4.\n",
62+
"\n",
63+
"# more_than_one = df[df['education'].isin(['Bachelors', 'Masters', 'Masters'])]\n",
64+
"# print(more_than_one) #7078 rows\n",
65+
"# not_more_than_one = df[~df['education'].isin(['Bachelors', 'Masters', 'Masters'])]\n",
66+
"# print(not_more_than_one) #25483 rows\n",
67+
"\n",
68+
"# num_of_higher = len(more_than_one[more_than_one.salary == '>50K'])\n",
69+
"# print(num_of_higher)\n",
70+
"# num_of_lower = len(not_more_than_one[not_more_than_one.salary == '>50K'])\n",
71+
"# print(num_of_lower)\n",
72+
"\n",
73+
"# high_education_rich = round(num_of_higher / len(more_than_one) * 100 , 1)\n",
74+
"# lower_education_rich = round(num_of_lower / len(not_more_than_one) * 100 , 1)\n",
75+
"# print(high_education_rich)\n",
76+
"# print(lower_education_rich)\n",
77+
"\n",
78+
"#5. \n",
79+
"\n",
80+
"# min_work = df['hours-per-week'].min()\n",
81+
"# print(min_work)\n",
82+
"\n",
83+
"#6. \n",
84+
"\n",
85+
"# min_num_workers = (df[df['hours-per-week'] == min_work])\n",
86+
"# # print(min_num_workers)\n",
87+
"# rich_min_work_percentage = len(min_num_workers[min_num_workers['salary']== '>50K']) / len(min_num_workers) * 100 \n",
88+
"# print(rich_min_work_percentage)\n",
89+
"\n",
90+
"#7.\n",
91+
"\n",
92+
"country_count = df['native-country'].value_counts()\n",
93+
"# print(country_count)\n",
94+
"country_rich_count = df[df['salary'] == '>50K']['native-country'].value_counts()\n",
95+
"# print(country_rich_count)\n",
96+
"\n",
97+
"# country_count = len(df['native-country'])\n",
98+
"# print(country_count)\n",
99+
"# country_rich_count = len(df[df['salary'] == '>50K']['native-country'])\n",
100+
"# print(country_rich_count)\n",
101+
"\n",
102+
"\n",
103+
"highest_earning_country = (country_rich_count / country_count * 100 ).max() #sort_values(ascending = False)\n",
104+
"print(highest_earning_country)\n",
105+
"\n",
106+
"#8. \n",
107+
"\n",
108+
"occupation_values = df[(df['native-country'] == 'India') & (df['salary'] == '>50K')]\n",
109+
"occupation_counts = occupation_values.occupation.value_counts()\n",
110+
"top_IN_occupation = occupation_counts.idxmax()\n",
111+
"print(top_IN_occupation)\n",
112+
"\n",
113+
"\n"
114+
],
115+
"execution_count": 51,
116+
"outputs": [
117+
{
118+
"output_type": "stream",
119+
"text": [
120+
"41.86046511627907\n",
121+
"Prof-specialty\n"
122+
],
123+
"name": "stdout"
124+
}
125+
]
126+
}
127+
]
128+
}

0 commit comments

Comments
 (0)