1+ {
2+ "nbformat" : 4 ,
3+ "nbformat_minor" : 0 ,
4+ "metadata" : {
5+ "colab" : {
6+ "name" : " first-colab-project.ipynb" ,
7+ "provenance" : [],
8+ "authorship_tag" : " ABX9TyPyuORnXGkHTUvBUQQDWHym" ,
9+ "include_colab_link" : true
10+ },
11+ "kernelspec" : {
12+ "name" : " python3" ,
13+ "display_name" : " Python 3"
14+ },
15+ "language_info" : {
16+ "name" : " python"
17+ }
18+ },
19+ "cells" : [
20+ {
21+ "cell_type" : " markdown" ,
22+ "metadata" : {
23+ "id" : " view-in-github" ,
24+ "colab_type" : " text"
25+ },
26+ "source" : [
27+ " <a href=\" https://colab.research.google.com/github/Mukeshvudayagiri/keep-coding-python/blob/main/Demographic-Data-Analyzer.ipynb\" target=\" _parent\" ><img src=\" https://colab.research.google.com/assets/colab-badge.svg\" alt=\" Open In Colab\" /></a>"
28+ ]
29+ },
30+ {
31+ "cell_type" : " code" ,
32+ "metadata" : {
33+ "colab" : {
34+ "base_uri" : " https://localhost:8080/"
35+ },
36+ "id" : " 2RZGHQo7tOpE" ,
37+ "outputId" : " 6d0f4edb-4510-4b55-94a6-12feaac65c3f"
38+ },
39+ "source" : [
40+ " import numpy as np\n " ,
41+ " import pandas as pd\n " ,
42+ " \n " ,
43+ " df = pd.read_csv('/adult.data.csv')\n " ,
44+ " #df.head()\n " ,
45+ " #1.\n " ,
46+ " #race_count = df['race'].value_counts()\n " ,
47+ " #race_count\n " ,
48+ " #2.\n " ,
49+ " #avg_men = round(df[df[\" sex\" ]=='Male']['age'].mean(),1)\n " ,
50+ " #avg_men\n " ,
51+ " \n " ,
52+ " #3.\n " ,
53+ " \n " ,
54+ " # bacherlors_degree = len(df[df['education']== 'Bachelors'])\n " ,
55+ " # print(bacherlors_degree)\n " ,
56+ " # total_degree = len(df)\n " ,
57+ " # #print(total_degree)\n " ,
58+ " # percentage_bachelors = round(bacherlors_degree/total_degree * 100 , 1)\n " ,
59+ " # print(percentage_bachelors)\n " ,
60+ " \n " ,
61+ " #4.\n " ,
62+ " \n " ,
63+ " # more_than_one = df[df['education'].isin(['Bachelors', 'Masters', 'Masters'])]\n " ,
64+ " # print(more_than_one) #7078 rows\n " ,
65+ " # not_more_than_one = df[~df['education'].isin(['Bachelors', 'Masters', 'Masters'])]\n " ,
66+ " # print(not_more_than_one) #25483 rows\n " ,
67+ " \n " ,
68+ " # num_of_higher = len(more_than_one[more_than_one.salary == '>50K'])\n " ,
69+ " # print(num_of_higher)\n " ,
70+ " # num_of_lower = len(not_more_than_one[not_more_than_one.salary == '>50K'])\n " ,
71+ " # print(num_of_lower)\n " ,
72+ " \n " ,
73+ " # high_education_rich = round(num_of_higher / len(more_than_one) * 100 , 1)\n " ,
74+ " # lower_education_rich = round(num_of_lower / len(not_more_than_one) * 100 , 1)\n " ,
75+ " # print(high_education_rich)\n " ,
76+ " # print(lower_education_rich)\n " ,
77+ " \n " ,
78+ " #5. \n " ,
79+ " \n " ,
80+ " # min_work = df['hours-per-week'].min()\n " ,
81+ " # print(min_work)\n " ,
82+ " \n " ,
83+ " #6. \n " ,
84+ " \n " ,
85+ " # min_num_workers = (df[df['hours-per-week'] == min_work])\n " ,
86+ " # # print(min_num_workers)\n " ,
87+ " # rich_min_work_percentage = len(min_num_workers[min_num_workers['salary']== '>50K']) / len(min_num_workers) * 100 \n " ,
88+ " # print(rich_min_work_percentage)\n " ,
89+ " \n " ,
90+ " #7.\n " ,
91+ " \n " ,
92+ " country_count = df['native-country'].value_counts()\n " ,
93+ " # print(country_count)\n " ,
94+ " country_rich_count = df[df['salary'] == '>50K']['native-country'].value_counts()\n " ,
95+ " # print(country_rich_count)\n " ,
96+ " \n " ,
97+ " # country_count = len(df['native-country'])\n " ,
98+ " # print(country_count)\n " ,
99+ " # country_rich_count = len(df[df['salary'] == '>50K']['native-country'])\n " ,
100+ " # print(country_rich_count)\n " ,
101+ " \n " ,
102+ " \n " ,
103+ " highest_earning_country = (country_rich_count / country_count * 100 ).max() #sort_values(ascending = False)\n " ,
104+ " print(highest_earning_country)\n " ,
105+ " \n " ,
106+ " #8. \n " ,
107+ " \n " ,
108+ " occupation_values = df[(df['native-country'] == 'India') & (df['salary'] == '>50K')]\n " ,
109+ " occupation_counts = occupation_values.occupation.value_counts()\n " ,
110+ " top_IN_occupation = occupation_counts.idxmax()\n " ,
111+ " print(top_IN_occupation)\n " ,
112+ " \n " ,
113+ " \n "
114+ ],
115+ "execution_count" : 51 ,
116+ "outputs" : [
117+ {
118+ "output_type" : " stream" ,
119+ "text" : [
120+ " 41.86046511627907\n " ,
121+ " Prof-specialty\n "
122+ ],
123+ "name" : " stdout"
124+ }
125+ ]
126+ }
127+ ]
128+ }
0 commit comments