Mukeshvudayagiri
diff --git a/‎Demographic-Data-Analyzer.ipynb‎
Lines changed: 128 additions & 0 deletions b/‎Demographic-Data-Analyzer.ipynb‎
Lines changed: 128 additions & 0 deletions
@@ -0,0 +1,128 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "first-colab-project.ipynb",
+ "provenance": [],
+ "authorship_tag": "ABX9TyPyuORnXGkHTUvBUQQDWHym",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "<a href=\"https://colab.research.google.com/github/Mukeshvudayagiri/keep-coding-python/blob/main/Demographic-Data-Analyzer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "2RZGHQo7tOpE",
+ "outputId": "6d0f4edb-4510-4b55-94a6-12feaac65c3f"
+ },
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "\n",
+ "df = pd.read_csv('/adult.data.csv')\n",
+ "#df.head()\n",
+ "#1.\n",
+ "#race_count = df['race'].value_counts()\n",
+ "#race_count\n",
+ "#2.\n",
+ "#avg_men = round(df[df[\"sex\"]=='Male']['age'].mean(),1)\n",
+ "#avg_men\n",
+ "\n",
+ "#3.\n",
+ "\n",
+ "# bacherlors_degree = len(df[df['education']== 'Bachelors'])\n",
+ "# print(bacherlors_degree)\n",
+ "# total_degree = len(df)\n",
+ "# #print(total_degree)\n",
+ "# percentage_bachelors = round(bacherlors_degree/total_degree * 100 , 1)\n",
+ "# print(percentage_bachelors)\n",
+ "\n",
+ "#4.\n",
+ "\n",
+ "# more_than_one = df[df['education'].isin(['Bachelors', 'Masters', 'Masters'])]\n",
+ "# print(more_than_one) #7078 rows\n",
+ "# not_more_than_one = df[~df['education'].isin(['Bachelors', 'Masters', 'Masters'])]\n",
+ "# print(not_more_than_one) #25483 rows\n",
+ "\n",
+ "# num_of_higher = len(more_than_one[more_than_one.salary == '>50K'])\n",
+ "# print(num_of_higher)\n",
+ "# num_of_lower = len(not_more_than_one[not_more_than_one.salary == '>50K'])\n",
+ "# print(num_of_lower)\n",
+ "\n",
+ "# high_education_rich = round(num_of_higher / len(more_than_one) * 100 , 1)\n",
+ "# lower_education_rich = round(num_of_lower / len(not_more_than_one) * 100 , 1)\n",
+ "# print(high_education_rich)\n",
+ "# print(lower_education_rich)\n",
+ "\n",
+ "#5. \n",
+ "\n",
+ "# min_work = df['hours-per-week'].min()\n",
+ "# print(min_work)\n",
+ "\n",
+ "#6. \n",
+ "\n",
+ "# min_num_workers = (df[df['hours-per-week'] == min_work])\n",
+ "# # print(min_num_workers)\n",
+ "# rich_min_work_percentage = len(min_num_workers[min_num_workers['salary']== '>50K']) / len(min_num_workers) * 100 \n",
+ "# print(rich_min_work_percentage)\n",
+ "\n",
+ "#7.\n",
+ "\n",
+ "country_count = df['native-country'].value_counts()\n",
+ "# print(country_count)\n",
+ "country_rich_count = df[df['salary'] == '>50K']['native-country'].value_counts()\n",
+ "# print(country_rich_count)\n",
+ "\n",
+ "# country_count = len(df['native-country'])\n",
+ "# print(country_count)\n",
+ "# country_rich_count = len(df[df['salary'] == '>50K']['native-country'])\n",
+ "# print(country_rich_count)\n",
+ "\n",
+ "\n",
+ "highest_earning_country = (country_rich_count / country_count * 100 ).max() #sort_values(ascending = False)\n",
+ "print(highest_earning_country)\n",
+ "\n",
+ "#8. \n",
+ "\n",
+ "occupation_values = df[(df['native-country'] == 'India') & (df['salary'] == '>50K')]\n",
+ "occupation_counts = occupation_values.occupation.value_counts()\n",
+ "top_IN_occupation = occupation_counts.idxmax()\n",
+ "print(top_IN_occupation)\n",
+ "\n",
+ "\n"
+ ],
+ "execution_count": 51,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "41.86046511627907\n",
+ "Prof-specialty\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ }
+ ]
+}