|
72 | 72 | }, |
73 | 73 | { |
74 | 74 | "cell_type": "code", |
75 | | - "execution_count": 30, |
| 75 | + "execution_count": 37, |
76 | 76 | "metadata": {}, |
77 | 77 | "outputs": [], |
78 | 78 | "source": [ |
|
87 | 87 | }, |
88 | 88 | { |
89 | 89 | "cell_type": "code", |
90 | | - "execution_count": 31, |
| 90 | + "execution_count": 38, |
91 | 91 | "metadata": {}, |
92 | 92 | "outputs": [], |
93 | 93 | "source": [ |
|
128 | 128 | }, |
129 | 129 | { |
130 | 130 | "cell_type": "code", |
131 | | - "execution_count": 50, |
| 131 | + "execution_count": 39, |
132 | 132 | "metadata": {}, |
133 | 133 | "outputs": [ |
134 | 134 | { |
|
145 | 145 | }, |
146 | 146 | { |
147 | 147 | "cell_type": "code", |
148 | | - "execution_count": 51, |
| 148 | + "execution_count": 40, |
149 | 149 | "metadata": { |
150 | 150 | "scrolled": true |
151 | 151 | }, |
|
546 | 546 | "[143 rows x 21 columns]>" |
547 | 547 | ] |
548 | 548 | }, |
549 | | - "execution_count": 51, |
| 549 | + "execution_count": 40, |
550 | 550 | "metadata": {}, |
551 | 551 | "output_type": "execute_result" |
552 | 552 | } |
|
561 | 561 | }, |
562 | 562 | { |
563 | 563 | "cell_type": "code", |
564 | | - "execution_count": 55, |
| 564 | + "execution_count": 41, |
565 | 565 | "metadata": {}, |
566 | 566 | "outputs": [ |
567 | 567 | { |
|
579 | 579 | }, |
580 | 580 | { |
581 | 581 | "cell_type": "code", |
582 | | - "execution_count": 56, |
| 582 | + "execution_count": 42, |
583 | 583 | "metadata": {}, |
584 | 584 | "outputs": [ |
585 | 585 | { |
|
622 | 622 | }, |
623 | 623 | { |
624 | 624 | "cell_type": "code", |
625 | | - "execution_count": 60, |
| 625 | + "execution_count": 43, |
626 | 626 | "metadata": {}, |
627 | 627 | "outputs": [ |
628 | 628 | { |
|
631 | 631 | "dict_keys(['METTS MARK', 'BAXTER JOHN C', 'ELLIOTT STEVEN', 'CORDES WILLIAM R', 'HANNON KEVIN P', 'MORDAUNT KRISTINA M', 'MEYER ROCKFORD G', 'MCMAHON JEFFREY', 'HAEDICKE MARK E', 'PIPER GREGORY F', 'HUMPHREY GENE E', 'NOLES JAMES L', 'BLACHMAN JEREMY M', 'SUNDE MARTIN', 'GIBBS DANA R', 'LOWRY CHARLES P', 'COLWELL WESLEY', 'MULLER MARK S', 'JACKSON CHARLENE R', 'WESTFAHL RICHARD K', 'WALTERS GARETH W', 'WALLS JR ROBERT H', 'KITCHEN LOUISE', 'CHAN RONNIE', 'BELFER ROBERT', 'SHANKMAN JEFFREY A', 'WODRASKA JOHN', 'BERGSIEKER RICHARD P', 'URQUHART JOHN A', 'BIBI PHILIPPE A', 'RIEKER PAULA H', 'WHALEY DAVID A', 'BECK SALLY W', 'HAUG DAVID L', 'ECHOLS JOHN B', 'MENDELSOHN JOHN', 'HICKERSON GARY J', 'CLINE KENNETH W', 'LEWIS RICHARD', 'HAYES ROBERT E', 'KOPPER MICHAEL J', 'LEFF DANIEL P', 'LAVORATO JOHN J', 'BERBERIAN DAVID', 'DETMERING TIMOTHY J', 'WAKEHAM JOHN', 'POWERS WILLIAM', 'GOLD JOSEPH', 'BANNANTINE JAMES M', 'DUNCAN JOHN H', 'SHAPIRO RICHARD S', 'SHERRIFF JOHN R', 'SHELBY REX', 'LEMAISTRE CHARLES', 'DEFFNER JOSEPH M', 'KISHKILL JOSEPH G', 'WHALLEY LAWRENCE G', 'MCCONNELL MICHAEL S', 'PIRO JIM', 'DELAINEY DAVID W', 'SULLIVAN-SHAKLOVITZ COLLEEN', 'WROBEL BRUCE', 'LINDHOLM TOD A', 'MEYER JEROME J', 'BUTTS ROBERT H', 'OLSON CINDY K', 'MCDONALD REBECCA', 'CUMBERLAND MICHAEL S', 'GAHN ROBERT S', 'BADUM JAMES P', 'HERMANN ROBERT J', 'FALLON JAMES B', 'GATHMANN WILLIAM D', 'HORTON STANLEY C', 'BOWEN JR RAYMOND M', 'GILLIS JOHN', 'FITZGERALD JAY L', 'MORAN MICHAEL P', 'REDMOND BRIAN L', 'BAZELIDES PHILIP J', 'BELDEN TIMOTHY N', 'DIMICHELE RICHARD G', 'DURAN WILLIAM D', 'THORN TERENCE H', 'FASTOW ANDREW S', 'FOY JOE', 'CALGER CHRISTOPHER F', 'RICE KENNETH D', 'KAMINSKI WINCENTY J', 'LOCKHART EUGENE E', 'COX DAVID', 'OVERDYKE JR JERE C', 'PEREIRA PAULO V. FERRAZ', 'STABLER FRANK', 'BLAKE JR. NORMAN P', 'SHERRICK JEFFREY B', 'PRENTICE JAMES', 'GRAY RODNEY', 'THE TRAVEL AGENCY IN THE PARK', 'UMANOFF ADAM S', 'KEAN STEVEN J', 'FOWLER PEGGY', 'WASAFF GEORGE', 'WHITE JR THOMAS E', 'CHRISTODOULOU DIOMEDES', 'ALLEN PHILLIP K', 'SHARP VICTORIA T', 'JAEDICKE ROBERT', 'WINOKUR JR. HERBERT S', 'BROWN MICHAEL', 'MCCLELLAN GEORGE', 'HUGHES JAMES A', 'REYNOLDS LAWRENCE', 'PICKERING MARK R', 'BHATNAGAR SANJAY', 'CARTER REBECCA C', 'BUCHANAN HAROLD G', 'YEAP SOON', 'MURRAY JULIA H', 'GARLAND C KEVIN', 'DODSON KEITH', 'YEAGER F SCOTT', 'HIRKO JOSEPH', 'DIETRICH JANET R', 'DERRICK JR. JAMES V', 'FREVERT MARK A', 'PAI LOU L', 'HAYSLETT RODERICK J', 'BAY FRANKLIN R', 'MCCARTY DANNY J', 'FUGH JOHN L', 'SCRIMSHAW MATTHEW', 'KOENIG MARK E', 'SAVAGE FRANK', 'IZZO LAWRENCE L', 'TILNEY ELIZABETH A', 'MARTIN AMANDA K', 'BUY RICHARD B', 'GRAMM WENDY L', 'CAUSEY RICHARD A', 'TAYLOR MITCHELL S', 'DONAHUE JR JEFFREY M', 'GLISAN JR BEN F'])" |
632 | 632 | ] |
633 | 633 | }, |
634 | | - "execution_count": 60, |
| 634 | + "execution_count": 43, |
635 | 635 | "metadata": {}, |
636 | 636 | "output_type": "execute_result" |
637 | 637 | } |
|
643 | 643 | }, |
644 | 644 | { |
645 | 645 | "cell_type": "code", |
646 | | - "execution_count": 61, |
647 | | - "metadata": {}, |
648 | | - "outputs": [], |
649 | | - "source": [ |
650 | | - "### Queries of the Dataset ###" |
651 | | - ] |
652 | | - }, |
653 | | - { |
654 | | - "cell_type": "code", |
655 | | - "execution_count": 66, |
| 646 | + "execution_count": 44, |
656 | 647 | "metadata": {}, |
657 | 648 | "outputs": [ |
658 | 649 | { |
|
681 | 672 | " 'total_stock_value': 1095040}" |
682 | 673 | ] |
683 | 674 | }, |
684 | | - "execution_count": 66, |
| 675 | + "execution_count": 44, |
685 | 676 | "metadata": {}, |
686 | 677 | "output_type": "execute_result" |
687 | 678 | } |
|
692 | 683 | }, |
693 | 684 | { |
694 | 685 | "cell_type": "code", |
695 | | - "execution_count": 67, |
| 686 | + "execution_count": 45, |
696 | 687 | "metadata": {}, |
697 | 688 | "outputs": [ |
698 | 689 | { |
|
701 | 692 | "1095040" |
702 | 693 | ] |
703 | 694 | }, |
704 | | - "execution_count": 67, |
| 695 | + "execution_count": 45, |
705 | 696 | "metadata": {}, |
706 | 697 | "output_type": "execute_result" |
707 | 698 | } |
|
712 | 703 | }, |
713 | 704 | { |
714 | 705 | "cell_type": "code", |
715 | | - "execution_count": 68, |
| 706 | + "execution_count": 46, |
716 | 707 | "metadata": {}, |
717 | 708 | "outputs": [ |
718 | 709 | { |
|
721 | 712 | "11" |
722 | 713 | ] |
723 | 714 | }, |
724 | | - "execution_count": 68, |
| 715 | + "execution_count": 46, |
725 | 716 | "metadata": {}, |
726 | 717 | "output_type": "execute_result" |
727 | 718 | } |
|
732 | 723 | }, |
733 | 724 | { |
734 | 725 | "cell_type": "code", |
735 | | - "execution_count": 75, |
| 726 | + "execution_count": 47, |
736 | 727 | "metadata": {}, |
737 | 728 | "outputs": [], |
738 | 729 | "source": [ |
|
747 | 738 | }, |
748 | 739 | { |
749 | 740 | "cell_type": "code", |
750 | | - "execution_count": 83, |
| 741 | + "execution_count": 48, |
751 | 742 | "metadata": {}, |
752 | 743 | "outputs": [ |
753 | 744 | { |
|
770 | 761 | }, |
771 | 762 | { |
772 | 763 | "cell_type": "code", |
773 | | - "execution_count": 86, |
| 764 | + "execution_count": 49, |
774 | 765 | "metadata": {}, |
775 | 766 | "outputs": [ |
776 | 767 | { |
|
861 | 852 | }, |
862 | 853 | { |
863 | 854 | "cell_type": "code", |
864 | | - "execution_count": 93, |
| 855 | + "execution_count": 50, |
865 | 856 | "metadata": {}, |
866 | 857 | "outputs": [], |
867 | 858 | "source": [ |
|
877 | 868 | }, |
878 | 869 | { |
879 | 870 | "cell_type": "code", |
880 | | - "execution_count": 94, |
| 871 | + "execution_count": 51, |
881 | 872 | "metadata": {}, |
882 | 873 | "outputs": [ |
883 | 874 | { |
|
912 | 903 | }, |
913 | 904 | { |
914 | 905 | "cell_type": "code", |
915 | | - "execution_count": 97, |
| 906 | + "execution_count": 52, |
916 | 907 | "metadata": {}, |
917 | 908 | "outputs": [ |
918 | 909 | { |
|
1174 | 1165 | "max 1.725253e+07 3.076606e+07 " |
1175 | 1166 | ] |
1176 | 1167 | }, |
1177 | | - "execution_count": 97, |
| 1168 | + "execution_count": 52, |
1178 | 1169 | "metadata": {}, |
1179 | 1170 | "output_type": "execute_result" |
1180 | 1171 | } |
|
1196 | 1187 | "df_imp.describe()" |
1197 | 1188 | ] |
1198 | 1189 | }, |
| 1190 | + { |
| 1191 | + "cell_type": "code", |
| 1192 | + "execution_count": 53, |
| 1193 | + "metadata": {}, |
| 1194 | + "outputs": [ |
| 1195 | + { |
| 1196 | + "data": { |
| 1197 | + "text/plain": [ |
| 1198 | + "{'bonus': 'NaN',\n", |
| 1199 | + " 'deferral_payments': 'NaN',\n", |
| 1200 | + " 'deferred_income': 'NaN',\n", |
| 1201 | + " 'director_fees': 'NaN',\n", |
| 1202 | + " 'email_address': 'NaN',\n", |
| 1203 | + " 'exercised_stock_options': 'NaN',\n", |
| 1204 | + " 'expenses': 'NaN',\n", |
| 1205 | + " 'from_messages': 'NaN',\n", |
| 1206 | + " 'from_poi_to_this_person': 'NaN',\n", |
| 1207 | + " 'from_this_person_to_poi': 'NaN',\n", |
| 1208 | + " 'loan_advances': 'NaN',\n", |
| 1209 | + " 'long_term_incentive': 'NaN',\n", |
| 1210 | + " 'other': 362096,\n", |
| 1211 | + " 'poi': False,\n", |
| 1212 | + " 'restricted_stock': 'NaN',\n", |
| 1213 | + " 'restricted_stock_deferred': 'NaN',\n", |
| 1214 | + " 'salary': 'NaN',\n", |
| 1215 | + " 'shared_receipt_with_poi': 'NaN',\n", |
| 1216 | + " 'to_messages': 'NaN',\n", |
| 1217 | + " 'total_payments': 362096,\n", |
| 1218 | + " 'total_stock_value': 'NaN'}" |
| 1219 | + ] |
| 1220 | + }, |
| 1221 | + "execution_count": 53, |
| 1222 | + "metadata": {}, |
| 1223 | + "output_type": "execute_result" |
| 1224 | + } |
| 1225 | + ], |
| 1226 | + "source": [ |
| 1227 | + "### Queries of the Dataset ###\n", |
| 1228 | + "enron_data['THE TRAVEL AGENCY IN THE PARK']" |
| 1229 | + ] |
| 1230 | + }, |
| 1231 | + { |
| 1232 | + "cell_type": "code", |
| 1233 | + "execution_count": 58, |
| 1234 | + "metadata": {}, |
| 1235 | + "outputs": [ |
| 1236 | + { |
| 1237 | + "name": "stdout", |
| 1238 | + "output_type": "stream", |
| 1239 | + "text": [ |
| 1240 | + "98\n" |
| 1241 | + ] |
| 1242 | + } |
| 1243 | + ], |
| 1244 | + "source": [ |
| 1245 | + "# dropping row for 'THE TRAVEL AGENCY IN THE PARK'\n", |
| 1246 | + "\n", |
| 1247 | + "park_index = list(enron_data.keys()).index('THE TRAVEL AGENCY IN THE PARK')\n", |
| 1248 | + "print(park_index)\n", |
| 1249 | + "df_imp_sub = df_imp.drop(df_imp.index[[park_index]])" |
| 1250 | + ] |
| 1251 | + }, |
| 1252 | + { |
| 1253 | + "cell_type": "code", |
| 1254 | + "execution_count": 55, |
| 1255 | + "metadata": {}, |
| 1256 | + "outputs": [ |
| 1257 | + { |
| 1258 | + "data": { |
| 1259 | + "text/plain": [ |
| 1260 | + "{'bonus': 'NaN',\n", |
| 1261 | + " 'deferral_payments': 'NaN',\n", |
| 1262 | + " 'deferred_income': 'NaN',\n", |
| 1263 | + " 'director_fees': 'NaN',\n", |
| 1264 | + " 'email_address': 'NaN',\n", |
| 1265 | + " 'exercised_stock_options': 'NaN',\n", |
| 1266 | + " 'expenses': 'NaN',\n", |
| 1267 | + " 'from_messages': 'NaN',\n", |
| 1268 | + " 'from_poi_to_this_person': 'NaN',\n", |
| 1269 | + " 'from_this_person_to_poi': 'NaN',\n", |
| 1270 | + " 'loan_advances': 'NaN',\n", |
| 1271 | + " 'long_term_incentive': 'NaN',\n", |
| 1272 | + " 'other': 'NaN',\n", |
| 1273 | + " 'poi': False,\n", |
| 1274 | + " 'restricted_stock': 'NaN',\n", |
| 1275 | + " 'restricted_stock_deferred': 'NaN',\n", |
| 1276 | + " 'salary': 'NaN',\n", |
| 1277 | + " 'shared_receipt_with_poi': 'NaN',\n", |
| 1278 | + " 'to_messages': 'NaN',\n", |
| 1279 | + " 'total_payments': 'NaN',\n", |
| 1280 | + " 'total_stock_value': 'NaN'}" |
| 1281 | + ] |
| 1282 | + }, |
| 1283 | + "execution_count": 55, |
| 1284 | + "metadata": {}, |
| 1285 | + "output_type": "execute_result" |
| 1286 | + } |
| 1287 | + ], |
| 1288 | + "source": [ |
| 1289 | + "enron_data['LOCKHART EUGENE E']" |
| 1290 | + ] |
| 1291 | + }, |
| 1292 | + { |
| 1293 | + "cell_type": "code", |
| 1294 | + "execution_count": 59, |
| 1295 | + "metadata": {}, |
| 1296 | + "outputs": [ |
| 1297 | + { |
| 1298 | + "name": "stdout", |
| 1299 | + "output_type": "stream", |
| 1300 | + "text": [ |
| 1301 | + "89\n" |
| 1302 | + ] |
| 1303 | + } |
| 1304 | + ], |
| 1305 | + "source": [ |
| 1306 | + "# dropping row for 'LOCKHART EUGENE E'\n", |
| 1307 | + "lockhart_index = list(enron_data.keys()).index('LOCKHART EUGENE E')\n", |
| 1308 | + "print(lockhart_index)\n", |
| 1309 | + "df_imp_sub = df_imp.drop(df_imp.index[[park_index]])" |
| 1310 | + ] |
| 1311 | + }, |
| 1312 | + { |
| 1313 | + "cell_type": "markdown", |
| 1314 | + "metadata": {}, |
| 1315 | + "source": [ |
| 1316 | + "- 'THE TRAVEL AGENCY IN THE PARK' was removed since it is not a person. \n", |
| 1317 | + "- 'LOCKHART EUGENE E' was removed during data processing since this row had no entries for any feature." |
| 1318 | + ] |
| 1319 | + }, |
1199 | 1320 | { |
1200 | 1321 | "cell_type": "code", |
1201 | 1322 | "execution_count": null, |
|
1209 | 1330 | "display_name": "Python 3", |
1210 | 1331 | "language": "python", |
1211 | 1332 | "name": "python3" |
1212 | | - }, |
1213 | | - "language_info": { |
1214 | | - "codemirror_mode": { |
1215 | | - "name": "ipython", |
1216 | | - "version": 3 |
1217 | | - }, |
1218 | | - "file_extension": ".py", |
1219 | | - "mimetype": "text/x-python", |
1220 | | - "name": "python", |
1221 | | - "nbconvert_exporter": "python", |
1222 | | - "pygments_lexer": "ipython3", |
1223 | | - "version": "3.6.4" |
1224 | 1333 | } |
1225 | 1334 | }, |
1226 | 1335 | "nbformat": 4, |
|
0 commit comments