|
413 | 413 | { |
414 | 414 | "data": { |
415 | 415 | "text/plain": [ |
416 | | - "<seaborn.axisgrid.PairGrid at 0x254702cd250>" |
| 416 | + "<seaborn.axisgrid.PairGrid at 0x17f4039bca0>" |
417 | 417 | ] |
418 | 418 | }, |
419 | 419 | "execution_count": 10, |
|
686 | 686 | } |
687 | 687 | ], |
688 | 688 | "source": [ |
689 | | - "# Data-preprocessing: Standardizing the data\n", |
| 689 | + "# Data-preprocessing of X_train: Standardizing the data\n", |
690 | 690 | "\n", |
691 | 691 | "from sklearn.preprocessing import StandardScaler\n", |
692 | 692 | "scaler = StandardScaler()\n", |
|
717 | 717 | "print(\"Std of each column:\", np.sqrt(scaler.var_))" |
718 | 718 | ] |
719 | 719 | }, |
| 720 | + { |
| 721 | + "cell_type": "code", |
| 722 | + "execution_count": 17, |
| 723 | + "metadata": {}, |
| 724 | + "outputs": [], |
| 725 | + "source": [ |
| 726 | + "# Data-preprocessing of y_train: Encoding the data\n", |
| 727 | + "\n", |
| 728 | + "from sklearn.preprocessing import LabelEncoder\n", |
| 729 | + "\n", |
| 730 | + "# Apply Label Encoding to convert target classes into numerical labels\n", |
| 731 | + "label_encoder = LabelEncoder()\n", |
| 732 | + "\n", |
| 733 | + "y_train_transformed = label_encoder.fit_transform(y_train)" |
| 734 | + ] |
| 735 | + }, |
| 736 | + { |
| 737 | + "cell_type": "code", |
| 738 | + "execution_count": 18, |
| 739 | + "metadata": {}, |
| 740 | + "outputs": [ |
| 741 | + { |
| 742 | + "data": { |
| 743 | + "text/plain": [ |
| 744 | + "array([1, 1, 2, 0, 2])" |
| 745 | + ] |
| 746 | + }, |
| 747 | + "execution_count": 18, |
| 748 | + "metadata": {}, |
| 749 | + "output_type": "execute_result" |
| 750 | + } |
| 751 | + ], |
| 752 | + "source": [ |
| 753 | + "y_train_transformed[:5]" |
| 754 | + ] |
| 755 | + }, |
| 756 | + { |
| 757 | + "cell_type": "code", |
| 758 | + "execution_count": 19, |
| 759 | + "metadata": {}, |
| 760 | + "outputs": [ |
| 761 | + { |
| 762 | + "name": "stdout", |
| 763 | + "output_type": "stream", |
| 764 | + "text": [ |
| 765 | + "['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']\n" |
| 766 | + ] |
| 767 | + } |
| 768 | + ], |
| 769 | + "source": [ |
| 770 | + "# Lets check the mapping\n", |
| 771 | + "\n", |
| 772 | + "print(label_encoder.classes_)" |
| 773 | + ] |
| 774 | + }, |
| 775 | + { |
| 776 | + "cell_type": "code", |
| 777 | + "execution_count": 20, |
| 778 | + "metadata": {}, |
| 779 | + "outputs": [ |
| 780 | + { |
| 781 | + "data": { |
| 782 | + "text/plain": [ |
| 783 | + "61 Iris-versicolor\n", |
| 784 | + "92 Iris-versicolor\n", |
| 785 | + "112 Iris-virginica\n", |
| 786 | + "2 Iris-setosa\n", |
| 787 | + "141 Iris-virginica\n", |
| 788 | + "Name: Species, dtype: object" |
| 789 | + ] |
| 790 | + }, |
| 791 | + "execution_count": 20, |
| 792 | + "metadata": {}, |
| 793 | + "output_type": "execute_result" |
| 794 | + } |
| 795 | + ], |
| 796 | + "source": [ |
| 797 | + "y_train.head()" |
| 798 | + ] |
| 799 | + }, |
720 | 800 | { |
721 | 801 | "cell_type": "markdown", |
722 | 802 | "metadata": {}, |
|
726 | 806 | }, |
727 | 807 | { |
728 | 808 | "cell_type": "code", |
729 | | - "execution_count": 17, |
| 809 | + "execution_count": 21, |
730 | 810 | "metadata": {}, |
731 | 811 | "outputs": [ |
732 | 812 | { |
|
738 | 818 | "LogisticRegression()" |
739 | 819 | ] |
740 | 820 | }, |
741 | | - "execution_count": 17, |
| 821 | + "execution_count": 21, |
742 | 822 | "metadata": {}, |
743 | 823 | "output_type": "execute_result" |
744 | 824 | } |
|
751 | 831 | "classifier = LogisticRegression()\n", |
752 | 832 | "\n", |
753 | 833 | "# Train a model on training data\n", |
754 | | - "classifier.fit(X_train_transformed, y_train)" |
| 834 | + "classifier.fit(X_train_transformed, y_train_transformed)" |
755 | 835 | ] |
756 | 836 | }, |
757 | 837 | { |
|
763 | 843 | }, |
764 | 844 | { |
765 | 845 | "cell_type": "code", |
766 | | - "execution_count": 18, |
| 846 | + "execution_count": 22, |
767 | 847 | "metadata": {}, |
768 | 848 | "outputs": [ |
769 | 849 | { |
|
781 | 861 | "print(X_test_transformed.shape)" |
782 | 862 | ] |
783 | 863 | }, |
| 864 | + { |
| 865 | + "cell_type": "code", |
| 866 | + "execution_count": 23, |
| 867 | + "metadata": {}, |
| 868 | + "outputs": [ |
| 869 | + { |
| 870 | + "name": "stdout", |
| 871 | + "output_type": "stream", |
| 872 | + "text": [ |
| 873 | + "(38,)\n" |
| 874 | + ] |
| 875 | + } |
| 876 | + ], |
| 877 | + "source": [ |
| 878 | + "y_test_transformed = label_encoder.transform(y_test)\n", |
| 879 | + "\n", |
| 880 | + "print(y_test_transformed.shape)" |
| 881 | + ] |
| 882 | + }, |
784 | 883 | { |
785 | 884 | "cell_type": "markdown", |
786 | 885 | "metadata": {}, |
|
790 | 889 | }, |
791 | 890 | { |
792 | 891 | "cell_type": "code", |
793 | | - "execution_count": 19, |
| 892 | + "execution_count": 24, |
794 | 893 | "metadata": {}, |
795 | 894 | "outputs": [], |
796 | 895 | "source": [ |
|
807 | 906 | }, |
808 | 907 | { |
809 | 908 | "cell_type": "code", |
810 | | - "execution_count": 20, |
| 909 | + "execution_count": 25, |
811 | 910 | "metadata": {}, |
812 | 911 | "outputs": [ |
813 | 912 | { |
|
816 | 915 | "0.9736842105263158" |
817 | 916 | ] |
818 | 917 | }, |
819 | | - "execution_count": 20, |
| 918 | + "execution_count": 25, |
820 | 919 | "metadata": {}, |
821 | 920 | "output_type": "execute_result" |
822 | 921 | } |
|
826 | 925 | "from sklearn import metrics\n", |
827 | 926 | "\n", |
828 | 927 | "# Calculate accuracy score\n", |
829 | | - "metrics.accuracy_score(y_test, y_test_pred)" |
| 928 | + "metrics.accuracy_score(y_test_transformed, y_test_pred)" |
830 | 929 | ] |
831 | 930 | }, |
832 | 931 | { |
|
838 | 937 | }, |
839 | 938 | { |
840 | 939 | "cell_type": "code", |
841 | | - "execution_count": 21, |
| 940 | + "execution_count": 26, |
842 | 941 | "metadata": {}, |
843 | 942 | "outputs": [ |
844 | 943 | { |
|
847 | 946 | "0.9736842105263158" |
848 | 947 | ] |
849 | 948 | }, |
850 | | - "execution_count": 21, |
| 949 | + "execution_count": 26, |
851 | 950 | "metadata": {}, |
852 | 951 | "output_type": "execute_result" |
853 | 952 | } |
|
860 | 959 | "classifier = KNeighborsClassifier()\n", |
861 | 960 | "\n", |
862 | 961 | "# Training the model\n", |
863 | | - "classifier.fit(X_train_transformed, y_train)\n", |
| 962 | + "classifier.fit(X_train_transformed, y_train_transformed)\n", |
864 | 963 | "\n", |
865 | 964 | "# Prediction on unseen data\n", |
866 | 965 | "y_test_pred = classifier.predict(X_test_transformed)\n", |
867 | 966 | "\n", |
868 | 967 | "# Evaluation\n", |
869 | | - "metrics.accuracy_score(y_test, y_test_pred)" |
| 968 | + "metrics.accuracy_score(y_test_transformed, y_test_pred)" |
870 | 969 | ] |
871 | 970 | }, |
872 | 971 | { |
|
878 | 977 | }, |
879 | 978 | { |
880 | 979 | "cell_type": "code", |
881 | | - "execution_count": 22, |
| 980 | + "execution_count": 27, |
882 | 981 | "metadata": {}, |
883 | 982 | "outputs": [ |
884 | 983 | { |
|
887 | 986 | "0.9736842105263158" |
888 | 987 | ] |
889 | 988 | }, |
890 | | - "execution_count": 22, |
| 989 | + "execution_count": 27, |
891 | 990 | "metadata": {}, |
892 | 991 | "output_type": "execute_result" |
893 | 992 | } |
|
900 | 999 | "classifier = DecisionTreeClassifier()\n", |
901 | 1000 | "\n", |
902 | 1001 | "# Training the model\n", |
903 | | - "classifier.fit(X_train_transformed, y_train)\n", |
| 1002 | + "classifier.fit(X_train_transformed, y_train_transformed)\n", |
904 | 1003 | "\n", |
905 | 1004 | "# Prediction on unseen data\n", |
906 | 1005 | "y_test_pred = classifier.predict(X_test_transformed)\n", |
907 | 1006 | "\n", |
908 | 1007 | "# Evaluation\n", |
909 | | - "metrics.accuracy_score(y_test, y_test_pred)" |
| 1008 | + "metrics.accuracy_score(y_test_transformed, y_test_pred)" |
910 | 1009 | ] |
911 | 1010 | }, |
912 | 1011 | { |
|
918 | 1017 | }, |
919 | 1018 | { |
920 | 1019 | "cell_type": "code", |
921 | | - "execution_count": 23, |
| 1020 | + "execution_count": 28, |
922 | 1021 | "metadata": {}, |
923 | 1022 | "outputs": [ |
924 | 1023 | { |
|
927 | 1026 | "0.9736842105263158" |
928 | 1027 | ] |
929 | 1028 | }, |
930 | | - "execution_count": 23, |
| 1029 | + "execution_count": 28, |
931 | 1030 | "metadata": {}, |
932 | 1031 | "output_type": "execute_result" |
933 | 1032 | } |
|
940 | 1039 | "classifier = RandomForestClassifier()\n", |
941 | 1040 | "\n", |
942 | 1041 | "# Training the model\n", |
943 | | - "classifier.fit(X_train_transformed, y_train)\n", |
| 1042 | + "classifier.fit(X_train_transformed, y_train_transformed)\n", |
944 | 1043 | "\n", |
945 | 1044 | "# Prediction on unseen data\n", |
946 | 1045 | "y_test_pred = classifier.predict(X_test_transformed)\n", |
947 | 1046 | "\n", |
948 | 1047 | "# Evaluation\n", |
949 | | - "metrics.accuracy_score(y_test, y_test_pred)" |
| 1048 | + "metrics.accuracy_score(y_test_transformed, y_test_pred)" |
950 | 1049 | ] |
951 | 1050 | } |
952 | 1051 | ], |
|
0 commit comments