deepteshrout
diff --git a/‎Module 4 - Machine Learning/01. Data Preparation and Modelling with sklearn/1. Hands-on with sklearn/code/intro_data_modelling.ipynb‎
Lines changed: 121 additions & 22 deletions b/‎Module 4 - Machine Learning/01. Data Preparation and Modelling with sklearn/1. Hands-on with sklearn/code/intro_data_modelling.ipynb‎
Lines changed: 121 additions & 22 deletions
@@ -413,7 +413,7 @@
  {
  "data": {
  "text/plain": [
- "<seaborn.axisgrid.PairGrid at 0x254702cd250>"
+ "<seaborn.axisgrid.PairGrid at 0x17f4039bca0>"
  ]
  },
  "execution_count": 10,
@@ -686,7 +686,7 @@
  }
  ],
  "source": [
- "# Data-preprocessing: Standardizing the data\n",
+ "# Data-preprocessing of X_train: Standardizing the data\n",
  "\n",
  "from sklearn.preprocessing import StandardScaler\n",
  "scaler = StandardScaler()\n",
@@ -717,6 +717,86 @@
  "print(\"Std of each column:\", np.sqrt(scaler.var_))"
  ]
  },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Data-preprocessing of y_train: Encoding the data\n",
+ "\n",
+ "from sklearn.preprocessing import LabelEncoder\n",
+ "\n",
+ "# Apply Label Encoding to convert target classes into numerical labels\n",
+ "label_encoder = LabelEncoder()\n",
+ "\n",
+ "y_train_transformed = label_encoder.fit_transform(y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1, 1, 2, 0, 2])"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_train_transformed[:5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Lets check the mapping\n",
+ "\n",
+ "print(label_encoder.classes_)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "61 Iris-versicolor\n",
+ "92 Iris-versicolor\n",
+ "112 Iris-virginica\n",
+ "2 Iris-setosa\n",
+ "141 Iris-virginica\n",
+ "Name: Species, dtype: object"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_train.head()"
+ ]
+ },
  {
  "cell_type": "markdown",
  "metadata": {},
@@ -726,7 +806,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 17,
+ "execution_count": 21,
  "metadata": {},
  "outputs": [
  {
@@ -738,7 +818,7 @@
  "LogisticRegression()"
  ]
  },
- "execution_count": 17,
+ "execution_count": 21,
  "metadata": {},
  "output_type": "execute_result"
  }
@@ -751,7 +831,7 @@
  "classifier = LogisticRegression()\n",
  "\n",
  "# Train a model on training data\n",
- "classifier.fit(X_train_transformed, y_train)"
+ "classifier.fit(X_train_transformed, y_train_transformed)"
  ]
  },
  {
@@ -763,7 +843,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 18,
+ "execution_count": 22,
  "metadata": {},
  "outputs": [
  {
@@ -781,6 +861,25 @@
  "print(X_test_transformed.shape)"
  ]
  },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(38,)\n"
+ ]
+ }
+ ],
+ "source": [
+ "y_test_transformed = label_encoder.transform(y_test)\n",
+ "\n",
+ "print(y_test_transformed.shape)"
+ ]
+ },
  {
  "cell_type": "markdown",
  "metadata": {},
@@ -790,7 +889,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 19,
+ "execution_count": 24,
  "metadata": {},
  "outputs": [],
  "source": [
@@ -807,7 +906,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 20,
+ "execution_count": 25,
  "metadata": {},
  "outputs": [
  {
@@ -816,7 +915,7 @@
  "0.9736842105263158"
  ]
  },
- "execution_count": 20,
+ "execution_count": 25,
  "metadata": {},
  "output_type": "execute_result"
  }
@@ -826,7 +925,7 @@
  "from sklearn import metrics\n",
  "\n",
  "# Calculate accuracy score\n",
- "metrics.accuracy_score(y_test, y_test_pred)"
+ "metrics.accuracy_score(y_test_transformed, y_test_pred)"
  ]
  },
  {
@@ -838,7 +937,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 21,
+ "execution_count": 26,
  "metadata": {},
  "outputs": [
  {
@@ -847,7 +946,7 @@
  "0.9736842105263158"
  ]
  },
- "execution_count": 21,
+ "execution_count": 26,
  "metadata": {},
  "output_type": "execute_result"
  }
@@ -860,13 +959,13 @@
  "classifier = KNeighborsClassifier()\n",
  "\n",
  "# Training the model\n",
- "classifier.fit(X_train_transformed, y_train)\n",
+ "classifier.fit(X_train_transformed, y_train_transformed)\n",
  "\n",
  "# Prediction on unseen data\n",
  "y_test_pred = classifier.predict(X_test_transformed)\n",
  "\n",
  "# Evaluation\n",
- "metrics.accuracy_score(y_test, y_test_pred)"
+ "metrics.accuracy_score(y_test_transformed, y_test_pred)"
  ]
  },
  {
@@ -878,7 +977,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 22,
+ "execution_count": 27,
  "metadata": {},
  "outputs": [
  {
@@ -887,7 +986,7 @@
  "0.9736842105263158"
  ]
  },
- "execution_count": 22,
+ "execution_count": 27,
  "metadata": {},
  "output_type": "execute_result"
  }
@@ -900,13 +999,13 @@
  "classifier = DecisionTreeClassifier()\n",
  "\n",
  "# Training the model\n",
- "classifier.fit(X_train_transformed, y_train)\n",
+ "classifier.fit(X_train_transformed, y_train_transformed)\n",
  "\n",
  "# Prediction on unseen data\n",
  "y_test_pred = classifier.predict(X_test_transformed)\n",
  "\n",
  "# Evaluation\n",
- "metrics.accuracy_score(y_test, y_test_pred)"
+ "metrics.accuracy_score(y_test_transformed, y_test_pred)"
  ]
  },
  {
@@ -918,7 +1017,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 23,
+ "execution_count": 28,
  "metadata": {},
  "outputs": [
  {
@@ -927,7 +1026,7 @@
  "0.9736842105263158"
  ]
  },
- "execution_count": 23,
+ "execution_count": 28,
  "metadata": {},
  "output_type": "execute_result"
  }
@@ -940,13 +1039,13 @@
  "classifier = RandomForestClassifier()\n",
  "\n",
  "# Training the model\n",
- "classifier.fit(X_train_transformed, y_train)\n",
+ "classifier.fit(X_train_transformed, y_train_transformed)\n",
  "\n",
  "# Prediction on unseen data\n",
  "y_test_pred = classifier.predict(X_test_transformed)\n",
  "\n",
  "# Evaluation\n",
- "metrics.accuracy_score(y_test, y_test_pred)"
+ "metrics.accuracy_score(y_test_transformed, y_test_pred)"
  ]
  }
  ],