Skip to content
This repository was archived by the owner on Jul 10, 2024. It is now read-only.

Commit 8c447e4

Browse files
committed
Update fine_tuning.ipynb
1 parent 840f40b commit 8c447e4

File tree

1 file changed

+17
-112
lines changed
  • Module 9 - GenAI (LLMs and Prompt Engineering)/3. HuggingFace Tutorial Series (Handson)/4. Fine Tuning

1 file changed

+17
-112
lines changed

Module 9 - GenAI (LLMs and Prompt Engineering)/3. HuggingFace Tutorial Series (Handson)/4. Fine Tuning/fine_tuning.ipynb

Lines changed: 17 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,8 @@
99
"\n",
1010
"Reference: \n",
1111
"https://huggingface.co/docs/transformers/training#train-a-tensorflow-model-with-keras \n",
12-
"https://huggingface.co/learn/nlp-course/chapter3/3?fw=tf\n",
1312
"\n",
1413
"\n",
15-
"https://learnopencv.com/fine-tuning-bert/ \n",
16-
"\n",
17-
"\n",
18-
"\n",
19-
"\n",
20-
"https://www.youtube.com/watch?v=RgpANRh44ao \n",
21-
"Notebook Link: https://www.kaggle.com/code/preatcher/emotion-detection-by-using-bert\n",
22-
"Github App Link: https://github.com/Abhishekjl/Sentiment-analyis-BERT-Trained-\n",
23-
"\n",
24-
"There are significant benefits to using a pretrained model. It reduces computation costs, your carbon footprint, and allows you to use state-of-the-art models without having to train one from scratch. 🤗 Transformers provides access to thousands of pretrained models for a wide range of tasks. When you use a pretrained model, you train it on a dataset specific to your task. This is known as fine-tuning, an incredibly powerful training technique. \n",
25-
"\n",
2614
"**Steps**\n",
2715
"1. Load the data\n",
2816
"2. Apply tokenization to the dataset\n",
@@ -31,7 +19,7 @@
3119
},
3220
{
3321
"cell_type": "code",
34-
"execution_count": 2,
22+
"execution_count": 1,
3523
"id": "0c5291d1-83ad-4e36-a3b9-0aeb161118fc",
3624
"metadata": {},
3725
"outputs": [
@@ -51,7 +39,7 @@
5139
},
5240
{
5341
"cell_type": "code",
54-
"execution_count": 1,
42+
"execution_count": 2,
5543
"id": "aba7b9e3-d558-45a1-9b03-ef697e243cba",
5644
"metadata": {},
5745
"outputs": [
@@ -61,34 +49,17 @@
6149
"text": [
6250
"2.16.1\n"
6351
]
64-
},
65-
{
66-
"ename": "AttributeError",
67-
"evalue": "module 'keras._tf_keras.keras' has no attribute '__version__'",
68-
"output_type": "error",
69-
"traceback": [
70-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
71-
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
72-
"Cell \u001b[1;32mIn[1], line 8\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtensorflow\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m keras\n\u001b[0;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(tf\u001b[38;5;241m.\u001b[39m__version__)\n\u001b[1;32m----> 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mkeras\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__version__\u001b[49m)\n",
73-
"File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\tensorflow\\python\\util\\lazy_loader.py:211\u001b[0m, in \u001b[0;36mKerasLazyLoader.__getattr__\u001b[1;34m(self, item)\u001b[0m\n\u001b[0;32m 207\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\n\u001b[0;32m 208\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` is not available with Keras 3.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 209\u001b[0m )\n\u001b[0;32m 210\u001b[0m module \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_load()\n\u001b[1;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mitem\u001b[49m\u001b[43m)\u001b[49m\n",
74-
"\u001b[1;31mAttributeError\u001b[0m: module 'keras._tf_keras.keras' has no attribute '__version__'"
75-
]
7652
}
7753
],
7854
"source": [
79-
"# TensorFlow and tf.keras\n",
80-
"\n",
8155
"import tensorflow as tf\n",
82-
"from tensorflow import keras\n",
83-
"\n",
84-
"print(tf.__version__)\n",
8556
"\n",
86-
"print(keras.__version__)"
57+
"print(tf.__version__)"
8758
]
8859
},
8960
{
9061
"cell_type": "code",
91-
"execution_count": 1,
62+
"execution_count": 3,
9263
"id": "a9c3a86f-dd64-48a6-b6dd-6013d2ae9412",
9364
"metadata": {},
9465
"outputs": [],
@@ -98,7 +69,7 @@
9869
},
9970
{
10071
"cell_type": "code",
101-
"execution_count": 2,
72+
"execution_count": 4,
10273
"id": "a8733911-db8b-40bf-82e3-2a052515d1f5",
10374
"metadata": {},
10475
"outputs": [
@@ -198,7 +169,7 @@
198169
"4 NEW YORK -- Indecision marked the dollar's ton... "
199170
]
200171
},
201-
"execution_count": 2,
172+
"execution_count": 4,
202173
"metadata": {},
203174
"output_type": "execute_result"
204175
}
@@ -211,7 +182,7 @@
211182
},
212183
{
213184
"cell_type": "code",
214-
"execution_count": 3,
185+
"execution_count": 5,
215186
"id": "67609b5a-6160-4c9b-9ff4-3e2306372e19",
216187
"metadata": {},
217188
"outputs": [
@@ -221,7 +192,7 @@
221192
"(8000, 5)"
222193
]
223194
},
224-
"execution_count": 3,
195+
"execution_count": 5,
225196
"metadata": {},
226197
"output_type": "execute_result"
227198
}
@@ -232,7 +203,7 @@
232203
},
233204
{
234205
"cell_type": "code",
235-
"execution_count": 4,
206+
"execution_count": 6,
236207
"id": "4c109e2e-d967-43f7-812e-18acbe145acb",
237208
"metadata": {},
238209
"outputs": [
@@ -246,7 +217,7 @@
246217
"Name: proportion, dtype: float64"
247218
]
248219
},
249-
"execution_count": 4,
220+
"execution_count": 6,
250221
"metadata": {},
251222
"output_type": "execute_result"
252223
}
@@ -257,7 +228,7 @@
257228
},
258229
{
259230
"cell_type": "code",
260-
"execution_count": 5,
231+
"execution_count": 7,
261232
"id": "f924c3d2-75bd-44d4-825f-8a37d1108f84",
262233
"metadata": {},
263234
"outputs": [
@@ -270,7 +241,7 @@
270241
"Name: proportion, dtype: float64"
271242
]
272243
},
273-
"execution_count": 5,
244+
"execution_count": 7,
274245
"metadata": {},
275246
"output_type": "execute_result"
276247
}
@@ -285,7 +256,7 @@
285256
},
286257
{
287258
"cell_type": "code",
288-
"execution_count": 9,
259+
"execution_count": 8,
289260
"id": "efa3c67b-8201-44eb-8506-e094e7dc07db",
290261
"metadata": {},
291262
"outputs": [],
@@ -298,7 +269,7 @@
298269
},
299270
{
300271
"cell_type": "code",
301-
"execution_count": 10,
272+
"execution_count": 9,
302273
"id": "ab2914ad-44ca-47cb-98ee-4227b6a31097",
303274
"metadata": {},
304275
"outputs": [],
@@ -311,7 +282,7 @@
311282
},
312283
{
313284
"cell_type": "code",
314-
"execution_count": 12,
285+
"execution_count": 10,
315286
"id": "d9e3a194-b9c9-4041-90f9-cfd43fff9f4b",
316287
"metadata": {},
317288
"outputs": [
@@ -337,76 +308,10 @@
337308
},
338309
{
339310
"cell_type": "code",
340-
"execution_count": 13,
311+
"execution_count": 11,
341312
"id": "a9272140-12e4-4c8a-9307-cbd135bfc15b",
342313
"metadata": {},
343-
"outputs": [
344-
{
345-
"data": {
346-
"application/vnd.jupyter.widget-view+json": {
347-
"model_id": "e3fb92a246514e888ec7379e1da18a62",
348-
"version_major": 2,
349-
"version_minor": 0
350-
},
351-
"text/plain": [
352-
"tokenizer_config.json: 0%| | 0.00/28.0 [00:00<?, ?B/s]"
353-
]
354-
},
355-
"metadata": {},
356-
"output_type": "display_data"
357-
},
358-
{
359-
"name": "stderr",
360-
"output_type": "stream",
361-
"text": [
362-
"C:\\Users\\DELL\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\huggingface_hub\\file_download.py:149: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\DELL\\.cache\\huggingface\\hub\\models--distilbert-base-uncased. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
363-
"To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
364-
" warnings.warn(message)\n"
365-
]
366-
},
367-
{
368-
"data": {
369-
"application/vnd.jupyter.widget-view+json": {
370-
"model_id": "70dfac598d3f48e0a494c914c6150693",
371-
"version_major": 2,
372-
"version_minor": 0
373-
},
374-
"text/plain": [
375-
"config.json: 0%| | 0.00/483 [00:00<?, ?B/s]"
376-
]
377-
},
378-
"metadata": {},
379-
"output_type": "display_data"
380-
},
381-
{
382-
"data": {
383-
"application/vnd.jupyter.widget-view+json": {
384-
"model_id": "b861ceae22df4de399324abee8cee86a",
385-
"version_major": 2,
386-
"version_minor": 0
387-
},
388-
"text/plain": [
389-
"vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]"
390-
]
391-
},
392-
"metadata": {},
393-
"output_type": "display_data"
394-
},
395-
{
396-
"data": {
397-
"application/vnd.jupyter.widget-view+json": {
398-
"model_id": "0e910174400d4409a950e6069f2f724b",
399-
"version_major": 2,
400-
"version_minor": 0
401-
},
402-
"text/plain": [
403-
"tokenizer.json: 0%| | 0.00/466k [00:00<?, ?B/s]"
404-
]
405-
},
406-
"metadata": {},
407-
"output_type": "display_data"
408-
}
409-
],
314+
"outputs": [],
410315
"source": [
411316
"from transformers import AutoTokenizer\n",
412317
"\n",

0 commit comments

Comments
 (0)