0% found this document useful (0 votes)

40 views7 pages

Lab 5

The document details a Python implementation for training a Recurrent Neural Network (RNN) using the IMDB dataset for sentiment analysis. It includes steps for data preprocessing, vocabulary creation, batching, padding, and model training/testing. The model's performance is evaluated based on accuracy after training.

Uploaded by

Thành Đạt D1

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

40 views7 pages

Lab 5

Uploaded by

Thành Đạt D1

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

Artificial Intelligence Lab Work (5)

レポート解答用紙 (Report Answer Sheet)

学生証番号 (Student ID): 22520205

名前(Name): Cao Thành Đạt (Cao Thanh Dat/カオ・タイン・ダット)

問題 1.
(プログラム)
!pip install torchtext==0.17.0

!pip install portalocker

import torch

import [Link] as F

import torchtext

train_iter, test_iter = [Link](split=('train', 'test'))

tokenizer = [Link].get_tokenizer('basic_english')

MODELNAME = "[Link]"

EPOCH = 10

BATCHSIZE = 64

LR = 1e-5

DEVICE = "cuda" if [Link].is_available() else "cpu"

print(DEVICE)

train_data = [(label, tokenizer(line)) for label, line in train_iter]

train_data.sort(key = lambda x: len(x[1]))

test_data = [(label, tokenizer(line)) for label, line in test_iter]

test_data.sort(key = lambda x: len(x[1]))

for i in range(10):

print(train_data[i])

def make_vocab(train_data, min_freq):

vocab = {}
for label, tokenlist in train_data:

for token in tokenlist:

if token not in vocab:

vocab[token] = 0

vocab[token] += 1

vocablist = [('<unk>', 0), ('<pad>', 0), ('<cls>', 0), ('<eos>', 0)]

vocabidx = {}

for token, freq in [Link]():

if freq >= min_freq:

idx = len(vocabidx)

[Link]((token, freq))

vocabidx[token] = idx

vocabidx['<unk>'] = 0

vocabidx['<pad>'] = 1

vocabidx['<cls>'] = 2

vocabidx['<eos>'] = 3

return vocablist, vocabidx

vocablist, vocabidx = make_vocab(train_data, 10)

def preprocess(data, vocabidx):

rr = []

for label, tokenlist in data:

tkl = ['<cls>']

for token in tokenlist:

[Link](token if token in vocabidx else '<unk>')

[Link]('<eos>')

[Link]((label, tkl))

return rr

train_data = preprocess(train_data, vocabidx)

test_data = preprocess(test_data, vocabidx)

for i in range(10):

print(train_data[i])

def make_batch(data, batchsize):

bb = []

blabel = []

btokenlist = []

for label, tokenlist in data:

[Link](label)

[Link](tokenlist)

if len(blabel) >= batchsize:

[Link]((btokenlist, blabel))

blabel = []

btokenlist = []

if len(blabel) > 0:

[Link]((btokenlist, blabel))

return bb

train_data = make_batch(train_data, BATCHSIZE)

test_data = make_batch(test_data, BATCHSIZE)

for i in range(10):

print(train_data[i])

def padding(bb):

for tokenlists, labels in bb:

maxlen = max([len(x) for x in tokenlists])

for tkl in tokenlists:

for i in range(maxlen - len(tkl)):

[Link]('<pad>')

return bb

train_data = padding(train_data)
test_data = padding(test_data)

for i in range(10):

print(train_data[i])

def word2id(bb, vocabidx):

rr = []

for tokenlists, labels in bb:

id_labels = [label - 1 for label in labels]

id_tokenlists = []

for tokenlist in tokenlists:

id_tokenlists.append([vocabidx[token] for token in tokenlist])

[Link]([id_tokenlists, id_labels])

return rr

train_data = word2id(train_data, vocabidx)

test_data = word2id(test_data, vocabidx)

for i in range(10):

print(train_data[i])

class MyRNN([Link]):

def __init__(self):

super(MyRNN, self).__init__()

vocabsize = len(vocablist)

[Link] = [Link](vocabsize, 300, padding_idx =

vocabidx['<pad>'])

self.l1 = [Link](300, 300)

self.l2 = [Link](300, 2)

def forward(self, x):

e = [Link](x)

h = [Link](e[0].size(), dtype = torch.float32).to(DEVICE)

for i in range([Link]()[0]):

h = [Link](e[i] + self.l1(h))
return self.l2(h)

def train():

model = MyRNN().to(DEVICE)

optimizer = [Link]([Link](), lr = LR)

for epoch in range(EPOCH):

loss = 0

for tokenlists, labels in train_data:

tokenlists = [Link](tokenlists, dtype = torch.int64).transpose(0,

1).to(DEVICE)

labels = [Link](labels, dtype = torch.int64).to(DEVICE)

optimizer.zero_grad()

y = model(tokenlists)

batchloss = F.cross_entropy(y, labels)

[Link]()

loss = loss + [Link]()

print("epoch: ", epoch, "loss: ", loss)

[Link](model.state_dict(), MODELNAME)

def test():

total = 0

correct = 0

model = MyRNN().to(DEVICE)

model.load_state_dict([Link](MODELNAME))

[Link]()

for tokenlists, labels in test_data:

total += len(labels)

tokenlists = [Link](tokenlists, dtype = torch.int64).transpose(0,

1).to(DEVICE)

labels = [Link](labels, dtype = torch.int64).to(DEVICE)

y = model(tokenlists)
pred_labels = [Link](dim=1)[1]

correct += (pred_labels == labels).sum()

print("correct:", [Link]())

print("total:", total)

print("accuracy: ", ([Link]() / float(total)))

train()

test()
(実行結果)

EncoderDecoderSeq2Seq DeepLSTM
100% (1)
EncoderDecoderSeq2Seq DeepLSTM
7 pages
Attention Mechanism
No ratings yet
Attention Mechanism
11 pages
German to English Translation with Transformer
No ratings yet
German to English Translation with Transformer
8 pages
3-Sentiment Analysis BERT
No ratings yet
3-Sentiment Analysis BERT
5 pages
Transformer
No ratings yet
Transformer
10 pages
Practical No10
No ratings yet
Practical No10
4 pages
NLP 8
No ratings yet
NLP 8
42 pages
Self-Attention Mechanism Explained
No ratings yet
Self-Attention Mechanism Explained
20 pages
Data Science Chapitre 0
No ratings yet
Data Science Chapitre 0
25 pages
Machine Learning: Linear Models For Classification 1
No ratings yet
Machine Learning: Linear Models For Classification 1
30 pages
Datascience Lab Manual
No ratings yet
Datascience Lab Manual
46 pages
And Lists: Jason Myers
No ratings yet
And Lists: Jason Myers
114 pages
Attention Mechanism
No ratings yet
Attention Mechanism
2 pages
Neural Networks & Gradient Descent
No ratings yet
Neural Networks & Gradient Descent
77 pages
Repetition Structures Python
No ratings yet
Repetition Structures Python
12 pages
Chapter 3: Control Structures: 1. Higher Order Organization of Python Instructions
No ratings yet
Chapter 3: Control Structures: 1. Higher Order Organization of Python Instructions
7 pages
KNIME Workflow Guide for Beginners
No ratings yet
KNIME Workflow Guide for Beginners
2 pages
6 Different Ways To Compensate For Missing Values in A Dataset (Data Imputation With Examples)
No ratings yet
6 Different Ways To Compensate For Missing Values in A Dataset (Data Imputation With Examples)
10 pages
GCLUTO - An Interactive Clustering, Visualization, and Analysis System
No ratings yet
GCLUTO - An Interactive Clustering, Visualization, and Analysis System
10 pages
Python Data Types Overview
No ratings yet
Python Data Types Overview
12 pages
Advanced LLM Course for Developers
No ratings yet
Advanced LLM Course for Developers
3 pages
ARIMA Modeling:: B-J Procedure
No ratings yet
ARIMA Modeling:: B-J Procedure
26 pages
Ethical Consideration in Artificial Intelligence Development and Deployment
No ratings yet
Ethical Consideration in Artificial Intelligence Development and Deployment
6 pages
Python Unit-3 Question Bank
No ratings yet
Python Unit-3 Question Bank
88 pages
Class Object
No ratings yet
Class Object
26 pages
IA Ethique 15-04
No ratings yet
IA Ethique 15-04
22 pages
Time Series
No ratings yet
Time Series
44 pages
Time Series and Forecasting
No ratings yet
Time Series and Forecasting
75 pages
Time Series 1
No ratings yet
Time Series 1
23 pages
Python (Anaconda) - Installation Kit
No ratings yet
Python (Anaconda) - Installation Kit
7 pages
Setting Up Anaconda for Python 3.9
No ratings yet
Setting Up Anaconda for Python 3.9
6 pages
Python Data Structures - Home - Coursera
No ratings yet
Python Data Structures - Home - Coursera
1 page
Python Worksheet 5 While Loops
No ratings yet
Python Worksheet 5 While Loops
2 pages
Pyspark RDD and DataFrame Examples
No ratings yet
Pyspark RDD and DataFrame Examples
3 pages
Level of Testing: Engr. Anees Ur Rahman
No ratings yet
Level of Testing: Engr. Anees Ur Rahman
31 pages
Transform Raw Texts Into Training and Development Data: Instructor: Nikos Aletras
No ratings yet
Transform Raw Texts Into Training and Development Data: Instructor: Nikos Aletras
2 pages
Applying CSS To HTML Documents: IP Lab Manual Three
No ratings yet
Applying CSS To HTML Documents: IP Lab Manual Three
3 pages
Anaconda Installation and Creating Environment - Lecture - 03
No ratings yet
Anaconda Installation and Creating Environment - Lecture - 03
40 pages
What Is HTML?: Content
No ratings yet
What Is HTML?: Content
5 pages
Version Control for Developers
No ratings yet
Version Control for Developers
6 pages
Understanding Python Tuples Explained
No ratings yet
Understanding Python Tuples Explained
7 pages
AI Agents
No ratings yet
AI Agents
17 pages
Benefits of Early Test Design
No ratings yet
Benefits of Early Test Design
8 pages
Gradient Descent for Deep Learning
No ratings yet
Gradient Descent for Deep Learning
21 pages
Css
No ratings yet
Css
22 pages
Viralheat Inc. Tech Stack Overview
No ratings yet
Viralheat Inc. Tech Stack Overview
68 pages
Single Layer Perceptron Experiment
No ratings yet
Single Layer Perceptron Experiment
11 pages
Debugging Techniques: Troubleshooting Computer Problems
No ratings yet
Debugging Techniques: Troubleshooting Computer Problems
18 pages
The Multilayer Perceptron
No ratings yet
The Multilayer Perceptron
11 pages
Sequence Models - Week3-Sequence Model and Attention Mechanism-Quiz 1
100% (1)
Sequence Models - Week3-Sequence Model and Attention Mechanism-Quiz 1
5 pages
Introduction to Python Programming
No ratings yet
Introduction to Python Programming
79 pages
Python Tuple Methods
No ratings yet
Python Tuple Methods
1 page
AI-Enhanced QA: EmbeddingAlign RAG
No ratings yet
AI-Enhanced QA: EmbeddingAlign RAG
7 pages
Variables and Data Types in Python
No ratings yet
Variables and Data Types in Python
20 pages
Whitepaper - Embeddings & Vector Stores
No ratings yet
Whitepaper - Embeddings & Vector Stores
52 pages
Oop Assignment 1 Fa19-Bee-012
0% (1)
Oop Assignment 1 Fa19-Bee-012
11 pages
Understanding Artificial Neural Networks
No ratings yet
Understanding Artificial Neural Networks
19 pages
AI Lab6
No ratings yet
AI Lab6
22 pages
Sample
No ratings yet
Sample
6 pages
Deep Learning Program Printout
No ratings yet
Deep Learning Program Printout
32 pages
7 Essential Innovation Habits
No ratings yet
7 Essential Innovation Habits
2 pages
ICC UK-Primatrade-Cash Against Data-Paper-V1-2
No ratings yet
ICC UK-Primatrade-Cash Against Data-Paper-V1-2
38 pages
Military Pain Relief Acupuncture
100% (7)
Military Pain Relief Acupuncture
44 pages
LKPD Chapter 1
No ratings yet
LKPD Chapter 1
7 pages
Securitisation Risk Transfer Guide
No ratings yet
Securitisation Risk Transfer Guide
155 pages
Guidelines for Mobile Tower Installation
No ratings yet
Guidelines for Mobile Tower Installation
2 pages
Class 11 CS Mid-Term
No ratings yet
Class 11 CS Mid-Term
3 pages
Grant Guidelines: Background Information
No ratings yet
Grant Guidelines: Background Information
10 pages
Naomi Saito Resume 7
No ratings yet
Naomi Saito Resume 7
1 page
2.1 Induction Motors
No ratings yet
2.1 Induction Motors
105 pages
B47 - 10008 STD 6 Ins 012 R00
No ratings yet
B47 - 10008 STD 6 Ins 012 R00
70 pages
Dew Point Compressed Air Application Note B210991EN B LOW v1
100% (1)
Dew Point Compressed Air Application Note B210991EN B LOW v1
4 pages
Training Report PDF
No ratings yet
Training Report PDF
5 pages
ARD Company Introduction 6.3.2025
No ratings yet
ARD Company Introduction 6.3.2025
31 pages
Garibaldi
No ratings yet
Garibaldi
6 pages
Stn3Nf06L: N-Channel 60 V, 0.07 Ω Typ., 4 A Stripfet™ Ii Power Mosfet In A Sot-223 Package
No ratings yet
Stn3Nf06L: N-Channel 60 V, 0.07 Ω Typ., 4 A Stripfet™ Ii Power Mosfet In A Sot-223 Package
12 pages
IFM - Coca Cola (Ha&Dat)
No ratings yet
IFM - Coca Cola (Ha&Dat)
5 pages
Pruning & Plucking in Tea Garden
100% (13)
Pruning & Plucking in Tea Garden
18 pages
Music's Impact on Running Performance
No ratings yet
Music's Impact on Running Performance
6 pages
Electronic System Design Lab Guide
No ratings yet
Electronic System Design Lab Guide
30 pages
Cognitive Skills via Block Play in PAUD
No ratings yet
Cognitive Skills via Block Play in PAUD
10 pages
DRBD Primary/Unknown Issue Resolution
No ratings yet
DRBD Primary/Unknown Issue Resolution
13 pages
RheoTool User Guide v3.0
No ratings yet
RheoTool User Guide v3.0
183 pages
Solaris, VXVM, Cluster L1 - L2 and L3 Also Interview Questions - Exploring Solaris and Veritas
No ratings yet
Solaris, VXVM, Cluster L1 - L2 and L3 Also Interview Questions - Exploring Solaris and Veritas
4 pages
(1915) Harmuth, Louis - Dictionary of Textiles
No ratings yet
(1915) Harmuth, Louis - Dictionary of Textiles
186 pages
Mathematics: Quarter 1
No ratings yet
Mathematics: Quarter 1
14 pages
Lab Request APE
No ratings yet
Lab Request APE
2 pages
01 Resource Material
No ratings yet
01 Resource Material
67 pages
ERP Lecture Notes: Key Concepts & Benefits
No ratings yet
ERP Lecture Notes: Key Concepts & Benefits
3 pages
ARTMAN - Volume 71 - Issue 71 - Pages 1-1 - 2
No ratings yet
ARTMAN - Volume 71 - Issue 71 - Pages 1-1 - 2
31 pages

Lab 5

Uploaded by

Lab 5

Uploaded by

Artificial Intelligence Lab Work (5)

レポート解答用紙 (Report Answer Sheet)

学生証番号 (Student ID): 22520205

!pip install portalocker

train_iter, test_iter = [Link](split=('train', 'test'))

DEVICE = "cuda" if [Link].is_available() else "cpu"

train_data = [(label, tokenizer(line)) for label, line in train_iter]

train_data.sort(key = lambda x: len(x[1]))

test_data = [(label, tokenizer(line)) for label, line in test_iter]

test_data.sort(key = lambda x: len(x[1]))

def make_vocab(train_data, min_freq):

for token in tokenlist:

if token not in vocab:

vocablist = [('<unk>', 0), ('<pad>', 0), ('<cls>', 0), ('<eos>', 0)]

for token, freq in [Link]():

if freq >= min_freq:

return vocablist, vocabidx

vocablist, vocabidx = make_vocab(train_data, 10)

def preprocess(data, vocabidx):

for label, tokenlist in data:

for token in tokenlist:

[Link](token if token in vocabidx else '<unk>')

train_data = preprocess(train_data, vocabidx)

test_data = preprocess(test_data, vocabidx)

def make_batch(data, batchsize):

for label, tokenlist in data:

if len(blabel) >= batchsize:

train_data = make_batch(train_data, BATCHSIZE)

test_data = make_batch(test_data, BATCHSIZE)

for tokenlists, labels in bb:

maxlen = max([len(x) for x in tokenlists])

for tkl in tokenlists:

for i in range(maxlen - len(tkl)):

def word2id(bb, vocabidx):

for tokenlists, labels in bb:

id_labels = [label - 1 for label in labels]

for tokenlist in tokenlists:

id_tokenlists.append([vocabidx[token] for token in tokenlist])

train_data = word2id(train_data, vocabidx)

test_data = word2id(test_data, vocabidx)

[Link] = [Link](vocabsize, 300, padding_idx =

self.l1 = [Link](300, 300)

def forward(self, x):

h = [Link](e[0].size(), dtype = torch.float32).to(DEVICE)

optimizer = [Link]([Link](), lr = LR)

for epoch in range(EPOCH):

for tokenlists, labels in train_data:

tokenlists = [Link](tokenlists, dtype = torch.int64).transpose(0,

labels = [Link](labels, dtype = torch.int64).to(DEVICE)

batchloss = F.cross_entropy(y, labels)

loss = loss + [Link]()

print("epoch: ", epoch, "loss: ", loss)

for tokenlists, labels in test_data:

tokenlists = [Link](tokenlists, dtype = torch.int64).transpose(0,

labels = [Link](labels, dtype = torch.int64).to(DEVICE)

correct += (pred_labels == labels).sum()

print("accuracy: ", ([Link]() / float(total)))

You might also like