Implementation and simulation of RNN
Character-level language model trained on Shakespeare text
Back to Home
Step 1: Import Libraries & Load Dataset
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams.update({
"figure.figsize": (8, 5),
"axes.grid": True,
"lines.linewidth": 2,
"font.size": 11
})
# Download Tiny Shakespeare dataset
!curl -L -o tiny_shakespeare.txt https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
with open("tiny_shakespeare.txt", "r", encoding="utf-8") as f:
text = f.read()
print("Dataset loaded successfully")
Step 2: Dataset Preprocessing
n = len(text)
train_text = text[:int(0.9*n)]
val_text = text[int(0.9*n):int(0.95*n)]
test_text = text[int(0.95*n):]
print("Train:", len(train_text))
print("Val :", len(val_text))
print("Test :", len(test_text))
# Create character vocabulary and mappings
chars = sorted(list(set(text)))
vocab_size = len(chars)
char2idx = {ch:i for i,ch in enumerate(chars)}
idx2char = {i:ch for i,ch in enumerate(chars)}
def text_to_ints(s):
return np.array([char2idx[c] for c in s], dtype=np.int32)
train_ids = text_to_ints(train_text)
val_ids = text_to_ints(val_text)
test_ids = text_to_ints(test_text)
print("Vocabulary size:", vocab_size)
Step 3: Hyperparameters Setup
epochs = 15
learning_rate = 0.01
# =====================================
embed_size = 128
batch_size = 64
seq_length = 100
num_layers = 2
hidden_size = 256
def get_batches(data, batch_size, seq_length):
n_batches = len(data) // (batch_size * seq_length)
data = data[:n_batches * batch_size * seq_length]
x = data.reshape(batch_size, -1)
y = np.roll(x, -1, axis=1)
return x, y
Step 4: Defining the RNN Model
class CharRNN(nn.Module):
def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
super().__init__()
self.embed = nn.Embedding(vocab_size, embed_size)
self.rnn = nn.RNN(embed_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, vocab_size)
def forward(self, x, hidden):
x = self.embed(x)
out, hidden = self.rnn(x, hidden)
out = self.fc(out)
return out, hidden
model = CharRNN(vocab_size, embed_size, hidden_size, num_layers)
print(model)
Step 5: Define Training Function
def train_model(model, train_ids, val_ids):
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_losses, val_losses = [], []
for epoch in range(epochs):
# -------- Training --------
model.train()
x, y = get_batches(train_ids, batch_size, seq_length)
hidden = None
total_loss = 0
num_batches = 0
for i in range(0, x.shape[1], seq_length):
inputs = torch.tensor(
x[:, i:i+seq_length], dtype=torch.long
)
targets = torch.tensor(
y[:, i:i+seq_length], dtype=torch.long
)
optimizer.zero_grad()
if hidden is not None:
hidden = hidden.detach()
outputs, hidden = model(inputs, hidden)
loss = criterion(
outputs.view(-1, vocab_size),
targets.view(-1)
)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
optimizer.step()
total_loss += loss.item()
num_batches += 1
train_losses.append(total_loss / num_batches)
# -------- Validation --------
model.eval()
with torch.no_grad():
vx, vy = get_batches(val_ids, batch_size, seq_length)
vhidden = None
vloss = 0
v_batches = 0
for i in range(0, vx.shape[1], seq_length):
vin = torch.tensor(
vx[:, i:i+seq_length], dtype=torch.long
)
vtar = torch.tensor(
vy[:, i:i+seq_length], dtype=torch.long
)
if vhidden is not None:
vhidden = vhidden.detach()
out, vhidden = model(vin, vhidden)
vloss += criterion(
out.view(-1, vocab_size),
vtar.view(-1)
).item()
v_batches += 1
val_losses.append(vloss / v_batches)
print(
f"Epoch {epoch+1}/{epochs} | "
f"Train Loss: {train_losses[-1]:.4f} | "
f"Val Loss: {val_losses[-1]:.4f}"
)
return train_losses, val_losses
Step 6: Run Training
train_losses, val_losses = train_model(model, train_ids, val_ids)
Step 7: Plot Training Loss
epochs_range = range(1, epochs + 1)
plt.figure()
plt.plot(epochs_range, train_losses, label="Train Loss", marker="o")
plt.plot(epochs_range, val_losses, label="Validation Loss", marker="o")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training vs Validation Loss")
plt.legend()
plt.show()
Step 8: Text Generation
def generate_text(model, start_text, length=500):
model.eval()
hidden = None
generated = start_text
# Prime the RNN with the starting text
for ch in start_text[:-1]:
x = torch.tensor([[char2idx[ch]]])
_, hidden = model(x, hidden)
# Start generation from the last character
inp = torch.tensor([[char2idx[start_text[-1]]]])
for _ in range(length):
out, hidden = model(inp, hidden)
probs = torch.softmax(out[:, -1, :], dim=-1)
char_idx = torch.multinomial(probs, 1).item()
generated += idx2char[char_idx]
inp = torch.tensor([[char_idx]])
return generated
print(generate_text(model, "ROMEO: HELLO! "))