import torch
from xv.util import listAttr
torch.__version__
use_cuda = torch.cuda.is_available()
use_cuda
In this device there is no GPU, so it is showing "False".
PyTorch provides two data primitives: torch.utils.data.DataLoader and torch.utils.data.Dataset that allow you to use pre-loaded datasets as well as your own data.
Dataset stores the samples and their corresponding labels, and DataLoader wraps an iterable around the Dataset to enable easy access to the samples.
PyTorch domain libraries provide a number of pre-loaded datasets (such as FashionMNIST) that subclass torch.utils.data.Dataset
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
listAttr(datasets)
We load the MNIST Dataset with the following parameters:
Parameters: root is the path where the train/test data is stored
train specifies training or test dataset,
download=True downloads the data from the internet if it’s not available at root.
transform (callable, optional) – A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform (callable, optional) – A function/transform that takes in the target and transforms it.
training_data = datasets.MNIST(
root="data",
train=True,
download=True,
transform=ToTensor()
)
test_data = datasets.MNIST(
root="data",
train=False,
download=True,
transform=ToTensor()
)
training_data
test_data
The Dataset retrieves our dataset’s features and labels one sample at a time. While training a model, we typically want to pass samples in “minibatches”, reshuffle the data at every epoch to reduce model overfitting, and use Python’s multiprocessing to speed up data retrieval.
DataLoader is an iterable that abstracts this complexity for us in an easy API.
from torch.utils.data import DataLoader
loaders = {
'train' : torch.utils.data.DataLoader(training_data,
batch_size=100,
shuffle=True,
num_workers=1),
'test' : torch.utils.data.DataLoader(test_data,
batch_size=100,
shuffle=True,
num_workers=1),
}
loaders
We have loaded that dataset into the Dataloader and can iterate through the dataset as needed.
Each iteration below returns a batch of train_features and train_labels (containing batch_size=10 features and labels respectively). Because we specified shuffle=True, after we iterate over all batches the data is shuffled.
sample = next(iter(loaders['train']))
imgs, lbls = sample
lbls
import matplotlib.pyplot as plt
figure = plt.figure(figsize=(10, 8))
cols, rows = 5, 5
for i in range(1, cols * rows + 1):
sample_idx = torch.randint(len(training_data), size=(1,)).item()
img, label = training_data[sample_idx]
figure.add_subplot(rows, cols, i)
plt.title(label)
plt.axis("off")
plt.imshow(img.squeeze(), cmap="gray")
plt.show()
Neural networks comprise of layers/modules that perform operations on data. The torch.nn namespace provides all the building blocks you need to build your own neural network.
Every module in PyTorch subclasses the nn.Module. A neural network is a module itself that consists of other modules (layers). This nested structure allows for building and managing complex architectures easily.
from torch import nn
import torch.nn.functional as F
class CustomPytorchModel(nn.Module):
def __init__(self, input_size = 784):
super().__init__()
self.fc1 = nn.Linear(input_size, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, 128)
self.fc4 = nn.Linear(128, 64)
self.fc5 = nn.Linear(64,10)
self.dropout = nn.Dropout(p=0.2)
def forward(self, input_data):
x1 = input_data.view(input_data.shape[0], -1)
x2 = self.dropout(F.relu(self.fc1(x1)))
x3 = self.dropout(F.relu(self.fc2(x2)))
x4 = self.dropout(F.relu(self.fc3(x3)))
x5 = self.dropout(F.relu(self.fc4(x4)))
x6 = F.log_softmax(self.fc5(x5), dim=1)
return x6
# Create the network, define the loss_function and optimizer
model = CustomPytorchModel()
# move model to GPU if CUDA is available
if use_cuda:
model = model.cuda()
print(model)
Error/loss is calculated as the difference between the actual output and the predicted output.
loss_function = nn.NLLLoss()
loss_function
The weights are modified using a function called Optimization Function.
torch.optim
is a package implementing various optimization algorithms. To use torch.optim we have to construct an optimizer object, that will hold the current state and will update the parameters based on the computed gradients.
To construct an Optimizer you have to give it an iterable containing the parameters to optimize. Then, you can specify optimizer-specific options such as the learning rate, weight decay, etc.
from torch import optim
optimizer = optim.Adam(model.parameters(), lr=0.001)
optimizer
def train(start_epochs, n_epochs, model):
for epoch in range(start_epochs, n_epochs + 1):
print(f"epoch = {epoch}")
pass
# return trained model
return model
pass
train(0, 2, model)
def train(start_epochs, n_epochs, model):
for epoch in range(start_epochs, n_epochs + 1):
# initialize variables to monitor training and validation loss
train_loss = 0.0
valid_loss = 0.0
#Set the model in training mode
model.train()
print(f"epoch = {epoch}")
# return trained model
return model
pass
train(0, 2, model)
def train(start_epochs, n_epochs, model, loaders):
for epoch in range(start_epochs, n_epochs + 1):
# initialize variables to monitor training and validation loss
train_loss = 0.0
valid_loss = 0.0
#Set the model in training mode
model.train()
print(f"batch started: ")
for batch_idx, (data, target) in enumerate(loaders['train']):
#print(f"batch_idx: {batch_idx}")
if batch_idx % 50 == 0:
print(f"{batch_idx}, ", end = "")
pass
print(f"epoch = {epoch}")
# return trained model
return model
pass
train(0, 2, model, loaders)
def train_process_batches(model, loaders, optimizer, loss_function, verbose = True ):
train_loss = 0.0
model.train()
if verbose:
print(f"Training data batch process: ", end = "")
for batch_idx, (data, target) in enumerate(loaders['train']):
# move to GPU
if use_cuda:
data, target = data.cuda(), target.cuda()
#we need to set the gradients to zero before starting to do backpropragation
#because PyTorch accumulates the gradients on subsequent backward passes
optimizer.zero_grad()
#forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
#calculate the batch loss
loss = loss_function(output, target)
#backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# perform a single optimization step (parameter update)
optimizer.step()
## calculate train_loss
train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
if batch_idx % 50 == 0:
if verbose:
print(f"\t{batch_idx}, {train_loss}", end = "\n")
else:
print(f"\t{batch_idx}, ", end = "")
pass
return train_loss
pass
def train(start_epochs, n_epochs, model, loaders):
for epoch in range(start_epochs, n_epochs + 1):
print(f"Epoch: {epoch}, ", end = "\n")
# initialize variables to monitor training and validation loss
valid_loss = 0.0
#train model
train_loss = train_process_batches(model, loaders, optimizer, loss_function)
print(f"\ntrain_loss = {train_loss}")
# return trained model
return model
train(0, 1, model, loaders)
def eval_process_batches(model, loaders, optimizer, loss_function, verbose = True ):
valid_loss = 0.0
model.eval()
if verbose:
print(f"Test data batch process: ", end = "")
for batch_idx, (data, target) in enumerate(loaders['test']):
# move to GPU
if use_cuda:
data, target = data.cuda(), target.cuda()
## update the average validation loss
# forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
# calculate the batch loss
loss = loss_function(output, target)
# update average validation loss
valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
if batch_idx % 20 == 0:
if verbose:
print(f"\t{batch_idx}, {valid_loss}", end = "\n")
else:
print(f"\t{batch_idx}, ", end = "")
pass
print()
return valid_loss
pass
def train(start_epochs, n_epochs, model, loaders):
for epoch in range(start_epochs, n_epochs+1):
print(f"Epoch: {epoch}, ", end = "\n")
# initialize variables to monitor training and validation loss
valid_loss = 0.0
#train model
train_loss = train_process_batches(model, loaders, optimizer, loss_function, verbose = False)
valid_loss = eval_process_batches(model, loaders, optimizer, loss_function, verbose = True)
print(f"\ntrain_loss = {train_loss}")
print(f"\nvalid_loss = {valid_loss}")
# return trained model
return model
train(0, 1, model, loaders)
def train(start_epochs, n_epochs, model, loaders):
for epoch in range(start_epochs, n_epochs+1):
print(f"Epoch: {epoch}, ", end = "\n")
# initialize variables to monitor training and validation loss
valid_loss = 0.0
#train model
train_loss = train_process_batches(model, loaders, optimizer,
loss_function, verbose = False)
valid_loss = eval_process_batches(model, loaders, optimizer,
loss_function, verbose = False)
# calculate average losses
train_loss = train_loss/len(loaders['train'].dataset)
valid_loss = valid_loss/len(loaders['test'].dataset)
# print training/validation statistics
print('Epoch: {} Training Loss: {:.6f}, Validation Loss: {:.6f}'.format(
epoch,
train_loss,
valid_loss
))
print(f" Over")
# return trained model
return model
train(0, 10, model, loaders)