import torch
import torch.nn as nn #n
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
from tqdm.autonotebook import tqdm
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_openml
# Load data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
print('Shape of X : ',X.shape)
print('Shape of y : ',y.shape)
class SimpleDataset(Dataset):
def __init__(self, X, y):
super(SimpleDataset, self).__init__()
self.X = X
self.y = y
def __getitem__(self, index):
return torch.tensor(self.X[index,:], dtype=torch.float32), torch.tensor(int(self.y[index]), dtype=torch.long)
#return self.X[index,:], int(self.y[index])
def __len__(self):
return self.X.shape[0]
dataset = SimpleDataset(X, y)
example, label = dataset[0]
print(example.shape) #Will return 784
print(label)
plt.imshow(example.reshape((28,28)))
train_size = int(len(dataset)*0.8)
test_size = len(dataset)-train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, (train_size, test_size))
print("{} examples for training and {} for testing".format(len(train_dataset), len(test_dataset)))
training_loader = DataLoader(train_dataset, shuffle=True)
testing_loader = DataLoader(test_dataset, shuffle=False)
def train_simple_network_with_input_reshape(model, loss_func, train_loader, val_loader=None, score_funcs=None,
epochs=50, device="cpu", checkpoint_file=None):
"""Train simple neural networks
Keyword arguments:
model -- the PyTorch model / "Module" to train
loss_func -- the loss function that takes in batch in two arguments, the model outputs and the labels, and returns a score
train_loader -- PyTorch DataLoader object that returns tuples of (input, label) pairs.
val_loader -- Optional PyTorch DataLoader to evaluate on after every epoch
score_funcs -- A dictionary of scoring functions to use to evalue the performance of the model
epochs -- the number of training epochs to perform
device -- the compute lodation to perform training
Here used the method provided in lecture only added code to flatten the inputs before passing to linear model
"""
to_track = ["epoch", "total time", "train loss"]
if val_loader is not None:
to_track.append("val loss")
for eval_score in score_funcs:
to_track.append("train " + eval_score )
if val_loader is not None:
to_track.append("val " + eval_score )
total_train_time = 0 #How long have we spent in the training loop?
results = {}
#Initialize every item with an empty list
for item in to_track:
results[item] = []
#SGD is Stochastic Gradient Decent.
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
#Place the model on the correct compute resource (CPU or GPU)
model.to(device)
for epoch in tqdm(range(epochs), desc="Epoch"):
model = model.train()#Put our model in training mode
running_loss = 0.0
y_true = []
y_pred = []
start = time.time()
for inputs, labels in tqdm(train_loader, desc="Train Batch", leave=False):
#Move the batch to the device we are using.
inputs = inputs.to(device)
labels = labels.to(device)
batch_size = inputs.shape[0]
# PyTorch stores gradients in a mutable data structure. So we need to set it to a clean state before we use it.
#Otherwise, it will have old information from a previous iteration
optimizer.zero_grad()
#flatten the input to fit in linear model
y_hat = model(inputs.view(inputs.size(0),-1)) #this just computed f_Θ(x(i))#pass in a flattened view of inputs
# Compute loss.
loss = loss_func(y_hat, labels)
loss.backward()# ∇_Θ just got computed by this one call!
#Now we just need to update all the parameters!
optimizer.step()# Θ_{k+1} = Θ_k − η * ∇_Θ ℓ(y_hat, y)
#Now we are just grabbing some information we would like to have
running_loss += loss.item() * inputs.size(0)
#moving labels & predictions back to CPU for computing / storing predictions
labels = labels.detach().cpu().numpy()
y_hat = y_hat.detach().cpu().numpy()
for i in range(batch_size):
y_true.append(labels[i])
y_pred.append(y_hat[i,:])
#end training epoch
end = time.time()
total_train_time += (end-start)
results["epoch"].append( epoch )
results["total time"].append( total_train_time )
results["train loss"].append( running_loss )
y_pred = np.asarray(y_pred)
if y_pred.shape[1] > 1: #We have a classification problem, convert to labels
y_pred = np.argmax(y_pred, axis=1)
for name, score_func in score_funcs.items():
results["train " + name].append( score_func(y_true, y_pred) )
if val_loader is None:
pass
else:#Lets find out validation performance as we go!
model = model.eval() #Set the model to "evaluation" mode, b/c we don't want to make any updates!
y_true = []
y_pred = []
running_loss = 0.0
for inputs, labels in val_loader:
#Move the batch to the device we are using.
inputs = inputs.to(device)
labels = labels.to(device)
#flatten the input to fit in linear model
y_hat = model(inputs.view(inputs.size(0),-1)) #pass in a flattened view of inputs
loss = loss_func(y_hat, labels)
#Now we are just grabbing some information we would like to have
running_loss += loss.item() * inputs.size(0)
#moving labels & predictions back to CPU for computing / storing predictions
labels = labels.detach().cpu().numpy()
y_hat = y_hat.detach().cpu().numpy()
for i in range(batch_size):
y_true.append(labels[i])
y_pred.append(y_hat[i,:])
results["val loss"].append( running_loss )
y_pred = np.asarray(y_pred)
if y_pred.shape[1] > 1: #We have a classification problem, convert to labels
y_pred = np.argmax(y_pred, axis=1)
for name, score_func in score_funcs.items():
results["val " + name].append( score_func(y_true, y_pred) )
if checkpoint_file is not None:
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'results' : results
}, checkpoint_file)
return pd.DataFrame.from_dict(results)
Creation of two models with different numbers of layers, and hiddent state sizes, to increase the accuracy of your MNIST classifier.
model1 = nn.Sequential(
nn.Linear(784, 784),
nn.Tanh(),
nn.Linear(784, 120),
nn.Tanh(),
nn.Linear(120, 10),
)
loss_func = nn.CrossEntropyLoss()
results_pd1 = train_simple_network_with_input_reshape(model1, loss_func, training_loader, epochs=10, val_loader=testing_loader,
checkpoint_file='model1.pt', score_funcs={'Acc':accuracy_score})
Checking Accuracy of the first model
sns.lineplot(x='epoch', y='train Acc', data=results_pd1, label='Train')
sns.lineplot(x='epoch', y='val Acc', data=results_pd1, label='Validation')
model2 = nn.Sequential(
nn.Linear(784, 784),
nn.Tanh(),
nn.Linear(784, 120),
nn.Tanh(),
nn.Linear(120, 84),
nn.Tanh(),
nn.Linear(84, 10),
)
loss_func = nn.CrossEntropyLoss()
results_pd2 = train_simple_network_with_input_reshape(model2, loss_func, training_loader, epochs=10, val_loader=testing_loader,
checkpoint_file='model2.pt', score_funcs={'Acc':accuracy_score})
Checking Accuracy of the second model
sns.lineplot(x='epoch', y='train Acc', data=results_pd2, label='Train')
sns.lineplot(x='epoch', y='val Acc', data=results_pd2, label='Validation')
transform = transforms.ToTensor()
mnist_training_data = datasets.MNIST(root='../Data', train=True, download=True, transform=transform)
print('mnist_train_data : \n',mnist_training_data)
mnist_testing_data = datasets.MNIST(root='../Data', train=False, download=True, transform=transform)
print('mnist_testing_data : \n',mnist_testing_data)
image, label = mnist_training_data[0]
print('Shape:', image.shape, '\nLabel:', label)
plt.imshow(mnist_training_data[0][0].reshape((28,28)), cmap="gist_yarg")
mnist_training_loader = DataLoader(mnist_training_data, shuffle=True)
mnist_testing_loader = DataLoader(mnist_testing_data, shuffle=False)
Creation of two models with different numbers of layers, and hiddent state sizes, to increase the accuracy of your MNIST classifier.
model_1 = nn.Sequential(
nn.Linear(784, 784),
nn.Tanh(),
nn.Linear(784, 84),
nn.Tanh(),
nn.Linear(84, 10),
)
loss_func = nn.CrossEntropyLoss()
results_pd_1 = train_simple_network_with_input_reshape(model_1, loss_func, mnist_training_loader, epochs=10, val_loader=mnist_testing_loader,
checkpoint_file='model_1.pt', score_funcs={'Acc':accuracy_score})
Checking Accuracy of the first model
sns.lineplot(x='epoch', y='train Acc', data=results_pd_1, label='Train')
sns.lineplot(x='epoch', y='val Acc', data=results_pd_1, label='Validation')
model_2 = nn.Sequential(
nn.Linear(784, 784),
nn.Tanh(),
nn.Linear(784, 120),
nn.Tanh(),
nn.Linear(120, 84),
nn.Tanh(),
nn.Linear(84, 10),
)
results_pd_2 = train_simple_network_with_input_reshape(model_2, loss_func, mnist_training_loader, epochs=10, val_loader=mnist_testing_loader,
checkpoint_file='model_2.pt', score_funcs={'Acc':accuracy_score})
Checking Accuracy of the second model
sns.lineplot(x='epoch', y='train Acc', data=results_pd_2, label='Train')
sns.lineplot(x='epoch', y='val Acc', data=results_pd_2, label='Validation')
Saving best model using the checkpoint option
torch.save({'model_state_dict': model_2.state_dict()}, 'mnist_best_model.pt')
Loading it back
model_new = nn.Sequential(
nn.Linear(784, 784),
nn.Tanh(),
nn.Linear(784, 120),
nn.Tanh(),
nn.Linear(120, 84),
nn.Tanh(),
nn.Linear(84, 10),
)
device = torch.device("cpu")
checkpoint_dict = torch.load('mnist_best_model.pt', map_location=device)
model_new.load_state_dict(checkpoint_dict['model_state_dict'])
Applying it to the MNIST test set
# Extract the data all at once, not in batches
mnist_test_load_all = DataLoader(mnist_testing_data, batch_size=10000, shuffle=False) #Prof: batch size of 10k! You should probably never use that big! Keep it 32 <= b <= 1024 is a more normal range!
with torch.no_grad():
correct = 0
for X_test, y_test in mnist_test_load_all:
y_val = model_new(X_test.view(len(X_test), -1)) # pass in a flattened view of X_test
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(mnist_testing_data)} = {correct.item()*100/(len(mnist_testing_data)):7.3f}%')
Loading the CIFAR10 dataset using PyTorch
transform = transforms.ToTensor()
cifar10_training_data = datasets.CIFAR10(root='../Data', train=True, download=True, transform=transform)
cifar10_testing_data = datasets.CIFAR10(root='../Data', train=False, download=True, transform=transform)
print('cifar10_training_data : \n',cifar10_training_data)
print('cifar10_testing_data : \n',cifar10_testing_data)
torch.manual_seed(101) # for reproducible results
cifar10_training_loader = DataLoader(cifar10_training_data, shuffle=True)
cifar10_testing_loader = DataLoader(cifar10_testing_data, shuffle=False)
cifar10_training_data[0][0].shape #Prof: leave notes about why you are printing these out! Helps future you when working on something more complex that needs to be maintained
cifar10_training_data[0][0].view(-1).size()
Linear model creation for CIFAR10 Dataset
D = 32*32*3 #32 * 32 images
#How many channels are in the input?
C = 3
#How many classes are there?
classes = 10
model_linear1 = nn.Sequential(
nn.Linear(D, 32*3*3),
nn.Tanh(),
nn.Linear(32*3*3, classes),
)
loss_func = nn.CrossEntropyLoss()
fc_results1 = train_simple_network_with_input_reshape(model_linear1, loss_func, cifar10_training_loader, val_loader=cifar10_testing_loader,
score_funcs={'Accuracy': accuracy_score}, epochs=10)
sns.lineplot(x='epoch', y='train Accuracy', data=fc_results1, label='Train')
sns.lineplot(x='epoch', y='val Accuracy', data=fc_results1, label='Validation')
cifar10_testing_load_all = DataLoader(cifar10_testing_data, batch_size=10000, shuffle=False)
with torch.no_grad():
correct = 0
for X_test, y_test in cifar10_testing_load_all:
y_val = model_linear1(X_test.view(len(X_test), -1))
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(cifar10_testing_data)} = {correct.item()*100/(len(cifar10_testing_data)):7.3f}%')
# Display the confusion matrix as a heatmap
arr = confusion_matrix(y_test.view(-1).detach().cpu().numpy(), predicted.view(-1).detach().cpu().numpy())
class_names = ['plane', ' car', ' bird', ' cat', ' deer', ' dog', ' frog', 'horse', ' ship', 'truck']
df_cm = pd.DataFrame(arr, class_names, class_names)
plt.figure(figsize = (9,6))
sns.heatmap(df_cm, annot=True, fmt="d", cmap='BuGn')
plt.xlabel("prediction")
plt.ylabel("label (ground truth)")
plt.show() #Prof: above and beyond ont he confusion matrix! If you do something like this, you should look at the errors. What does it tell you about the nature of the mistakes? eg., cat/dog and truck/car are confused more than a bird & truck!
Linear model creation for CIFAR10 Dataset
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
#This is defining a simple PyTorch laye to convert a tensor from (N, C, W, H)
#to one of shape (N, D=C*W*H). Despite being useful, the pyTorch folks refuse to
#add one. We will use it later!
class Flatten(nn.Module):
def forward(self, input):
return input.view(input.size(0), -1)
D = 32*32*3 #32 * 32 images
#Number of channels in the input
C = 3
#Number of classes
classes = 10
model_linear2 = nn.Sequential(
Flatten(),
nn.Linear(D, 32*3*3),
nn.Tanh(),
nn.Linear(32*3*3, classes),
)
def train_simple_network(model, loss_func, train_loader, val_loader=None, score_funcs=None,
epochs=50, device="cpu", checkpoint_file=None):
"""Train simple neural networks
Keyword arguments:
model -- the PyTorch model / "Module" to train
loss_func -- the loss function that takes in batch in two arguments, the model outputs and the labels, and returns a score
train_loader -- PyTorch DataLoader object that returns tuples of (input, label) pairs.
val_loader -- Optional PyTorch DataLoader to evaluate on after every epoch
score_funcs -- A dictionary of scoring functions to use to evalue the performance of the model
epochs -- the number of training epochs to perform
device -- the compute lodation to perform training
"""
to_track = ["epoch", "total time", "train loss"]
if val_loader is not None:
to_track.append("val loss")
for eval_score in score_funcs:
to_track.append("train " + eval_score )
if val_loader is not None:
to_track.append("val " + eval_score )
total_train_time = 0 #How long have we spent in the training loop?
results = {}
#Initialize every item with an empty list
for item in to_track:
results[item] = []
#SGD is Stochastic Gradient Decent.
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
#Place the model on the correct compute resource (CPU or GPU)
model.to(device)
for epoch in tqdm(range(epochs), desc="Epoch"):
model = model.train()#Put our model in training mode
running_loss = 0.0
y_true = []
y_pred = []
start = time.time()
for inputs, labels in tqdm(train_loader, desc="Train Batch", leave=False):
#Move the batch to the device we are using.
inputs = inputs.to(device)
labels = labels.to(device)
batch_size = inputs.shape[0]
# PyTorch stores gradients in a mutable data structure. So we need to set it to a clean state before we use it.
#Otherwise, it will have old information from a previous iteration
optimizer.zero_grad()
y_hat = model(inputs) #this just computed f_Θ(x(i))
# Compute loss.
loss = loss_func(y_hat, labels)
loss.backward()# ∇_Θ just got computed by this one call!
#Now we just need to update all the parameters!
optimizer.step()# Θ_{k+1} = Θ_k − η * ∇_Θ ℓ(y_hat, y)
#Now we are just grabbing some information we would like to have
running_loss += loss.item() * inputs.size(0)
#moving labels & predictions back to CPU for computing / storing predictions
labels = labels.detach().cpu().numpy()
y_hat = y_hat.detach().cpu().numpy()
for i in range(batch_size):
y_true.append(labels[i])
y_pred.append(y_hat[i,:])
#end training epoch
end = time.time()
total_train_time += (end-start)
results["epoch"].append( epoch )
results["total time"].append( total_train_time )
results["train loss"].append( running_loss )
y_pred = np.asarray(y_pred)
if y_pred.shape[1] > 1: #We have a classification problem, convert to labels
y_pred = np.argmax(y_pred, axis=1)
for name, score_func in score_funcs.items():
results["train " + name].append( score_func(y_true, y_pred) )
if val_loader is None:
pass
else:#Lets find out validation performance as we go!
model = model.eval() #Set the model to "evaluation" mode, b/c we don't want to make any updates!
y_true = []
y_pred = []
running_loss = 0.0
for inputs, labels in val_loader:
#Move the batch to the device we are using.
inputs = inputs.to(device)
labels = labels.to(device)
y_hat = model(inputs)
loss = loss_func(y_hat, labels)
#Now we are just grabbing some information we would like to have
running_loss += loss.item() * inputs.size(0)
#moving labels & predictions back to CPU for computing / storing predictions
labels = labels.detach().cpu().numpy()
y_hat = y_hat.detach().cpu().numpy()
for i in range(batch_size):
y_true.append(labels[i])
y_pred.append(y_hat[i,:])
results["val loss"].append( running_loss )
y_pred = np.asarray(y_pred)
if y_pred.shape[1] > 1: #We have a classification problem, convert to labels
y_pred = np.argmax(y_pred, axis=1)
for name, score_func in score_funcs.items():
results["val " + name].append( score_func(y_true, y_pred) )
if checkpoint_file is not None:
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'results' : results
}, checkpoint_file)
return pd.DataFrame.from_dict(results)
loss_func = nn.CrossEntropyLoss()
fc_results2 = train_simple_network(model_linear2, loss_func, cifar10_training_loader, val_loader=cifar10_testing_loader,
score_funcs={'Accuracy': accuracy_score}, device=device, epochs=10)
sns.lineplot(x='epoch', y='train Accuracy', data=fc_results2, label='Train')
sns.lineplot(x='epoch', y='val Accuracy', data=fc_results2, label='Validation')
cifar10_testing_load_all = DataLoader(cifar10_testing_data, batch_size=10000, shuffle=False)
with torch.no_grad():
correct = 0
for X_test, y_test in cifar10_testing_load_all:
X_test = X_test.to(device)
y_test = y_test.to(device)
y_val = model_linear2(X_test.view(len(X_test), -1))
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(cifar10_testing_data)} = {correct.item()*100/(len(cifar10_testing_data)):7.3f}%')
# Display the confusion matrix as a heatmap
arr = confusion_matrix(y_test.view(-1).detach().cpu().numpy(), predicted.view(-1).detach().cpu().numpy())
class_names = ['plane', ' car', ' bird', ' cat', ' deer', ' dog', ' frog', 'horse', ' ship', 'truck']
df_cm = pd.DataFrame(arr, class_names, class_names)
plt.figure(figsize = (9,6))
sns.heatmap(df_cm, annot=True, fmt="d", cmap='BuGn')
plt.xlabel("prediction")
plt.ylabel("label (ground truth)")
plt.show()
Convolutional network for CIFAR10 Dataset
#Number of input
D = 32*32 #32 * 32 images
#Number of channels in the input
C = 3
#Number of classes
classes = 10
model_cnn_pool = nn.Sequential(
nn.Conv2d(C, 32, (3,3), padding=1),
nn.MaxPool2d((2,2)),
nn.Tanh(),
nn.Conv2d(32, 32, (3,3), padding=1),
nn.MaxPool2d((2,2)),
nn.Tanh(),
Flatten(),
nn.Linear(32*D//(4**2), classes),
)
cnn_results_with_pool = train_simple_network(model_cnn_pool, loss_func, cifar10_training_loader, val_loader=cifar10_testing_loader,
score_funcs={'Accuracy': accuracy_score}, device=device, epochs=10)
sns.lineplot(x='epoch', y='train Accuracy', data=cnn_results_with_pool, label='Train')
sns.lineplot(x='epoch', y='val Accuracy', data=cnn_results_with_pool, label='Validation')
cifar10_testing_load_all = DataLoader(cifar10_testing_data, batch_size=10000, shuffle=False)
with torch.no_grad():
correct = 0
for X_test, y_test in cifar10_testing_load_all:
X_test = X_test.to(device)
y_test = y_test.to(device)
y_val = model_cnn_pool(X_test)
predicted = torch.max(y_val,1)[1]
correct += (predicted == y_test).sum()
print(f'Test accuracy: {correct.item()}/{len(cifar10_testing_data)} = {correct.item()*100/(len(cifar10_testing_data)):7.3f}%')
# Display the confusion matrix as a heatmap
arr = confusion_matrix(y_test.view(-1).detach().cpu().numpy(), predicted.view(-1).detach().cpu().numpy())
class_names = ['plane', ' car', ' bird', ' cat', ' deer', ' dog', ' frog', 'horse', ' ship', 'truck']
df_cm = pd.DataFrame(arr, class_names, class_names)
plt.figure(figsize = (9,6))
sns.heatmap(df_cm, annot=True, fmt="d", cmap='BuGn')
plt.xlabel("prediction")
plt.ylabel("label (ground truth)")
plt.show()
Good first assignment! Your code is good, you could improve it by defining a function to build your network, and using a for loop to test more optoins. I think the biggest item for you to work on is the notes and text you leave. The last paragraph was a good one. In the future, try and aim for one paragaph like that in each section to explain the results.