import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from tqdm.autonotebook import tqdm
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from sklearn.metrics import accuracy_score
from google.colab import drive
drive.mount('/content/gdrive/')
import sys
sys.path.append('/content/gdrive/My Drive/MPDL/')
from mpdl import train_network, Flatten, View, weight_reset
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
class Flatten(nn.Module):
def forward(self, input):
return input.view(input.size(0), -1)
class View(nn.Module):
def __init__(self, *shape):
super(View, self).__init__()
self.shape = shape
def forward(self, input):
return input.view(*self.shape)
def moveTo(obj, device):
if isinstance(obj, tuple):
return tuple([moveTo(x, device) for x in obj])
elif isinstance(obj, list):
return [moveTo(x, device) for x in obj]
elif isinstance(obj, torch.Tensor):
return obj.to(device)
else:
return x
def train_network(model, loss_func, train_loader, val_loader=None, score_funcs=None,
epochs=50, device="cpu", checkpoint_file=None,
lr_schedule=None, optimizer=None, disable_tqdm=False
):
"""Train simple neural networks
Keyword arguments:
model -- the PyTorch model / "Module" to train
loss_func -- the loss function that takes in batch in two arguments, the model outputs and the labels, and returns a score
train_loader -- PyTorch DataLoader object that returns tuples of (input, label) pairs.
val_loader -- Optional PyTorch DataLoader to evaluate on after every epoch
score_funcs -- A dictionary of scoring functions to use to evalue the performance of the model
epochs -- the number of training epochs to perform
device -- the compute lodation to perform training
"""
if score_funcs == None:
score_funcs = {}#Empty set
to_track = ["epoch", "total time", "train loss"]
if val_loader is not None:
to_track.append("val loss")
for eval_score in score_funcs:
to_track.append("train " + eval_score )
if val_loader is not None:
to_track.append("val " + eval_score )
total_train_time = 0 #How long have we spent in the training loop?
results = {}
#Initialize every item with an empty list
for item in to_track:
results[item] = []
if optimizer == None:
#The AdamW optimizer is a good default optimizer
optimizer = torch.optim.AdamW(model.parameters())
del_opt = True
else:
del_opt = False
#Place the model on the correct compute resource (CPU or GPU)
model.to(device)
for epoch in tqdm(range(epochs), desc="Epoch", disable=disable_tqdm):
model = model.train()#Put our model in training mode
running_loss = 0.0
y_true = []
y_pred = []
start = time.time()
for inputs, labels in tqdm(train_loader, desc="Train Batch", leave=False, disable=disable_tqdm):
#Move the batch to the device we are using.
inputs = moveTo(inputs, device)
labels = moveTo(labels, device)
batch_size = labels.shape[0]
# PyTorch stores gradients in a mutable data structure. So we need to set it to a clean state before we use it.
#Otherwise, it will have old information from a previous iteration
optimizer.zero_grad()
y_hat = model(inputs) #this just computed f_Θ(x(i))
# Compute loss.
loss = loss_func(y_hat, labels)
loss.backward()# ∇_Θ just got computed by this one call!
#Now we just need to update all the parameters!
optimizer.step()# Θ_{k+1} = Θ_k − η * ∇_Θ ℓ(y_hat, y)
#Now we are just grabbing some information we would like to have
running_loss += loss.item() * batch_size
if len(score_funcs) > 0:
#moving labels & predictions back to CPU for computing / storing predictions
labels = labels.detach().cpu().numpy()
y_hat = y_hat.detach().cpu().numpy()
for i in range(batch_size):
y_true.append(labels[i])
y_pred.append(y_hat[i,:])
#end training epoch
end = time.time()
total_train_time += (end-start)
results["epoch"].append( epoch )
results["total time"].append( total_train_time )
results["train loss"].append( running_loss )
y_pred = np.asarray(y_pred)
if len(y_pred.shape) == 2 and y_pred.shape[1] > 1: #We have a classification problem, convert to labels
y_pred = np.argmax(y_pred, axis=1)
for name, score_func in score_funcs.items():
results["train " + name].append( score_func(y_true, y_pred) )
if val_loader is None:
pass
else:#Lets find out validation performance as we go!
model = model.eval() #Set the model to "evaluation" mode, b/c we don't want to make any updates!
y_true = []
y_pred = []
val_running_loss = 0.0
for inputs, labels in val_loader:
#Move the batch to the device we are using.
inputs = inputs.to(device)
labels = labels.to(device)
batch_size = labels.shape[0]
y_hat = model(inputs)
loss = loss_func(y_hat, labels)
#Now we are just grabbing some information we would like to have
val_running_loss += loss.item() * batch_size
if len(score_funcs) > 0:
#moving labels & predictions back to CPU for computing / storing predictions
labels = labels.detach().cpu().numpy()
y_hat = y_hat.detach().cpu().numpy()
for i in range(batch_size):
y_true.append(labels[i])
y_pred.append(y_hat[i,:])
results["val loss"].append( running_loss )
y_pred = np.asarray(y_pred)
if len(y_pred.shape) == 2 and y_pred.shape[1] > 1: #We have a classification problem, convert to labels
y_pred = np.argmax(y_pred, axis=1)
for name, score_func in score_funcs.items():
results["val " + name].append( score_func(y_true, y_pred) )
#In PyTorch, the convention is to update the learning rate after every epoch
if not lr_schedule is None:
if isinstance(lr_schedule, torch.optim.lr_scheduler.ReduceLROnPlateau):
lr_schedule.step(val_running_loss)
else:
lr_schedule.step()
if checkpoint_file is not None:
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'results' : results
}, checkpoint_file)
if del_opt:
del optimizer
return pd.DataFrame.from_dict(results)
1) Build a Convolutional Denoising Auto Encoder on the MNIST dataset.
Createing DataLoader for MNIST Dataset
class AutoEncodeDataset(Dataset):
"""Takes a dataset with (x, y) label pairs and converts it to (x, x) pairs.
This makes it easy to re-use other code"""
def __init__(self, dataset):
self.dataset = dataset
def __len__(self):
return len(self.dataset)
def __getitem__(self, idx):
x, y = self.dataset.__getitem__(idx)
return x, x
train_data = AutoEncodeDataset(torchvision.datasets.MNIST("./", train=True, transform=transforms.ToTensor(), download=True))
test_data_xy = torchvision.datasets.MNIST("./", train=False, transform=transforms.ToTensor(), download=True)
test_data_xx = AutoEncodeDataset(test_data_xy)
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
test_loader = DataLoader(test_data_xx, batch_size=128)
normal = torch.distributions.Normal(0, 0.5)
def addNoise(x, device='cpu'):
"""
We will use this helper function to add noise to some data.
x: the data we want to add noise to
device: the CPU or GPU that the input is located on.
"""
return x + normal.sample(sample_shape=torch.Size(x.shape)).to(device)
class AdditiveGausNoise(nn.Module):
def __init__(self):
super().__init__()
def forward(self, x):
#every PyTorch Module object has a self.training boolean which can be used
#to check if we are in training (True) or evaluation (False) mode.
if self.training:
return addNoise(x, device=device)
else:
return x
def showEncodeDecode(encode_decode, x):
encode_decode = encode_decode.cpu()
with torch.no_grad():
x_recon = encode_decode(x.cpu())
f, axarr = plt.subplots(1,2)
axarr[0].imshow(x.numpy()[0,:])
axarr[1].imshow(x_recon.numpy()[0,0,:])
#How many values are in the input? We use this to help determine the size of subsequent layers
D = 28*28 #28 * 28 images
#How many channels are in the input?
C = 1
#How many classes are there?
classes = 10
Convolutional Denoising Auto Encoder
dnauto_encoder_conv_big = nn.Sequential(
AdditiveGausNoise(),
View(-1,1,28,28),
nn.Conv2d(C, 2, (3,3), padding=1), # 1 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, 2, (3,3), padding=1),# 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, 2, (3,3), padding=1),# 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, 2, (3,3), padding=1) # 2 * 28 *28 to 2 * 28 *28
)
dnauto_decoder_conv_big = nn.Sequential(
nn.Conv2d(2, 2, (3,3), padding=1), # 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, 2, (3,3), padding=1), # 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, 2, (3,3), padding=1), # 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, C, (3,3),padding=1)# 2 * 28 *28 to 1 * 28 * 28
)
dnauto_encode_decode_conv_big = nn.Sequential(
dnauto_encoder_conv_big,
dnauto_decoder_conv_big
)
mse_loss = nn.MSELoss()
train_network(dnauto_encode_decode_conv_big, mse_loss, train_loader, val_loader=test_loader, epochs=10, device=device)
showEncodeDecode(dnauto_encode_decode_conv_big, test_data_xy[6][0])
showEncodeDecode(dnauto_encode_decode_conv_big, addNoise(test_data_xy[6][0]))
showEncodeDecode(dnauto_encode_decode_conv_big, test_data_xy[23][0])
showEncodeDecode(dnauto_encode_decode_conv_big, addNoise(test_data_xy[23][0]))
dnauto_encoder_conv_big2 = nn.Sequential(
AdditiveGausNoise(),
View(-1,1,28,28),
nn.Conv2d(C, 2, (3,3), padding=1), # 1 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
AdditiveGausNoise(),
nn.Conv2d(2, 2, (3,3), padding=1),# 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
AdditiveGausNoise(),
nn.Conv2d(2, 2, (3,3), padding=1),# 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
AdditiveGausNoise(),
nn.Conv2d(2, 2, (3,3), padding=1) # 2 * 28 *28 to 2 * 28 *28
)
dnauto_decoder_conv_big2 = nn.Sequential(
nn.Conv2d(2, 2, (3,3), padding=1), # 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, 2, (3,3), padding=1), # 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, 2, (3,3), padding=1), # 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, C, (3,3),padding=1)# 2 * 28 *28 to 1 * 28 * 28
)
dnauto_encode_decode_conv_big2 = nn.Sequential(
dnauto_encoder_conv_big2,
dnauto_decoder_conv_big2
)
train_network(dnauto_encode_decode_conv_big2, mse_loss, train_loader, val_loader=test_loader, epochs=10, device=device)
showEncodeDecode(dnauto_encode_decode_conv_big2, test_data_xy[6][0])
showEncodeDecode(dnauto_encode_decode_conv_big2, addNoise(test_data_xy[6][0]))
showEncodeDecode(dnauto_encode_decode_conv_big2, test_data_xy[23][0])
showEncodeDecode(dnauto_encode_decode_conv_big2, addNoise(test_data_xy[23][0]))
Convolutional Denoising Auto Encoder with ConvTranspose2d
Conv2d
$H_{out}$=$$\frac{H_{in} + 2×padding[0] −dilation[0]×(kernel_size[0]−1)−1}{stride[0]} + 1$$ ​
$W_{out}$ = $$\frac{W_{in} + 2 × padding[1] - dilation[1] × (kernel_size[1] - 1) - 1}{stride[1]} + 1$$
ConvTranspose2d
$H_{out}$ = ($H_{in}$ - 1) × stride[0] - 2 ×padding[0] + dilation[0] × (kernel_size[0] - 1) + output_padding[0] + 1
$W_{out}$ = ($W_{in}$ - 1) × stride}[1] - 2 ×padding[1] + dilation[1] × (kernel_size[1] - 1) + output_padding[1] + 1
dnauto_encoder_conv_convtranspose_big = nn.Sequential(
AdditiveGausNoise(),
View(-1,1,28,28),
nn.Conv2d(C, 8, (3,3), stride=2,padding=1), # 1 * 28 *28 to 8 * 14 * 14
nn.BatchNorm2d(8),
nn.ReLU(),
nn.Conv2d(8, 32, (3,3), stride=2, padding=1),# 8 * 14 * 14 to 32 * 7 * 7
nn.BatchNorm2d(32),
nn.ReLU(),
nn.Conv2d(32, 32, (3,3), stride=1, padding=1),# 32 * 7 * 7 to 32 * 7 * 7
nn.BatchNorm2d(32),
nn.ReLU(),
nn.Conv2d(32, 32, (3,3), stride=1, padding=1) # 32 * 7 * 7 to 32 * 7 * 7
)
dnauto_decoder_conv_convtranspose_big = nn.Sequential(
nn.ConvTranspose2d(32, 32, (3,3), padding=1),# 32 * 7 * 7 to 32 * 7 * 7
nn.BatchNorm2d(32),
nn.ReLU(),
nn.ConvTranspose2d(32, 32, (3,3), padding=1),# 32 * 7 * 7 to 32 * 7 * 7
nn.BatchNorm2d(32),
nn.ReLU(),
nn.ConvTranspose2d(32, 8, (3,3), stride=2, padding=1, output_padding=1),# 32 * 7 * 7 to 8 * 14 * 14
nn.BatchNorm2d(8),
nn.ReLU(),
nn.ConvTranspose2d(8, C, (3,3),stride=2,padding=1, output_padding=1)# 8 * 14 * 14 to 1 * 28 * 28
)
dnauto_encode_decode_conv_convtranspose_big = nn.Sequential(
dnauto_encoder_conv_convtranspose_big,
dnauto_decoder_conv_convtranspose_big
)
train_network(dnauto_encode_decode_conv_convtranspose_big, mse_loss, train_loader, val_loader=test_loader, epochs=10, device=device)
showEncodeDecode(dnauto_encode_decode_conv_convtranspose_big, test_data_xy[6][0])
showEncodeDecode(dnauto_encode_decode_conv_convtranspose_big, addNoise(test_data_xy[6][0]))
showEncodeDecode(dnauto_encode_decode_conv_convtranspose_big, test_data_xy[23][0])
showEncodeDecode(dnauto_encode_decode_conv_convtranspose_big, addNoise(test_data_xy[23][0]))
dnauto_encoder_conv_convtranspose_big2 = nn.Sequential(
AdditiveGausNoise(),
View(-1,1,28,28),
nn.Conv2d(C, 8, (3,3), stride=2,padding=1), # 1 * 28 *28 to 8 * 14 * 14
nn.BatchNorm2d(8),
nn.ReLU(),
AdditiveGausNoise(),
nn.Conv2d(8, 32, (3,3), stride=2, padding=1),# 8 * 14 * 14 to 32 * 7 * 7
nn.BatchNorm2d(32),
nn.ReLU(),
AdditiveGausNoise(),
nn.Conv2d(32, 32, (3,3), stride=1, padding=1),# 32 * 7 * 7 to 32 * 7 * 7
nn.BatchNorm2d(32),
nn.ReLU(),
AdditiveGausNoise(),
nn.Conv2d(32, 32, (3,3), stride=1, padding=1) # 32 * 7 * 7 to 32 * 7 * 7
)
dnauto_decoder_conv_convtranspose_big2 = nn.Sequential(
nn.ConvTranspose2d(32, 32, (3,3), padding=1),# 32 * 7 * 7 to 32 * 7 * 7
nn.BatchNorm2d(32),
nn.ReLU(),
nn.ConvTranspose2d(32, 32, (3,3), padding=1),# 32 * 7 * 7 to 32 * 7 * 7
nn.BatchNorm2d(32),
nn.ReLU(),
nn.ConvTranspose2d(32, 8, (3,3), stride=2, padding=1, output_padding=1),# 32 * 7 * 7 to 8 * 14 * 14
nn.BatchNorm2d(8),
nn.ReLU(),
nn.ConvTranspose2d(8, C, (3,3),stride=2,padding=1, output_padding=1)# 8 * 14 * 14 to 1 * 28 * 28
)
dnauto_encode_decode_conv_convtranspose_big2 = nn.Sequential(
dnauto_encoder_conv_convtranspose_big2,
dnauto_decoder_conv_convtranspose_big2
)
train_network(dnauto_encode_decode_conv_convtranspose_big2, mse_loss, train_loader, val_loader=test_loader, epochs=10, device=device)
showEncodeDecode(dnauto_encode_decode_conv_convtranspose_big2, test_data_xy[6][0])
showEncodeDecode(dnauto_encode_decode_conv_convtranspose_big2, addNoise(test_data_xy[6][0]))
showEncodeDecode(dnauto_encode_decode_conv_convtranspose_big2, test_data_xy[23][0])
showEncodeDecode(dnauto_encode_decode_conv_convtranspose_big2, addNoise(test_data_xy[23][0]))
Convolutional Denoising Auto Encoder with Maxpool2d and ConvTranspose2d
MaxPool2d
$H_{out}$=$$\frac{H_{in} + 2×padding[0] −dilation[0]×(kernel_size[0]−1)−1}{stride[0]} + 1$$ ​
$W_{out}$ = $$\frac{W_{in} + 2 × padding[1] - dilation[1] × (kernel_size[1] - 1) - 1}{stride[1]} + 1$$
dnauto_encoder_conv_max_big = nn.Sequential(
AdditiveGausNoise(),
View(-1,1,28,28),
nn.Conv2d(C, 2, (3,3), padding=1), # 1 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, 2, (3,3), padding=1),# 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, 8, (3,3), padding=1),# 2 * 28 *28 to 8 * 28 *28
nn.BatchNorm2d(8),
nn.ReLU(),
nn.MaxPool2d((2,2))# 8 * 28 *28 to 8 * 14 *14 #2 stride 2 kernel size make the C*W*H//4 or (C,W//2,H//2) shaped
)
dnauto_decoder_conv_max_big = nn.Sequential(
nn.ConvTranspose2d(8, 8, (2,2), stride=2), # 8 * 14 *14 to 8 * 28 *28
nn.BatchNorm2d(8),
nn.ReLU(),
nn.Conv2d(8, 2, (3,3), padding=1), # 8 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, 2, (3,3), padding=1), # 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, C, (3,3),padding=1)# 2 * 28 *28 to 1 * 28 * 28
)
dnauto_encode_decode_conv_max_big = nn.Sequential(
dnauto_encoder_conv_max_big,
dnauto_decoder_conv_max_big
)
train_network(dnauto_encode_decode_conv_max_big, mse_loss, train_loader, val_loader=test_loader, epochs=10, device=device)
showEncodeDecode(dnauto_encode_decode_conv_max_big, test_data_xy[6][0])
showEncodeDecode(dnauto_encode_decode_conv_max_big, addNoise(test_data_xy[6][0]))
showEncodeDecode(dnauto_encode_decode_conv_max_big, test_data_xy[23][0])
showEncodeDecode(dnauto_encode_decode_conv_max_big, addNoise(test_data_xy[23][0]))
dnauto_encoder_conv_max_big2 = nn.Sequential(
AdditiveGausNoise(),
View(-1,1,28,28),
nn.Conv2d(C, 2, (3,3), padding=1), # 1 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
AdditiveGausNoise(),
nn.Conv2d(2, 2, (3,3), padding=1),# 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
AdditiveGausNoise(),
nn.Conv2d(2, 8, (3,3), padding=1),# 2 * 28 *28 to 8 * 28 *28
nn.BatchNorm2d(8),
nn.ReLU(),
AdditiveGausNoise(),
nn.MaxPool2d((2,2))# 8 * 28 *28 to 8 * 14 *14 #2 stride 2 kernel size make the C*W*H//4 or (C,W//2,H//2) shaped
)
dnauto_decoder_conv_max_big2 = nn.Sequential(
nn.ConvTranspose2d(8, 8, (2,2), stride=2), # 8 * 14 *14 to 8 * 28 *28
nn.BatchNorm2d(8),
nn.ReLU(),
nn.Conv2d(8, 2, (3,3), padding=1), # 8 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, 2, (3,3), padding=1), # 2 * 28 *28 to 2 * 28 *28
nn.BatchNorm2d(2),
nn.ReLU(),
nn.Conv2d(2, C, (3,3),padding=1)# 2 * 28 *28 to 1 * 28 * 28
)
dnauto_encode_decode_conv_max_big2 = nn.Sequential(
dnauto_encoder_conv_max_big2,
dnauto_decoder_conv_max_big2
)
train_network(dnauto_encode_decode_conv_max_big2, mse_loss, train_loader, val_loader=test_loader, epochs=10, device=device)
showEncodeDecode(dnauto_encode_decode_conv_max_big2, test_data_xy[6][0])
showEncodeDecode(dnauto_encode_decode_conv_max_big2, addNoise(test_data_xy[6][0]))
showEncodeDecode(dnauto_encode_decode_conv_max_big2, test_data_xy[23][0])
showEncodeDecode(dnauto_encode_decode_conv_max_big2, addNoise(test_data_xy[23][0]))
2) Compare the Denoising CNN and the large Denoising Auto Encoder from the lecture numerically and qualitatively. Which one is better? Why?
Comparing the Denoising CNN and the large Denoising Auto Encoder from the lecture
Numerically Comparison
Denoising CNN Auto Encoder's taring loss and validation loss(listed below) is much less than the large Denoising Auto Encoder's taring loss and validation loss(873.606800) and taring loss and validation loss(913.972139) of large Denoising Auto Encoder with noise added to the input of several layers .
Taring loss and Validation loss For :
Qualitatively Comparison
It shows that without being explicitly told about the concept of 5, or that there are even distinct numbers present.
Reasons
3) Tell me your initial project idea & if you are going to have a partner who the partner is. (limit is teams of 2)
Wow, above an beyond on this homework, very good job! Enjoy the extra-credit bonus for doing so much extra!
My one comment would be that your use of only 2 filters in many of your CNNs is exceptionally small. While it does work on MNIST, due to MNIST's simplicity, it is generally not useful to try unless you have a very specifc hypothesis you are testing. I might do that if Ithought there was a bug in my code, or a data quality problem, and I wanted to see if it can get better results than it should. In general, I would use a minimum of 32 filters for most real world problems.
Hopefully the recent lecture clarified when / where to use a Tranposed convolution. This was unecessary for your architecture's design, but it dosn't hurt to try new things :)
We have talked about your project before, and its still good by me! Remember that a good project dosn't necessarily have to be working/complete. I'm looking for the kind of stuff you have in this HW, detailed results showing what you did/tried, progress, and what you understood / learned.