!pip install wget
!pip install clean-text
!pip install torchtext==0.6.0
!pip install  sentencepiece  
!pip install transformers==2.8.0

import re
import os
import csv
import time
import math
import json
import random
import collections
import numpy as np
import pandas as pd
from cleantext import clean
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torchtext import data
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel, DistilBertTokenizer, DistilBertModel, GPT2Tokenizer, GPT2LMHeadModel 
from torchsummary import summary
from torchtext.data.metrics import bleu_score
from tqdm.autonotebook import tqdm

Collecting wget
  Downloading https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... done
  Created wheel for wget: filename=wget-3.2-cp36-none-any.whl size=9682 sha256=c25f5180749786fadf9dc538740b4196d14b2b9872562bcc1381362268a9a646
  Stored in directory: /root/.cache/pip/wheels/40/15/30/7d8f7cea2902b4db79e3fea550d7d7b85ecb27ef992b618f3f
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2
Collecting clean-text
  Downloading https://files.pythonhosted.org/packages/78/30/7013e9bf37e00ad81406c771e8f5b071c624b8ab27a7984cd9b8434bed4f/clean_text-0.3.0-py3-none-any.whl
Collecting ftfy<6.0,>=5.8
  Downloading https://files.pythonhosted.org/packages/ff/e2/3b51c53dffb1e52d9210ebc01f1fb9f2f6eba9b3201fa971fd3946643c71/ftfy-5.8.tar.gz (64kB)
     |████████████████████████████████| 71kB 4.8MB/s 
Collecting emoji
  Downloading https://files.pythonhosted.org/packages/ff/1c/1f1457fe52d0b30cbeebfd578483cedb3e3619108d2d5a21380dfecf8ffd/emoji-0.6.0.tar.gz (51kB)
     |████████████████████████████████| 51kB 3.8MB/s 
Requirement already satisfied: wcwidth in /usr/local/lib/python3.6/dist-packages (from ftfy<6.0,>=5.8->clean-text) (0.2.5)
Building wheels for collected packages: ftfy, emoji
  Building wheel for ftfy (setup.py) ... done
  Created wheel for ftfy: filename=ftfy-5.8-cp36-none-any.whl size=45612 sha256=d1d22c28a0635375c1f1dbbd729b4686c5af3360fb9fc9c5848789f39589d819
  Stored in directory: /root/.cache/pip/wheels/ba/c0/ef/f28c4da5ac84a4e06ac256ca9182fc34fa57fefffdbc68425b
  Building wheel for emoji (setup.py) ... done
  Created wheel for emoji: filename=emoji-0.6.0-cp36-none-any.whl size=49716 sha256=3977498e45f2ad13ac6946ec2bebf2e0d436a9a61976696f233bff5f23b09e32
  Stored in directory: /root/.cache/pip/wheels/46/2c/8b/9dcf5216ca68e14e0320e283692dce8ae321cdc01e73e17796
Successfully built ftfy emoji
Installing collected packages: ftfy, emoji, clean-text
Successfully installed clean-text-0.3.0 emoji-0.6.0 ftfy-5.8
Collecting torchtext==0.6.0
  Downloading https://files.pythonhosted.org/packages/f2/17/e7c588245aece7aa93f360894179374830daf60d7ed0bbb59332de3b3b61/torchtext-0.6.0-py3-none-any.whl (64kB)
     |████████████████████████████████| 71kB 4.7MB/s 
Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (from torchtext==0.6.0) (1.7.0+cu101)
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from torchtext==0.6.0) (1.18.5)
Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from torchtext==0.6.0) (4.41.1)
Collecting sentencepiece
  Downloading https://files.pythonhosted.org/packages/e5/2d/6d4ca4bef9a67070fa1cac508606328329152b1df10bdf31fb6e4e727894/sentencepiece-0.1.94-cp36-cp36m-manylinux2014_x86_64.whl (1.1MB)
     |████████████████████████████████| 1.1MB 13.3MB/s 
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from torchtext==0.6.0) (1.15.0)
Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from torchtext==0.6.0) (2.23.0)
Requirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from torch->torchtext==0.6.0) (0.8)
Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch->torchtext==0.6.0) (0.16.0)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.6/dist-packages (from torch->torchtext==0.6.0) (3.7.4.3)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext==0.6.0) (2020.11.8)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext==0.6.0) (1.24.3)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext==0.6.0) (2.10)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext==0.6.0) (3.0.4)
Installing collected packages: sentencepiece, torchtext
  Found existing installation: torchtext 0.3.1
    Uninstalling torchtext-0.3.1:
      Successfully uninstalled torchtext-0.3.1
Successfully installed sentencepiece-0.1.94 torchtext-0.6.0
Requirement already satisfied: sentencepiece in /usr/local/lib/python3.6/dist-packages (0.1.94)
Collecting transformers==2.8.0
  Downloading https://files.pythonhosted.org/packages/a3/78/92cedda05552398352ed9784908b834ee32a0bd071a9b32de287327370b7/transformers-2.8.0-py3-none-any.whl (563kB)
     |████████████████████████████████| 573kB 11.4MB/s 
Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (2.23.0)
Collecting boto3
  Downloading https://files.pythonhosted.org/packages/51/71/8025cafe9780b6102b9c564b75a0865781e84699d4d2c0d458e5664560b6/boto3-1.16.27.tar.gz (97kB)
     |████████████████████████████████| 102kB 8.3MB/s 
Collecting tokenizers==0.5.2
  Downloading https://files.pythonhosted.org/packages/d1/3f/73c881ea4723e43c1e9acf317cf407fab3a278daab3a69c98dcac511c04f/tokenizers-0.5.2-cp36-cp36m-manylinux1_x86_64.whl (3.7MB)
     |████████████████████████████████| 3.7MB 26.0MB/s 
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (2019.12.20)
Collecting sacremoses
  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
     |████████████████████████████████| 890kB 48.1MB/s 
Requirement already satisfied: dataclasses; python_version < "3.7" in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (0.8)
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (1.18.5)
Requirement already satisfied: sentencepiece in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (0.1.94)
Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (3.0.12)
Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers==2.8.0) (4.41.1)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.8.0) (2.10)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.8.0) (1.24.3)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.8.0) (3.0.4)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers==2.8.0) (2020.11.8)
Collecting botocore<1.20.0,>=1.19.27
  Downloading https://files.pythonhosted.org/packages/e4/e6/f41a1936b7602d9badb66ae9677e5d4cfc8bd9955a9a1618a0945f2f5b1b/botocore-1.19.27-py2.py3-none-any.whl (7.0MB)
     |████████████████████████████████| 7.0MB 37.9MB/s 
Collecting jmespath<1.0.0,>=0.7.1
  Downloading https://files.pythonhosted.org/packages/07/cb/5f001272b6faeb23c1c9e0acc04d48eaaf5c862c17709d20e3469c6e0139/jmespath-0.10.0-py2.py3-none-any.whl
Collecting s3transfer<0.4.0,>=0.3.0
  Downloading https://files.pythonhosted.org/packages/69/79/e6afb3d8b0b4e96cefbdc690f741d7dd24547ff1f94240c997a26fa908d3/s3transfer-0.3.3-py2.py3-none-any.whl (69kB)
     |████████████████████████████████| 71kB 8.5MB/s 
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers==2.8.0) (1.15.0)
Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers==2.8.0) (7.1.2)
Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers==2.8.0) (0.17.0)
Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.6/dist-packages (from botocore<1.20.0,>=1.19.27->boto3->transformers==2.8.0) (2.8.1)
Building wheels for collected packages: boto3, sacremoses
  Building wheel for boto3 (setup.py) ... done
  Created wheel for boto3: filename=boto3-1.16.27-py2.py3-none-any.whl size=128454 sha256=d0ae36ea063530c3eeae2d08f6e5281f32cf99fe9301f330d6f35c0229bf51b3
  Stored in directory: /root/.cache/pip/wheels/f7/4a/29/e5b74fd7012b8322191f5db368a910a78b013ce96bf4259d50
  Building wheel for sacremoses (setup.py) ... done
  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893257 sha256=0e7149c2896f0d67879b69ee733aa2748b619f5feda9625bf98474bc9e853898
  Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45
Successfully built boto3 sacremoses
ERROR: botocore 1.19.27 has requirement urllib3<1.27,>=1.25.4; python_version != "3.4", but you'll have urllib3 1.24.3 which is incompatible.
Installing collected packages: jmespath, botocore, s3transfer, boto3, tokenizers, sacremoses, transformers
Successfully installed boto3-1.16.27 botocore-1.19.27 jmespath-0.10.0 s3transfer-0.3.3 sacremoses-0.0.43 tokenizers-0.5.2 transformers-2.8.0

Since the GPL-licensed package `unidecode` is not installed, using Python's `unicodedata` package which yields worse results.


from google.colab import drive
drive.mount('/content/gdrive/',force_remount=True)

Mounted at /content/gdrive/


"""
Device
"""
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

"""
Batch Size
"""
BATCH_SIZE= 6

isTrain = False

def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

set_seed(915)


def textPreprocess(txt):
    """
    Convert to Lowercase and Trim the text
    """
    txt = txt.lower().strip()

    """
    Fix various unicode errors
    transliterate to closest ASCII representation
    """
    txt = clean(txt, fix_unicode=True, to_ascii=True)

    """
    Removing zero-width character
    """
    txt = re.sub(u"\ufe0f", r" ", txt)

    """
    Remove URL
    """
    txt = re.sub(r"https?://[A-Za-z0-9./]*", r" ", txt)

    """
    Remove Specific Special character
    """
    txt = re.sub(r"[-.!?()_]+", r" ", txt)
    """
    Remove charatcter like special characters, punctuations except alphanumeric charatcter.
    """
    txt = re.sub(r"[^0-9a-zA-Z]+", r" ", txt)

    """
    Remove Extra spaces which are appearing from previous processing steps.
   """
    txt = re.sub(r"\s+", r" ", txt).strip()
    return txt


"""
Required functions for directory creation
"""
def check_if_dir_exists(directory):
    """
    Checks if 'directory' exists
    """
    return(os.path.isdir(directory))

def make_dir(directory):
    """
    Create directory
    """
    if not check_if_dir_exists(directory):
        os.mkdir(directory)
        print("Directory %s created successfully." %directory)
    else:
        print("Directory %s exists." %directory)

print("We are in:",os.getcwd())

"""
Required directory creation
"""
chatbot_dir="/content/gdrive/My Drive/DATA_SCIENCE_Capstone_Project_ChatBot_Sofia_Dutta"
make_dir(chatbot_dir)

os.chdir("/content/gdrive/My Drive/DATA_SCIENCE_Capstone_Project_ChatBot_Sofia_Dutta")

chatbot_data_dir = chatbot_dir + "/ChatBot_Data/"
make_dir(chatbot_data_dir)

chatbot_result_dir = chatbot_dir + "/ChatBot_Results/"
make_dir(chatbot_result_dir)

chatbot_checkpoint_dir = chatbot_dir + "/ChatBot_Checkpoint/"
make_dir(chatbot_checkpoint_dir)

print(chatbot_data_dir)

We are in: /content
Directory /content/gdrive/My Drive/DATA_SCIENCE_Capstone_Project_ChatBot_Sofia_Dutta exists.
Directory /content/gdrive/My Drive/DATA_SCIENCE_Capstone_Project_ChatBot_Sofia_Dutta/ChatBot_Data/ exists.
Directory /content/gdrive/My Drive/DATA_SCIENCE_Capstone_Project_ChatBot_Sofia_Dutta/ChatBot_Results/ exists.
Directory /content/gdrive/My Drive/DATA_SCIENCE_Capstone_Project_ChatBot_Sofia_Dutta/ChatBot_Checkpoint/ exists.
/content/gdrive/My Drive/DATA_SCIENCE_Capstone_Project_ChatBot_Sofia_Dutta/ChatBot_Data/


def list_dir(dir_path):
  """
  List directories for a given path
  """
  print("Directory %s contains : " %dir_path)
  for dir_or_file in os.listdir(dir_path):
    print(dir_or_file)
  print("\n")

"""
List created directories
"""
print('Current directory : ', os.getcwd(),'\n')
list_dir(chatbot_dir)
list_dir(chatbot_data_dir)
ist_dir(chatbot_checkpoint_dir)

Current directory :  /content/gdrive/My Drive/DATA_SCIENCE_Capstone_Project_ChatBot_Sofia_Dutta 

Directory /content/gdrive/My Drive/DATA_SCIENCE_Capstone_Project_ChatBot_Sofia_Dutta contains : 
ChatBot_Data
ChatBot_Results
ChatBot_Checkpoint
.vector_cache
Images


Directory /content/gdrive/My Drive/DATA_SCIENCE_Capstone_Project_ChatBot_Sofia_Dutta/ChatBot_Data/ contains : 
.kaggle
dev_v2.1.json.gz
dev_v2.1.json
question_asked.tsv
QAWhats.tsv
twcs.csv
QA_Pair.tsv
Apple_QA_Pair.tsv
Q_asked
question_asked


Directory /content/gdrive/My Drive/DATA_SCIENCE_Capstone_Project_ChatBot_Sofia_Dutta/ChatBot_Checkpoint/ contains : 
QA_Seq2Seq_ChatBot_BestModel.pt
QA_Seq2Seq_ChatBot.pt
QA_GPT_Seq2seq_ChatBot.pt
QA_GPT_Seq2seq_Results.csv
Apple_QA_Seq2Seq_ChatBot.pt
Apple_QA_Seq2Seq_ChatBot_BestModel.pt
Apple_QA_GPT_Seq2seq_ChatBot.pt
Apple_QA_GPT_Seq2seq_Results.csv


"""
Batch Size, Embedding Size, Hidden Size
"""
BATCH_SZ, EMBEDDING_SIZE, HIDDEN_SIZE, NUM_LAYERS, EPOCHS = 32, 64, 256, 3, 20
"""
Sentence start, end and pad token
"""
SOS_TOKEN, EOS_TOKEN, PAD_TOKEN = "<SOS>", "<EOS>", "<_PADDING_>"


"""
QA Pair, word to integer map And integer to word map Creation Function
"""
def getVocab(qa_df):
     
    qa_pairs, vocab2idx, idx2vocab = [], {}, {}
    """
    Question Answer Pair Creation
    """
    qa_pairs=[(qa_df.iloc[idx]['question'],qa_df.iloc[idx]['answer']) for idx in range(len(qa_df))]
    print('Number of question and answer pairs) : ',len(qa_pairs))
   
    """
    Creation of word to integer map.
    """
    vocab=set(word for question, answer in qa_pairs for sentance in (question, answer) for word in sentance.split(" "))
    print('Number of vocab : ',len(vocab))

    vocab2idx = {w:i for i,w in enumerate(vocab,3)}
    vocab2idx[PAD_TOKEN], vocab2idx[SOS_TOKEN], vocab2idx[EOS_TOKEN] = 0, 1, 2
    print('Number of keys in vocab2idx : ',len(vocab2idx))

    """
    Creation of integer to word map.
    """
    idx2vocab = {idx:word for word, idx in vocab2idx.items()}
    print('Number of keys in idx2vocab : ',len(idx2vocab))
      
    return qa_pairs, vocab2idx, idx2vocab

"""
QA DataSet Creation Function
"""
class QADataset(Dataset):

    def __init__(self, lang_pairs, vocab2idx):
        self.lang_pairs = lang_pairs
        self.vocab2idx = vocab2idx

    def __len__(self):
        return len(self.lang_pairs)

    def addToken(self, txt, flag):
        if flag:
            return SOS_TOKEN + " " + txt + " " + EOS_TOKEN
        else:
            return txt + " " + EOS_TOKEN

    def getTensor(self, txt):
        return torch.tensor([self.vocab2idx[wrd] for wrd in txt.split(" ")], dtype=torch.int64)

    def __getitem__(self, idx):
        x, y = self.getTensor(self.addToken(self.lang_pairs[idx][0],True)), self.getTensor(self.addToken(self.lang_pairs[idx][1],False))

        return x, y

"""
Pad items in the batch to the length of the longest item in the batch
"""
def collate(batch):

    len_tuples=[(i[0].size(0) , i[1].size(0))  for i in batch]

    max_x, max_y = tuple(map(max, zip(*len_tuples)))

    getBatch = lambda batch, idx, max_len : torch.stack([F.pad(src_trg[idx], (0,max_len-src_trg[idx].size(0)), value=PAD) for src_trg in batch])
    
    X,Y = getBatch(batch, 0, max_x), getBatch(batch, 1, max_y)
    
    return (X, Y), Y



"""
Load Refined Data
"""
qa_final_df=pd.read_csv(chatbot_data_dir+'Apple_QA_Pair.tsv',sep='\t')
print('Shape of qa_final_df : ',qa_final_df.shape)
qa_final_df.head()
"""
Create qa_pairs, vocab2idx, idx2vocab
"""
qa_pairs, vocab2idx, idx2vocab = getVocab(qa_final_df)
PAD = vocab2idx[PAD_TOKEN]
SOS = vocab2idx[SOS_TOKEN]
EOS = vocab2idx[EOS_TOKEN]

"""
Create QA DataSet
"""
qa_dataset = QADataset(qa_pairs, vocab2idx)

"""
Data Split
"""
train_size, test_size = round(len(qa_dataset)*0.8),len(qa_dataset)-round(len(qa_dataset)*0.8)

train_dataset, test_dataset = torch.utils.data.random_split(qa_dataset, [train_size, test_size])

"""
Create DataLoader
"""
train_loader = DataLoader(train_dataset, batch_size = BATCH_SZ, shuffle = True, collate_fn = collate)
test_loader = DataLoader(test_dataset, batch_size = BATCH_SZ, collate_fn = collate)

print("\n\nHow does the tensor look?\n",train_dataset[8])

Shape of qa_final_df :  (96750, 2)
Number of question and answer pairs) :  96750
Number of vocab :  35075
Number of keys in vocab2idx :  35078
Number of keys in idx2vocab :  35078


How does the tensor look?
 (tensor([    1,  1530, 31549,  2713, 33282, 22879,  4114,  3806,  9206, 34541,
        12057,  3820, 12720, 23014, 12187, 12057,  3475,     2]), tensor([28294, 12187, 20625,  4868,  2639,  3456,  7267, 12859, 12057,  9073,
        30162, 23118, 20185, 31533,  2378, 28581, 12070,  4251, 13627,     2]))


"""
Attention Mechanism Layers
"""
class Attention(nn.Module):

    def __init__(self):
        super(Attention, self).__init__()

    def dot_score(self, hidden_encoded, hidden_decoded):
        """
        *******
        Input
        *******
        hidden_encoded  : (B, T, D) 
        hidden_decoded  : (B, D)

        *******
        Output
        *******
        attention_score :  (B, T, 1)
        """
        return torch.bmm(hidden_encoded, hidden_decoded.unsqueeze(2)) / np.sqrt(hidden_encoded.size(2))
        
        
    def forward(self, hidden_encoded, hidden_decoded, mask=None):
        """
        *******
        Input
        *******

        hidden_encoded   : (B, T, D)
        hidden_decoded   : (B, D)
        attention_scores : (B, T, 1) 
        mask             : (B, T) 
            
        *******
        Output
        *******
        context           : (B, D) 
        attention_weight  : (B, T, 1)
        """

        """
        Attention scores
        """
        attention_scores = self.dot_score(hidden_encoded, hidden_decoded)

        if mask is not None:
            attention_scores[~mask] = float(-1000)
        
        """
        Attention weight
        """
        attention_weight = F.softmax(attention_scores, dim=1)
        
        """
        (B, T, D) * (B, T, 1) to (B, D)
        """
        context = (hidden_encoded * attention_weight).sum(dim=1)
        
        return context, attention_weight

def maskedFill(input, time_dimension=1, fill=0):
    """
    Generate Mask of shape (B, T) to determine input sequence length.
    """
    dimensions = list(range(1,len(input.shape))) 
    
    if time_dimension in dimensions:
        dimensions.remove(time_dimension)
       
    with torch.no_grad():
        if len(dimensions) == 0:
            return (input != fill)
        
        mask = torch.sum((input != fill), dim=dimensions) > 0
    return mask


class EncoderRNN(nn.Module):

    def __init__(self, embeddding_size, hidden_size, n_layers=1, bidirectional=True):
        super(EncoderRNN, self).__init__()

        self.encoder_layer = nn.GRU(input_size = embeddding_size, 
                                    hidden_size = hidden_size//2, 
                                    num_layers = n_layers, 
                                    bidirectional = bidirectional)

    def forward(self, question_embd, question_len):
        
        """
        Pack the sequences as question sequences are of varying length.
        """
        embed_packed = pack_padded_sequence(question_embd,
                                            question_len, 
                                            batch_first=True,
                                            enforce_sorted=False)
        
        enc_outs, h_enc = self.encoder_layer(embed_packed)
        """
        As bidirectional : (B, T, 2, D//2)
        """
        enc_outs, _ = pad_packed_sequence(enc_outs) 
        
        batch_size, time_step = question_embd.size(0), question_embd.size(1)
        """
        (B, T, 2, D//2) to (B, T, D)
        """
        enc_outs = enc_outs.view(batch_size, time_step , -1) 

        hidden_size = enc_outs.size(2) 

        """
        Reshaping the h_enc as (n_layers, directions, batch_size, hidden_size).
        Take the last layer's output.
        """
        h_enc = h_enc.view(-1, 2, batch_size, hidden_size//2)[-1,:,:,:] 
        """
        Reordering to (B, 2, D/2) and reshaping to (B, D)
        """
        h_enc = h_enc.permute(1, 0, 2).reshape(batch_size, -1)


        return enc_outs, h_enc


class AttentionDecoderRNN(nn.Module):

    def __init__(self,vocab_size, embedding_size, hidden_size, n_layers=1):
        super(AttentionDecoderRNN, self).__init__()

        """
        Decoder is uni-directionall and used GRUCells so to do the decoding one step at a time.
        """
        self.decoder_layers = nn.ModuleList([nn.GRUCell(embedding_size, hidden_size)] + 
                                            [nn.GRUCell(hidden_size, hidden_size) for i in range(n_layers-1)])
        
        self.attention = Attention()

        """
        Prediction, a fully connected network to convert the attention context and decoded context to a predicted next token
        """
        self.prediction = nn.Sequential(nn.Linear(2*hidden_size, hidden_size),
                                        nn.LeakyReLU(),
                                        nn.LayerNorm(hidden_size),
                                        nn.Linear(hidden_size, hidden_size),
                                        nn.LeakyReLU(),
                                        nn.LayerNorm(hidden_size),
                                        nn.Linear(hidden_size, vocab_size)
                                      )

    def forward(self, decoder_input, h_previous, encoded_outs, mask):

        
        for layer in range(len(self.decoder_layers)):  
            next_hidden_state = self.decoder_layers[layer](decoder_input, h_previous[layer])
            
            h_previous[layer], decoder_input = next_hidden_state, next_hidden_state
            

        """
        (B, D)
        """    
        answer_decoded = decoder_input 

        """
        Attention mechanism, to get relevant information from the previous encoded states.
        (B, T, 1)
        """ 
        attention_context, attention_weights = self.attention(encoded_outs, answer_decoded, mask=mask)
        
        """
        Concatinating the attention context and the decoded context.
        (B, D) + (B, D)  to (B, 2*D)
        """
        pred_token = torch.cat((attention_context, answer_decoded), dim=1) 
        """
        Predict the next token.
        (B, 2*D) to (B, V)
        """
        pred_token = self.prediction(pred_token) 

        return attention_weights, pred_token, h_previous


class Seq2SeqAttention(nn.Module):

    def __init__(self, vocab_size, embedding_size, hidden_size, pad_idx=None, n_layers=1, decode_steps=22):
        super(Seq2SeqAttention, self).__init__()

        self.pad_idx = pad_idx
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        self.embedding = nn.Embedding(vocab_size, embedding_size, padding_idx=self.pad_idx)
        
        
        self.encoder = EncoderRNN(embedding_size,
                                  hidden_size,
                                  n_layers)
        

        self.attndecoder = AttentionDecoderRNN(vocab_size,
                                               embedding_size,
                                               hidden_size,
                                               n_layers)
        
        self.decode_steps = decode_steps               
    
    def forward(self, question):
        """
        question either (B, T) or ((B, T), (B, T'))
        """
        if isinstance(question, tuple):
            question, answer = question
        else:
            answer = None

        batch_size, time_dimension  = question.size(0), question.size(1)

        """
        Embedding
        (B, T) to (B, T, D)
        """
        embeded = self.embedding(question) 
        device = self.embedding.weight.device
        mask = maskedFill(embeded)
        question_lengths = mask.sum(dim=1).view(-1) 

        """
        Encoding
        """
        encoded_outs, h_encoded = self.encoder(embeded, question_lengths)

        """
        Attention Decoding 
        encoded_outs : encoded feature vectors of the question data.
        h_encoded : the initial input for the decoder.
        """
        h_previous = [h_encoded for _ in range(self.n_layers)]
        attention_weights, predictions = [], []
        
        """
        Last token of question, EOS marker as the first input for the decoder.
        """
        decoder_input = self.embedding(question[:,-1]) 

        decode_steps = self.decode_steps
        """
        Training : Given Question and Answer pairs gives exact decode length.
        Testing  : Given decode_steps.
        """
        if answer is not None: 
            decode_steps = answer.size(1)
        
        """
        Either Teacher Forcing OR Auto-Regressive
        """
        teacher_forcing = np.random.choice((True,False))
        for decode_step in range(decode_steps):
            """
            (B, D)
            """
            decoder_in = decoder_input   

            attention_weight, pred_token, h_previous = self.attndecoder(decoder_in, h_previous, encoded_outs, mask)
            
            attention_weights.append(attention_weight.detach())  
            predictions.append(pred_token)
            
            """
            Selecting the token for the next time step. 
            torch.no_grad() : In-order to prevent the gradient to pass through the question tokens.
            """
            with torch.no_grad():
                if self.training:
                    if answer is not None and teacher_forcing:
                        """
                        Teacher Forcing : next correct token.
                        """
                        next_token = answer[:,decode_step].squeeze()
                    else:
                        """
                        Auto-Regressive : next token based on the prediction.
                        """
                        next_token = torch.multinomial(F.softmax(pred_token, dim=1), 1)[:,-1]
                else:
                    """
                    For testing : selecting most likely token.
                    """
                    next_token = torch.argmax(pred_token, dim=1)
            
            """
            Next token is the decoder input for next time step further time step's token prediction. 
            """
            decoder_input = self.embedding(next_token.to(device))
        

        prediction, attention_score = torch.stack(predictions, dim=1),  torch.stack(attention_weights, dim=1).squeeze()
        
        return prediction if self.training else prediction, attention_score


checkpointFile = chatbot_checkpoint_dir +'Apple_QA_Seq2Seq_ChatBot.pt'
checkpointFileBestModel = chatbot_checkpoint_dir +'Apple_QA_Seq2Seq_ChatBot_BestModel.pt'

seq2seq_model = Seq2SeqAttention(vocab_size      = len(vocab2idx), 
                                 embedding_size  = EMBEDDING_SIZE, 
                                 hidden_size     = HIDDEN_SIZE,
                                 pad_idx         = PAD, 
                                 n_layers        = NUM_LAYERS
                                 )
"""
Gradient Cliping
"""
for param in seq2seq_model.parameters():
    param.register_hook(lambda grad: torch.clamp(grad, -10, 10))

"""
Load State Dict of Best Model
"""
checkpoint_dict = torch.load(checkpointFileBestModel)
seq2seq_model.load_state_dict(checkpoint_dict['model_state_dict'])

seq2seq_model = seq2seq_model.eval().cpu()


getWords = lambda x : [idx2vocab[idx] for idx in x.cpu().numpy()]


"""
Function that calculate BLEU Score
"""
def calculateBleuScore(model):
    
    answers, pred_answers = [], []

    for idx in tqdm(range(len(test_dataset)), desc="BLEU", disable=False):
      
        question_tensor, answer_tensor = test_dataset[idx]
  
        with torch.no_grad():
            predictions, attn_score = model(question_tensor.unsqueeze(0))
            pred = torch.argmax(predictions, dim=2)
        
        ans_words, pred_ans = getWords(answer_tensor), getWords(pred[0,:])

        answers.append([ans_words[:-1]])
        pred_answers.append(pred_ans)
    
    return bleu_score(pred_answers, answers)

"""
Calculate BLEU Score
"""
bleu = calculateBleuScore(seq2seq_model)
print('BLEU Score : {:.4f}'.format(bleu))

BLEU Score : 0.4861


def calculate_f1_score(model):
    f1_scores = []
    for idx in tqdm(range(len(test_dataset)), desc="F1 Score", disable=False):
        question_tensor, answer_tensor = test_dataset[idx]
        
        with torch.no_grad():
            predictions, attention_score = model(question_tensor.unsqueeze(0))
            pred = torch.argmax(predictions, dim=2)

        answer_words, pred_ans = getWords(answer_tensor), getWords(pred[0,:])

        number_of_common_words = sum((collections.Counter(answer_words) & collections.Counter(pred_ans)).values())

        if number_of_common_words == 0:
            f1_score =  0
        else:
            precision = 1.0 * number_of_common_words / len(pred_ans)
            recall = 1.0 * number_of_common_words / len(answer_words)
            f1_score = (2 * precision * recall) / (precision + recall)
            
        f1_scores.append(f1_score)
    return f1_scores

f1_scores = calculate_f1_score(seq2seq_model)
f1_score =(sum(f1_scores)/len(f1_scores))
print('F1 Score : {:.4f}'.format(f1_score))

F1 Score : 0.9053


def longest_common_subsequence(str1, str2):
        
    """
    Makeing a grid of 0's with len(str2) + 1 columns  and len(str1) + 1 rows.
    """
    dp = [[0] * (len(str2) + 1) for _ in range(len(str1) + 1)]
    
    """
    Iterate up each column, starting from the last one.
    """
    for col in reversed(range(len(str2))):
        for row in reversed(range(len(str1))):
            
            if str2[col] == str1[row]:
                """
                If the corresponding characters for this cell are the same.
                """
                dp[row][col] = 1 + dp[row + 1][col + 1]
            
            else:
                """
                Otherwise they must be different.
                """
                dp[row][col] = max(dp[row + 1][col], dp[row][col + 1])
    
    """
    The original problem's answer is in dp[0][0]. Return it.
    """
    return dp[0][0]
    
def rougel_score(ans, pred):
    
    BETA, answers, pred_answers = 1.2, [], []
        
    if len(pred)!=1 and len(ans)<=0:
        return        
    
    for idx in range(min(len(pred),len(ans))):
        pred_words, ans_words = pred[idx], ans[idx]
        long_cmmn_subseq = longest_common_subsequence(ans_words, pred_words)
        answers.append(long_cmmn_subseq/float(len(ans_words)))
        pred_answers.append(long_cmmn_subseq/float(len(pred_words)))
        
    max_ans, max_pred = max(answers), max(pred_answers)
    
    """
    Rouge-L Score
    """
    return ((1 + BETA**2)* max_pred * max_ans)/float(max_ans + BETA**2 * max_pred) if (max_ans !=0 and max_pred !=0) else 0.0

def calculateRougeLScore(model):
    
    answers, pred_answers = [], []

    for idx in tqdm(range(len(test_dataset)), desc="ROUGE-L", disable=False):
      
        question_tensor, answer_tensor = test_dataset[idx]

        with torch.no_grad():
            predictions, attn_score = model(question_tensor.unsqueeze(0))
            pred = torch.argmax(predictions, dim=2)
        
        answer_words, pred_ans = getWords(answer_tensor), getWords(pred[0,:])

        pred_answers.append(pred_ans)
        answers.append(answer_words[:-1])

    return rougel_score(answers, pred_answers)

rouge_l_score = calculateRougeLScore(seq2seq_model)
print('ROUGE-L Score : {:.4f}'.format(rouge_l_score))

ROUGE-L Score : 0.9606


def bot_response(question):

    question = SOS_TOKEN + " " + textPreprocess(question) + " " + EOS_TOKEN
    
    question_tensor = torch.tensor([vocab2idx[w] for w in question.split(" ")], dtype=torch.int64)
    
    with torch.no_grad():
        predictions, attention_score = seq2seq_model(question_tensor.unsqueeze(0))
        pred = torch.argmax(predictions, dim=2)
 
    pred_words = getWords(pred[0,:]) 
    return " ".join([w for w in pred_words if not (w == '<EOS>')])


question = ''
print('Bot : Hi, Did you want to chat with me?')
while question.lower()[:3] != 'bye':
  try:
    while True:
        print('Me : ', end='')
        question = input()
        if question:
            break
    if question.lower()[:3] != 'bye':
        response = bot_response(question)
        print('Bot: ' + response)
    else:
        print('Bot: Bye!! Stay safe. Have a nice day.')
  except KeyError:
            print("Sorry, I am not sure what you are talking about :/")

Bot : Hi, Did you want to chat with me?
Me : My last os update is not working.
Bot: we'd like to help  dm us the details of the issues you're experiencing and we'll go from there  <url>
Me : This needs to be fixed as my music randomly pauses.
Bot: we want to help  which iphone and ios version are you using        
Me : iphone and ios version is 11.
Bot: thanks for letting us know  let's continue in dm  <url>         
Me : Also after the update,unable to connect to wifi automatically.
Bot: we want to help  which device are you using           
Me : iphone.
Bot: thanks for that info  let's continue in dm  <url>          
Me : Thanks for your help!!!
Bot: you're welcome  we're glad to hear  reach out to us if you need any more help  have a great
Me : Bye!!!
Bot: Bye!! Stay safe. Have a nice day.

Constants And Required Functions¶

Evaluation Matrix For First Model for Chatbot using Seq2seq Approach¶

Dataset and DataLoader Creation¶

Question & Answer ChatBot QABot Model Creation¶

Attention¶

Encoder¶

Decoder¶

Seq2Seq¶

Loading Best Seq2seq ChatBot Model¶

Evaluation : Test Results¶

BLEU Score¶

F1 Score¶

Rouge L Score¶

Chat¶