from json import decoder from operator import mod import numpy as np import time,pdb,os, random, math, copy import pandas as pd import pickle as pkl from sklearn.metrics import f1_score import matplotlib.lines as mlines from scipy.stats import norm import torch from torch import nn, Tensor from torchinfo import summary from torch.utils.data import DataLoader,Dataset from torch.nn import TransformerEncoder, TransformerEncoderLayer def find_replace(seq, token, word): st = 0 while st', savefilename) with open(savefilename, 'wb') as f: pkl.dump([vocab, vocabdict, words, paircounter],f) return savefilename def sort_by_key_len(dict): dict_len= {key: len(key) for key in dict.keys()} import operator sorted_key_list = sorted(dict_len.items(), key=operator.itemgetter(1), reverse=True) sorted_dict = [{item[0]: dict[item [0]]} for item in sorted_key_list] return sorted_dict def getPair(word, N=2): word = np.array(word) slid = 1 sub_windows = ( np.expand_dims(np.arange(N), 0) + np.expand_dims(np.arange(0, word.shape[0]-N+1, slid), 0).T ).astype(int) return word[sub_windows], word def flattenTuple(x): flatten = [] for oo in list(x): if type(oo)==int: flatten.append(tuple([oo])) else: if len(oo)==1: flatten.append(oo) else: flatten = flatten + flattenTuple(oo) return flatten def equalTuple(x, y): if set(x)==set(y): if flattenTuple(x)==flattenTuple(y): return True return False def findTuple(target, candidates): for x in candidates: if equalTuple(target, x): return True return False def uniqueSubwords(pairs, ignore): subwords = set([]) for pair in pairs: subword = [] for x in list(pair): if len(x)==1: subword.append(tuple(x)) else: subword.append(list(x)) subword = tuple(map(tuple,subword)) subwords.add(subword) return subwords def setup_seed(seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True def poolsegmentTokenise(gkey, gdata, win_len, vocabdict, mark, BPEvocab=None): unknowntoken = vocabdict['unknown'] paddingtoken = vocabdict['padding'] slid = win_len windowTokendict, windowBPEdict = {}, {} events = np.array(list(gdata.event.values) + ['OVER']) sub_windows = ( np.expand_dims(np.arange(win_len), 0) + np.expand_dims(np.arange(0, len(gdata)-win_len+1, slid), 0).T ).astype(int) windowOri = events[sub_windows].tolist() if len(sub_windows)==0: lastidx = 0 else: lastidx = sub_windows[-1][-1] if lastidx Tensor: x = x + self.pe[:,:x.shape[1]] return self.dropout(x) class SupervisedTransformerv2(nn.Module): def __init__(self, **params): super().__init__() self.params = params self.pos_encoder = PositionalEncoding(d_model=params['d_model'], dropout=params['dropout']) if 'ntokens' in params.keys(): self.embedding = nn.Embedding(num_embeddings=params['ntokens'], embedding_dim=params['d_model']) self.transformer_encoder = TransformerEncoder(TransformerEncoderLayer(d_model=params['d_model'], nhead=params['nhead'], dim_feedforward=params['d_model']*4, dropout=params['dropout'], activation='relu', batch_first=True), params['n_layers']) self.linear = nn.Linear(params['d_model']*params['win_len'], params['nlabels']) def forward(self, encoder_input, paddingmask): encoder_embed = self.embedding(encoder_input) * math.sqrt(self.params['d_model']) encoder_pos = self.pos_encoder(encoder_embed) encoder_output = self.transformer_encoder(src=encoder_pos, src_key_padding_mask=paddingmask) output = encoder_output.view(encoder_output.shape[0],-1) final_output = self.linear(output) return final_output def evaluate(model, criterion, val_loader, device): model = model.to(device) model.eval() losses, f1s = [], [] with torch.no_grad(): for batchdata, batchlabel, batchmask in val_loader: predictions = model(batchdata.to(device), batchmask.to(device)) loss = criterion(predictions, batchlabel.reshape(-1).to(device).long()) if np.isnan(loss.item()): raise "Loss NaN!" losses.append(loss.item()) pred_label = np.argmax(predictions.detach().cpu().numpy(), axis=1) f1 = f1_score(batchlabel.numpy(), pred_label, average='macro') f1s.append(f1) return np.mean(losses), np.mean(f1s) class DatasetPadding(Dataset): def __init__(self, data, paddingtoken=None, label=None): self.data = data self.label = label if paddingtoken is not None: self.mask = data==paddingtoken def __getitem__(self, idx): if self.label is None: return self.data[idx], self.mask[idx] return self.data[idx], self.label[idx], self.mask[idx] def __len__(self): return len(self.data) def fit_transformer(traindata, trainlabel, testdata, testlabel, args, device, paddingtoken, nLabels, savemodel='model', nTokens=None): if os.path.exists(savemodel): return criterion = nn.CrossEntropyLoss(label_smoothing=0.1) traindata = traindata.astype(int) params = {'nlabels': nLabels, 'batch_size':args.batch_size, 'd_model':args.d_model, 'optimizer_name': args, 'nhead':args.nhead, 'dropout':args.dropout, 'win_len': traindata.shape[1], 'lr':args.lr, 'n_layers':args.n_layers, 'ntokens': nTokens} trainset = DatasetPadding(data=traindata, paddingtoken=paddingtoken, label=trainlabel) testset = DatasetPadding(data=testdata, paddingtoken=paddingtoken, label=testlabel) trainloader = DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, num_workers=0) testloader = DataLoader(testset, batch_size=params['batch_size'], shuffle=True, num_workers=0) model = SupervisedTransformerv2(**params).to(device) model.train() optimizer = getattr(torch.optim, params['optimizer_name'])(model.parameters(), lr=params['lr'], betas=(0.9,0.999), weight_decay=0.01) if len(trainloader)>=20: LOG = int(len(trainloader)/20) else: LOG = 1 trloss, valoss, trf1, vaf1 = [], [], [], [] evaloss, evaf1 = 0,0 for epoch in range(1, args.epochs+1): for batch, (batchdata, batchlabel, batchmask) in enumerate(trainloader): predictions = model(batchdata.to(device), batchmask.to(device)) loss = criterion(predictions, batchlabel.reshape(-1).to(device).long()) if np.isnan(loss.item()): raise "Loss NaN!" loss.requires_grad_(True) optimizer.zero_grad() loss.backward() optimizer.step() evaloss+=loss.item() pred_label = np.argmax(predictions.detach().cpu().numpy(), axis=1) f1 = f1_score(batchlabel.numpy(), pred_label, average='macro') evaf1 += f1 if batch%(LOG)==0 or batch==len(trainloader)-1: cur_valoss, cur_vaf1 = evaluate(model, criterion, testloader, device) model.train() trloss.append(evaloss/LOG) valoss.append(cur_valoss) trf1.append(evaf1/LOG) vaf1.append(cur_vaf1) evaloss, evaf1 = 0,0 print('Epoch [{}/{}], Batch [{}/{}], Train Loss: {:.4f}, Train F1: {:.4f}, Val Loss: {:.4f}, Val F1: {:.4f}' .format(epoch, args.epochs, batch, len(trainloader), trloss[-1], trf1[-1], valoss[-1], vaf1[-1])) torch.save([model.cpu(), [trloss, valoss, trf1, vaf1]], savemodel+'%d.pkl'%(epoch))