human-gaze-guided-neural-at.../joint_paraphrase_model/libs/fixation_generation/main.py

132 lines
4.1 KiB
Python

from collections import OrderedDict
import logging
import sys
from .self_attention import Transformer
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_sequence, pack_padded_sequence, pad_packed_sequence
def random_embedding(vocab_size, embedding_dim):
pretrain_emb = np.empty([vocab_size, embedding_dim])
scale = np.sqrt(3.0 / embedding_dim)
for index in range(vocab_size):
pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim])
return pretrain_emb
def neg_log_likelihood_loss(outputs, batch_label, batch_size, seq_len):
outputs = outputs.view(batch_size * seq_len, -1)
score = F.log_softmax(outputs, 1)
loss = nn.NLLLoss(ignore_index=0, size_average=False)(
score, batch_label.view(batch_size * seq_len)
)
loss = loss / batch_size
_, tag_seq = torch.max(score, 1)
tag_seq = tag_seq.view(batch_size, seq_len)
# print(score[0], tag_seq[0])
return loss, tag_seq
def mse_loss(outputs, batch_label, batch_size, seq_len, word_seq_length):
# score = torch.nn.functional.softmax(outputs, 1)
score = torch.sigmoid(outputs)
mask = torch.zeros_like(score)
for i, v in enumerate(word_seq_length):
mask[i, 0:v] = 1
score = score * mask
loss = nn.MSELoss(reduction="sum")(
score.view(batch_size, seq_len), batch_label.view(batch_size, seq_len)
)
loss = loss / batch_size
return loss, score.view(batch_size, seq_len)
class Network(nn.Module):
def __init__(
self,
embedding_type,
vocab_size,
embedding_dim,
dropout,
hidden_dim,
embeddings=None,
attention=True,
):
super().__init__()
self.logger = logging.getLogger(f"{__name__}")
prelayers = OrderedDict()
postlayers = OrderedDict()
if embedding_type in ("w2v", "glove"):
if embeddings is not None:
prelayers["embedding_layer"] = nn.Embedding.from_pretrained(embeddings)
else:
prelayers["embedding_layer"] = nn.Embedding(vocab_size, embedding_dim)
prelayers["embedding_dropout_layer"] = nn.Dropout(dropout)
embedding_dim = 300
elif embedding_type == "bert":
embedding_dim = 768
self.lstm = BiLSTM(embedding_dim, hidden_dim // 2, num_layers=1)
postlayers["lstm_dropout_layer"] = nn.Dropout(dropout)
if attention:
# increased compl with 1024D, and 16,16: for no att and att experiments
# before: for the initial att and pretraining: heads 4 and layers 4, 128D
# then was 128 D with heads 4 layer 1 = results for all IUI
###postlayers["position_encodings"] = PositionalEncoding(hidden_dim)
postlayers["attention_layer"] = Transformer(
d_model=hidden_dim, n_heads=4, n_layers=1
)
postlayers["ff_layer"] = nn.Linear(hidden_dim, hidden_dim // 2)
postlayers["ff_activation"] = nn.ReLU()
postlayers["output_layer"] = nn.Linear(hidden_dim // 2, 1)
self.logger.info(f"prelayers: {prelayers.keys()}")
self.logger.info(f"postlayers: {postlayers.keys()}")
self.pre = nn.Sequential(prelayers)
self.post = nn.Sequential(postlayers)
def forward(self, x, word_seq_length):
x = self.pre(x)
x = self.lstm(x, word_seq_length)
#MS pritning fix model params
#for p in self.parameters():
# print(p.data)
# break
return self.post(x.transpose(1, 0))
class BiLSTM(nn.Module):
def __init__(self, embedding_dim, lstm_hidden, num_layers):
super().__init__()
self.net = nn.LSTM(
input_size=embedding_dim,
hidden_size=lstm_hidden,
num_layers=num_layers,
batch_first=True,
bidirectional=True,
)
def forward(self, x, word_seq_length):
packed_words = pack_padded_sequence(x, word_seq_length, True, False)
lstm_out, hidden = self.net(packed_words)
lstm_out, _ = pad_packed_sequence(lstm_out)
return lstm_out