vlcn/core/model/utils.py

164 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
PyTorch DNC implementation from
-->
https://github.com/ixaxaar/pytorch-dnc
<--
"""
import torch.nn as nn
import torch as T
import torch.nn.functional as F
import numpy as np
import torch
from torch.autograd import Variable
import re
import string
def recursiveTrace(obj):
print(type(obj))
if hasattr(obj, 'grad_fn'):
print(obj.grad_fn)
recursiveTrace(obj.grad_fn)
elif hasattr(obj, 'saved_variables'):
print(obj.requires_grad, len(obj.saved_tensors), len(obj.saved_variables))
[print(v) for v in obj.saved_variables]
[recursiveTrace(v.grad_fn) for v in obj.saved_variables]
def cuda(x, grad=False, gpu_id=-1):
x = x.float() if T.is_tensor(x) else x
if gpu_id == -1:
t = T.FloatTensor(x)
t.requires_grad=grad
return t
else:
t = T.FloatTensor(x.pin_memory()).cuda(gpu_id)
t.requires_grad=grad
return t
def cudavec(x, grad=False, gpu_id=-1):
if gpu_id == -1:
t = T.Tensor(T.from_numpy(x))
t.requires_grad = grad
return t
else:
t = T.Tensor(T.from_numpy(x).pin_memory()).cuda(gpu_id)
t.requires_grad = grad
return t
def cudalong(x, grad=False, gpu_id=-1):
if gpu_id == -1:
t = T.LongTensor(T.from_numpy(x.astype(np.long)))
t.requires_grad = grad
return t
else:
t = T.LongTensor(T.from_numpy(x.astype(np.long)).pin_memory()).cuda(gpu_id)
t.requires_grad = grad
return t
def θ(a, b, normBy=2):
"""Batchwise Cosine similarity
Cosine similarity
Arguments:
a {Tensor} -- A 3D Tensor (b * m * w)
b {Tensor} -- A 3D Tensor (b * r * w)
Returns:
Tensor -- Batchwise cosine similarity (b * r * m)
"""
dot = T.bmm(a, b.transpose(1,2))
a_norm = T.norm(a, normBy, dim=2).unsqueeze(2)
b_norm = T.norm(b, normBy, dim=2).unsqueeze(1)
cos = dot / (a_norm * b_norm + δ)
return cos.transpose(1,2).contiguous()
def σ(input, axis=1):
"""Softmax on an axis
Softmax on an axis
Arguments:
input {Tensor} -- input Tensor
Keyword Arguments:
axis {number} -- axis on which to take softmax on (default: {1})
Returns:
Tensor -- Softmax output Tensor
"""
input_size = input.size()
trans_input = input.transpose(axis, len(input_size) - 1)
trans_size = trans_input.size()
input_2d = trans_input.contiguous().view(-1, trans_size[-1])
soft_max_2d = F.softmax(input_2d, -1)
soft_max_nd = soft_max_2d.view(*trans_size)
return soft_max_nd.transpose(axis, len(input_size) - 1)
δ = 1e-6
def register_nan_checks(model):
def check_grad(module, grad_input, grad_output):
# print(module) you can add this to see that the hook is called
# print('hook called for ' + str(type(module)))
if any(np.all(np.isnan(gi.data.cpu().numpy())) for gi in grad_input if gi is not None):
print('NaN gradient in grad_input ' + type(module).__name__)
model.apply(lambda module: module.register_backward_hook(check_grad))
def apply_dict(dic):
for k, v in dic.items():
apply_var(v, k)
if isinstance(v, nn.Module):
key_list = [a for a in dir(v) if not a.startswith('__')]
for key in key_list:
apply_var(getattr(v, key), key)
for pk, pv in v._parameters.items():
apply_var(pv, pk)
def apply_var(v, k):
if isinstance(v, Variable) and v.requires_grad:
v.register_hook(check_nan_gradient(k))
def check_nan_gradient(name=''):
def f(tensor):
if np.isnan(T.mean(tensor).data.cpu().numpy()):
print('\nnan gradient of {} :'.format(name))
# print(tensor)
# assert 0, 'nan gradient'
return tensor
return f
def ptr(tensor):
if T.is_tensor(tensor):
return tensor.storage().data_ptr()
elif hasattr(tensor, 'data'):
return tensor.clone().data.storage().data_ptr()
else:
return tensor
# TODO: EWW change this shit
def ensure_gpu(tensor, gpu_id):
if "cuda" in str(type(tensor)) and gpu_id != -1:
return tensor.cuda(gpu_id)
elif "cuda" in str(type(tensor)):
return tensor.cpu()
elif "Tensor" in str(type(tensor)) and gpu_id != -1:
return tensor.cuda(gpu_id)
elif "Tensor" in str(type(tensor)):
return tensor
elif type(tensor) is np.ndarray:
return cudavec(tensor, gpu_id=gpu_id).data
else:
return tensor
def print_gradient(x, name):
s = "Gradient of " + name + " ----------------------------------"
x.register_hook(lambda y: print(s, y.squeeze()))