-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Closed
Description
This is probably related to the previous #2264 @gchanan
Running many iterations of double backward involving BatchNorm2d may cause out of memory error. There may be a memory leak somewhere? Code to reproduce the error:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
class BatchNormTest(nn.Module):
def __init__(self, c, num_classes=2):
super(BatchNormTest, self).__init__()
self.bn = nn.BatchNorm2d(c)
def forward(self, x):
out = x
out = self.bn(out)
out = F.relu(out)
return out
c = 100
net = BatchNormTest(c)
use_cuda = True
inputs = Variable(torch.rand(100,c,100,100), requires_grad=True)
if use_cuda:
net.cuda()
inputs = inputs.cuda()
# on my server it fails at iteration 14
T = 100
for i in range(T):
output = net(inputs)
loss1 = torch.sum(output)
grad_params = torch.autograd.grad(loss1, inputs, create_graph=True)
grad = grad_params[0]
loss = torch.sum(grad)
loss.backward()
print(i)
The error message:
THCudaCheck FAIL file=/private/home/hongyizmit/pytorch/torch/lib/THC/generic/THCStorage.cu line=66 error=2 : out of memory
Traceback (most recent call last):
File "models/testres.py", line 37, in <module>
loss.backward()
File "/private/home/hongyizmit/.conda/envs/torchmaster/lib/python2.7/site-packages/torch/autograd/variable.py", line 156, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
File "/private/home/hongyizmit/.conda/envs/torchmaster/lib/python2.7/site-packages/torch/autograd/__init__.py", line 98, in backward
variables, grad_variables, retain_graph)
File "/private/home/hongyizmit/.conda/envs/torchmaster/lib/python2.7/site-packages/torch/nn/_functions/thnn/batchnorm_double_backwards.py", line 80, in batchnorm_double_backwards_fn
gG = ggI * first_back_grad_input(gO, 1)
File "/private/home/hongyizmit/.conda/envs/torchmaster/lib/python2.7/site-packages/torch/nn/_functions/thnn/batchnorm_double_backwards.py", line 73, in first_back_grad_input
input_sub_mu.div(sigma2_eps) * sum_exclude_dim1(gO * input_sub_mu))
File "/private/home/hongyizmit/.conda/envs/torchmaster/lib/python2.7/site-packages/torch/autograd/variable.py", line 829, in __mul__
return self.mul(other)
File "/private/home/hongyizmit/.conda/envs/torchmaster/lib/python2.7/site-packages/torch/autograd/variable.py", line 339, in mul
return Mul.apply(self, other)
File "/private/home/hongyizmit/.conda/envs/torchmaster/lib/python2.7/site-packages/torch/autograd/_functions/basic_ops.py", line 48, in forward
return a.mul(b)
RuntimeError: cuda runtime error (2) : out of memory at /private/home/hongyizmit/pytorch/torch/lib/THC/generic/THCStorage.cu:66
zxj32
Metadata
Metadata
Assignees
Labels
No labels