Dear Yuhang,
I have noticed that you are using a ResNet20 for CIFAR10 with 11.3 Million parameters. In the original ResNet publication of He et al [1] the definition of ResNet20 on CIFAR10 is given and results in 0.27 Million parameters. I know that it is somewhat "conventional" to use the implementation of ResNet20 you are using, the problem is that I am really interested in the one with the smaller number of parameters : P
I have defined the "original" ResNet20 for CIFAR10 with 0.27 M parameters as shown below. I have added the file under models
in your repository and run first the ANN training and then SNN calibration on it:
python -m SNN_Calibration.CIFAR.main_train --dataset CIFAR10 --arch orgres20 --dpath 'datasets/CIFAR10/' --usebn
python -m SNN_Calibration.CIFAR.main_calibration --dataset CIFAR10 --arch orgres20 --T 16 --usebn --calib advanced --dpath 'datasets/CIFAR10/'
The ANN training is working well and results in 93.5% accuracy. But for some reason the SNN_Calibration doesn't work on the network below and results in 20% accuracy. Please help to get the SNN Calibration working on this : ) It would be much appreciated to understand the issue here.
[1] He, K., Zhang, X., Ren, S., & Sun, J. (2015). Deep Residual Learning for Image Recognition. Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2016-Decem, 770–778. https://doi.org/10.1109/CVPR.2016.90
'''
ResNet20 on CIFAR10 with the correct number of parameter (0.27M) as in the original publication [1].
References:
[1] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learning for image recognition. In CVPR, 2016.
[2] K. He, X. Zhang, S. Ren, and J. Sun. Identity mappings in deep residual networks. In ECCV, 2016.
'''
import torch
import torch.nn as nn
import math
# @anna: I fixed the following relative imports
from ...CIFAR.models.utils import AvgPoolConv, StraightThrough
from ...CIFAR.models.spiking_layer import SpikeModel, SpikeModule, Union
import torch.nn.functional as F
from .resnet import SpikeBasicBlock
def conv3x3(in_planes, out_planes, stride=1):
" 3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = BN(planes)
self.relu1 = ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = BN(planes)
self.downsample = downsample
self.stride = stride
self.relu2 = ReLU(inplace=True)
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu1(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu2(out)
return out
class Org_ResNet_Cifar_Modified(nn.Module):
def __init__(self, block, layers, num_classes=10, use_bn=True):
super(Org_ResNet_Cifar_Modified, self).__init__()
global BN
BN = nn.BatchNorm2d if use_bn else StraightThrough
global ReLU
ReLU = nn.ReLU
self.inplanes = 64
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
BN(64),
ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
BN(64),
ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False),
BN(64),
ReLU(inplace=True),
)
self.layer1 = self._make_layer(block, 16, layers[0], stride=1)
self.layer2 = self._make_layer(block, 32, layers[1], stride=2)
self.layer3 = self._make_layer(block, 64, layers[2], stride=2)
#self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = AvgPoolConv(kernel_size=4, stride=1, input_channel=64)
self.fc_save = nn.Linear(64, num_classes)
#self.fc = nn.Linear(64, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d) and not isinstance(m, AvgPoolConv):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
n = m.weight.size(1)
m.weight.data.normal_(0, 1.0 / float(n))
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
BN(planes * block.expansion)
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
#x = F.avg_pool2d(x, x.size()[3])
#x = self.layer4(x)
#print(x.size())
x = self.avgpool(x)
print(x.shape)
x = x.view(x.size(0), -1)
x = self.fc_save(x)
return x
def org_resnet20(**kwargs):
model = Org_ResNet_Cifar_Modified(BasicBlock, [3, 3, 3], **kwargs)
return model
res_specials = {BasicBlock: SpikeBasicBlock}