打印网络模型的conv.weight(一)
今天现讲常见的几种简单的网络模型搭建,利用
for name, module in model.named_modules():print('modules:', module)
打印conv.weight遇到的问题。
一、首先、我们要知道打印出来的是什么结构
print('modules:', module)
给一个简单网络, 来看一下
import torchimport torch.nn as nnclass MyNet(nn.Module):def __init__(self, inplanes, planes):super(MyNet, self,).__init__()self.conv_block=torch.nn.Sequential()self.conv_block.add_module("conv1",torch.nn.Conv2d(inplanes, planes, 3, 1, 1))self.conv_block.add_module("relu1",torch.nn.ReLU())self.conv_block.add_module("pool1",torch.nn.MaxPool2d(2))self.dense_block = torch.nn.Sequential()self.dense_block.add_module("dense1",torch.nn.Linear(32 * 3 * 3, 128))self.dense_block.add_module("relu2",torch.nn.ReLU())self.dense_block.add_module("dense2",torch.nn.Linear(128, 10))def forward(self, x):conv_out = self.conv_block(x)res = conv_out.view(conv_out.size(0), -1)out = self.dense_block(res)return outmodel = MyNet(3,32)# print(model.conv_block.conv1.weight)for name, module in model.named_modules():print('modules:', module)print("******************")
modules: MyNet((conv_block): Sequential((conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))(relu1): ReLU()(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))(dense_block): Sequential((dense1): Linear(in_features=288, out_features=128, bias=True)(relu2): ReLU()(dense2): Linear(in_features=128, out_features=10, bias=True)))******************modules: Sequential((conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))(relu1): ReLU()(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))******************modules: Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))******************modules: ReLU()******************modules: MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)******************modules: Sequential((dense1): Linear(in_features=288, out_features=128, bias=True)(relu2): ReLU()(dense2): Linear(in_features=128, out_features=10, bias=True))******************modules: Linear(in_features=288, out_features=128, bias=True)******************modules: ReLU()******************modules: Linear(in_features=128, out_features=10, bias=True)******************
我们可以看到,打印出来的结构是“总-分”形式的。即先显示总体结构, 再细分到每一个层。
这个概念很重要, 后面会用到。
二、打印conv.weight
1)直接定义一个Conv2d,也能打印weight
x = nn.Conv2d(in_channels=32, out_channels=6, kernel_size=3, stride=1 ,padding=0)print(x.weight)
2)全部自己定义网络层,不要Sequential构建。利用
for name, module in model.named_modules():
import torchimport torch.nn as nnclass Bottleneck(nn.Module):expansion = 4def __init__(self, inplanes, planes, stride=1, downsample=None):super(Bottleneck, self).__init__()self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)self.bn1 = nn.BatchNorm2d(planes)self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,padding=1, bias=False)self.bn2 = nn.BatchNorm2d(planes)self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)self.bn3 = nn.BatchNorm2d(planes * 4)self.relu = nn.ReLU(inplace=True)self.downsample = downsampleself.stride = stridedef forward(self, x):residual = xout = self.conv1(x)out = self.bn1(out)out = self.relu(out)out = self.conv2(out)out = self.bn2(out)out = self.relu(out)out = self.conv3(out)out = self.bn3(out)if self.downsample is not None:residual = self.downsample(x)out += residualout = self.relu(out)return outx = Bottleneck(32,32)# print(x.conv1.weight)for name, module in x.named_modules():print('modules:', module)print(module.conv1.weight) # 只能打印一个
modules: Bottleneck((conv1): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)(bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)(conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)(bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)(conv3): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)(bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)(relu): ReLU(inplace=True))Parameter containing:tensor([[[[ 0.0640]],[]],[]],...,[]],[]],[]]],[]],[]],[]],...,[]],[]],[]]],[]],[]],[]],...,[]],[]],[]]],...,[]],[]],[]],...,[]],[]],[]]],[]],[]],[]],...,[]],[]],[]]],[]],[]],[]],...,[]],[]],[]]]], requires_grad=True)modules: Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
说明: 总-分的 总体打印出来了,module.conv1.weight也打印出来了。
但到了分的时候就报错了 “Sequential中没有conv1”之类的错误
我推测一个是module.conv1.weight的定义原因。第二个是因为module针对的是“总”而不是“分”
所以总体的都能打印出权值, 一旦带“分”就会报错。
2)、利用sequential搭建
import torchimport torch.nn as nn# x = nn.Conv2d(in_channels=32, out_channels=6, kernel_size=3, stride=1 ,padding=0)# print(x.weight)class MyNet(nn.Module):def __init__(self, inplanes, planes):super(MyNet, self,).__init__()self.conv_block=torch.nn.Sequential()self.conv_block.add_module("conv1",torch.nn.Conv2d(inplanes, planes, 3, 1, 1))self.conv_block.add_module("relu1",torch.nn.ReLU())self.conv_block.add_module("pool1",torch.nn.MaxPool2d(2))self.dense_block = torch.nn.Sequential()self.dense_block.add_module("dense1",torch.nn.Linear(32 * 3 * 3, 128))self.dense_block.add_module("relu2",torch.nn.ReLU())self.dense_block.add_module("dense2",torch.nn.Linear(128, 10))def forward(self, x):conv_out = self.conv_block(x)res = conv_out.view(conv_out.size(0), -1)out = self.dense_block(res)return outmodel = MyNet(3,32)# print(model.conv_block.conv1.weight)for name, module in model.named_modules():print('modules:', module)print(module.conv_block.conv1.weight)
modules: MyNet((conv_block): Sequential((conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))(relu1): ReLU()(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))(dense_block): Sequential((dense1): Linear(in_features=288, out_features=128, bias=True)(relu2): ReLU()(dense2): Linear(in_features=128, out_features=10, bias=True)))Parameter containing:tensor([[[[ 1.7723e-01, -1.5411e-01, -1.1020e-01],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]],[],[],[]],[],[],[]],[],[],[]]]], requires_grad=True)modules: Sequential((conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))(relu1): ReLU()(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))
必须是module.conv_block.conv1.weight才能打印, 因为网络定义里是先定义的conv_block再定义的con1
也是“总”打印完毕后, 可以得到权值, 轮到“分”的时候便报错
三、 Sequential定义模型, 打印两个权值, 遇到“分”也会报错
import torchimport torch.nn as nnimport torchimport torch.nn as nnimport torch.nn.functional as Ffrom collections import OrderedDictclass MyNet(torch.nn.Module):def __init__(self,inplanes, planes):super(MyNet, self).__init__()self.conv1 = torch.nn.Sequential(OrderedDict([("conv", torch.nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)),("relu1", torch.nn.ReLU()),("pool", torch.nn.MaxPool2d(2))]))self.dense = torch.nn.Sequential(OrderedDict([("dense1", torch.nn.Linear(32 * 3 * 3, 128)),("relu2", torch.nn.ReLU()),("dense2", torch.nn.Linear(128, 10))]))def forward(self, x):conv_out = self.conv1(x)res = conv_out.view(conv_out.size(0), -1)out = self.dense(res)return outmodel = MyNet(3, 32)# print(model.conv1.conv.weight)# for name, module in model.named_children():# print('children module:', name)for name, module in model.named_modules():print("***************")print('names:', name)print('modules:', module)# converted_weights = {}print(module.conv1.conv.weight) # 只能打印conv1中的conv.weightprint(module.dense.dense1.weight)
modules: MyNet((conv_block): Sequential((conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))(relu1): ReLU()(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))(dense_block): Sequential((dense1): Linear(in_features=288, out_features=128, bias=True)(relu2): ReLU()(dense2): Linear(in_features=128, out_features=10, bias=True)))******************modules: Sequential((conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))(relu1): ReLU()(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))******************modules: Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))******************modules: ReLU()******************modules: MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)******************modules: Sequential((dense1): Linear(in_features=288, out_features=128, bias=True)(relu2): ReLU()(dense2): Linear(in_features=128, out_features=10, bias=True))******************modules: Linear(in_features=288, out_features=128, bias=True)******************modules: ReLU()******************modules: Linear(in_features=128, out_features=10, bias=True)******************(pytorch) xzq@xzq-GL552JX:~/1$ ^C(pytorch) xzq@xzq-GL552JX:~/1$ cd /home/xzq/1 ; env /home/xzq/anaconda3/envs/pytorch/bin/python /home/xzq/.vscode/extensions/ms-python.python-2020.9.114305/pythonFiles/lib/python/debugpy/launcher 33663 -- /home/xzq/1/2.py***************names:modules: MyNet((conv1): Sequential((conv): Conv2d(3, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)(relu1): ReLU()(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))(dense): Sequential((dense1): Linear(in_features=288, out_features=128, bias=True)(relu2): ReLU()(dense2): Linear(in_features=128, out_features=10, bias=True)))Parameter containing:tensor([[[[ 0.1538]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]],[]],[]],[]]]], requires_grad=True)Parameter containing:tensor([[ 0.0487, -0.0190, -0.0245, ..., 0.0512, -0.0006, -0.0239],[],[],...,[],[],[]],requires_grad=True)***************names: conv1modules: Sequential((conv): Conv2d(3, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)(relu1): ReLU()(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))
结论:module.conv.weight的这个module是针对的“总”
所以若不想报错, 需要添加判断句,只要“总”
解决方法:参考repvgg
将module中包含repvgg_convert函数的,取出来做下一步
for name, module in model.named_modules(): # module中有所有的分支if hasattr(module, 'repvgg_convert'): #判断kernel, bias = module.repvgg_convert()
