打印网络模型的conv.weight(一)
今天现讲常见的几种简单的网络模型搭建,利用
for name, module in model.named_modules():
print('modules:', module)
打印conv.weight遇到的问题。
一、首先、我们要知道打印出来的是什么结构
print('modules:', module)
给一个简单网络, 来看一下
import torch
import torch.nn as nn
class MyNet(nn.Module):
def __init__(self, inplanes, planes):
super(MyNet, self,).__init__()
self.conv_block=torch.nn.Sequential()
self.conv_block.add_module("conv1",torch.nn.Conv2d(inplanes, planes, 3, 1, 1))
self.conv_block.add_module("relu1",torch.nn.ReLU())
self.conv_block.add_module("pool1",torch.nn.MaxPool2d(2))
self.dense_block = torch.nn.Sequential()
self.dense_block.add_module("dense1",torch.nn.Linear(32 * 3 * 3, 128))
self.dense_block.add_module("relu2",torch.nn.ReLU())
self.dense_block.add_module("dense2",torch.nn.Linear(128, 10))
def forward(self, x):
conv_out = self.conv_block(x)
res = conv_out.view(conv_out.size(0), -1)
out = self.dense_block(res)
return out
model = MyNet(3,32)
# print(model.conv_block.conv1.weight)
for name, module in model.named_modules():
print('modules:', module)
print("******************")
modules: MyNet(
(conv_block): Sequential(
(conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU()
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(dense_block): Sequential(
(dense1): Linear(in_features=288, out_features=128, bias=True)
(relu2): ReLU()
(dense2): Linear(in_features=128, out_features=10, bias=True)
)
)
******************
modules: Sequential(
(conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU()
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
******************
modules: Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
******************
modules: ReLU()
******************
modules: MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
******************
modules: Sequential(
(dense1): Linear(in_features=288, out_features=128, bias=True)
(relu2): ReLU()
(dense2): Linear(in_features=128, out_features=10, bias=True)
)
******************
modules: Linear(in_features=288, out_features=128, bias=True)
******************
modules: ReLU()
******************
modules: Linear(in_features=128, out_features=10, bias=True)
******************
我们可以看到,打印出来的结构是“总-分”形式的。即先显示总体结构, 再细分到每一个层。
这个概念很重要, 后面会用到。
二、打印conv.weight
1)直接定义一个Conv2d,也能打印weight
x = nn.Conv2d(in_channels=32, out_channels=6, kernel_size=3, stride=1 ,padding=0)
print(x.weight)
2)全部自己定义网络层,不要Sequential构建。利用
for name, module in model.named_modules():
import torch
import torch.nn as nn
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
x = Bottleneck(32,32)
# print(x.conv1.weight)
for name, module in x.named_modules():
print('modules:', module)
print(module.conv1.weight) # 只能打印一个
modules: Bottleneck(
(conv1): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
Parameter containing:
tensor([[[[ 0.0640]],
[ ]],
[ ]],
...,
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]],
...,
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]],
...,
[ ]],
[ ]],
[ ]]],
...,
[ ]],
[ ]],
[ ]],
...,
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]],
...,
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]],
...,
[ ]],
[ ]],
[ ]]]], requires_grad=True)
modules: Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
说明: 总-分的 总体打印出来了,module.conv1.weight也打印出来了。
但到了分的时候就报错了 “Sequential中没有conv1”之类的错误
我推测一个是
module.conv1.weight
的定义原因。第二个是因为module针对的是“总”而不是“分”
所以总体的都能打印出权值, 一旦带“分”就会报错。
2)、利用sequential搭建
import torch
import torch.nn as nn
# x = nn.Conv2d(in_channels=32, out_channels=6, kernel_size=3, stride=1 ,padding=0)
# print(x.weight)
class MyNet(nn.Module):
def __init__(self, inplanes, planes):
super(MyNet, self,).__init__()
self.conv_block=torch.nn.Sequential()
self.conv_block.add_module("conv1",torch.nn.Conv2d(inplanes, planes, 3, 1, 1))
self.conv_block.add_module("relu1",torch.nn.ReLU())
self.conv_block.add_module("pool1",torch.nn.MaxPool2d(2))
self.dense_block = torch.nn.Sequential()
self.dense_block.add_module("dense1",torch.nn.Linear(32 * 3 * 3, 128))
self.dense_block.add_module("relu2",torch.nn.ReLU())
self.dense_block.add_module("dense2",torch.nn.Linear(128, 10))
def forward(self, x):
conv_out = self.conv_block(x)
res = conv_out.view(conv_out.size(0), -1)
out = self.dense_block(res)
return out
model = MyNet(3,32)
# print(model.conv_block.conv1.weight)
for name, module in model.named_modules():
print('modules:', module)
print(module.conv_block.conv1.weight)
modules: MyNet(
(conv_block): Sequential(
(conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU()
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(dense_block): Sequential(
(dense1): Linear(in_features=288, out_features=128, bias=True)
(relu2): ReLU()
(dense2): Linear(in_features=128, out_features=10, bias=True)
)
)
Parameter containing:
tensor([[[[ 1.7723e-01, -1.5411e-01, -1.1020e-01],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]],
[ ],
[ ],
[ ]]]], requires_grad=True)
modules: Sequential(
(conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU()
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
必须是module.conv_block.conv1.weight才能打印, 因为网络定义里是先定义的conv_block再定义的con1
也是“总”打印完毕后, 可以得到权值, 轮到“分”的时候便报错
三、 Sequential定义模型, 打印两个权值, 遇到“分”也会报错
import torch
import torch.nn as nn
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
class MyNet(torch.nn.Module):
def __init__(self,inplanes, planes):
super(MyNet, self).__init__()
self.conv1 = torch.nn.Sequential(
OrderedDict(
[
("conv", torch.nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)),
("relu1", torch.nn.ReLU()),
("pool", torch.nn.MaxPool2d(2))
]
))
self.dense = torch.nn.Sequential(
OrderedDict([
("dense1", torch.nn.Linear(32 * 3 * 3, 128)),
("relu2", torch.nn.ReLU()),
("dense2", torch.nn.Linear(128, 10))
])
)
def forward(self, x):
conv_out = self.conv1(x)
res = conv_out.view(conv_out.size(0), -1)
out = self.dense(res)
return out
model = MyNet(3, 32)
# print(model.conv1.conv.weight)
# for name, module in model.named_children():
# print('children module:', name)
for name, module in model.named_modules():
print("***************")
print('names:', name)
print('modules:', module)
# converted_weights = {}
print(module.conv1.conv.weight) # 只能打印conv1中的conv.weight
print(module.dense.dense1.weight)
modules: MyNet(
(conv_block): Sequential(
(conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU()
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(dense_block): Sequential(
(dense1): Linear(in_features=288, out_features=128, bias=True)
(relu2): ReLU()
(dense2): Linear(in_features=128, out_features=10, bias=True)
)
)
******************
modules: Sequential(
(conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(relu1): ReLU()
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
******************
modules: Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
******************
modules: ReLU()
******************
modules: MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
******************
modules: Sequential(
(dense1): Linear(in_features=288, out_features=128, bias=True)
(relu2): ReLU()
(dense2): Linear(in_features=128, out_features=10, bias=True)
)
******************
modules: Linear(in_features=288, out_features=128, bias=True)
******************
modules: ReLU()
******************
modules: Linear(in_features=128, out_features=10, bias=True)
******************
(pytorch) xzq@xzq-GL552JX:~/1$ ^C
(pytorch) xzq@xzq-GL552JX:~/1$ cd /home/xzq/1 ; env /home/xzq/anaconda3/envs/pytorch/bin/python /home/xzq/.vscode/extensions/ms-python.python-2020.9.114305/pythonFiles/lib/python/debugpy/launcher 33663 -- /home/xzq/1/2.py
***************
names:
modules: MyNet(
(conv1): Sequential(
(conv): Conv2d(3, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(relu1): ReLU()
(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(dense): Sequential(
(dense1): Linear(in_features=288, out_features=128, bias=True)
(relu2): ReLU()
(dense2): Linear(in_features=128, out_features=10, bias=True)
)
)
Parameter containing:
tensor([[[[ 0.1538]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]],
[ ]],
[ ]],
[ ]]]], requires_grad=True)
Parameter containing:
tensor([[ 0.0487, -0.0190, -0.0245, ..., 0.0512, -0.0006, -0.0239],
[ ],
[ ],
...,
[ ],
[ ],
[ ]],
requires_grad=True)
***************
names: conv1
modules: Sequential(
(conv): Conv2d(3, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(relu1): ReLU()
(pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
结论:module.conv.weight的这个module是针对的“总”
所以若不想报错, 需要添加判断句,只要“总”
解决方法:参考repvgg
将module中包含repvgg_convert函数的,取出来做下一步
for name, module in model.named_modules(): # module中有所有的分支
if hasattr(module, 'repvgg_convert'): #判断
kernel, bias = module.repvgg_convert()