VGG
1. VGG block
- 3x3 Conv, pad 1 (n layers, m channels usually double, ReLU)
- 2x2 MaxPool, stride 2 (half size per block)
It turns out that ‘deeper 3x3 Conv’ is better than ‘5x5 Conv’.
2. Architecture
- multiple VGG blocks
- Dense (4096) (Flatten, Linear, ReLU, Dropout)
- Dense (4096) (Linear, ReLU, Dropout)
- Dense (1000)
3. Code
import torch
from torch import nn
def vgg_block(num_conv,in_channels,out_channels) ->nn.Sequential:
layers: List[nn.Module] = []
for _ in range(num_conv):
layers.append(nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1))
layers.append(nn.ReLU())
in_channels = out_channels
layers.append(nn.MaxPool2d(kernel_size=2,stride=2))
return nn.Sequential(*layers)
class vgg_net(nn.Module):
def __init__(self,cfg:tuple,in_channels) -> None:
super(vgg_net,self).__init__()
layers: List[nn.Module] = []
for num_conv,out_channels in cfg:
layers.append(vgg_block(num_conv,in_channels,out_channels))
in_channels = out_channels
self.model = nn.Sequential(*layers,
nn.Flatten(),
nn.Linear(in_channels * 7 * 7,4096),nn.ReLU(),nn.Dropout(p=0.5),
nn.Linear(4096,4096),nn.ReLU(),nn.Dropout(p=0.5),
nn.Linear(4096,10)
)
def forward(self,X:torch.Tensor):
for layer in self.model:
X=layer(X)
print(layer.__class__.__name__,"output shape:\t",X.shape)
print(X)
return nn.Softmax(1)(X) # Classification
X=torch.rand(size=(1,3,224,224),dtype=torch.float32)
cfg = ((3,64),(1,128),(2,256),(2,512),(2,512))
net = vgg_net(cfg,in_channels=3)
net(X)