본문 바로가기
AI데이터 엔지니어, 새싹

59th_11_28(Tue)_VGG Implementation

by Leetora 2023. 11. 29.

✔️nn.Sequential을 통한 레이어 요약

H, W = 224, 224 input_tensor = torch.randn(size=(8, 3, H, W))  class VGG11(nn.Module):     def __init__(self):         super(VGG11, self).__init__()         self.feature = nn.Sequential(             nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2),              nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2),              nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2),              nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2),              nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2))          self.classifier = nn.Sequential(             nn.Linear(in_features=512*7*7, out_features=4096),             nn.ReLU(),              nn.Linear(in_features=4096, out_features=4096),             nn.ReLU(),              nn.Linear(in_features=4096, out_features=1000))      def forward(self, x):         x = self.feature(x)         # (b, c, h, w)       # torch.view(1차원, 2차원)         x = x.view(x.size(0), -1)       # mlp (b, w) -> (b, 512*7*7)         x = self.classifier(x)         #Cross Entropy의 loss_function에서 해주기 때문에 softmax가 없어도 괜찮음         return x   model = VGG11() model.forward(input_tensor)   #모델링 후 randn으로 input shape를 만들고 레이어를 통과할 때마다 torch shape를 뽑아낸다 #input tensor를 해서 스스로 맞는지 검증까지 거친다 
  • self.레이어 명을 지정해 코드를 간소화시킬 수 있음
  • 파라미터와 Output의 shape을 알고 싶다면?
from torchsummary import summary  model = VGG11() summary(model, input_size=(3, 224, 224)) 

이렇게 torchsummary를 통해 summary를 import할 수 있고, 이를 요약한 내용은 아래와 같다

스크린샷 2023-11-28 오후 6.48.45.jpg

✔️VGG13 & VGG19 구현

  • VGG11뿐만 아니라 13개&19개의 Convolutional layer를 통해 Accuracy를 높일 수 있다.

VGG13

import torch import torch.nn as nn  H, W = 224, 224 input_tensor = torch.randn(size=(16, 3, H, W))  class VGG13(nn.Module):     def __init__(self):         super(VGG13, self).__init__()                  self.conv1 = nn.Sequential(             nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2))                  self.conv2 = nn.Sequential(             nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2))                  self.conv3 = nn.Sequential(             nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2))                  self.conv4 = nn.Sequential(             nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2))                   self.conv5 = nn.Sequential(             nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2))                  self.classifier = nn.Sequential(             nn.Linear(in_features=512*7*7, out_features=4096),             nn.ReLU(),             nn.Linear(in_features=4096, out_features=4096),             nn.ReLU(),             nn.Linear(in_features=4096, out_features=1000))              def forward(self, x):         x = self.conv1(x)         x = self.conv2(x)         x = self.conv3(x)         x = self.conv4(x)         x = self.conv5(x)                  x = x.view(x.size(0), -1)                  x = self.classifier(x)         return x  model = VGG13() model(input_tensor) 

VGG19

class VGG19(nn.Module):     def __init__(self):         super(VGG19, self).__init__()          self.conv1 = nn.Sequential(             nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2))                  self.conv2 = nn.Sequential(             nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2))                  self.conv3 = nn.Sequential(             nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2))                  self.conv4 = nn.Sequential(             nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2))                  self.conv5 = nn.Sequential(             nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),             nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1),             nn.ReLU(),              nn.MaxPool2d(kernel_size=2, stride=2))                  self.classifier = nn.Sequential(             nn.Linear(in_features=512*7*7, out_features=4096),             nn.ReLU(),             nn.Linear(in_features=4096, out_features=4096),             nn.ReLU(),             nn.Linear(in_features=4096, out_features=1000))              def forward(self, x):         x = self.conv1(x)         x = self.conv2(x)         x = self.conv3(x)         x = self.conv4(x)         x = self.conv5(x)          x = x.view(x.size(0), -1)          x = self.classifier(x)         return x  H, W = 224, 224 input_tensor = torch.randn(size=(8, 3, H, W))  model = VGG19() model(input_tensor) 

✔️ConvBlock

  • 같은 코드를 만드는 비효율적
  • Module로 만들어서 사용하면 가독성이 높아진다
import torch.nn as nn  class ConvBlock(nn.Module):   def __init__(self, in_channels, out_channels, n_layers):     super(ConvBlock, self).__init__()      #처음 conv layer는 in_channels를 사용     self.layers = [         nn.Conv2d(in_channels=in_channels, out_channels=out_channels,                   kernel_size=3, padding=1),         nn.ReLU()     ]      #n_layers가 2이상일 때 동작하는 코드     for _ in range(n_layers - 1):       self.layers.append(nn.Conv2d(in_channelrs=out_channels, out_channels=out_channels,                                    kernel_size=3, padding=1))       self.layers.append(nn.ReLU())      #마지막에 max pooling을 추가     self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))      #list에 들어있는 layer를 언패킹해 nn.Sequential에 입력     self.layers = nn.Sequential(*self.layers)    def forward(self, x):     x = self.layer(x)     return x 

self.layers에 아무런 값을 넣지 않고 만들 시, 아래와 같은 결과를 확인할 수 있다.

class ConvBlock(nn.Module):     def __init__(self, in_channels, out_channels, n_layers):         super(ConvBlock, self).__init__()          self.layers = []         for _ in range(n_layers):             self.layers.append(nn.Conv2d(in_channels=in_channels, out_channels=out_channels,                                          kernel_size=3, padding=1))             self.layers.append(nn.ReLU())             in_channels = out_channels          self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))          self.layers = nn.Sequential(*self.layers)          def forward(self, x):         x = self.layers(x)         return self.intermediate_layers, x.shape 
  • pooling을 하면 stride값이 2가 되면서 image size가 절반이 된다.
  • convolutional layer가 fully connected layer로 넘어가면서 한장으로 쭉 받게됨
  • activation(ReLU)는 연산만 하기 때문에 크기는 계속 유지된다
  • weight / bias가 딥러닝에서 파라미터라는 건 변함없는 사실
  • ConvBlock에서의 Output Shape, Parameter 개수 생각해보기

① Case1 (n_layers=1)

     
Layer Output Shape Parameter
Conv2d-1 (64, 100, 100) 1,792
ReLU-2 (64, 100, 100) 0
MaxPool2d-3 (64, 50, 50) 0

② Case2 (n_layers=2)

     
Layer Output Shape Parameter
Conv2d-1 (64, 100, 100) 1,792
ReLU-2 (64, 100, 100) 0
Conv2d-3 (64, 100, 100) 36,928
ReLU-4 (64, 100, 100) 0
MaxPool2d-5 (64, 50, 50) 0

  • - nn.Conv2d 레이어의 파라미터 수 계산
  • Convolutional Layer에서는 weight를 kernel이 갖고 있음. 즉 (3, 3) kernel이라면 9개의 weight를 갖고 있는 것
  • (kernel size * kernel channel 수(input channel 수) * kernel 수(output channel 수) + bias
  • (3*3)X3X64 + 64

스크린샷 2023-11-28 오후 3.21.10.jpg

  • 첫번째 conv2d: {(3X3)X3 + 1}X64 = 1792
  • 두번째 conv2d: {(3X3)X64 + 1}X64 = 36928
    • 두번째 input_size는 64이기 때문에 64를 곱해준다

output_shape가 (64, 100, 100)으로 유지됨
convolutional layer가 추가되더라도 output_channel은 64이기 때문에 parameter값은 동일하다

ConvBlock구현

  • VGG13 / VGG19 모두를 구현

VGG13

class VGG13Block(nn.Module):     def __init__(self):         super(VGG13Block, self).__init__()         self.conv1 = ConvBlock(in_channels=3, out_channels=64,                                    n_layers=2)         self.conv2 = ConvBlock(in_channels=64, out_channels=128,                                    n_layers=2)         self.conv3 = ConvBlock(in_channels=128, out_channels=256,                                    n_layers=2)         self.conv4 = ConvBlock(in_channels=256, out_channels=256,                                    n_layers=2)         self.conv5 = ConvBlock(in_channels=512, out_channels=512,                                    n_layers=2)                      self.classifier = nn.Sequential(             nn.Linear(in_features=512*7*7, out_features=4096),             nn.ReLU(),              nn.Linear(in_features=4096, out_features=4096),             nn.ReLU(),              nn.Linear(in_features=4096, out_features=1000))      def forward(self, x):         x = self.feature(x)         x = x.view(x.size(0), -1)         x = self.classifier(x)         return x 

VGG19

class VGG19Block(nn.Module):     def __init__(self):         super(VGG19Block, self).__init__()         self.conv1 = ConvBlock(in_channels=3, out_channels=64,                                    n_layers=2)         self.conv2 = ConvBlock(in_channels=64, out_channels=128,                                    n_layers=2)         self.conv3 = ConvBlock(in_channels=128, out_channels=256,                                    n_layers=4)         self.conv4 = ConvBlock(in_channels=256, out_channels=256,                                    n_layers=4)         self.conv5 = ConvBlock(in_channels=512, out_channels=512,                                    n_layers=4)                      self.classifier = nn.Sequential(             nn.Linear(in_features=512*7*7, out_features=4096),             nn.ReLU(),              nn.Linear(in_features=4096, out_features=4096),             nn.ReLU(),              nn.Linear(in_features=4096, out_features=1000))      def forward(self, x):         x = self.feature(x)         x = x.view(x.size(0), -1)         x = self.classifier(x)         return x 

이후엔 CIFAR를 통해 LeNet과 VGG모두를 트레이닝
- 이미지 사이즈가 다르기 때문에 사이즈 찾아보기
- VGG19 성능이 낮은데, 이 모델 데이터셋을 커스텀해 시도

✍🏻VGG19 모델로 학습시키기

from torchvision.datasets import CIFAR10 from torchvision.transforms import ToTensor, transforms from torch.utils.data import DataLoader from torch.optim import SGD from tqdm import tqdm  BATCH_SIZE = 64 LR = 0.1 EPOCHS = 10  dataset = CIFAR10(root='data', train=True, download=True, transform=ToTensor()) data_loader = DataLoader(dataset, batch_size=BATCH_SIZE) n_sample = len(dataset) #(32*32)  if torch.cuda.is_available(): DEVICE = 'cuda' elif torch.backends.mps.is_available(): DEVICE = 'mps' else: DEVICE = 'cpu'  model = VGG19().to(DEVICE) loss_function = nn.CrossEntropyLoss() optimizer = SGD(model.parameters(), lr=LR)  losses, accs = [], [] for epoch in range(EPOCHS):     epoch_loss, n_corrects = 0., 0     for X, y in tqdm(data_loader):         X, y = X.to(DEVICE), y.to(DEVICE)          pred = model(X)         loss = loss_function(pred, y)          optimizer.zero_grad()         loss.backward()         optimizer.step()          epoch_loss += loss.item() * len(X)         n_corrects += (torch.max(pred, axis=1)[1] == y).sum().item()      epoch_loss /= n_sample     losses.append(epoch_loss)      epoch_acc = n_corrects / n_sample     accs.append(epoch_acc)      print(f"Epoch: {epoch + 1}")     print(f"Loss: {epoch_loss: .4f}, Accuracy: {epoch_acc: .4f}")