✔️nn.Sequential을 통한 레이어 요약
H, W = 224, 224 input_tensor = torch.randn(size=(8, 3, H, W)) class VGG11(nn.Module): def __init__(self): super(VGG11, self).__init__() self.feature = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.classifier = nn.Sequential( nn.Linear(in_features=512*7*7, out_features=4096), nn.ReLU(), nn.Linear(in_features=4096, out_features=4096), nn.ReLU(), nn.Linear(in_features=4096, out_features=1000)) def forward(self, x): x = self.feature(x) # (b, c, h, w) # torch.view(1차원, 2차원) x = x.view(x.size(0), -1) # mlp (b, w) -> (b, 512*7*7) x = self.classifier(x) #Cross Entropy의 loss_function에서 해주기 때문에 softmax가 없어도 괜찮음 return x model = VGG11() model.forward(input_tensor) #모델링 후 randn으로 input shape를 만들고 레이어를 통과할 때마다 torch shape를 뽑아낸다 #input tensor를 해서 스스로 맞는지 검증까지 거친다
- self.레이어 명을 지정해 코드를 간소화시킬 수 있음
- 파라미터와 Output의 shape을 알고 싶다면?
from torchsummary import summary model = VGG11() summary(model, input_size=(3, 224, 224))
이렇게 torchsummary를 통해 summary를 import할 수 있고, 이를 요약한 내용은 아래와 같다
✔️VGG13 & VGG19 구현
- VGG11뿐만 아니라 13개&19개의 Convolutional layer를 통해 Accuracy를 높일 수 있다.
VGG13
import torch import torch.nn as nn H, W = 224, 224 input_tensor = torch.randn(size=(16, 3, H, W)) class VGG13(nn.Module): def __init__(self): super(VGG13, self).__init__() self.conv1 = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.conv2 = nn.Sequential( nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.conv3 = nn.Sequential( nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.conv4 = nn.Sequential( nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.conv5 = nn.Sequential( nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.classifier = nn.Sequential( nn.Linear(in_features=512*7*7, out_features=4096), nn.ReLU(), nn.Linear(in_features=4096, out_features=4096), nn.ReLU(), nn.Linear(in_features=4096, out_features=1000)) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = self.conv4(x) x = self.conv5(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x model = VGG13() model(input_tensor)
VGG19
class VGG19(nn.Module): def __init__(self): super(VGG19, self).__init__() self.conv1 = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.conv2 = nn.Sequential( nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.conv3 = nn.Sequential( nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.conv4 = nn.Sequential( nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.conv5 = nn.Sequential( nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.classifier = nn.Sequential( nn.Linear(in_features=512*7*7, out_features=4096), nn.ReLU(), nn.Linear(in_features=4096, out_features=4096), nn.ReLU(), nn.Linear(in_features=4096, out_features=1000)) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = self.conv4(x) x = self.conv5(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x H, W = 224, 224 input_tensor = torch.randn(size=(8, 3, H, W)) model = VGG19() model(input_tensor)
✔️ConvBlock
- 같은 코드를 만드는 비효율적
- Module로 만들어서 사용하면 가독성이 높아진다
import torch.nn as nn class ConvBlock(nn.Module): def __init__(self, in_channels, out_channels, n_layers): super(ConvBlock, self).__init__() #처음 conv layer는 in_channels를 사용 self.layers = [ nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, padding=1), nn.ReLU() ] #n_layers가 2이상일 때 동작하는 코드 for _ in range(n_layers - 1): self.layers.append(nn.Conv2d(in_channelrs=out_channels, out_channels=out_channels, kernel_size=3, padding=1)) self.layers.append(nn.ReLU()) #마지막에 max pooling을 추가 self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) #list에 들어있는 layer를 언패킹해 nn.Sequential에 입력 self.layers = nn.Sequential(*self.layers) def forward(self, x): x = self.layer(x) return x
self.layers에 아무런 값을 넣지 않고 만들 시, 아래와 같은 결과를 확인할 수 있다.
class ConvBlock(nn.Module): def __init__(self, in_channels, out_channels, n_layers): super(ConvBlock, self).__init__() self.layers = [] for _ in range(n_layers): self.layers.append(nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, padding=1)) self.layers.append(nn.ReLU()) in_channels = out_channels self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) self.layers = nn.Sequential(*self.layers) def forward(self, x): x = self.layers(x) return self.intermediate_layers, x.shape
- pooling을 하면 stride값이 2가 되면서 image size가 절반이 된다.
- convolutional layer가 fully connected layer로 넘어가면서 한장으로 쭉 받게됨
- activation(ReLU)는 연산만 하기 때문에 크기는 계속 유지된다
- weight / bias가 딥러닝에서 파라미터라는 건 변함없는 사실
- ConvBlock에서의 Output Shape, Parameter 개수 생각해보기
① Case1 (n_layers=1)
Layer | Output Shape | Parameter |
Conv2d-1 | (64, 100, 100) | 1,792 |
ReLU-2 | (64, 100, 100) | 0 |
MaxPool2d-3 | (64, 50, 50) | 0 |
② Case2 (n_layers=2)
Layer | Output Shape | Parameter |
Conv2d-1 | (64, 100, 100) | 1,792 |
ReLU-2 | (64, 100, 100) | 0 |
Conv2d-3 | (64, 100, 100) | 36,928 |
ReLU-4 | (64, 100, 100) | 0 |
MaxPool2d-5 | (64, 50, 50) | 0 |
- - nn.Conv2d 레이어의 파라미터 수 계산
- Convolutional Layer에서는 weight를 kernel이 갖고 있음. 즉 (3, 3) kernel이라면 9개의 weight를 갖고 있는 것
- (kernel size * kernel channel 수(input channel 수) * kernel 수(output channel 수) + bias
- (3*3)X3X64 + 64
- 첫번째 conv2d: {(3X3)X3 + 1}X64 = 1792
- 두번째 conv2d: {(3X3)X64 + 1}X64 = 36928
- 두번째 input_size는 64이기 때문에 64를 곱해준다
output_shape가 (64, 100, 100)으로 유지됨
convolutional layer가 추가되더라도 output_channel은 64이기 때문에 parameter값은 동일하다
ConvBlock구현
- VGG13 / VGG19 모두를 구현
VGG13
class VGG13Block(nn.Module): def __init__(self): super(VGG13Block, self).__init__() self.conv1 = ConvBlock(in_channels=3, out_channels=64, n_layers=2) self.conv2 = ConvBlock(in_channels=64, out_channels=128, n_layers=2) self.conv3 = ConvBlock(in_channels=128, out_channels=256, n_layers=2) self.conv4 = ConvBlock(in_channels=256, out_channels=256, n_layers=2) self.conv5 = ConvBlock(in_channels=512, out_channels=512, n_layers=2) self.classifier = nn.Sequential( nn.Linear(in_features=512*7*7, out_features=4096), nn.ReLU(), nn.Linear(in_features=4096, out_features=4096), nn.ReLU(), nn.Linear(in_features=4096, out_features=1000)) def forward(self, x): x = self.feature(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x
VGG19
class VGG19Block(nn.Module): def __init__(self): super(VGG19Block, self).__init__() self.conv1 = ConvBlock(in_channels=3, out_channels=64, n_layers=2) self.conv2 = ConvBlock(in_channels=64, out_channels=128, n_layers=2) self.conv3 = ConvBlock(in_channels=128, out_channels=256, n_layers=4) self.conv4 = ConvBlock(in_channels=256, out_channels=256, n_layers=4) self.conv5 = ConvBlock(in_channels=512, out_channels=512, n_layers=4) self.classifier = nn.Sequential( nn.Linear(in_features=512*7*7, out_features=4096), nn.ReLU(), nn.Linear(in_features=4096, out_features=4096), nn.ReLU(), nn.Linear(in_features=4096, out_features=1000)) def forward(self, x): x = self.feature(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x
이후엔 CIFAR를 통해 LeNet과 VGG모두를 트레이닝
- 이미지 사이즈가 다르기 때문에 사이즈 찾아보기
- VGG19 성능이 낮은데, 이 모델 데이터셋을 커스텀해 시도
✍🏻VGG19 모델로 학습시키기
from torchvision.datasets import CIFAR10 from torchvision.transforms import ToTensor, transforms from torch.utils.data import DataLoader from torch.optim import SGD from tqdm import tqdm BATCH_SIZE = 64 LR = 0.1 EPOCHS = 10 dataset = CIFAR10(root='data', train=True, download=True, transform=ToTensor()) data_loader = DataLoader(dataset, batch_size=BATCH_SIZE) n_sample = len(dataset) #(32*32) if torch.cuda.is_available(): DEVICE = 'cuda' elif torch.backends.mps.is_available(): DEVICE = 'mps' else: DEVICE = 'cpu' model = VGG19().to(DEVICE) loss_function = nn.CrossEntropyLoss() optimizer = SGD(model.parameters(), lr=LR) losses, accs = [], [] for epoch in range(EPOCHS): epoch_loss, n_corrects = 0., 0 for X, y in tqdm(data_loader): X, y = X.to(DEVICE), y.to(DEVICE) pred = model(X) loss = loss_function(pred, y) optimizer.zero_grad() loss.backward() optimizer.step() epoch_loss += loss.item() * len(X) n_corrects += (torch.max(pred, axis=1)[1] == y).sum().item() epoch_loss /= n_sample losses.append(epoch_loss) epoch_acc = n_corrects / n_sample accs.append(epoch_acc) print(f"Epoch: {epoch + 1}") print(f"Loss: {epoch_loss: .4f}, Accuracy: {epoch_acc: .4f}")
'AI데이터 엔지니어, 새싹' 카테고리의 다른 글
61th_11_30(Thu)_ResNet (1) | 2023.11.30 |
---|---|
60th_11_29(Wed)_GoogLeNet (0) | 2023.11.29 |
58th_11_27(Mon)_Convolutinal Neural Networks (1) | 2023.11.27 |
3주차(2) - [pandas] 데이터 전처리 & 데이터 핸들링 (0) | 2023.09.13 |
3주차(1) - [Data]데이터 분석 프로세스 (0) | 2023.09.13 |