Backbone of Computer Vision
This page focuses on building convolution layers from scratch so we can understand what is happening under the hood.
import torch
import torch.nn as nn
import torch.nn.functional as F
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
# First convolutional layer
self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
# Pooling layer
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
# Second convolutional layer
self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
# Fully connected layers
self.fc1 = nn.Linear(64 * 7 * 7, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
# Apply first convolutional layer followed by ReLU activation and pooling
x = self.pool(F.relu(self.conv1(x)))
# Apply second convolutional layer followed by ReLU activation and pooling
x = self.pool(F.relu(self.conv2(x)))
# Flatten the output for the fully connected layer
x = x.view(-1, 64 * 7 * 7)
# Apply first fully connected layer with ReLU
x = F.relu(self.fc1(x))
# Apply second fully connected layer (output layer)
x = self.fc2(x)
return x
# Create a CNN instance
model = SimpleCNN()
# Example input (batch of 1 image, 1 channel, 28x28 pixels)
example_input = torch.randn(1, 1, 28, 28)
# Get output
output = model(example_input)
# First convolutional layer
self.conv1 = nn.Conv2d(
in_channels=1,
out_channels=32,
kernel_size=3,
stride=1,
padding=1
)
# Pooling layer
self.pool = nn.MaxPool2d(
kernel_size=2,
stride=2
)
# Second convolutional layer
self.conv2 = nn.Conv2d(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
padding=1
)
We will be looking at conv1d to conv3d layers and it’s implementation.
