— Backbone of Computer Vision
CNNs are a type of neural network made for images. They work by scanning small parts of an image, not the whole thing at once.
In this page I will be going through the implementation of CNNs, the forward pass and how the input modality (image) interacts with the CNN model.
— Alexi
import torch
import torch.nn as nn
import torch.nn.functional as F
class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__()
# First convolutional layer
self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
# Pooling layer
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
# Second convolutional layer
self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
# Fully connected layers
self.fc1 = nn.Linear(64 * 7 * 7, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
# Apply first convolutional layer followed by ReLU activation and pooling
x = self.pool(F.relu(self.conv1(x)))
# Apply second convolutional layer followed by ReLU activation and pooling
x = self.pool(F.relu(self.conv2(x)))
# Flatten the output for the fully connected layer
x = x.view(-1, 64 * 7 * 7)
# Apply first fully connected layer with ReLU
x = F.relu(self.fc1(x))
# Apply second fully connected layer (output layer)
x = self.fc2(x)
return x
# Create a CNN instance
model = SimpleCNN()
# Example input (batch of 1 image, 1 channel, 28x28 pixels)
example_input = torch.randn(1, 1, 28, 28)
# Get output
output = model(example_input)
# First convolutional layer
self.conv1 = nn.Conv2d(
in_channels=1,
out_channels=32,
kernel_size=3,
stride=1,
padding=1
)
# Pooling layer
self.pool = nn.MaxPool2d(
kernel_size=2,
stride=2
)
# Second convolutional layer
self.conv2 = nn.Conv2d(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
padding=1
)
We will be looking at conv1d to conv3d layers and it’s implementation.

Source: https://medium.com/hackernoon/visualizing-parts-of-convolutional-neural-networks-using-keras-and-cats-5cc01b214e59
