Train CIFAR10 by ResNet18

1. CIFAR10 dataset

“The CIFAR-10 dataset is widely used in the fields of machine learning and computer vision. This dataset comprises 60,000 color images categorized into 10 distinct classes, with each class containing 6,000 images. The dataset is designed for image classification tasks where each image must be classified into one of these 10 categories.

Image Categories: The CIFAR-10 dataset includes the following 10 image categories:

Airplane
Automobile
Bird
Cat
Deer
Dog
Frog
Horse
Ship
Truck

Image Characteristics: Each image has a resolution of 32x32 pixels and consists of color channels (red, green, blue), making each image composed of 32x32x3 pixels.

2. Download CIFAR10 dataset

You can utilize the following code to download the dataset:

import torch
import torchvision.transforms as transforms
import torchvision

BATCHSIZE_TRAIN = 64
BATCHSIZE_TEST = 32

# We will make some change later
transform_train = transforms.Compose([
    transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

train_dataset = torchvision.datasets.CIFAR10(
    root='./data',  # your data path
    train=True, 
    transform=transform_train, 
    download=True,
)

test_dataset = torchvision.datasets.CIFAR10(
    root='./data',  # your data path
    train=False, 
    transform=transform_test, 
    download=True,
)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCHSIZE_TRAIN, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BATCHSIZE_TEST, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

def ReadData():
    return train_loader,test_loader

if __name__ == '__main__':
    import random
    import matplotlib.pyplot as plt
    
    num_images_to_show = 5  

    random_indices = random.sample(range(len(test_dataset)), num_images_to_show)

    fig, axs = plt.subplots(1, num_images_to_show, figsize=(12, 3))

    for i, idx in enumerate(random_indices):
        image, label = test_dataset[idx]
        axs[i].imshow(image.permute(1, 2, 0))
        axs[i].set_title(classes[label])

    for ax in axs:
        ax.axis('off')

    plt.show()

root is your own files download path, and if you have downloaded it already, you could set download to False.

3. ResNet18

If you’re unfamiliar with ResNet, you can visit the following link to learn the basics: Deeplearning - ResNet

The structure of ResNet18 is as shown in the following diagram:

Figure-1

I use a simple image to illustrate the specific parameters and process of the ResNet18 model：

Figure-2

Let’s start using code to build ResNet18 right away.

ResNet18 Code

import torch
import torch.nn as nn

# Define ResBlock
class ResBlock(nn.Module):
    # Control the number of input and output channels.
    expansion = 1

    def __init__(self, inchannel, outchannel, stride=1):
        super(ResBlock, self).__init__()

        # Define main conv layers within residual block
        self.conv1 = nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(outchannel) # Normalize inputs to accelerate training
        self.ReLU = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(outchannel)

        # Define shortcut
        self.shortcut = nn.Sequential()
        # When the input size and output size are different, or when the channel dimensions differ, 
        # the shortcut step needs to unify the size and dimensions.
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outchannel)
            )
    
    def forward(self, x):
        out = self.bn1(self.conv1(x))
        out = self.ReLU(out)
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.ReLU(out)
        return out

class ResNet(nn.Module):
    def __init__(self, ResBlock, num_classes=10):
        super(ResNet, self).__init__()
        self.inchannel = 64

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.layer1 = self._make_layer(ResBlock, 64, 2, stride=1)
        self.layer2 = self._make_layer(ResBlock, 128, 2, stride=2)
        self.layer3 = self._make_layer(ResBlock, 256, 2, stride=2)        
        self.layer4 = self._make_layer(ResBlock, 512, 2, stride=2)        
        self.linear = nn.Linear(512, num_classes)
      
    def _make_layer(self, block, channels, num_blocks, stride):
        # Typically, the first residual block changes the size, 
        # while the subsequent residual blocks do not change the size and have a stride of 1.
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels * block.expansion
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = nn.functional.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1) # Flatten into one-dimensional data
        out = self.linear(out)
        return out
    
def ResNet18():
    return ResNet(ResBlock)

if __name__ == '__main__':
    net = ResNet18()
    print(net)

If you are using the ResNet18 model for the first time, I strongly recommend building it with PyTorch. You will gain a different level of understanding.

Or you can also simply use the ResNet model provided by torchvision

import torch
import torchvision.models as models
net = models.resnet18()

4. Main Code

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from ResNet import ResNet18
from data import ReadData

train_loader,test_loader = ReadData()

# use GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# net = models.resnet18()
net = ResNet18().to(device)

# define parameters
LEARNING_RATE = 0.1
EPOCH = 50

# define loss function as crossentropy loss
criterion = nn.CrossEntropyLoss()
# use SGD optimizer
optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=5e-4)

# train start

train_losses = []
test_losses = []
test_accuracies = []

for epoch in range(EPOCH):
    print('\nEpoch: %d' % (epoch + 1))
    net.train()
    sum_loss = 0.0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = net(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        sum_loss += loss.item()

    # 计算并打印训练集上的损失
    train_loss = sum_loss / len(train_loader)
    print("Epoch [{}/{}], Training Loss: {:.4f}".format(epoch + 1, EPOCH, train_loss))
    train_losses.append(train_loss)

    # 在测试集上评估模型
    net.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        test_loss = 0.0

        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = net(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        # 计算并打印测试集上的损失和准确率
        test_loss /= len(test_loader)
        test_accuracy = 100 * correct / total
        print('Epoch [{}/{}], Testing Loss: {:.4f}, Testing Accuracy: {:.2f} %'.format(epoch + 1, EPOCH, test_loss, test_accuracy))
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)

# 绘制训练和测试损失图
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(range(1, EPOCH + 1), train_losses, label='Training Loss')
plt.plot(range(1, EPOCH + 1), test_losses, label='Testing Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Testing Loss')
plt.legend()

# 绘制测试准确率图
plt.subplot(1, 2, 2)
plt.plot(range(1, EPOCH + 1), test_accuracies, label='Testing Accuracy', color='green')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Testing Accuracy')
plt.legend()

# 保存损失和准确率图
plt.tight_layout()
plt.savefig('loss_accuracy_plots.png')
# plt.show()

5. Optimize

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomCrop(32, padding=4, padding_mode="reflect"),
    transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])