“The CIFAR-10 dataset is widely used in the fields of machine learning and computer vision. This dataset comprises 60,000 color images categorized into 10 distinct classes, with each class containing 6,000 images. The dataset is designed for image classification tasks where each image must be classified into one of these 10 categories.
Image Categories: The CIFAR-10 dataset includes the following 10 image categories:
Airplane
Automobile
Bird
Cat
Deer
Dog
Frog
Horse
Ship
Truck
Image Characteristics: Each image has a resolution of 32x32 pixels and consists of color channels (red, green, blue), making each image composed of 32x32x3 pixels.
2. Download CIFAR10 dataset
You can utilize the following code to download the dataset:
import torch import torchvision.transforms as transforms import torchvision
BATCHSIZE_TRAIN = 64 BATCHSIZE_TEST = 32
# We will make some change later transform_train = transforms.Compose([ transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ])
# Define main conv layers within residual block self.conv1 = nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(outchannel) # Normalize inputs to accelerate training self.ReLU = nn.ReLU(inplace=True) self.conv2 = nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(outchannel)
# Define shortcut self.shortcut = nn.Sequential() # When the input size and output size are different, or when the channel dimensions differ, # the shortcut step needs to unify the size and dimensions. if stride != 1or inchannel != outchannel: self.shortcut = nn.Sequential( nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(outchannel) ) defforward(self, x): out = self.bn1(self.conv1(x)) out = self.ReLU(out) out = self.bn2(self.conv2(out)) out += self.shortcut(x) out = self.ReLU(out) return out
self.conv1 = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU() ) self.layer1 = self._make_layer(ResBlock, 64, 2, stride=1) self.layer2 = self._make_layer(ResBlock, 128, 2, stride=2) self.layer3 = self._make_layer(ResBlock, 256, 2, stride=2) self.layer4 = self._make_layer(ResBlock, 512, 2, stride=2) self.linear = nn.Linear(512, num_classes) def_make_layer(self, block, channels, num_blocks, stride): # Typically, the first residual block changes the size, # while the subsequent residual blocks do not change the size and have a stride of 1. strides = [stride] + [1] * (num_blocks - 1) layers = [] for stride in strides: layers.append(block(self.inchannel, channels, stride)) self.inchannel = channels * block.expansion return nn.Sequential(*layers) defforward(self, x): out = self.conv1(x) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = nn.functional.avg_pool2d(out, 4) out = out.view(out.size(0), -1) # Flatten into one-dimensional data out = self.linear(out) return out defResNet18(): return ResNet(ResBlock)
if __name__ == '__main__': net = ResNet18() print(net)
If you are using the ResNet18 model for the first time, I strongly recommend building it with PyTorch. You will gain a different level of understanding.
Or you can also simply use the ResNet model provided by torchvision
import torch import torchvision.models as models net = models.resnet18()
4. Main Code
import torch import torch.nn as nn import torch.optim as optim import torchvision.models as models from ResNet import ResNet18 from data import ReadData
train_loader,test_loader = ReadData()
# use GPU device = torch.device('cuda'if torch.cuda.is_available() else'cpu')
# net = models.resnet18() net = ResNet18().to(device)
# define loss function as crossentropy loss criterion = nn.CrossEntropyLoss() # use SGD optimizer optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=0.9, weight_decay=5e-4)