前言
实现经典卷积神经网络LeNet(LeNet-5)识别数字,这里将激活函数从sigmoid换成ReLU,参考资料《动手学深度学习》。
环境
python+pytorch
实现
import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
from torch.utils import data
from torchvision import transforms
import cv2 as cv
#下载数据集
trans = transforms.ToTensor()
mnist_train = torchvision.datasets.MNIST(
root="./drive/MyDrive/ex/data", transform=trans, train=True, download=True)
mnist_test = torchvision.datasets.MNIST(
root="./drive/MyDrive/ex/data", transform=trans, train=False, download=True)
# root后的路径为数据集下载后的保存路径
# 定义网络
class Reshape(nn.Module):
def forward(self, x):
return x.view(-1, 1, 28, 28)
net = nn.Sequential(
Reshape(),
nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.ReLU(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(6, 16, kernel_size=5), nn.ReLU(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(16 * 5 * 5, 120), nn.ReLU(),
nn.Linear(120, 84), nn.ReLU(),
nn.Linear(84, 10))
#读取数据
batch_size = 256
train_iter = data.DataLoader(mnist_train, shuffle=True, batch_size=batch_size, num_workers=4)
test_iter = data.DataLoader(mnist_test, shuffle=True, batch_size=batch_size, num_workers=4)
class Accumulator:
"""在`n`个变量上累加。"""
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
def accuracy(y_hat, y):
"""计算预测正确的数量。"""
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1)
cmp = y_hat.type(y.dtype) == y
return float(cmp.type(y.dtype).sum())
def evaluate_accuracy_gpu(net, data_iter, device=None):
"""使用GPU计算模型在数据集上的精度。"""
if isinstance(net, nn.Module):
net.eval() # 设置为评估模式
if not device:
device = next(iter(net.parameters())).device
# 正确预测的数量,总预测的数量
metric = Accumulator(2)
for X, y in data_iter:
if isinstance(X, list):
X = [x.to(device) for x in X]
else:
X = X.to(device)
y = y.to(device)
metric.add(accuracy(net(X), y), y.numel())
return metric[0] / metric[1]
def train(net, train_iter, test_iter, num_epochs, lr, device):
"""用GPU训练"""
def init_weights(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
nn.init.xavier_uniform_(m.weight)
net.apply(init_weights)
net.to(device)
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()
for epoch in range(num_epochs):
metric = Accumulator(3)
net.train()
for i, (X, y) in enumerate(train_iter):
optimizer.zero_grad()
X, y = X.to(device), y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
optimizer.step()
with torch.no_grad():
metric.add(l * X.shape[0], accuracy(y_hat, y), X.shape[0])
train_l = metric[0] / metric[2]
train_acc = metric[1] / metric[2]
test_acc = evaluate_accuracy_gpu(net, test_iter)
print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, '
f'test acc {test_acc:.3f}')
lr, num_epochs = 0.1, 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train(net, train_iter, test_iter, num_epochs, lr, device)
def get_mnist_labels(labels):
"""返回数字标签。"""
text_labels = ['0', '1', '2', '3', '4',
'5', '6', '7', '8', '9']
return [text_labels[int(i)] for i in labels]
def show_image(img_tensor):
"""用于数据集中的显示图片"""
plt.imshow(img.numpy())
plt.axis('off')
def predict(net, device=None):
"""预测图片数字"""
if isinstance(net, nn.Module):
net.eval() # 设置为评估模式
if not device:
device = next(iter(net.parameters())).device
img = cv.imread('./drive/MyDrive/ex/data/MNIST/test.png') # 放预测图片的位置,图片大小为28*28,黑底白字
img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
transf = transforms.ToTensor()
img_tensor = transf(img)
show_image(img_tensor)
pred = get_mnist_labels(net(img_tensor).argmax(axis=1))
print(pred)
predict(net)
结果
loss:0.046, train accuracy:0.986, test accuracy:0.978
导入下面的预测图片
导入的图片显示效果和识别的数字。