【SeNet模块学习】结合MNIST手写数据识别任务学习
SeNet模块基础学习
·
前置图像分类入门任务MNIST手写数据识别:【图像分类入门】MNIST手写数据识别-CSDN博客
1.SeNet模块
class SENet_Layer(nn.Module):
def __init__(self, channel, reduction=16): # 默认r为16
super(SENet_Layer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1) # 自适应平均池化层,输出大小1*1
self.fc = nn.Sequential(
nn.Linear(channel, channel//reduction),
nn.ReLU(),
nn.Linear(channel//reduction, channel),
nn.Sigmoid(), # 将通道权重输出为0-1
)
def forward(self, x):
b, c, _, _ = x.size() # 输入的数据x为四维,提取批次数量和通道数c
y = self.avg_pool(x).view(b, c) # 经过池化层(挤压层)输出为b*c*1*1,展平为b*c以经过之后的全连接层(激励层)
y = self.fc(y).view(b, c, 1, 1) # 生成通道权重,输出恢复为原思维结构以供乘积
return x * y.expand_as(x) # 对应元素进行逐一相乘
上述即为SeNet层的基础训练框架,首先通过全局平均池化层(挤压)将图像宽和高压缩至1*1;紧接的激励层包括两个全连接层,第一个全连接层将输入维度降低到C/r,以平衡模型复杂性、提升计算效率、加强特征抽象和提升注意力机制效果等,第一个全连接层使用ReLU激活函数增加非线性;再经过第二个全连接层恢复通道维度为C,利用sigmoid激活函数将最终输出的通道权重防缩到0-1之间。
2.完整训练模块
class CNN(nn.Module): # 训练模型
def __init__(self):
super(CNN, self).__init__()
# 初始1*28*28
self.layer01 = nn.Sequential(
# 1.卷积操作 卷积层(h2=w2=(28-5+2*2)/1+1=28)
nn.Conv2d(in_channels=1, out_channels=64, kernel_size=5, stride=1, padding=2, bias=True),
# 2.归一化操作BN层
nn.BatchNorm2d(64),
# 3.激活层 使用Relu
nn.ReLU(inplace=True),
# 4.最大池化
nn.MaxPool2d(2)
) # 经过layer0维度变为64*14*14
self.Se_layer01 = SENet_Layer(64)
self.dropout = nn.Dropout(p=0.5) # 丢弃
self.layer02 = nn.Sequential(
# 1.卷积操作 卷积层(h3=w3=(14-3+1*2)/1+1=14)
nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2)
) # 经过layer0维度变为32*7*7
self.Se_layer02 = SENet_Layer(32)
self.fc = torch.nn.Linear(in_features=7 * 7 * 32, out_features=10) # b*10
def forward(self, x):
x = self.layer01(x)
x = self.Se_layer01(x)
x = self.dropout(x)
x = self.layer02(x)
x = self.Se_layer02(x)
x = x.view(x.size()[0], -1) # 将图像数据展开成一维的
x = self.fc(x)
return x
完整的训练模块如上所示,在原两个卷积层后添加SeNet层,由于MNIST这样的简单手写数字识别任务,使用较简单的卷积网络就可以达到非常好的结果,过于复杂化神经网络框架反而可能能导致过拟合等问题,所以期间加入了丢弃层,在训练集较小时防止过拟合。
3.完整代码
import numpy as np
import torch
import torchvision.datasets as dataset
import torchvision.transforms as transforms
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
import time
class SENet_Layer(nn.Module): # SeNet模块
def __init__(self, channel, reduction=16): # 默认r为16
super(SENet_Layer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1) # 自适应平均池化层,输出大小1*1
self.fc = nn.Sequential(
nn.Linear(channel, channel//reduction),
nn.ReLU(),
nn.Linear(channel//reduction, channel),
nn.Sigmoid(), # 将通道权重输出为0-1
)
def forward(self, x):
b, c, _, _ = x.size() # 输入的数据x为四维,提取批次数量和通道数c
y = self.avg_pool(x).view(b, c) # 经过池化层(挤压层)输出为b*c*1*1,展平为b*c以经过之后的全连接层(激励层)
y = self.fc(y).view(b, c, 1, 1) # 生成通道权重,输出恢复为原思维结构以供乘积
return x * y.expand_as(x) # 对应元素进行逐一相乘
class CNN(nn.Module): # 训练模型
def __init__(self):
super(CNN, self).__init__()
# 初始1*28*28
self.layer01 = nn.Sequential(
# 1.卷积操作 卷积层(h2=w2=(28-5+2*2)/1+1=28)
nn.Conv2d(in_channels=1, out_channels=64, kernel_size=5, stride=1, padding=2, bias=True),
# 2.归一化操作BN层
nn.BatchNorm2d(64),
# 3.激活层 使用Relu
nn.ReLU(inplace=True),
# 4.最大池化
nn.MaxPool2d(2)
) # 经过layer0维度变为64*14*14
self.Se_layer01 = SENet_Layer(64)
self.dropout = nn.Dropout(p=0.5) # 丢弃
self.layer02 = nn.Sequential(
# 1.卷积操作 卷积层(h3=w3=(14-3+1*2)/1+1=14)
nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2)
) # 经过layer0维度变为32*7*7
self.Se_layer02 = SENet_Layer(32)
self.fc = torch.nn.Linear(in_features=7 * 7 * 32, out_features=10) # b*10
def forward(self, x):
x = self.layer01(x)
x = self.Se_layer01(x)
x = self.dropout(x)
x = self.layer02(x)
x = self.Se_layer02(x)
x = x.view(x.size()[0], -1) # 将图像数据展开成一维的
x = self.fc(x)
return x
def train_val(train_loader, val_loader, device, model, loss, optimizer, epochs, save_path): # 正式训练函数
model = model.to(device)
plt_train_loss = [] # 训练过程loss值,存储每轮训练的均值
plt_train_acc = [] # 训练过程acc值
plt_val_loss = [] # 验证过程
plt_val_acc = []
max_acc = 0 # 以最大准确率来确定训练过程的最优模型
for epoch in range(epochs): # 开始训练
train_loss = 0.0
train_acc = 0.0
val_acc = 0.0
val_loss = 0.0
start_time = time.time()
model.train()
for index, (images, labels) in enumerate(train_loader):
images, labels = images.cuda(), labels.cuda()
optimizer.zero_grad() # 梯度置0
pred = model(images)
bat_loss = loss(pred, labels) # 算交叉熵loss
bat_loss.backward() # 回传梯度
optimizer.step() # 更新模型参数
train_loss += bat_loss.item()
# 注意此时的pred结果为64*10的张量(概率分布)
pred = pred.argmax(dim=1) # 返回给定维度1上最大值的索引
train_acc += (pred == labels).sum().item()
print("当前为第{}轮训练,批次为{}/{},该批次总loss:{} | 正确acc数量:{}"
.format(epoch+1, index+1, len(train_data)//config["batch_size"],
bat_loss.item(), (pred == labels).sum().item()))
# 计算当前Epoch的训练损失和准确率,并存储到对应列表中:
plt_train_loss.append(train_loss / train_loader.dataset.__len__())
plt_train_acc.append(train_acc / train_loader.dataset.__len__())
model.eval() # 模型调为验证模式
with torch.no_grad(): # 验证过程不需要梯度回传,无需追踪grad
for index, (images, labels) in enumerate(val_loader):
images, labels = images.cuda(), labels.cuda()
pred = model(images)
bat_loss = loss(pred, labels) # 算交叉熵loss
val_loss += bat_loss.item()
pred = pred.argmax(dim=1)
val_acc += (pred == labels).sum().item()
print("当前为第{}轮验证,批次为{}/{},该批次总loss:{} | 正确acc数量:{}"
.format(epoch+1, index+1, len(val_data)//config["batch_size"],
bat_loss.item(), (pred == labels).sum().item()))
val_acc = val_acc / val_loader.dataset.__len__()
if val_acc > max_acc:
max_acc = val_acc
torch.save(model, save_path)
plt_val_loss.append(val_loss / val_loader.dataset.__len__())
plt_val_acc.append(val_acc)
print('该轮训练结束,训练结果如下[%03d/%03d] %2.2fsec(s) TrainAcc:%3.6f TrainLoss:%3.6f | valAcc:%3.6f valLoss:%3.6f \n\n'
% (epoch+1, epochs, time.time()-start_time, plt_train_acc[-1], plt_train_loss[-1], plt_val_acc[-1], plt_val_loss[-1]))
plt.plot(plt_train_loss) # 画图
plt.plot(plt_val_loss)
plt.title('loss')
plt.legend(['train', 'val'])
plt.show()
plt.plot(plt_train_acc)
plt.plot(plt_val_acc)
plt.title('Accuracy')
plt.legend(['train', 'val'])
# plt.savefig('./acc.png')
plt.show()
def test(save_path, test_loader, device, loss): # 测试函数
best_model = torch.load(save_path).to(device)
test_loss = 0.0
test_acc = 0.0
start_time = time.time()
with torch.no_grad():
for index, (images, labels) in enumerate(test_loader):
images, labels = images.cuda(), labels.cuda()
pred = best_model(images)
bat_loss = loss(pred, labels) # 算交叉熵loss
test_loss += bat_loss.item()
pred = pred.argmax(dim=1)
test_acc += (pred == labels).sum().item()
print("正在最终测试:批次为{}/{},该批次总loss:{} | 正确acc数量:{}"
.format(index + 1, len(test_data) // config["batch_size"],
bat_loss.item(), (pred == labels).sum().item()))
print('最终测试结束,测试结果如下:%2.2fsec(s) TestAcc:%.2f%% TestLoss:%.2f%% \n\n'
% (time.time() - start_time, test_acc/test_loader.dataset.__len__()*100, test_loss/test_loader.dataset.__len__()*100))
# 加载MNIST数据
ori_data = dataset.MNIST(
root="./data",
train=True,
transform=transforms.ToTensor(),
download=True
)
test_data = dataset.MNIST(
root="./data",
train=False,
transform=transforms.ToTensor(),
download=True
)
# print(ori_data)
# print(test_data)
# 查看某一样本数据
# image, label = ori_data[0]
# print(f"Image shape: {image.shape}, Label: {label}")
# image = image.squeeze().numpy()
# plt.imshow(image)
# plt.title(f'Label: {label}')
# plt.show()
config = {
"train_size_perc": 0.8,
"batch_size": 64,
"learning_rate": 0.01,
"epochs": 10,
"save_path": "model_save/best_model.pth"
}
# 设置训练集和验证集的比例
train_size = int(config["train_size_perc"] * len(ori_data)) # 80%用于训练
val_size = len(ori_data) - train_size # 20%用于验证
# 使用random_split划分数据集
train_data, val_data = random_split(ori_data, [train_size, val_size])
train_loader = DataLoader(dataset=train_data, batch_size=config["batch_size"], shuffle=True)
val_loader = DataLoader(dataset=val_data, batch_size=config["batch_size"], shuffle=False)
test_loader = DataLoader(dataset=test_data, batch_size=config["batch_size"], shuffle=True)
# print(f'Training Set Size: {len(train_loader.dataset)}')
# print(f'Validation Set Size: {len(val_loader.dataset)}')
# print(f'Testing Set Size: {len(test_loader.dataset)}')
model = CNN()
loss = nn.CrossEntropyLoss() # 交叉熵损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=config["learning_rate"]) # 优化器
# optimizer = torch.optim.AdamW(model.parameters(), lr=0.01, weight_decay=1e-4)\
device = "cuda" if torch.cuda.is_available() else "cpu"
# print(device)
train_val(train_loader, val_loader, device, model, loss, optimizer, config["epochs"], config["save_path"])
test(config["save_path"], test_loader, device, loss)
4.训练结果




结果准确率仍在99%左右,无极大明显提升 。
无SeNet模块的MNIST手写数据识别任务:【图像分类入门】MNIST手写数据识别-CSDN博客
DAMO开发者矩阵,由阿里巴巴达摩院和中国互联网协会联合发起,致力于探讨最前沿的技术趋势与应用成果,搭建高质量的交流与分享平台,推动技术创新与产业应用链接,围绕“人工智能与新型计算”构建开放共享的开发者生态。
更多推荐
所有评论(0)