参考文档:
https://blog.csdn.net/gaotihong/article/details/80763813
简单来说,卷积层相当于一个特征检测层。每个卷积层有特定数目的通道,每个通道能够检测出图像中的具体特征。需要检测的每个特征常常被叫做核(kernel)或过滤器,它们都有固定大小,通常为3X3。
# 导入需要的包
import torch
import torch.nn as nn
class SimpleNet(nn.Module):
def __init__(self, num_classes=10):
super(SimpleNet, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
self.relu1 = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=3, stride=1, padding=1)
self.relu2 = nn.ReLU()
self.pool = nn.MaxPool2d(kernel_size=2)
self.conv3 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
self.relu3 = nn.ReLU()
self.conv4 = nn.Conv2d(in_channels=24, out_channels=24, kernel_size=3, stride=1, padding=1)
self.relu4 = nn.ReLU()
self.fc = nn.Linear(in_features=16 * 16 * 24, out_features=num_classes)
def forward(self, input):
output = self.conv1(input)
output = self.relu1(output)
output = self.conv2(output)
output = self.relu2(output)
output = self.pool(output)
output = self.conv3(output)
output = self.relu3(output)
output = self.conv4(output)
output = self.relu4(output)
output = output.view(-1, 16 * 16 * 24)
output = self.fc(output)
return output
nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
因为我们的输入为有 3 个通道(红-绿-蓝)的 RGB 图像,我们指明 in_channels 的数量为 3。接着我们想将 12 特征的检测器应用在图像上,所以我们指明 out_channels 的数量为 12。这里我们使用标准大小为 3X3 的核。步幅设定为 1,后面一直是这样,除非你计划缩减图像的维度。将步幅设置为 1,卷积会一次变为 1 像素。最后,我们设定填充(padding)为 1:这样能确保我们的图像以0填充,从而保持输入和输出大小一致。
nn.Conv2d(in_channels=12, out_channels=12, kernel_size=3, stride=1, padding=1)
注意:我们在将最后一个卷积 -ReLU 层中的特征图谱输入图像前,必须把整个图谱压平。最后一层有 24 个输出通道,由于 2X2
的最大池化,在这时我们的图像就变成了16 X 16(32/2 = 16)。我们压平后的图像的维度会是16 x 16 x 24,实现代码如下: |
---|
output = output.view(-1, 16 * 16 * 24)
在我们的线性层中,我们必须指明 input_features 的数目同样为 16 x 16 x 24,out_features 的数目应和我们所希望的类的数量一致。
lass Unit(nn.Module):
def __init__(self, in_channels, out_channels):
super(Unit, self).__init__()
self.conv = nn.Conv2d(in_channels=in_channels, kernel_size=3, out_channels=out_channels, stride=1, padding=1)
self.bn = nn.BatchNorm2d(num_features=out_channels)
self.relu = nn.ReLU()
def forward(self, input):
output = self.conv(input)
output = self.bn(output)
output = self.relu(output)
return output
class Unit(nn.Module):
def __init__(self,in_channels,out_channels):
super(Unit,self).__init__()
self.conv = nn.Conv2d(in_channels=in_channels,kernel_size=3,out_channels=out_channels,stride=1,padding=1)
self.bn = nn.BatchNorm2d(num_features=out_channels)
self.relu = nn.ReLU()
def forward(self,input):
output = self.conv(input)
output = self.bn(output)
output = self.relu(output)
return output
class SimpleNet(nn.Module):
def __init__(self,num_classes=10):
super(SimpleNet,self).__init__()
#Create 14 layers of the unit with max pooling in between
self.unit1 = Unit(in_channels=3,out_channels=32)
self.unit2 = Unit(in_channels=32, out_channels=32)
self.unit3 = Unit(in_channels=32, out_channels=32)
self.pool1 = nn.MaxPool2d(kernel_size=2)
self.unit4 = Unit(in_channels=32, out_channels=64)
self.unit5 = Unit(in_channels=64, out_channels=64)
self.unit6 = Unit(in_channels=64, out_channels=64)
self.unit7 = Unit(in_channels=64, out_channels=64)
self.pool2 = nn.MaxPool2d(kernel_size=2)
self.unit8 = Unit(in_channels=64, out_channels=128)
self.unit9 = Unit(in_channels=128, out_channels=128)
self.unit10 = Unit(in_channels=128, out_channels=128)
self.unit11 = Unit(in_channels=128, out_channels=128)
self.pool3 = nn.MaxPool2d(kernel_size=2)
self.unit12 = Unit(in_channels=128, out_channels=128)
self.unit13 = Unit(in_channels=128, out_channels=128)
self.unit14 = Unit(in_channels=128, out_channels=128)
self.avgpool = nn.AvgPool2d(kernel_size=4)
#Add all the units into the Sequential layer in exact order
self.net = nn.Sequential(self.unit1, self.unit2, self.unit3, self.pool1, self.unit4, self.unit5, self.unit6
,self.unit7, self.pool2, self.unit8, self.unit9, self.unit10, self.unit11, self.pool3,
self.unit12, self.unit13, self.unit14, self.avgpool)
self.fc = nn.Linear(in_features=128,out_features=num_classes)
def forward(self, input):
output = self.net(input)
output = output.view(-1,128)
output = self.fc(output)
return output
我们的整个神经网络出来了,它有14个卷积层、14个ReLU层、14个规范层、4个池化层和1个线性层组成,总共62个层!
self.net = nn.Sequential(self.unit1, self.unit2, self.unit3, self.pool1, self.unit4, self.unit5, self.unit6, self.unit7, self.pool2, self.unit8, self.unit9, self.unit10, self.unit11, self.pool3,self.unit12, self.unit13, self.unit14, self.avgpool)
self.avgpool = nn.AvgPool2d(kernel_size=4) 因此,线性层会有1X1X128=128个输入特征。 self.fc = nn.Linear(in_features=128,out_features=num_classes) 我们同样会压平神经网络的输出,让它有128个特征。 output = output.view(-1,128)
from torchvision.datasets import CIFAR10
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
# 定义训练集的转换,随机翻转图像,剪裁图像,应用平均和标准正常化方法
train_transformations = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32,padding=4),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])
# 加载训练集
train_set =CIFAR10(root="./data",train=True,transform=train_transformations,download=True)
# 为训练集创建加载程序
train_loader = DataLoader(train_set,batch_size=32,shuffle=True,num_workers=4)
首先,我们用 transform.Compose 输入转换的一个数组。RandomHorizontalFlip 会随机水平翻转照片。RandomCrop 随机剪裁照片。
# 定义测试集的转换
test_transformations = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# 加载测试集,注意这里的train设为false
test_set = CIFAR10(root="./data", train=False, transform=test_transformations, download=True)
# 为测试集创建加载程序,注意这里的shuffle设为false
test_loader = DataLoader(test_set, batch_size=32, shuffle=False, num_workers=4)
from torch.optim import Adam
# 检查GPU是否可用
cuda_avail = torch.cuda.is_available()
# 创建模型,优化器和损失函数
model = SimpleNet(num_classes=10)
# 若GPU可用,将模型移往GPU
if cuda_avail:
model.cuda()
# 定义优化器和损失函数
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_fn = nn.CrossEntropyLoss()
#Create a learning rate adjustment function that divides the learning rate by 10 every 30 epochs
def adjust_learning_rate(epoch):
lr = 0.001
if epoch > 180:
lr = lr / 1000000
elif epoch > 150:
lr = lr / 100000
elif epoch > 120:
lr = lr / 10000
elif epoch > 90:
lr = lr / 1000
elif epoch > 60:
lr = lr / 100
elif epoch > 30:
lr = lr / 10
for param_group in optimizer.param_groups:
param_group["lr"] = lr
def save_models(epoch):
torch.save(model.state_dict(), "cifar10model_{}.model".format(epoch))
print("Chekcpoint saved")
def test():
model.eval()
test_acc = 0.0
for i, (images, labels) in enumerate(test_loader):
if cuda_avail:
images = Variable(images.cuda())
labels = Variable(labels.cuda())
# Predict classes using images from the test set
outputs = model(images)
_, prediction = torch.max(outputs.data, 1)
test_acc += torch.sum(prediction == labels.data)
# Compute the average acc and loss over all 10000 test images
test_acc = test_acc / 10000
return test_acc
def train(num_epochs):
best_acc = 0.0
for epoch in range(num_epochs):
model.train()
train_acc = 0.0
train_loss = 0.0
for i, (images, labels) in enumerate(train_loader):
# 若GPU可用,将图像和标签移往GPU
if cuda_avail:
images = Variable(images.cuda())
labels = Variable(labels.cuda())
# 清除所有累积梯度
optimizer.zero_grad()
# 用来自测试集的图像预测类
outputs = model(images)
# 根据实际标签和预测值计算损失
loss = loss_fn(outputs, labels)
# 传播损失
loss.backward()
# 根据计算的梯度调整参数
optimizer.step()
train_loss += loss.cpu().data[0] * images.size(0)
_, prediction = torch.max(outputs.data, 1)
train_acc += torch.sum(prediction == labels.data)
# 调用学习率调整函数
adjust_learning_rate(epoch)
# 计算模型在50000张训练图像上的准确率和损失值
train_acc = train_acc / 50000
train_loss = train_loss / 50000
# 用测试集评估
test_acc = test()
# 若测试准确率高于当前最高准确率,则保存模型
if test_acc > best_acc:
save_models(epoch)
best_acc = test_acc
# 打印度量
print("Epoch {}, Train Accuracy: {} , TrainLoss: {} , Test Accuracy: {}".format(epoch, train_acc, train_loss,
for i, (images,labels) in enumerate(train_loader):
接着,如果可以用GPU,我们就将图像和标签移往GPU:
if cuda_avail:
images = Variable(images.cuda())
labels = Variable(labels.cuda())
optimizer.zero_grad()
train_loss += loss.cpu().data[0] * images.size(0)
_, prediction = torch.max(outputs.data, 1)
train_acc += torch.sum(prediction == labels.data)
https://gist.github.com/johnolafenwa/96b3322aabb61d4d36fd870a77f02aa3
# 导入需要的包
import torch
import torch.nn as nn
from torchvision.transforms import transforms
from torch.autograd import Variable
from torchvision.models import squeezenet1_1
import requests
import shutil
from io import open
import os
from PIL import Image
import json
model = squeezenet1_1(pretrained=True)
model.eval()
注意,在上面的代码中,通过将pretrained设为True,Squeezenet模型在你首次运行函数时就会被下载。模型的大小只有4.7 MB。
接着,创建一个预测函数,如下:
def predict_image(image_path):
print("Prediction in progress")
image = Image.open(image_path)
# Define transformations for the image, should (note that imagenet models are trained with image size 224)
transformation = transforms.Compose([
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# 预处理图像
image_tensor = transformation(image).float()
# 额外添加一个批次维度,因为PyTorch将所有的图像当做批次
image_tensor = image_tensor.unsqueeze_(0)
if torch.cuda.is_available():
image_tensor.cuda()
# 将输入变为变量
input = Variable(image_tensor)
# 预测图像的类
output = model(input)
index = output.data.numpy().argmax()
return index
if __name__ == "__main__":
imagefile = "image.png"
imagepath = os.path.join(os.getcwd(), imagefile)
# Donwload image if it doesn't exist
if not os.path.exists(imagepath):
data = requests.get(
"https://github.com/OlafenwaMoses/ImageAI/raw/master/images/3.jpg", stream=True)
with open(imagepath, "wb") as file:
shutil.copyfileobj(data.raw, file)
del data
index_file = "class_index_map.json"
indexpath = os.path.join(os.getcwd(), index_file)
# Donwload class index if it doesn't exist
if not os.path.exists(indexpath):
data = requests.get('https://github.com/OlafenwaMoses/ImageAI/raw/master/imagenet_class_index.json')
with open(indexpath, "w", encoding="utf-8") as file:
file.write(data.text)
class_map = json.load(open(indexpath))
# run prediction function annd obtain prediccted class index
index = predict_image(imagepath)
prediction = class_map[str(index)][1]
print("Predicted Class ", prediction)
这是推断过程的完整代码:
# Import needed packages
import torch
import torch.nn as nn
from torchvision.transforms import transforms
import matplotlib.pyplot as plt
import numpy as np
from torch.autograd import Variable
from torchvision.models import squeezenet1_1
import torch.functional as F
import requests
import shutil
from io import open
import os
from PIL import Image
import json
""" Instantiate model, this downloads tje 4.7 mb squzzene the first time it is called.
To use with your own model, re-define your trained networks ad load weights as below
checkpoint = torch.load("pathtosavemodel")
model = SimpleNet(num_classes=10)
model.load_state_dict(checkpoint)
model.eval()
"""
model = squeezenet1_1(pretrained=True)
model.eval()
def predict_image(image_path):
print("Prediction in progress")
image = Image.open(image_path)
# Define transformations for the image, should (note that imagenet models are trained with image size 224)
transformation = transforms.Compose([
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# Preprocess the image
image_tensor = transformation(image).float()
# Add an extra batch dimension since pytorch treats all images as batches
image_tensor = image_tensor.unsqueeze_(0)
if torch.cuda.is_available():
image_tensor.cuda()
# Turn the input into a Variable
input = Variable(image_tensor)
# Predict the class of the image
output = model(input)
index = output.data.numpy().argmax()
return index
if __name__ == "__main__":
imagefile = "image.png"
imagepath = os.path.join(os.getcwd(), imagefile)
# Donwload image if it doesn't exist
if not os.path.exists(imagepath):
data = requests.get(
"https://github.com/OlafenwaMoses/ImageAI/raw/master/images/3.jpg", stream=True)
with open(imagepath, "wb") as file:
shutil.copyfileobj(data.raw, file)
del data
index_file = "class_index_map.json"
indexpath = os.path.join(os.getcwd(), index_file)
# Donwload class index if it doesn't exist
if not os.path.exists(indexpath):
data = requests.get('https://github.com/OlafenwaMoses/ImageAI/raw/master/imagenet_class_index.json')
with open(indexpath, "w", encoding="utf-8") as file:
file.write(data.text)
class_map = json.load(open(indexpath))
# run prediction function annd obtain prediccted class index
index = predict_image(imagepath)
prediction = class_map[str(index)][1]
print("Predicted Class ", prediction)
如果你想用自己搭建的网络进行推断,比如我们前面搭建的SimpleNet,你只需替换模型的加载部分:
checkpoint = torch.load("pathtosavemodel")
model = SimpleNet(num_classes=10)
model.load_state_dict(checkpoint)
model.eval()
尚无评论!