提高模型的泛化能力,减轻模型对物体出现位置的依赖,还可以调整亮度、色彩。
import time import torch from torch import nn,optim import matplotlib.pyplot as plt from torch.utils.data import DataLoader,Dataset import torchvision from PIL import Image device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') image = Image.open('cat.jpg') plt.imshow(image) <matplotlib.image.AxesImage at 0x7fec61069240> def show_images(imgs,num_rows,num_cols,scale=2): figsize = (num_cols*scale,num_rows*scale) _,axes = plt.subplots(num_rows,num_cols,figsize=figsize) for i in range(num_rows): for j in range(num_cols): axes[i][j].imshow(imgs[i*num_cols+j]) axes[i][j].axes.get_xaxis().set_visible(False) axes[i][j].axes.get_yaxis().set_visible(False) return axes def apply(img,aug,num_rows=2,num_cols=4,scale=3): Y = [aug(img) for _ in range(num_rows*num_cols)] show_images(Y,num_rows,num_cols,scale)图像增广有像是裁剪,随机裁剪,翻转,旋转角度,色彩,对比度变化之类种种,看名字就知道了,不描述了。
apply(image,torchvision.transforms.RandomHorizontalFlip()) apply(image,torchvision.transforms.RandomVerticalFlip()) shape_aug = torchvision.transforms.RandomResizedCrop(200,scale=(0.1,1),ratio=(0.5,2)) apply(image,shape_aug)颜色,四个方面:
亮度(brightness)对比度(contrast)饱和度(saturation)色调(hue) # jitter表明是上下变为0.5倍,即0.5-1.5 apply(image,torchvision.transforms.ColorJitter(brightness=0.5)) apply(image,torchvision.transforms.ColorJitter(hue=0.5)) # 或者同时改,都可以的 apply(image,torchvision.transforms.ColorJitter(brightness=0.5,saturation=0.5,contrast=0.5))还可以叠加多种处理方法,用Compose
augs = torchvision.transforms.Compose([torchvision.transforms.RandomHorizontalFlip(),shape_aug]) apply(image,augs)为了在预测时得到确定的结果,我们通常只将图像增⼴应⽤在训练样本上,⽽不在预测时使⽤含随机操作的图像增⼴。
是迁移学习中的常用技术。(就是因为数据集不够大嘛)
在源数据集上预训练一个网络模型,即源模型。创建新的神经网络模型(目标模型),它复制了源模型上除了输出层外的所有模型设计和参数。添加输出层,并随机初始化该层的模型参数。在目标数据集上训练目标模型。从头训练输出层,而其余层的参数都是基于源模型的参数微调得到。torchvision 的models包提供了常用的预训练模型。
from torchvision.datasets import ImageFolder from torchvision import transforms from torchvision import models import os data_dir = './' os.listdir(os.path.join(data_dir,'hotdog')) ['test', 'train'] !ls -al hotdog total 2 drwxrwxr-x 4 admin admin 4096 Sep 3 21:50 . drwxr-xr-x 10 admin admin 4096 Sep 3 22:42 .. drwxrwxr-x 4 admin admin 4096 Oct 28 2017 test drwxrwxr-x 5 admin admin 4096 Sep 3 21:27 train创建ImageFolder实例来分别读取训练数据集和测试数据集中的所有文件
train_imgs = ImageFolder(os.path.join(data_dir,'hotdog/train')) print(train_imgs.class_to_idx) test_imgs = ImageFolder(os.path.join(data_dir,'hotdog/test')) print(test_imgs.class_to_idx) {'hotdog': 0, 'not-hotdog': 1} {'hotdog': 0, 'not-hotdog': 1} rm -rf hotdog/train/.ipynb_checkpoints/ # 画图看一下 hotdogs = [train_imgs[i][1] for i in range(8)] not_hotdogs = [train_imgs[-i-1][1] for i in range(8)] not_hotdogs [1, 1, 1, 1, 1, 1, 1, 1]在训练时,从图像中随机裁剪出一块区域,缩放为224的输入,测试时,先将图片高和宽缩放为256像素,然后裁剪中心区域224作为输入。此外,将RGB通道的数值做标准化:减去平均值再除以方差。
为什么要这样处理呢?
因为使用预训练模型,一定要和预训练时做同样的处理
# 图像预处理 normalize = transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225]) train_augs = transforms.Compose([ transforms.RandomResizedCrop(size=224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) test_augs = transforms.Compose([ transforms.Resize(size=256), transforms.CenterCrop(size=224), transforms.ToTensor(), normalize ]) pretrained_net = models.resnet18(pretrained=False) pretrained_net.load_state_dict(torch.load('resnet18-5c106cde.pth')) <All keys matched successfully> print(pretrained_net.fc) Linear(in_features=512, out_features=1000, bias=True) pretrained_net.fc = nn.Linear(512,2) print(pretrained_net.fc) Linear(in_features=512, out_features=2, bias=True) output_params = list(map(id,pretrained_net.fc.parameters())) feature_params = filter(lambda p: id(p) not in output_params,pretrained_net.parameters()) lr = 0.01 optimizer = optim.SGD([{'params': feature_params}, {'params':pretrained_net.fc.parameters(),'lr':lr*10}], lr=lr,weight_decay=0.001) def evaluate_accuracy(data_iter,net,device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): acc_sum, n =0.0,0 with torch.no_grad(): for X,y in data_iter: net.eval() #评估模式,关闭dropout acc_sum +=(net(X.to(device)).argmax(dim=1)==y.to(device)).float().sum().cpu().item() net.train() #改回训练模式 n+=y.shape[0] return acc_sum/n def train_fine_tuning(net,optimizer,batch_size=128,num_epochs=5): train_iter = DataLoader(ImageFolder(os.path.join(data_dir,'hotdog/train'),transform=train_augs), batch_size,shuffle=True) test_iter = DataLoader(ImageFolder(os.path.join(data_dir,'hotdog/test'),transform=test_augs), batch_size) loss = torch.nn.CrossEntropyLoss() train(train_iter,test_iter,net,loss,optimizer,device,num_epochs) def train(train_iter,test_iter,net,loss,optimizer,device,num_epochs): net = net.to(device) print('training on ',device) batch_count = 0 for epoch in range(num_epochs): train_l_sum, train_acc_sum, n,start = 0.0,0.0,0,time.time() for X,y in train_iter: X = X.to(device) y = y.to(device) y_hat = net(X) l = loss(y_hat,y) optimizer.zero_grad() l.backward() optimizer.step() train_l_sum += l.cpu().item() train_acc_sum += (y_hat.argmax(dim=1)==y).sum().cpu().item() n += y.shape[0] batch_count += 1 test_acc = evaluate_accuracy(test_iter,net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec' % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start)) train_fine_tuning(pretrained_net,optimizer) training on cuda epoch 1, loss 0.2794, train acc 0.917, test acc 0.916, time 31.2 sec epoch 2, loss 0.1227, train acc 0.926, test acc 0.908, time 31.5 sec epoch 3, loss 0.1145, train acc 0.901, test acc 0.938, time 31.3 sec epoch 4, loss 0.0390, train acc 0.950, test acc 0.935, time 31.1 sec epoch 5, loss 0.0276, train acc 0.949, test acc 0.949, time 30.9 sec结果就比较明显了,由于用了迁移学习,所以正确率比较高,在第一轮的训练就得到了90% 以上的正确率。