深度学习入门(基于python实现)--第五章 误差反向传播 03

tech2025-04-26  11

# _*_ coding:UTF-8 _*_ # 文件名:Error_BackPropagation_03.py # 开发工具:PyCharm """ 神经网络学习全貌图 前提:有合适的权重和偏置,调整这两样的过程称为学习,分为以下四个步骤 1. mini-batch 从数据集中随机选择一部分数据 2.计算梯度 计算损失函数关于各个权重的参数的梯度 3.更新参数 将权重参数沿着梯度方向进行微小的更新 4.重复 重复步骤1、步骤2、步骤3 """ import sys, os from dataset.mnist import load_mnist sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定 import numpy as np from common.layers import * from common.gradient import numerical_gradient from collections import OrderedDict class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): """ 初始化神经网络 :param input_size:输入层神经元 数目 :param hidden_size: 隐藏层神经元个数 :param output_size: 输出层神经元个数 :param weight_init_std: 初始权重 """ self.params = {} # 权重 self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) # 生成“层” self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x # x:输入数据 t:监督数据 def loss(self, x, t): y = self.predict(x) return self.lastLayer.forward(y, t) def accuracy(self, x, t): y = self.predict(x) y = np.argmax(y, axis=1) if t.ndim != 1: t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def numerical_gradient(self, x, t): loss_W = lambda W: self.loss(x, t) grads = {} grads['W1'] = numerical_gradient(loss_W, self.params['W1']) grads['b1'] = numerical_gradient(loss_W, self.params['b1']) grads['W2'] = numerical_gradient(loss_W, self.params['W2']) grads['b2'] = numerical_gradient(loss_W, self.params['b2']) return grads def gradient(self, x, t): # forward self.loss(x, t) # backward dout = 1 dout = self.lastLayer.backward(dout) layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) # 设定 grads = {} grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db return grads (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] # 通过误差反向传播求梯度 # grad = network.numerical_gradient(x_batch, t_batch) grad = network.gradient(x_batch, t_batch) # 更新参数 for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] -= learning_rate * grad[key] loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print(train_acc, test_acc) # output:0.1954 0.1975 # 0.9013333333333333 0.9078 # 0.9248833333333333 0.9276 # 0.9382166666666667 0.9378 # 0.9467166666666667 0.9422 # 0.9510666666666666 0.9471 # 0.9543166666666667 0.9504 # 0.9606833333333333 0.9565 # 0.9627333333333333 0.9588 # 0.9663333333333334 0.9629 # 0.9692333333333333 0.9642 # 0.9708666666666667 0.9659 # 0.9727666666666667 0.9668 # 0.9733833333333334 0.9683 # 0.97365 0.9677 # 0.97655 0.9701
最新回复(0)