From 30c11c317fe463bb62160e7fec12d510449c8330 Mon Sep 17 00:00:00 2001 From: bushuhui Date: Fri, 18 Feb 2022 18:57:45 +0800 Subject: [PATCH] Improve LeNet5 & AlexNet --- .gitignore | 2 +- 7_deep_learning/1_CNN/02-LeNet5.ipynb | 117 +++++++++++++-------- 7_deep_learning/1_CNN/03-AlexNet.ipynb | 59 ++++++++--- 7_deep_learning/1_CNN/04-vgg.ipynb | 5 +- 7_deep_learning/1_CNN/08-batch-normalization.ipynb | 2 +- 7_deep_learning/1_CNN/utils.py | 8 +- 6 files changed, 122 insertions(+), 71 deletions(-) diff --git a/.gitignore b/.gitignore index c5f40fa..ea59330 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ *.tar.gz *.pth __pycache__ -fig-res +fig-res* diff --git a/7_deep_learning/1_CNN/02-LeNet5.ipynb b/7_deep_learning/1_CNN/02-LeNet5.ipynb index 3512f55..0390477 100644 --- a/7_deep_learning/1_CNN/02-LeNet5.ipynb +++ b/7_deep_learning/1_CNN/02-LeNet5.ipynb @@ -20,23 +20,13 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "import sys\n", - "sys.path.append('..')\n", - "\n", - "import numpy as np\n", "import torch\n", "from torch import nn\n", - "from torch.autograd import Variable\n", "import torch.nn.functional as F\n", - "from torchvision.datasets import CIFAR10\n", - "from torchvision import transforms as tfs\n", - "\n", "\n", "class LeNet5(nn.Module):\n", " def __init__(self):\n", @@ -64,23 +54,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LeNet5(\n", - " (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))\n", - " (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n", - " (fc1): Linear(in_features=400, out_features=120, bias=True)\n", - " (fc2): Linear(in_features=120, out_features=84, bias=True)\n", - " (fc3): Linear(in_features=84, out_features=10, bias=True)\n", - ")\n" - ] - } - ], + "outputs": [], "source": [ "net = LeNet5()\n", "print(net)" @@ -89,54 +65,103 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ + "input = torch.randn(1, 1, 32, 32)\n", + "out = net(input)\n", + "print(out)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", "from torchvision.datasets import mnist\n", "from torch.utils.data import DataLoader\n", + "from torchvision.datasets import mnist \n", + "from torchvision import transforms as tfs\n", "from utils import train\n", "\n", "# 使用数据增强\n", "def data_tf(x):\n", " im_aug = tfs.Compose([\n", " tfs.Resize(32),\n", - " tfs.ToTensor(),\n", - " tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n", + " tfs.ToTensor() #,\n", + " #tfs.Normalize([0.5], [0.5])\n", " ])\n", " x = im_aug(x)\n", " return x\n", " \n", - "train_set = CIFAR10('../../data', train=True, transform=data_tf)\n", + "train_set = mnist.MNIST('../../data/mnist', train=True, transform=data_tf, download=True) \n", "train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n", - "test_set = CIFAR10('../../data', train=False, transform=data_tf)\n", - "test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n", + "test_set = mnist.MNIST('../../data/mnist', train=False, transform=data_tf, download=True) \n", + "test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 显示其中一个数据\n", + "import matplotlib.pyplot as plt\n", + "plt.imshow(train_set.data[0], cmap='gray')\n", + "plt.title('%i' % train_set.targets[0])\n", + "plt.colorbar()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", "\n", - "net = LeNet5()\n", - "optimizer = torch.optim.Adam(net.parameters(), lr=1e-1)\n", - "criterion = nn.CrossEntropyLoss()" + "# 显示转化后的图像\n", + "for im, label in train_data:\n", + " print(im.shape)\n", + " print(label.shape)\n", + " \n", + " img = im[0,0,:,:]\n", + " lab = label[0]\n", + " plt.imshow(img, cmap='gray')\n", + " plt.title('%i' % lab)\n", + " plt.colorbar()\n", + " plt.show()\n", + "\n", + " print(im[0,0,:,:])\n", + " break" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "scrolled": false }, "outputs": [], "source": [ + "net = LeNet5()\n", + "optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)\n", + "criterion = nn.CrossEntropyLoss()\n", + "\n", "res = train(net, train_data, test_data, 20, \n", " optimizer, criterion,\n", - " use_cuda=False)" + " use_cuda=True)" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", @@ -145,6 +170,7 @@ "plt.plot(res[0], label='train')\n", "plt.plot(res[2], label='valid')\n", "plt.xlabel('epoch')\n", + "plt.ylabel('Loss')\n", "plt.legend(loc='best')\n", "plt.savefig('fig-res-lenet5-train-validate-loss.pdf')\n", "plt.show()\n", @@ -152,6 +178,7 @@ "plt.plot(res[1], label='train')\n", "plt.plot(res[3], label='valid')\n", "plt.xlabel('epoch')\n", + "plt.ylabel('Acc')\n", "plt.legend(loc='best')\n", "plt.savefig('fig-res-lenet5-train-validate-acc.pdf')\n", "plt.show()" @@ -174,7 +201,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.4" + "version": "3.7.9" } }, "nbformat": 4, diff --git a/7_deep_learning/1_CNN/03-AlexNet.ipynb b/7_deep_learning/1_CNN/03-AlexNet.ipynb index 3ea3fd8..efcaa08 100644 --- a/7_deep_learning/1_CNN/03-AlexNet.ipynb +++ b/7_deep_learning/1_CNN/03-AlexNet.ipynb @@ -12,10 +12,8 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, + "execution_count": 1, + "metadata": {}, "outputs": [], "source": [ "import torch.nn as nn\n", @@ -25,7 +23,7 @@ " def __init__(self, num_classes=1000, init_weights=False): \n", " super(AlexNet, self).__init__()\n", " self.features = nn.Sequential( \n", - " nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), \n", + " nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=2), \n", " nn.ReLU(inplace=True), #inplace 可以载入更大模型\n", " nn.MaxPool2d(kernel_size=3, stride=2), \n", "\n", @@ -76,16 +74,12 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, + "execution_count": 2, + "metadata": {}, "outputs": [], "source": [ - "import sys\n", - "sys.path.append('..')\n", - "\n", "from torchvision.datasets import CIFAR10\n", + "from torch.utils.data import DataLoader\n", "from torchvision import transforms as tfs\n", "from utils import train\n", "\n", @@ -103,14 +97,47 @@ "train_set = CIFAR10('../../data', train=True, transform=data_tf)\n", "train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n", "test_set = CIFAR10('../../data', train=False, transform=data_tf)\n", - "test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n", - "\n", + "test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "net = AlexNet(num_classes=10)\n", - "optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)\n", + "optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)\n", "criterion = nn.CrossEntropyLoss()\n", "\n", "res = train(net, train_data, test_data, 20, optimizer, criterion, use_cuda=False)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "plt.plot(res[0], label='train')\n", + "plt.plot(res[2], label='valid')\n", + "plt.xlabel('epoch')\n", + "plt.ylabel('Loss')\n", + "plt.legend(loc='best')\n", + "plt.savefig('fig-res-alexnet-train-validate-loss.pdf')\n", + "plt.show()\n", + "\n", + "plt.plot(res[1], label='train')\n", + "plt.plot(res[3], label='valid')\n", + "plt.xlabel('epoch')\n", + "plt.ylabel('Acc')\n", + "plt.legend(loc='best')\n", + "plt.savefig('fig-res-alexnet-train-validate-acc.pdf')\n", + "plt.show()" + ] } ], "metadata": { @@ -129,7 +156,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.4" + "version": "3.7.9" } }, "nbformat": 4, diff --git a/7_deep_learning/1_CNN/04-vgg.ipynb b/7_deep_learning/1_CNN/04-vgg.ipynb index 8149c64..96ee1da 100644 --- a/7_deep_learning/1_CNN/04-vgg.ipynb +++ b/7_deep_learning/1_CNN/04-vgg.ipynb @@ -74,9 +74,6 @@ }, "outputs": [], "source": [ - "import sys\n", - "sys.path.append('..')\n", - "\n", "import numpy as np\n", "import torch\n", "from torch import nn\n", @@ -478,7 +475,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.4" + "version": "3.7.9" } }, "nbformat": 4, diff --git a/7_deep_learning/1_CNN/08-batch-normalization.ipynb b/7_deep_learning/1_CNN/08-batch-normalization.ipynb index 5f11ffa..a00de6e 100644 --- a/7_deep_learning/1_CNN/08-batch-normalization.ipynb +++ b/7_deep_learning/1_CNN/08-batch-normalization.ipynb @@ -588,7 +588,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.4" + "version": "3.7.9" } }, "nbformat": 4, diff --git a/7_deep_learning/1_CNN/utils.py b/7_deep_learning/1_CNN/utils.py index f4b5a33..ca22ef4 100644 --- a/7_deep_learning/1_CNN/utils.py +++ b/7_deep_learning/1_CNN/utils.py @@ -57,11 +57,11 @@ def train(net, train_data, valid_data, num_epochs, optimizer, criterion, use_cud net = net.eval() for im, label in valid_data: if use_cuda and torch.cuda.is_available(): - im = Variable(im.cuda(), volatile=True) - label = Variable(label.cuda(), volatile=True) + im = Variable(im.cuda()) + label = Variable(label.cuda()) else: - im = Variable(im, volatile=True) - label = Variable(label, volatile=True) + im = Variable(im) + label = Variable(label) output = net(im) loss = criterion(output, label) valid_loss += loss.item()