From 30c11c317fe463bb62160e7fec12d510449c8330 Mon Sep 17 00:00:00 2001
From: bushuhui <bushuhui@foxmail.com>
Date: Fri, 18 Feb 2022 18:57:45 +0800
Subject: [PATCH] Improve LeNet5 & AlexNet

---
 .gitignore                                         |   2 +-
 7_deep_learning/1_CNN/02-LeNet5.ipynb              | 117 +++++++++++++--------
 7_deep_learning/1_CNN/03-AlexNet.ipynb             |  59 ++++++++---
 7_deep_learning/1_CNN/04-vgg.ipynb                 |   5 +-
 7_deep_learning/1_CNN/08-batch-normalization.ipynb |   2 +-
 7_deep_learning/1_CNN/utils.py                     |   8 +-
 6 files changed, 122 insertions(+), 71 deletions(-)

diff --git a/.gitignore b/.gitignore
index c5f40fa..ea59330 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,4 @@
 *.tar.gz
 *.pth
 __pycache__
-fig-res
+fig-res*
diff --git a/7_deep_learning/1_CNN/02-LeNet5.ipynb b/7_deep_learning/1_CNN/02-LeNet5.ipynb
index 3512f55..0390477 100644
--- a/7_deep_learning/1_CNN/02-LeNet5.ipynb
+++ b/7_deep_learning/1_CNN/02-LeNet5.ipynb
@@ -20,23 +20,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "import sys\n",
-    "sys.path.append('..')\n",
-    "\n",
-    "import numpy as np\n",
     "import torch\n",
     "from torch import nn\n",
-    "from torch.autograd import Variable\n",
     "import torch.nn.functional as F\n",
-    "from torchvision.datasets import CIFAR10\n",
-    "from torchvision import transforms as tfs\n",
-    "\n",
     "\n",
     "class LeNet5(nn.Module):\n",
     "    def __init__(self):\n",
@@ -64,23 +54,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LeNet5(\n",
-      "  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))\n",
-      "  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n",
-      "  (fc1): Linear(in_features=400, out_features=120, bias=True)\n",
-      "  (fc2): Linear(in_features=120, out_features=84, bias=True)\n",
-      "  (fc3): Linear(in_features=84, out_features=10, bias=True)\n",
-      ")\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "net = LeNet5()\n",
     "print(net)"
@@ -89,54 +65,103 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
+    "input = torch.randn(1, 1, 32, 32)\n",
+    "out = net(input)\n",
+    "print(out)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
     "from torchvision.datasets import mnist\n",
     "from torch.utils.data import DataLoader\n",
+    "from torchvision.datasets import mnist \n",
+    "from torchvision import transforms as tfs\n",
     "from utils import train\n",
     "\n",
     "# 使用数据增强\n",
     "def data_tf(x):\n",
     "    im_aug = tfs.Compose([\n",
     "        tfs.Resize(32),\n",
-    "        tfs.ToTensor(),\n",
-    "        tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n",
+    "        tfs.ToTensor() #,\n",
+    "        #tfs.Normalize([0.5], [0.5])\n",
     "    ])\n",
     "    x = im_aug(x)\n",
     "    return x\n",
     "     \n",
-    "train_set = CIFAR10('../../data', train=True, transform=data_tf)\n",
+    "train_set  = mnist.MNIST('../../data/mnist', train=True,  transform=data_tf, download=True) \n",
     "train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n",
-    "test_set  = CIFAR10('../../data', train=False, transform=data_tf)\n",
-    "test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n",
+    "test_set   = mnist.MNIST('../../data/mnist', train=False, transform=data_tf, download=True) \n",
+    "test_data  = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 显示其中一个数据\n",
+    "import matplotlib.pyplot as plt\n",
+    "plt.imshow(train_set.data[0], cmap='gray')\n",
+    "plt.title('%i' % train_set.targets[0])\n",
+    "plt.colorbar()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
     "\n",
-    "net = LeNet5()\n",
-    "optimizer = torch.optim.Adam(net.parameters(), lr=1e-1)\n",
-    "criterion = nn.CrossEntropyLoss()"
+    "# 显示转化后的图像\n",
+    "for im, label in train_data:\n",
+    "    print(im.shape)\n",
+    "    print(label.shape)\n",
+    "    \n",
+    "    img = im[0,0,:,:]\n",
+    "    lab = label[0]\n",
+    "    plt.imshow(img, cmap='gray')\n",
+    "    plt.title('%i' % lab)\n",
+    "    plt.colorbar()\n",
+    "    plt.show()\n",
+    "\n",
+    "    print(im[0,0,:,:])\n",
+    "    break"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "scrolled": false
    },
    "outputs": [],
    "source": [
+    "net = LeNet5()\n",
+    "optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)\n",
+    "criterion = nn.CrossEntropyLoss()\n",
+    "\n",
     "res = train(net, train_data, test_data, 20, \n",
     "            optimizer, criterion,\n",
-    "            use_cuda=False)"
+    "            use_cuda=True)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
@@ -145,6 +170,7 @@
     "plt.plot(res[0], label='train')\n",
     "plt.plot(res[2], label='valid')\n",
     "plt.xlabel('epoch')\n",
+    "plt.ylabel('Loss')\n",
     "plt.legend(loc='best')\n",
     "plt.savefig('fig-res-lenet5-train-validate-loss.pdf')\n",
     "plt.show()\n",
@@ -152,6 +178,7 @@
     "plt.plot(res[1], label='train')\n",
     "plt.plot(res[3], label='valid')\n",
     "plt.xlabel('epoch')\n",
+    "plt.ylabel('Acc')\n",
     "plt.legend(loc='best')\n",
     "plt.savefig('fig-res-lenet5-train-validate-acc.pdf')\n",
     "plt.show()"
@@ -174,7 +201,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.4"
+   "version": "3.7.9"
   }
  },
  "nbformat": 4,
diff --git a/7_deep_learning/1_CNN/03-AlexNet.ipynb b/7_deep_learning/1_CNN/03-AlexNet.ipynb
index 3ea3fd8..efcaa08 100644
--- a/7_deep_learning/1_CNN/03-AlexNet.ipynb
+++ b/7_deep_learning/1_CNN/03-AlexNet.ipynb
@@ -12,10 +12,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 1,
+   "metadata": {},
    "outputs": [],
    "source": [
     "import torch.nn as nn\n",
@@ -25,7 +23,7 @@
     "    def __init__(self, num_classes=1000, init_weights=False):   \n",
     "        super(AlexNet, self).__init__()\n",
     "        self.features = nn.Sequential( \n",
-    "            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),  \n",
+    "            nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=2),  \n",
     "            nn.ReLU(inplace=True), #inplace 可以载入更大模型\n",
     "            nn.MaxPool2d(kernel_size=3, stride=2),       \n",
     "\n",
@@ -76,16 +74,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 2,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "import sys\n",
-    "sys.path.append('..')\n",
-    "\n",
     "from torchvision.datasets import CIFAR10\n",
+    "from torch.utils.data import DataLoader\n",
     "from torchvision import transforms as tfs\n",
     "from utils import train\n",
     "\n",
@@ -103,14 +97,47 @@
     "train_set  = CIFAR10('../../data', train=True, transform=data_tf)\n",
     "train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n",
     "test_set   = CIFAR10('../../data', train=False, transform=data_tf)\n",
-    "test_data  = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n",
-    "\n",
+    "test_data  = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "net = AlexNet(num_classes=10)\n",
-    "optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)\n",
+    "optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)\n",
     "criterion = nn.CrossEntropyLoss()\n",
     "\n",
     "res = train(net, train_data, test_data, 20, optimizer, criterion, use_cuda=False)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline\n",
+    "\n",
+    "plt.plot(res[0], label='train')\n",
+    "plt.plot(res[2], label='valid')\n",
+    "plt.xlabel('epoch')\n",
+    "plt.ylabel('Loss')\n",
+    "plt.legend(loc='best')\n",
+    "plt.savefig('fig-res-alexnet-train-validate-loss.pdf')\n",
+    "plt.show()\n",
+    "\n",
+    "plt.plot(res[1], label='train')\n",
+    "plt.plot(res[3], label='valid')\n",
+    "plt.xlabel('epoch')\n",
+    "plt.ylabel('Acc')\n",
+    "plt.legend(loc='best')\n",
+    "plt.savefig('fig-res-alexnet-train-validate-acc.pdf')\n",
+    "plt.show()"
+   ]
   }
  ],
  "metadata": {
@@ -129,7 +156,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.4"
+   "version": "3.7.9"
   }
  },
  "nbformat": 4,
diff --git a/7_deep_learning/1_CNN/04-vgg.ipynb b/7_deep_learning/1_CNN/04-vgg.ipynb
index 8149c64..96ee1da 100644
--- a/7_deep_learning/1_CNN/04-vgg.ipynb
+++ b/7_deep_learning/1_CNN/04-vgg.ipynb
@@ -74,9 +74,6 @@
    },
    "outputs": [],
    "source": [
-    "import sys\n",
-    "sys.path.append('..')\n",
-    "\n",
     "import numpy as np\n",
     "import torch\n",
     "from torch import nn\n",
@@ -478,7 +475,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.4"
+   "version": "3.7.9"
   }
  },
  "nbformat": 4,
diff --git a/7_deep_learning/1_CNN/08-batch-normalization.ipynb b/7_deep_learning/1_CNN/08-batch-normalization.ipynb
index 5f11ffa..a00de6e 100644
--- a/7_deep_learning/1_CNN/08-batch-normalization.ipynb
+++ b/7_deep_learning/1_CNN/08-batch-normalization.ipynb
@@ -588,7 +588,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.4"
+   "version": "3.7.9"
   }
  },
  "nbformat": 4,
diff --git a/7_deep_learning/1_CNN/utils.py b/7_deep_learning/1_CNN/utils.py
index f4b5a33..ca22ef4 100644
--- a/7_deep_learning/1_CNN/utils.py
+++ b/7_deep_learning/1_CNN/utils.py
@@ -57,11 +57,11 @@ def train(net, train_data, valid_data, num_epochs, optimizer, criterion, use_cud
             net = net.eval()
             for im, label in valid_data:
                 if use_cuda and torch.cuda.is_available():
-                    im = Variable(im.cuda(), volatile=True)
-                    label = Variable(label.cuda(), volatile=True)
+                    im = Variable(im.cuda())
+                    label = Variable(label.cuda())
                 else:
-                    im = Variable(im, volatile=True)
-                    label = Variable(label, volatile=True)
+                    im = Variable(im)
+                    label = Variable(label)
                 output = net(im)
                 loss = criterion(output, label)
                 valid_loss += loss.item()