Improve LeNet5 & AlexNet

3 years ago · 30c11c317f
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,4 @@
 *.tar.gz
 *.pth
 __pycache__
 fig-res
 fig-res*
--- a/7_deep_learning/1_CNN/02-LeNet5.ipynb
+++ b/7_deep_learning/1_CNN/02-LeNet5.ipynb
@@ -20,23 +20,13 @@
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('..')\n",
    "\n",
    "import numpy as np\n",
    "import torch\n",
    "from torch import nn\n",
    "from torch.autograd import Variable\n",
    "import torch.nn.functional as F\n",
    "from torchvision.datasets import CIFAR10\n",
    "from torchvision import transforms as tfs\n",
    "\n",
    "\n",
    "class LeNet5(nn.Module):\n",
    "    def __init__(self):\n",
@@ -64,23 +54,9 @@
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LeNet5(\n",
      "  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))\n",
      "  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n",
      "  (fc1): Linear(in_features=400, out_features=120, bias=True)\n",
      "  (fc2): Linear(in_features=120, out_features=84, bias=True)\n",
      "  (fc3): Linear(in_features=84, out_features=10, bias=True)\n",
      ")\n"
     ]
    }
   ],
   "outputs": [],
   "source": [
    "net = LeNet5()\n",
    "print(net)"
@@ -89,54 +65,103 @@
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "metadata": {},
   "outputs": [],
   "source": [
    "input = torch.randn(1, 1, 32, 32)\n",
    "out = net(input)\n",
    "print(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from torchvision.datasets import mnist\n",
    "from torch.utils.data import DataLoader\n",
    "from torchvision.datasets import mnist \n",
    "from torchvision import transforms as tfs\n",
    "from utils import train\n",
    "\n",
    "# 使用数据增强\n",
    "def data_tf(x):\n",
    "    im_aug = tfs.Compose([\n",
    "        tfs.Resize(32),\n",
    "        tfs.ToTensor(),\n",
    "        tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n",
    "        tfs.ToTensor() #,\n",
    "        #tfs.Normalize([0.5], [0.5])\n",
    "    ])\n",
    "    x = im_aug(x)\n",
    "    return x\n",
    "     \n",
    "train_set = CIFAR10('../../data', train=True, transform=data_tf)\n",
    "train_set  = mnist.MNIST('../../data/mnist', train=True,  transform=data_tf, download=True) \n",
    "train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n",
    "test_set  = CIFAR10('../../data', train=False, transform=data_tf)\n",
    "test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n",
    "test_set   = mnist.MNIST('../../data/mnist', train=False, transform=data_tf, download=True) \n",
    "test_data  = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 显示其中一个数据\n",
    "import matplotlib.pyplot as plt\n",
    "plt.imshow(train_set.data[0], cmap='gray')\n",
    "plt.title('%i' % train_set.targets[0])\n",
    "plt.colorbar()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "net = LeNet5()\n",
    "optimizer = torch.optim.Adam(net.parameters(), lr=1e-1)\n",
    "criterion = nn.CrossEntropyLoss()"
    "# 显示转化后的图像\n",
    "for im, label in train_data:\n",
    "    print(im.shape)\n",
    "    print(label.shape)\n",
    "    \n",
    "    img = im[0,0,:,:]\n",
    "    lab = label[0]\n",
    "    plt.imshow(img, cmap='gray')\n",
    "    plt.title('%i' % lab)\n",
    "    plt.colorbar()\n",
    "    plt.show()\n",
    "\n",
    "    print(im[0,0,:,:])\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "net = LeNet5()\n",
    "optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)\n",
    "criterion = nn.CrossEntropyLoss()\n",
    "\n",
    "res = train(net, train_data, test_data, 20, \n",
    "            optimizer, criterion,\n",
    "            use_cuda=False)"
    "            use_cuda=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
@@ -145,6 +170,7 @@
    "plt.plot(res[0], label='train')\n",
    "plt.plot(res[2], label='valid')\n",
    "plt.xlabel('epoch')\n",
    "plt.ylabel('Loss')\n",
    "plt.legend(loc='best')\n",
    "plt.savefig('fig-res-lenet5-train-validate-loss.pdf')\n",
    "plt.show()\n",
@@ -152,6 +178,7 @@
    "plt.plot(res[1], label='train')\n",
    "plt.plot(res[3], label='valid')\n",
    "plt.xlabel('epoch')\n",
    "plt.ylabel('Acc')\n",
    "plt.legend(loc='best')\n",
    "plt.savefig('fig-res-lenet5-train-validate-acc.pdf')\n",
    "plt.show()"
@@ -174,7 +201,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
--- a/7_deep_learning/1_CNN/03-AlexNet.ipynb
+++ b/7_deep_learning/1_CNN/03-AlexNet.ipynb
@@ -12,10 +12,8 @@
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch.nn as nn\n",
@@ -25,7 +23,7 @@
    "    def __init__(self, num_classes=1000, init_weights=False):   \n",
    "        super(AlexNet, self).__init__()\n",
    "        self.features = nn.Sequential( \n",
    "            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),  \n",
    "            nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=2),  \n",
    "            nn.ReLU(inplace=True), #inplace 可以载入更大模型\n",
    "            nn.MaxPool2d(kernel_size=3, stride=2),       \n",
    "\n",
@@ -76,16 +74,12 @@
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('..')\n",
    "\n",
    "from torchvision.datasets import CIFAR10\n",
    "from torch.utils.data import DataLoader\n",
    "from torchvision import transforms as tfs\n",
    "from utils import train\n",
    "\n",
@@ -103,14 +97,47 @@
    "train_set  = CIFAR10('../../data', train=True, transform=data_tf)\n",
    "train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n",
    "test_set   = CIFAR10('../../data', train=False, transform=data_tf)\n",
    "test_data  = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n",
    "\n",
    "test_data  = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "net = AlexNet(num_classes=10)\n",
    "optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)\n",
    "optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)\n",
    "criterion = nn.CrossEntropyLoss()\n",
    "\n",
    "res = train(net, train_data, test_data, 20, optimizer, criterion, use_cuda=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "plt.plot(res[0], label='train')\n",
    "plt.plot(res[2], label='valid')\n",
    "plt.xlabel('epoch')\n",
    "plt.ylabel('Loss')\n",
    "plt.legend(loc='best')\n",
    "plt.savefig('fig-res-alexnet-train-validate-loss.pdf')\n",
    "plt.show()\n",
    "\n",
    "plt.plot(res[1], label='train')\n",
    "plt.plot(res[3], label='valid')\n",
    "plt.xlabel('epoch')\n",
    "plt.ylabel('Acc')\n",
    "plt.legend(loc='best')\n",
    "plt.savefig('fig-res-alexnet-train-validate-acc.pdf')\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
@@ -129,7 +156,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
--- a/7_deep_learning/1_CNN/04-vgg.ipynb
+++ b/7_deep_learning/1_CNN/04-vgg.ipynb
@@ -74,9 +74,6 @@
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('..')\n",
    "\n",
    "import numpy as np\n",
    "import torch\n",
    "from torch import nn\n",
@@ -478,7 +475,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
--- a/7_deep_learning/1_CNN/08-batch-normalization.ipynb
+++ b/7_deep_learning/1_CNN/08-batch-normalization.ipynb
@@ -588,7 +588,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
--- a/7_deep_learning/1_CNN/utils.py
+++ b/7_deep_learning/1_CNN/utils.py
@@ -57,11 +57,11 @@ def train(net, train_data, valid_data, num_epochs, optimizer, criterion, use_cud
            net = net.eval()
            for im, label in valid_data:
                if use_cuda and torch.cuda.is_available():
                    im = Variable(im.cuda(), volatile=True)
                    label = Variable(label.cuda(), volatile=True)
                    im = Variable(im.cuda())
                    label = Variable(label.cuda())
                else:
                    im = Variable(im, volatile=True)
                    label = Variable(label, volatile=True)
                    im = Variable(im)
                    label = Variable(label)
                output = net(im)
                loss = criterion(output, label)
                valid_loss += loss.item()