Browse Source

Improve LeNet5 & AlexNet

pull/10/MERGE
bushuhui 3 years ago
parent
commit
30c11c317f
6 changed files with 122 additions and 71 deletions
  1. +1
    -1
      .gitignore
  2. +72
    -45
      7_deep_learning/1_CNN/02-LeNet5.ipynb
  3. +43
    -16
      7_deep_learning/1_CNN/03-AlexNet.ipynb
  4. +1
    -4
      7_deep_learning/1_CNN/04-vgg.ipynb
  5. +1
    -1
      7_deep_learning/1_CNN/08-batch-normalization.ipynb
  6. +4
    -4
      7_deep_learning/1_CNN/utils.py

+ 1
- 1
.gitignore View File

@@ -3,4 +3,4 @@
*.tar.gz
*.pth
__pycache__
fig-res
fig-res*

+ 72
- 45
7_deep_learning/1_CNN/02-LeNet5.ipynb View File

@@ -20,23 +20,13 @@
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('..')\n",
"\n",
"import numpy as np\n",
"import torch\n",
"from torch import nn\n",
"from torch.autograd import Variable\n",
"import torch.nn.functional as F\n",
"from torchvision.datasets import CIFAR10\n",
"from torchvision import transforms as tfs\n",
"\n",
"\n",
"class LeNet5(nn.Module):\n",
" def __init__(self):\n",
@@ -64,23 +54,9 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LeNet5(\n",
" (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))\n",
" (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n",
" (fc1): Linear(in_features=400, out_features=120, bias=True)\n",
" (fc2): Linear(in_features=120, out_features=84, bias=True)\n",
" (fc3): Linear(in_features=84, out_features=10, bias=True)\n",
")\n"
]
}
],
"outputs": [],
"source": [
"net = LeNet5()\n",
"print(net)"
@@ -89,54 +65,103 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"input = torch.randn(1, 1, 32, 32)\n",
"out = net(input)\n",
"print(out)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from torchvision.datasets import mnist\n",
"from torch.utils.data import DataLoader\n",
"from torchvision.datasets import mnist \n",
"from torchvision import transforms as tfs\n",
"from utils import train\n",
"\n",
"# 使用数据增强\n",
"def data_tf(x):\n",
" im_aug = tfs.Compose([\n",
" tfs.Resize(32),\n",
" tfs.ToTensor(),\n",
" tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n",
" tfs.ToTensor() #,\n",
" #tfs.Normalize([0.5], [0.5])\n",
" ])\n",
" x = im_aug(x)\n",
" return x\n",
" \n",
"train_set = CIFAR10('../../data', train=True, transform=data_tf)\n",
"train_set = mnist.MNIST('../../data/mnist', train=True, transform=data_tf, download=True) \n",
"train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n",
"test_set = CIFAR10('../../data', train=False, transform=data_tf)\n",
"test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n",
"test_set = mnist.MNIST('../../data/mnist', train=False, transform=data_tf, download=True) \n",
"test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 显示其中一个数据\n",
"import matplotlib.pyplot as plt\n",
"plt.imshow(train_set.data[0], cmap='gray')\n",
"plt.title('%i' % train_set.targets[0])\n",
"plt.colorbar()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"net = LeNet5()\n",
"optimizer = torch.optim.Adam(net.parameters(), lr=1e-1)\n",
"criterion = nn.CrossEntropyLoss()"
"# 显示转化后的图像\n",
"for im, label in train_data:\n",
" print(im.shape)\n",
" print(label.shape)\n",
" \n",
" img = im[0,0,:,:]\n",
" lab = label[0]\n",
" plt.imshow(img, cmap='gray')\n",
" plt.title('%i' % lab)\n",
" plt.colorbar()\n",
" plt.show()\n",
"\n",
" print(im[0,0,:,:])\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
"scrolled": false
},
"outputs": [],
"source": [
"net = LeNet5()\n",
"optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)\n",
"criterion = nn.CrossEntropyLoss()\n",
"\n",
"res = train(net, train_data, test_data, 20, \n",
" optimizer, criterion,\n",
" use_cuda=False)"
" use_cuda=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
@@ -145,6 +170,7 @@
"plt.plot(res[0], label='train')\n",
"plt.plot(res[2], label='valid')\n",
"plt.xlabel('epoch')\n",
"plt.ylabel('Loss')\n",
"plt.legend(loc='best')\n",
"plt.savefig('fig-res-lenet5-train-validate-loss.pdf')\n",
"plt.show()\n",
@@ -152,6 +178,7 @@
"plt.plot(res[1], label='train')\n",
"plt.plot(res[3], label='valid')\n",
"plt.xlabel('epoch')\n",
"plt.ylabel('Acc')\n",
"plt.legend(loc='best')\n",
"plt.savefig('fig-res-lenet5-train-validate-acc.pdf')\n",
"plt.show()"
@@ -174,7 +201,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
"version": "3.7.9"
}
},
"nbformat": 4,


+ 43
- 16
7_deep_learning/1_CNN/03-AlexNet.ipynb View File

@@ -12,10 +12,8 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import torch.nn as nn\n",
@@ -25,7 +23,7 @@
" def __init__(self, num_classes=1000, init_weights=False): \n",
" super(AlexNet, self).__init__()\n",
" self.features = nn.Sequential( \n",
" nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), \n",
" nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=2), \n",
" nn.ReLU(inplace=True), #inplace 可以载入更大模型\n",
" nn.MaxPool2d(kernel_size=3, stride=2), \n",
"\n",
@@ -76,16 +74,12 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('..')\n",
"\n",
"from torchvision.datasets import CIFAR10\n",
"from torch.utils.data import DataLoader\n",
"from torchvision import transforms as tfs\n",
"from utils import train\n",
"\n",
@@ -103,14 +97,47 @@
"train_set = CIFAR10('../../data', train=True, transform=data_tf)\n",
"train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n",
"test_set = CIFAR10('../../data', train=False, transform=data_tf)\n",
"test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n",
"\n",
"test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"net = AlexNet(num_classes=10)\n",
"optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)\n",
"optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)\n",
"criterion = nn.CrossEntropyLoss()\n",
"\n",
"res = train(net, train_data, test_data, 20, optimizer, criterion, use_cuda=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"plt.plot(res[0], label='train')\n",
"plt.plot(res[2], label='valid')\n",
"plt.xlabel('epoch')\n",
"plt.ylabel('Loss')\n",
"plt.legend(loc='best')\n",
"plt.savefig('fig-res-alexnet-train-validate-loss.pdf')\n",
"plt.show()\n",
"\n",
"plt.plot(res[1], label='train')\n",
"plt.plot(res[3], label='valid')\n",
"plt.xlabel('epoch')\n",
"plt.ylabel('Acc')\n",
"plt.legend(loc='best')\n",
"plt.savefig('fig-res-alexnet-train-validate-acc.pdf')\n",
"plt.show()"
]
}
],
"metadata": {
@@ -129,7 +156,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
"version": "3.7.9"
}
},
"nbformat": 4,


+ 1
- 4
7_deep_learning/1_CNN/04-vgg.ipynb View File

@@ -74,9 +74,6 @@
},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('..')\n",
"\n",
"import numpy as np\n",
"import torch\n",
"from torch import nn\n",
@@ -478,7 +475,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
"version": "3.7.9"
}
},
"nbformat": 4,


+ 1
- 1
7_deep_learning/1_CNN/08-batch-normalization.ipynb View File

@@ -588,7 +588,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
"version": "3.7.9"
}
},
"nbformat": 4,


+ 4
- 4
7_deep_learning/1_CNN/utils.py View File

@@ -57,11 +57,11 @@ def train(net, train_data, valid_data, num_epochs, optimizer, criterion, use_cud
net = net.eval()
for im, label in valid_data:
if use_cuda and torch.cuda.is_available():
im = Variable(im.cuda(), volatile=True)
label = Variable(label.cuda(), volatile=True)
im = Variable(im.cuda())
label = Variable(label.cuda())
else:
im = Variable(im, volatile=True)
label = Variable(label, volatile=True)
im = Variable(im)
label = Variable(label)
output = net(im)
loss = criterion(output, label)
valid_loss += loss.item()


Loading…
Cancel
Save