Browse Source

Refine nn, logistic regression

pull/1/MERGE
Shuhui Bu 6 years ago
parent
commit
b4e8ebcb05
18 changed files with 473 additions and 736 deletions
  1. +4
    -4
      2_pytorch/1_NN/deep-nn.ipynb
  2. +233
    -0
      2_pytorch/1_NN/deep-nn.py
  3. +4
    -8
      2_pytorch/1_NN/optimizer/adam.ipynb
  4. +2
    -2
      2_pytorch/1_NN/optimizer/adam.py
  5. +14
    -27
      2_pytorch/2_CNN/batch-normalization.ipynb
  6. +6
    -6
      2_pytorch/2_CNN/batch-normalization.py
  7. +8
    -14
      2_pytorch/2_CNN/densenet.ipynb
  8. +2
    -2
      2_pytorch/2_CNN/densenet.py
  9. +1
    -1
      2_pytorch/3_RNN/time-series/lstm-time-series.ipynb
  10. +144
    -0
      2_pytorch/3_RNN/time-series/lstm-time-series.py
  11. +0
    -0
      2_pytorch/PyTorch_quick_intro.ipynb
  12. +0
    -533
      2_pytorch/PyTorch快速入门.py
  13. +3
    -4
      README.md
  14. +26
    -21
      demo_code/2_logistic_regression_2.py
  15. +1
    -1
      demo_code/2_poly_fitting.py
  16. +0
    -0
      demo_code/3_CNN_CIFAR.py
  17. +25
    -3
      demo_code/3_NN_FC.py
  18. +0
    -110
      demo_code/Neural_Network.0.py

+ 4
- 4
2_pytorch/1_NN/deep-nn.ipynb View File

@@ -99,8 +99,8 @@
], ],
"source": [ "source": [
"# 使用内置函数下载 mnist 数据集\n", "# 使用内置函数下载 mnist 数据集\n",
"train_set = mnist.MNIST('./data', train=True, download=True)\n",
"test_set = mnist.MNIST('./data', train=False, download=True)"
"train_set = mnist.MNIST('../../data/mnist', train=True, download=True)\n",
"test_set = mnist.MNIST('../../data/mnist', train=False, download=True)"
] ]
}, },
{ {
@@ -491,7 +491,7 @@
" train_loss += loss.data[0]\n", " train_loss += loss.data[0]\n",
" # 计算分类的准确率\n", " # 计算分类的准确率\n",
" _, pred = out.max(1)\n", " _, pred = out.max(1)\n",
" num_correct = (pred == label).sum().data[0]\n",
" num_correct = float((pred == label).sum().data[0])\n",
" acc = num_correct / im.shape[0]\n", " acc = num_correct / im.shape[0]\n",
" train_acc += acc\n", " train_acc += acc\n",
" \n", " \n",
@@ -510,7 +510,7 @@
" eval_loss += loss.data[0]\n", " eval_loss += loss.data[0]\n",
" # 记录准确率\n", " # 记录准确率\n",
" _, pred = out.max(1)\n", " _, pred = out.max(1)\n",
" num_correct = (pred == label).sum().data[0]\n",
" num_correct = flot((pred == label).sum().data[0])\n",
" acc = num_correct / im.shape[0]\n", " acc = num_correct / im.shape[0]\n",
" eval_acc += acc\n", " eval_acc += acc\n",
" \n", " \n",


+ 233
- 0
2_pytorch/1_NN/deep-nn.py View File

@@ -0,0 +1,233 @@
# -*- coding: utf-8 -*-
# ---
# jupyter:
# jupytext_format_version: '1.2'
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.5.2
# ---

# # 深层神经网络
# 前面一章我们简要介绍了神经网络的一些基本知识,同时也是示范了如何用神经网络构建一个复杂的非线性二分类器,更多的情况神经网络适合使用在更加复杂的情况,比如图像分类的问题,下面我们用深度学习的入门级数据集 MNIST 手写体分类来说明一下更深层神经网络的优良表现。
#
# ## MNIST 数据集
# mnist 数据集是一个非常出名的数据集,基本上很多网络都将其作为一个测试的标准,其来自美国国家标准与技术研究所, National Institute of Standards and Technology (NIST)。 训练集 (training set) 由来自 250 个不同人手写的数字构成, 其中 50% 是高中学生, 50% 来自人口普查局 (the Census Bureau) 的工作人员,一共有 60000 张图片。 测试集(test set) 也是同样比例的手写数字数据,一共有 10000 张图片。
#
# 每张图片大小是 28 x 28 的灰度图,如下
#
# ![](https://ws3.sinaimg.cn/large/006tKfTcly1fmlx2wl5tqj30ge0au745.jpg)
#
# 所以我们的任务就是给出一张图片,我们希望区别出其到底属于 0 到 9 这 10 个数字中的哪一个。
#
# ## 多分类问题
# 前面我们讲过二分类问题,现在处理的问题更加复杂,是一个 10 分类问题,统称为多分类问题,对于多分类问题而言,我们的 loss 函数使用一个更加复杂的函数,叫交叉熵。
#
# ### softmax
# 提到交叉熵,我们先讲一下 softmax 函数,前面我们见过了 sigmoid 函数,如下
#
# $$s(x) = \frac{1}{1 + e^{-x}}$$
#
# 可以将任何一个值转换到 0 ~ 1 之间,当然对于一个二分类问题,这样就足够了,因为对于二分类问题,如果不属于第一类,那么必定属于第二类,所以只需要用一个值来表示其属于其中一类概率,但是对于多分类问题,这样并不行,需要知道其属于每一类的概率,这个时候就需要 softmax 函数了。
#
# softmax 函数示例如下
#
# ![](https://ws4.sinaimg.cn/large/006tKfTcly1fmlxtnfm4fj30ll0bnq3c.jpg)
#

# 对于网络的输出 $z_1, z_2, \cdots z_k$,我们首先对他们每个都取指数变成 $e^{z_1}, e^{z_2}, \cdots, e^{z_k}$,那么每一项都除以他们的求和,也就是
#
# $$
# z_i \rightarrow \frac{e^{z_i}}{\sum_{j=1}^{k} e^{z_j}}
# $$
#
# 如果对经过 softmax 函数的所有项求和就等于 1,所以他们每一项都分别表示属于其中某一类的概率。
#
# ## 交叉熵
# 交叉熵衡量两个分布相似性的一种度量方式,前面讲的二分类问题的 loss 函数就是交叉熵的一种特殊情况,交叉熵的一般公式为
#
# $$
# cross\_entropy(p, q) = E_{p}[-\log q] = - \frac{1}{m} \sum_{x} p(x) \log q(x)
# $$
#
# 对于二分类问题我们可以写成
#
# $$
# -\frac{1}{m} \sum_{i=1}^m (y^{i} \log sigmoid(x^{i}) + (1 - y^{i}) \log (1 - sigmoid(x^{i}))
# $$
#
# 这就是我们之前讲的二分类问题的 loss,当时我们并没有解释原因,只是给出了公式,然后解释了其合理性,现在我们给出了公式去证明这样取 loss 函数是合理的
#
# 交叉熵是信息理论里面的内容,这里不再具体展开,更多的内容,可以看到下面的[链接](http://blog.csdn.net/rtygbwwwerr/article/details/50778098)
#
# 下面我们直接用 mnist 举例,讲一讲深度神经网络

# +
import numpy as np
import torch
from torchvision.datasets import mnist # 导入 pytorch 内置的 mnist 数据

from torch import nn
from torch.autograd import Variable
# -

# 使用内置函数下载 mnist 数据集
train_set = mnist.MNIST('../../data/mnist', train=True, download=True)
test_set = mnist.MNIST('../../data/mnist', train=False, download=True)

# 我们可以看看其中的一个数据是什么样子的

a_data, a_label = train_set[0]

a_data

a_label

# 这里的读入的数据是 PIL 库中的格式,我们可以非常方便地将其转换为 numpy array

a_data = np.array(a_data, dtype='float32')
print(a_data.shape)

# 这里我们可以看到这种图片的大小是 28 x 28

print(a_data)

# 我们可以将数组展示出来,里面的 0 就表示黑色,255 表示白色
#
# 对于神经网络,我们第一层的输入就是 28 x 28 = 784,所以必须将得到的数据我们做一个变换,使用 reshape 将他们拉平成一个一维向量

# +
def data_tf(x):
x = np.array(x, dtype='float32') / 255
x = (x - 0.5) / 0.5 # 标准化,这个技巧之后会讲到
x = x.reshape((-1,)) # 拉平
x = torch.from_numpy(x)
return x

train_set = mnist.MNIST('./data', train=True, transform=data_tf, download=True) # 重新载入数据集,申明定义的数据变换
test_set = mnist.MNIST('./data', train=False, transform=data_tf, download=True)
# -

a, a_label = train_set[0]
print(a.shape)
print(a_label)

from torch.utils.data import DataLoader
# 使用 pytorch 自带的 DataLoader 定义一个数据迭代器
train_data = DataLoader(train_set, batch_size=64, shuffle=True)
test_data = DataLoader(test_set, batch_size=128, shuffle=False)

# 使用这样的数据迭代器是非常有必要的,如果数据量太大,就无法一次将他们全部读入内存,所以需要使用 python 迭代器,每次生成一个批次的数据

a, a_label = next(iter(train_data))

# 打印出一个批次的数据大小
print(a.shape)
print(a_label.shape)

# 使用 Sequential 定义 4 层神经网络
net = nn.Sequential(
nn.Linear(784, 400),
nn.ReLU(),
nn.Linear(400, 200),
nn.ReLU(),
nn.Linear(200, 100),
nn.ReLU(),
nn.Linear(100, 10)
)

net

# 交叉熵在 pytorch 中已经内置了,交叉熵的数值稳定性更差,所以内置的函数已经帮我们解决了这个问题

# 定义 loss 函数
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), 1e-1) # 使用随机梯度下降,学习率 0.1

# + {"scrolled": true}
# 开始训练
losses = []
acces = []
eval_losses = []
eval_acces = []

for e in range(20):
train_loss = 0
train_acc = 0
net.train()
for im, label in train_data:
im = Variable(im)
label = Variable(label)
# 前向传播
out = net(im)
loss = criterion(out, label)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 记录误差
train_loss += loss.data[0]
# 计算分类的准确率
_, pred = out.max(1)
num_correct = float((pred == label).sum().data[0])
acc = num_correct / im.shape[0]
train_acc += acc
losses.append(train_loss / len(train_data))
acces.append(train_acc / len(train_data))
# 在测试集上检验效果
eval_loss = 0
eval_acc = 0
net.eval() # 将模型改为预测模式
for im, label in test_data:
im = Variable(im)
label = Variable(label)
out = net(im)
loss = criterion(out, label)
# 记录误差
eval_loss += loss.data[0]
# 记录准确率
_, pred = out.max(1)
num_correct = flot((pred == label).sum().data[0])
acc = num_correct / im.shape[0]
eval_acc += acc
eval_losses.append(eval_loss / len(test_data))
eval_acces.append(eval_acc / len(test_data))
print('epoch: {}, Train Loss: {:.6f}, Train Acc: {:.6f}, Eval Loss: {:.6f}, Eval Acc: {:.6f}'
.format(e, train_loss / len(train_data), train_acc / len(train_data),
eval_loss / len(test_data), eval_acc / len(test_data)))
# -

# 画出 loss 曲线和 准确率曲线

import matplotlib.pyplot as plt
# %matplotlib inline

plt.title('train loss')
plt.plot(np.arange(len(losses)), losses)

plt.plot(np.arange(len(acces)), acces)
plt.title('train acc')

plt.plot(np.arange(len(eval_losses)), eval_losses)
plt.title('test loss')

plt.plot(np.arange(len(eval_acces)), eval_acces)
plt.title('test acc')

# 可以看到我们的三层网络在训练集上能够达到 99.9% 的准确率,测试集上能够达到 98.20% 的准确率

# **小练习:看一看上面的训练过程,看一下准确率是怎么计算出来的,特别注意 max 这个函数**
#
# **自己重新实现一个新的网络,试试改变隐藏层的数目和激活函数,看看有什么新的结果**

+ 4
- 8
2_pytorch/1_NN/optimizer/adam.ipynb View File

@@ -47,9 +47,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def adam(parameters, vs, sqrs, lr, t, beta1=0.9, beta2=0.999):\n", "def adam(parameters, vs, sqrs, lr, t, beta1=0.9, beta2=0.999):\n",
@@ -65,9 +63,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import numpy as np\n", "import numpy as np\n",
@@ -87,8 +83,8 @@
" x = torch.from_numpy(x)\n", " x = torch.from_numpy(x)\n",
" return x\n", " return x\n",
"\n", "\n",
"train_set = MNIST('./data', train=True, transform=data_tf, download=True) # 载入数据集,申明定义的数据变换\n",
"test_set = MNIST('./data', train=False, transform=data_tf, download=True)\n",
"train_set = MNIST('../../../data/mnist', train=True, transform=data_tf, download=True) # 载入数据集,申明定义的数据变换\n",
"test_set = MNIST('../../../data/mnist', train=False, transform=data_tf, download=True)\n",
"\n", "\n",
"# 定义 loss 函数\n", "# 定义 loss 函数\n",
"criterion = nn.CrossEntropyLoss()" "criterion = nn.CrossEntropyLoss()"


+ 2
- 2
2_pytorch/1_NN/optimizer/adam.py View File

@@ -79,8 +79,8 @@ def data_tf(x):
x = torch.from_numpy(x) x = torch.from_numpy(x)
return x return x


train_set = MNIST('./data', train=True, transform=data_tf, download=True) # 载入数据集,申明定义的数据变换
test_set = MNIST('./data', train=False, transform=data_tf, download=True)
train_set = MNIST('../../../data/mnist', train=True, transform=data_tf, download=True) # 载入数据集,申明定义的数据变换
test_set = MNIST('../../../data/mnist', train=False, transform=data_tf, download=True)


# 定义 loss 函数 # 定义 loss 函数
criterion = nn.CrossEntropyLoss() criterion = nn.CrossEntropyLoss()


+ 14
- 27
2_pytorch/2_CNN/batch-normalization.ipynb View File

@@ -64,8 +64,7 @@
"ExecuteTime": { "ExecuteTime": {
"end_time": "2017-12-23T06:50:51.579067Z", "end_time": "2017-12-23T06:50:51.579067Z",
"start_time": "2017-12-23T06:50:51.575693Z" "start_time": "2017-12-23T06:50:51.575693Z"
},
"collapsed": true
}
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -82,8 +81,7 @@
"ExecuteTime": { "ExecuteTime": {
"end_time": "2017-12-23T07:14:11.077807Z", "end_time": "2017-12-23T07:14:11.077807Z",
"start_time": "2017-12-23T07:14:11.060849Z" "start_time": "2017-12-23T07:14:11.060849Z"
},
"collapsed": true
}
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -168,8 +166,7 @@
"ExecuteTime": { "ExecuteTime": {
"end_time": "2017-12-23T07:32:48.025709Z", "end_time": "2017-12-23T07:32:48.025709Z",
"start_time": "2017-12-23T07:32:48.005892Z" "start_time": "2017-12-23T07:32:48.005892Z"
},
"collapsed": true
}
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -196,9 +193,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 5,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import numpy as np\n", "import numpy as np\n",
@@ -215,8 +210,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# 使用内置函数下载 mnist 数据集\n", "# 使用内置函数下载 mnist 数据集\n",
"train_set = mnist.MNIST('./data', train=True)\n",
"test_set = mnist.MNIST('./data', train=False)\n",
"train_set = mnist.MNIST('../../data/mnist', train=True)\n",
"test_set = mnist.MNIST('../../data/mnist', train=False)\n",
"\n", "\n",
"def data_tf(x):\n", "def data_tf(x):\n",
" x = np.array(x, dtype='float32') / 255\n", " x = np.array(x, dtype='float32') / 255\n",
@@ -225,8 +220,8 @@
" x = torch.from_numpy(x)\n", " x = torch.from_numpy(x)\n",
" return x\n", " return x\n",
"\n", "\n",
"train_set = mnist.MNIST('./data', train=True, transform=data_tf, download=True) # 重新载入数据集,申明定义的数据变换\n",
"test_set = mnist.MNIST('./data', train=False, transform=data_tf, download=True)\n",
"train_set = mnist.MNIST('../../data/mnist', train=True, transform=data_tf, download=True) # 重新载入数据集,申明定义的数据变换\n",
"test_set = mnist.MNIST('../../data/mnist', train=False, transform=data_tf, download=True)\n",
"train_data = DataLoader(train_set, batch_size=64, shuffle=True)\n", "train_data = DataLoader(train_set, batch_size=64, shuffle=True)\n",
"test_data = DataLoader(test_set, batch_size=128, shuffle=False)" "test_data = DataLoader(test_set, batch_size=128, shuffle=False)"
] ]
@@ -234,9 +229,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 7,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"class multi_network(nn.Module):\n", "class multi_network(nn.Module):\n",
@@ -263,9 +256,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 8,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"net = multi_network()" "net = multi_network()"
@@ -426,9 +417,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def data_tf(x):\n", "def data_tf(x):\n",
@@ -438,8 +427,8 @@
" x = x.unsqueeze(0)\n", " x = x.unsqueeze(0)\n",
" return x\n", " return x\n",
"\n", "\n",
"train_set = mnist.MNIST('./data', train=True, transform=data_tf, download=True) # 重新载入数据集,申明定义的数据变换\n",
"test_set = mnist.MNIST('./data', train=False, transform=data_tf, download=True)\n",
"train_set = mnist.MNIST('../../data/mnist', train=True, transform=data_tf, download=True) # 重新载入数据集,申明定义的数据变换\n",
"test_set = mnist.MNIST('../../data/mnist', train=False, transform=data_tf, download=True)\n",
"train_data = DataLoader(train_set, batch_size=64, shuffle=True)\n", "train_data = DataLoader(train_set, batch_size=64, shuffle=True)\n",
"test_data = DataLoader(test_set, batch_size=128, shuffle=False)" "test_data = DataLoader(test_set, batch_size=128, shuffle=False)"
] ]
@@ -500,9 +489,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 76, "execution_count": 76,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# 不使用批标准化\n", "# 不使用批标准化\n",


+ 6
- 6
2_pytorch/2_CNN/batch-normalization.py View File

@@ -110,8 +110,8 @@ from torch.autograd import Variable


# + # +
# 使用内置函数下载 mnist 数据集 # 使用内置函数下载 mnist 数据集
train_set = mnist.MNIST('./data', train=True)
test_set = mnist.MNIST('./data', train=False)
train_set = mnist.MNIST('../../data/mnist', train=True)
test_set = mnist.MNIST('../../data/mnist', train=False)


def data_tf(x): def data_tf(x):
x = np.array(x, dtype='float32') / 255 x = np.array(x, dtype='float32') / 255
@@ -120,8 +120,8 @@ def data_tf(x):
x = torch.from_numpy(x) x = torch.from_numpy(x)
return x return x


train_set = mnist.MNIST('./data', train=True, transform=data_tf, download=True) # 重新载入数据集,申明定义的数据变换
test_set = mnist.MNIST('./data', train=False, transform=data_tf, download=True)
train_set = mnist.MNIST('../../data/mnist', train=True, transform=data_tf, download=True) # 重新载入数据集,申明定义的数据变换
test_set = mnist.MNIST('../../data/mnist', train=False, transform=data_tf, download=True)
train_data = DataLoader(train_set, batch_size=64, shuffle=True) train_data = DataLoader(train_set, batch_size=64, shuffle=True)
test_data = DataLoader(test_set, batch_size=128, shuffle=False) test_data = DataLoader(test_set, batch_size=128, shuffle=False)
# - # -
@@ -193,8 +193,8 @@ def data_tf(x):
x = x.unsqueeze(0) x = x.unsqueeze(0)
return x return x


train_set = mnist.MNIST('./data', train=True, transform=data_tf, download=True) # 重新载入数据集,申明定义的数据变换
test_set = mnist.MNIST('./data', train=False, transform=data_tf, download=True)
train_set = mnist.MNIST('../../data/mnist', train=True, transform=data_tf, download=True) # 重新载入数据集,申明定义的数据变换
test_set = mnist.MNIST('../../data/mnist', train=False, transform=data_tf, download=True)
train_data = DataLoader(train_set, batch_size=64, shuffle=True) train_data = DataLoader(train_set, batch_size=64, shuffle=True)
test_data = DataLoader(test_set, batch_size=128, shuffle=False) test_data = DataLoader(test_set, batch_size=128, shuffle=False)




+ 8
- 14
2_pytorch/2_CNN/densenet.ipynb View File

@@ -35,8 +35,7 @@
"ExecuteTime": { "ExecuteTime": {
"end_time": "2017-12-22T15:38:31.113030Z", "end_time": "2017-12-22T15:38:31.113030Z",
"start_time": "2017-12-22T15:38:30.612922Z" "start_time": "2017-12-22T15:38:30.612922Z"
},
"collapsed": true
}
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -64,8 +63,7 @@
"ExecuteTime": { "ExecuteTime": {
"end_time": "2017-12-22T15:38:31.121249Z", "end_time": "2017-12-22T15:38:31.121249Z",
"start_time": "2017-12-22T15:38:31.115369Z" "start_time": "2017-12-22T15:38:31.115369Z"
},
"collapsed": true
}
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -92,8 +90,7 @@
"ExecuteTime": { "ExecuteTime": {
"end_time": "2017-12-22T15:38:31.145274Z", "end_time": "2017-12-22T15:38:31.145274Z",
"start_time": "2017-12-22T15:38:31.123363Z" "start_time": "2017-12-22T15:38:31.123363Z"
},
"collapsed": true
}
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -163,8 +160,7 @@
"ExecuteTime": { "ExecuteTime": {
"end_time": "2017-12-22T15:38:31.222120Z", "end_time": "2017-12-22T15:38:31.222120Z",
"start_time": "2017-12-22T15:38:31.215770Z" "start_time": "2017-12-22T15:38:31.215770Z"
},
"collapsed": true
}
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -226,8 +222,7 @@
"ExecuteTime": { "ExecuteTime": {
"end_time": "2017-12-22T15:38:31.318822Z", "end_time": "2017-12-22T15:38:31.318822Z",
"start_time": "2017-12-22T15:38:31.236857Z" "start_time": "2017-12-22T15:38:31.236857Z"
},
"collapsed": true
}
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -298,8 +293,7 @@
"ExecuteTime": { "ExecuteTime": {
"end_time": "2017-12-22T15:38:32.894729Z", "end_time": "2017-12-22T15:38:32.894729Z",
"start_time": "2017-12-22T15:38:31.656356Z" "start_time": "2017-12-22T15:38:31.656356Z"
},
"collapsed": true
}
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -313,9 +307,9 @@
" x = torch.from_numpy(x)\n", " x = torch.from_numpy(x)\n",
" return x\n", " return x\n",
" \n", " \n",
"train_set = CIFAR10('./data', train=True, transform=data_tf)\n",
"train_set = CIFAR10('../../data', train=True, transform=data_tf)\n",
"train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n", "train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)\n",
"test_set = CIFAR10('./data', train=False, transform=data_tf)\n",
"test_set = CIFAR10('../../data', train=False, transform=data_tf)\n",
"test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n", "test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)\n",
"\n", "\n",
"net = densenet(3, 10)\n", "net = densenet(3, 10)\n",


+ 2
- 2
2_pytorch/2_CNN/densenet.py View File

@@ -162,9 +162,9 @@ def data_tf(x):
x = torch.from_numpy(x) x = torch.from_numpy(x)
return x return x
train_set = CIFAR10('./data', train=True, transform=data_tf)
train_set = CIFAR10('../../data', train=True, transform=data_tf)
train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True) train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
test_set = CIFAR10('./data', train=False, transform=data_tf)
test_set = CIFAR10('../../data', train=False, transform=data_tf)
test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False) test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)


net = densenet(3, 10) net = densenet(3, 10)


+ 1
- 1
2_pytorch/3_RNN/time-series/lstm-time-series.ipynb View File

@@ -377,7 +377,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.5.2"
} }
}, },
"nbformat": 4, "nbformat": 4,


+ 144
- 0
2_pytorch/3_RNN/time-series/lstm-time-series.py View File

@@ -0,0 +1,144 @@
# -*- coding: utf-8 -*-
# ---
# jupyter:
# jupytext_format_version: '1.2'
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.5.2
# ---

# # RNN 用于时间序列的分析
# 前面我们讲到使用 RNN 做简单的图像分类的问题,但是 RNN 并不擅长此类问题,下面我们讲一讲如何将 RNN 用到时间序列的问题上,因为对于时序数据,后面的数据会用到前面的数据,LSTM 的记忆特性非常适合这种场景。

# 首先我们可以读入数据,这个数据是 10 年飞机月流量,可视化得到下面的效果。

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# %matplotlib inline

data_csv = pd.read_csv('./data.csv', usecols=[1])

plt.plot(data_csv)

# 首先我们进行预处理,将数据中 `na` 的数据去掉,然后将数据标准化到 0 ~ 1 之间。

# 数据预处理
data_csv = data_csv.dropna()
dataset = data_csv.values
dataset = dataset.astype('float32')
max_value = np.max(dataset)
min_value = np.min(dataset)
scalar = max_value - min_value
dataset = list(map(lambda x: x / scalar, dataset))

# 接着我们进行数据集的创建,我们想通过前面几个月的流量来预测当月的流量,比如我们希望通过前两个月的流量来预测当月的流量,我们可以将前两个月的流量当做输入,当月的流量当做输出。同时我们需要将我们的数据集分为训练集和测试集,通过测试集的效果来测试模型的性能,这里我们简单的将前面几年的数据作为训练集,后面两年的数据作为测试集。

def create_dataset(dataset, look_back=2):
dataX, dataY = [], []
for i in range(len(dataset) - look_back):
a = dataset[i:(i + look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
return np.array(dataX), np.array(dataY)

# 创建好输入输出
data_X, data_Y = create_dataset(dataset)

# 划分训练集和测试集,70% 作为训练集
train_size = int(len(data_X) * 0.7)
test_size = len(data_X) - train_size
train_X = data_X[:train_size]
train_Y = data_Y[:train_size]
test_X = data_X[train_size:]
test_Y = data_Y[train_size:]

# 最后,我们需要将数据改变一下形状,因为 RNN 读入的数据维度是 (seq, batch, feature),所以要重新改变一下数据的维度,这里只有一个序列,所以 batch 是 1,而输入的 feature 就是我们希望依据的几个月份,这里我们定的是两个月份,所以 feature 就是 2.

# +
import torch

train_X = train_X.reshape(-1, 1, 2)
train_Y = train_Y.reshape(-1, 1, 1)
test_X = test_X.reshape(-1, 1, 2)

train_x = torch.from_numpy(train_X)
train_y = torch.from_numpy(train_Y)
test_x = torch.from_numpy(test_X)
# -

from torch import nn
from torch.autograd import Variable

# 这里定义好模型,模型的第一部分是一个两层的 RNN,每一步模型接受两个月的输入作为特征,得到一个输出特征。接着通过一个线性层将 RNN 的输出回归到流量的具体数值,这里我们需要用 `view` 来重新排列,因为 `nn.Linear` 不接受三维的输入,所以我们先将前两维合并在一起,然后经过线性层之后再将其分开,最后输出结果。

# 定义模型
class lstm_reg(nn.Module):
def __init__(self, input_size, hidden_size, output_size=1, num_layers=2):
super(lstm_reg, self).__init__()
self.rnn = nn.LSTM(input_size, hidden_size, num_layers) # rnn
self.reg = nn.Linear(hidden_size, output_size) # 回归
def forward(self, x):
x, _ = self.rnn(x) # (seq, batch, hidden)
s, b, h = x.shape
x = x.view(s*b, h) # 转换成线性层的输入格式
x = self.reg(x)
x = x.view(s, b, -1)
return x

# +
net = lstm_reg(2, 4)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)
# -

# 定义好网络结构,输入的维度是 2,因为我们使用两个月的流量作为输入,隐藏层的维度可以任意指定,这里我们选的 4

# 开始训练
for e in range(1000):
var_x = Variable(train_x)
var_y = Variable(train_y)
# 前向传播
out = net(var_x)
loss = criterion(out, var_y)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (e + 1) % 100 == 0: # 每 100 次输出结果
print('Epoch: {}, Loss: {:.5f}'.format(e + 1, loss.data[0]))

# 训练完成之后,我们可以用训练好的模型去预测后面的结果

net = net.eval() # 转换成测试模式

data_X = data_X.reshape(-1, 1, 2)
data_X = torch.from_numpy(data_X)
var_data = Variable(data_X)
pred_test = net(var_data) # 测试集的预测结果

# 改变输出的格式
pred_test = pred_test.view(-1).data.numpy()

# 画出实际结果和预测的结果
plt.plot(pred_test, 'r', label='prediction')
plt.plot(dataset, 'b', label='real')
plt.legend(loc='best')

# 这里蓝色的是真实的数据集,红色的是预测的结果,我们能够看到,使用 lstm 能够得到比较相近的结果,预测的趋势也与真实的数据集是相同的,因为其能够记忆之前的信息,而单纯的使用线性回归并不能得到较好的结果,从这个例子也说明了 RNN 对于序列有着非常好的性能。

# **小练习:试试改变隐藏状态输出的特征数,看看有没有什么改变,同时试试使用简单的线性回归模型,看看会得到什么样的结果**

2_pytorch/PyTorch快速入门.ipynb → 2_pytorch/PyTorch_quick_intro.ipynb View File


+ 0
- 533
2_pytorch/PyTorch快速入门.py View File

@@ -1,533 +0,0 @@
# -*- coding: utf-8 -*-
# ---
# jupyter:
# jupytext_format_version: '1.2'
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.5.2
# ---

# # PyTorch快速入门
#
# PyTorch的简洁设计使得它入门很简单,在深入介绍PyTorch之前,本节将先介绍一些PyTorch的基础知识,使得读者能够对PyTorch有一个大致的了解,并能够用PyTorch搭建一个简单的神经网络。部分内容读者可能暂时不太理解,可先不予以深究,后续的课程将会对此进行深入讲解。
#
# 本节内容参考了PyTorch官方教程[^1]并做了相应的增删修改,使得内容更贴合新版本的PyTorch接口,同时也更适合新手快速入门。另外本书需要读者先掌握基础的Numpy使用,其他相关知识推荐读者参考CS231n的教程[^2]。
#
# [^1]: http://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html
# [^2]: http://cs231n.github.io/python-numpy-tutorial/

# ### Tensor
#
# Tensor是PyTorch中重要的数据结构,可认为是一个高维数组。它可以是一个数(标量)、一维数组(向量)、二维数组(矩阵)以及更高维的数组。Tensor和Numpy的ndarrays类似,但Tensor可以使用GPU进行加速。Tensor的使用和Numpy及Matlab的接口十分相似,下面通过几个例子来看看Tensor的基本使用。

from __future__ import print_function
import torch as t

# 构建 5x3 矩阵,只是分配了空间,未初始化
x = t.Tensor(5, 3)
x

# 使用[0,1]均匀分布随机初始化二维数组
x = t.rand(5, 3)
x

print(x.size()) # 查看x的形状
x.size()[1], x.size(1) # 查看列的个数, 两种写法等价

# `torch.Size` 是tuple对象的子类,因此它支持tuple的所有操作,如x.size()[0]

y = t.rand(5, 3)
# 加法的第一种写法
x + y

# 加法的第二种写法
t.add(x, y)

# 加法的第三种写法:指定加法结果的输出目标为result
result = t.Tensor(5, 3) # 预先分配空间
t.add(x, y, out=result) # 输入到result
result

# +
print('最初y')
print(y)

print('第一种加法,y的结果')
y.add(x) # 普通加法,不改变y的内容
print(y)

print('第二种加法,y的结果')
y.add_(x) # inplace 加法,y变了
print(y)
# -

# 注意,函数名后面带下划线**`_`** 的函数会修改Tensor本身。例如,`x.add_(y)`和`x.t_()`会改变 `x`,但`x.add(y)`和`x.t()`返回一个新的Tensor, 而`x`不变。

# Tensor的选取操作与Numpy类似
x[:, 1]

# Tensor还支持很多操作,包括数学运算、线性代数、选择、切片等等,其接口设计与Numpy极为相似。更详细的使用方法,会在第三章系统讲解。
#
# Tensor和Numpy的数组之间的互操作非常容易且快速。对于Tensor不支持的操作,可以先转为Numpy数组处理,之后再转回Tensor。

a = t.ones(5) # 新建一个全1的Tensor
a

b = a.numpy() # Tensor -> Numpy
b

import numpy as np
a = np.ones(5)
b = t.from_numpy(a) # Numpy->Tensor
print(a)
print(b)

# Tensor和numpy对象共享内存,所以他们之间的转换很快,而且几乎不会消耗什么资源。但这也意味着,如果其中一个变了,另外一个也会随之改变。

b.add_(1) # 以`_`结尾的函数会修改自身
print(a)
print(b) # Tensor和Numpy共享内存

# Tensor可通过`.cuda` 方法转为GPU的Tensor,从而享受GPU带来的加速运算。

# 在不支持CUDA的机器下,下一步不会运行
if t.cuda.is_available():
x = x.cuda()
y = y.cuda()
x + y

# 此处可能发现GPU运算的速度并未提升太多,这是因为x和y太小且运算也较为简单,而且将数据从内存转移到显存还需要花费额外的开销。GPU的优势需在大规模数据和复杂运算下才能体现出来。
#
# ### Autograd: 自动微分
#
# 深度学习的算法本质上是通过反向传播求导数,而PyTorch的**`Autograd`**模块则实现了此功能。在Tensor上的所有操作,Autograd都能为它们自动提供微分,避免了手动计算导数的复杂过程。
#
# `autograd.Variable`是Autograd中的核心类,它简单封装了Tensor,并支持几乎所有Tensor有的操作。Tensor在被封装为Variable之后,可以调用它的`.backward`实现反向传播,自动计算所有梯度。Variable的数据结构如图2-6所示。
#
#
# ![图2-6:Variable的数据结构](imgs/autograd_Variable.svg)
#
#
# Variable主要包含三个属性。
# - `data`:保存Variable所包含的Tensor
# - `grad`:保存`data`对应的梯度,`grad`也是个Variable,而不是Tensor,它和`data`的形状一样。
# - `grad_fn`:指向一个`Function`对象,这个`Function`用来反向传播计算输入的梯度,具体细节会在下一章讲解。

from torch.autograd import Variable

# + {"scrolled": true}
# 使用Tensor新建一个Variable
x = Variable(t.ones(2, 2), requires_grad = True)
x

# + {"scrolled": true}
y = x.sum()
y
# -

y.grad_fn

y.backward() # 反向传播,计算梯度

# y = x.sum() = (x[0][0] + x[0][1] + x[1][0] + x[1][1])
# 每个值的梯度都为1
x.grad

# 注意:`grad`在反向传播过程中是累加的(accumulated),**这意味着每一次运行反向传播,梯度都会累加之前的梯度,所以反向传播之前需把梯度清零。**

y.backward()
x.grad

# + {"scrolled": true}
y.backward()
x.grad
# -

# 以下划线结束的函数是inplace操作,就像add_
x.grad.data.zero_()

y.backward()
x.grad

# Variable和Tensor具有近乎一致的接口,在实际使用中可以无缝切换。

x = Variable(t.ones(4,5))
y = t.cos(x)
x_tensor_cos = t.cos(x.data)
print(y)
x_tensor_cos

# ### 神经网络
#
# Autograd实现了反向传播功能,但是直接用来写深度学习的代码在很多情况下还是稍显复杂,torch.nn是专门为神经网络设计的模块化接口。nn构建于 Autograd之上,可用来定义和运行神经网络。nn.Module是nn中最重要的类,可把它看成是一个网络的封装,包含网络各层定义以及forward方法,调用forward(input)方法,可返回前向传播的结果。下面就以最早的卷积神经网络:LeNet为例,来看看如何用`nn.Module`实现。LeNet的网络结构如图2-7所示。
#
# ![图2-7:LeNet网络结构](imgs/nn_lenet.png)
#
# 这是一个基础的前向传播(feed-forward)网络: 接收输入,经过层层传递运算,得到输出。
#
# #### 定义网络
#
# 定义网络时,需要继承`nn.Module`,并实现它的forward方法,把网络中具有可学习参数的层放在构造函数`__init__`中。如果某一层(如ReLU)不具有可学习的参数,则既可以放在构造函数中,也可以不放,但建议不放在其中,而在forward中使用`nn.functional`代替。

# +
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
def __init__(self):
# nn.Module子类的函数必须在构造函数中执行父类的构造函数
# 下式等价于nn.Module.__init__(self)
super(Net, self).__init__()
# 卷积层 '1'表示输入图片为单通道, '6'表示输出通道数,'5'表示卷积核为5*5
self.conv1 = nn.Conv2d(1, 6, 5)
# 卷积层
self.conv2 = nn.Conv2d(6, 16, 5)
# 仿射层/全连接层,y = Wx + b
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)

def forward(self, x):
# 卷积 -> 激活 -> 池化
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
# reshape,‘-1’表示自适应
x = x.view(x.size()[0], -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x

net = Net()
print(net)
# -

# 只要在nn.Module的子类中定义了forward函数,backward函数就会自动被实现(利用`Autograd`)。在`forward` 函数中可使用任何Variable支持的函数,还可以使用if、for循环、print、log等Python语法,写法和标准的Python写法一致。
#
# 网络的可学习参数通过`net.parameters()`返回,`net.named_parameters`可同时返回可学习的参数及名称。

params = list(net.parameters())
print(len(params))

for name,parameters in net.named_parameters():
print(name,':',parameters.size())

# forward函数的输入和输出都是Variable,只有Variable才具有自动求导功能,而Tensor是没有的,所以在输入时,需把Tensor封装成Variable。

# + {"scrolled": true}
input = Variable(t.randn(1, 1, 32, 32))
out = net(input)
out.size()
# -

net.zero_grad() # 所有参数的梯度清零
out.backward(Variable(t.ones(1,10))) # 反向传播

# 需要注意的是,torch.nn只支持mini-batches,不支持一次只输入一个样本,即一次必须是一个batch。但如果只想输入一个样本,则用 `input.unsqueeze(0)`将batch_size设为1。例如 `nn.Conv2d` 输入必须是4维的,形如$nSamples \times nChannels \times Height \times Width$。可将nSample设为1,即$1 \times nChannels \times Height \times Width$。

# #### 损失函数
#
# nn实现了神经网络中大多数的损失函数,例如nn.MSELoss用来计算均方误差,nn.CrossEntropyLoss用来计算交叉熵损失。

# + {"scrolled": true}
output = net(input)
target = Variable(t.arange(0,10))
criterion = nn.MSELoss()
loss = criterion(output, target)
loss
# -

# 如果对loss进行反向传播溯源(使用`gradfn`属性),可看到它的计算图如下:
#
# ```
# input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
# -> view -> linear -> relu -> linear -> relu -> linear
# -> MSELoss
# -> loss
# ```
#
# 当调用`loss.backward()`时,该图会动态生成并自动微分,也即会自动计算图中参数(Parameter)的导数。

# 运行.backward,观察调用之前和调用之后的grad
net.zero_grad() # 把net中所有可学习参数的梯度清零
print('反向传播之前 conv1.bias的梯度')
print(net.conv1.bias.grad)
loss.backward()
print('反向传播之后 conv1.bias的梯度')
print(net.conv1.bias.grad)

# #### 优化器

# 在反向传播计算完所有参数的梯度后,还需要使用优化方法来更新网络的权重和参数,例如随机梯度下降法(SGD)的更新策略如下:
# ```
# weight = weight - learning_rate * gradient
# ```
#
# 手动实现如下:
#
# ```python
# learning_rate = 0.01
# for f in net.parameters():
# f.data.sub_(f.grad.data * learning_rate)# inplace 减法
# ```
#
# `torch.optim`中实现了深度学习中绝大多数的优化方法,例如RMSProp、Adam、SGD等,更便于使用,因此大多数时候并不需要手动写上述代码。

# +
import torch.optim as optim
#新建一个优化器,指定要调整的参数和学习率
optimizer = optim.SGD(net.parameters(), lr = 0.01)

# 在训练过程中
# 先梯度清零(与net.zero_grad()效果一样)
optimizer.zero_grad()

# 计算损失
output = net(input)
loss = criterion(output, target)

#反向传播
loss.backward()

#更新参数
optimizer.step()
# -

#
#
# #### 数据加载与预处理
#
# 在深度学习中数据加载及预处理是非常复杂繁琐的,但PyTorch提供了一些可极大简化和加快数据处理流程的工具。同时,对于常用的数据集,PyTorch也提供了封装好的接口供用户快速调用,这些数据集主要保存在torchvison中。
#
# `torchvision`实现了常用的图像数据加载功能,例如Imagenet、CIFAR10、MNIST等,以及常用的数据转换操作,这极大地方便了数据加载,并且代码具有可重用性。
#
#
# ### 小试牛刀:CIFAR-10分类
#
# 下面我们来尝试实现对CIFAR-10数据集的分类,步骤如下:
#
# 1. 使用torchvision加载并预处理CIFAR-10数据集
# 2. 定义网络
# 3. 定义损失函数和优化器
# 4. 训练网络并更新网络参数
# 5. 测试网络
#
# #### CIFAR-10数据加载及预处理
#
# CIFAR-10[^3]是一个常用的彩色图片数据集,它有10个类别: 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'。每张图片都是$3\times32\times32$,也即3-通道彩色图片,分辨率为$32\times32$。
#
# [^3]: http://www.cs.toronto.edu/~kriz/cifar.html

import torch as t
import torchvision as tv
import torchvision.transforms as transforms
from torchvision.transforms import ToPILImage
show = ToPILImage() # 可以把Tensor转成Image,方便可视化

# +
# 第一次运行程序torchvision会自动下载CIFAR-10数据集,
# 大约100M,需花费一定的时间,
# 如果已经下载有CIFAR-10,可通过root参数指定

# 定义对数据的预处理
transform = transforms.Compose([
transforms.ToTensor(), # 转为Tensor
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # 归一化
])

# 训练集
trainset = tv.datasets.CIFAR10(
root='../data/',
train=True,
download=True,
transform=transform)

trainloader = t.utils.data.DataLoader(
trainset,
batch_size=4,
shuffle=True,
num_workers=2)

# 测试集
testset = tv.datasets.CIFAR10(
'../data/',
train=False,
download=True,
transform=transform)

testloader = t.utils.data.DataLoader(
testset,
batch_size=4,
shuffle=False,
num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
# -

# Dataset对象是一个数据集,可以按下标访问,返回形如(data, label)的数据。

# +
(data, label) = trainset[100]
print(classes[label])

# (data + 1) / 2是为了还原被归一化的数据
show((data + 1) / 2).resize((100, 100))
# -

# Dataloader是一个可迭代的对象,它将dataset返回的每一条数据拼接成一个batch,并提供多线程加速优化和数据打乱等操作。当程序对dataset的所有数据遍历完一遍之后,相应的对Dataloader也完成了一次迭代。

dataiter = iter(trainloader)
images, labels = dataiter.next() # 返回4张图片及标签
print(' '.join('%11s'%classes[labels[j]] for j in range(4)))
show(tv.utils.make_grid((images+1)/2)).resize((400,100))

# #### 定义网络
#
# 拷贝上面的LeNet网络,修改self.conv1第一个参数为3通道,因CIFAR-10是3通道彩图。

# +
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)

def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(x.size()[0], -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x


net = Net()
print(net)
# -

# #### 定义损失函数和优化器(loss和optimizer)

from torch import optim
criterion = nn.CrossEntropyLoss() # 交叉熵损失函数
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# ### 训练网络
#
# 所有网络的训练流程都是类似的,不断地执行如下流程:
#
# - 输入数据
# - 前向传播+反向传播
# - 更新参数
#

# +
from torch.autograd import Variable

t.set_num_threads(8)
for epoch in range(2):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# 输入数据
inputs, labels = data
inputs, labels = Variable(inputs), Variable(labels)
# 梯度清零
optimizer.zero_grad()
# forward + backward
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
# 更新参数
optimizer.step()
# 打印log信息
running_loss += loss.data[0]
if i % 2000 == 1999: # 每2000个batch打印一下训练状态
print('[%d, %5d] loss: %.3f' \
% (epoch+1, i+1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
# -

# 此处仅训练了2个epoch(遍历完一遍数据集称为一个epoch),来看看网络有没有效果。将测试图片输入到网络中,计算它的label,然后与实际的label进行比较。

dataiter = iter(testloader)
images, labels = dataiter.next() # 一个batch返回4张图片
print('实际的label: ', ' '.join(\
'%08s'%classes[labels[j]] for j in range(4)))
show(tv.utils.make_grid(images / 2 - 0.5)).resize((400,100))


# 接着计算网络预测的label:

# +
# 计算图片在每个类别上的分数
outputs = net(Variable(images))
# 得分最高的那个类
_, predicted = t.max(outputs.data, 1)

print('预测结果: ', ' '.join('%5s'\
% classes[predicted[j]] for j in range(4)))
# -

# 已经可以看出效果,准确率50%,但这只是一部分的图片,再来看看在整个测试集上的效果。

# +
correct = 0 # 预测正确的图片数
total = 0 # 总共的图片数
for data in testloader:
images, labels = data
outputs = net(Variable(images))
_, predicted = t.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum()

print('10000张测试集中的准确率为: %d %%' % (100 * correct / total))
# -

# 训练的准确率远比随机猜测(准确率10%)好,证明网络确实学到了东西。

# #### 在GPU训练
# 就像之前把Tensor从CPU转到GPU一样,模型也可以类似地从CPU转到GPU。

if t.cuda.is_available():
net.cuda()
images = images.cuda()
labels = labels.cuda()
output = net(Variable(images))
loss= criterion(output,Variable(labels))

# 如果发现在GPU上并没有比CPU提速很多,实际上是因为网络比较小,GPU没有完全发挥自己的真正实力。

# 对PyTorch的基础介绍至此结束。总结一下,本节主要包含以下内容。
#
# 1. Tensor: 类似Numpy数组的数据结构,与Numpy接口类似,可方便地互相转换。
# 2. autograd/Variable: Variable封装了Tensor,并提供自动求导功能。
# 3. nn: 专门为神经网络设计的接口,提供了很多有用的功能(神经网络层,损失函数,优化器等)。
# 4. 神经网络训练: 以CIFAR-10分类为例演示了神经网络的训练流程,包括数据加载、网络搭建、训练及测试。
#
# 通过本节的学习,相信读者可以体会出PyTorch具有接口简单、使用灵活等特点。从下一章开始,本书将深入系统地讲解PyTorch的各部分知识。

+ 3
- 4
README.md View File

@@ -1,8 +1,8 @@
# Python与机器学习 # Python与机器学习


本教程包含了一些使用Python来学习机器学习的notebook,通过本教程能够引导学习Python的基础知识和机器学习的理论知识和实际编程,并学习如何解决实际问题。
本教程包含了一些使用Python来学习机器学习的notebook,通过本教程能够引导学习Python的基础知识、机器学习的理论知识与实际编程,并学习如何解决实际问题。


由于**本课程需要大量的编程练习才能取得比较好的学习效果**,因此需要认真把作业和报告完成。作业的地址是:https://gitee.com/machinelearning2018/pr_homework 请按照里面的说明进行操作。
由于**本课程需要大量的编程练习才能取得比较好的学习效果**,因此需要认真把作业和报告完成。作业的地址是:https://gitee.com/machinelearning2018/pr_homework 请按照里面的说明进行操作,并提交作业




## 内容 ## 内容
@@ -28,7 +28,7 @@
- [Multi-layer Perceptron & BP](1_nn/mlp_bp.ipynb) - [Multi-layer Perceptron & BP](1_nn/mlp_bp.ipynb)
- [Softmax & cross-entroy](1_nn/softmax_ce.ipynb) - [Softmax & cross-entroy](1_nn/softmax_ce.ipynb)
7. [PyTorch](2_pytorch/) 7. [PyTorch](2_pytorch/)
- [short tutorial](PyTorch快速入门.ipynb)
- [short tutorial](2_pytorch/PyTorch_quick_intro.ipynb)
- [basic/Tensor-and-Variable](2_pytorch/0_basic/Tensor-and-Variable.ipynb) - [basic/Tensor-and-Variable](2_pytorch/0_basic/Tensor-and-Variable.ipynb)
- [basic/autograd](2_pytorch/0_basic/autograd.ipynb) - [basic/autograd](2_pytorch/0_basic/autograd.ipynb)
- [basic/dynamic-graph](2_pytorch/0_basic/dynamic-graph.ipynb) - [basic/dynamic-graph](2_pytorch/0_basic/dynamic-graph.ipynb)
@@ -40,7 +40,6 @@
- [nn/param_initialize](2_pytorch/1_NN/param_initialize.ipynb) - [nn/param_initialize](2_pytorch/1_NN/param_initialize.ipynb)
- [optim/sgd](2_pytorch/1_NN/optimizer/sgd.ipynb) - [optim/sgd](2_pytorch/1_NN/optimizer/sgd.ipynb)
- [optim/adam](2_pytorch/1_NN/optimizer/adam.ipynb) - [optim/adam](2_pytorch/1_NN/optimizer/adam.ipynb)
- [optim/adam](2_pytorch/1_NN/optimizer/adam.ipynb)
- [cnn/basic_conv](2_pytorch/2_CNN/basic_conv.ipynb) - [cnn/basic_conv](2_pytorch/2_CNN/basic_conv.ipynb)
- [cnn/batch-normalization](2_pytorch/2_CNN/batch-normalization.ipynb) - [cnn/batch-normalization](2_pytorch/2_CNN/batch-normalization.ipynb)
- [cnn/regularization](2_pytorch/2_CNN/regularization.ipynb) - [cnn/regularization](2_pytorch/2_CNN/regularization.ipynb)


+ 26
- 21
demo_code/2_logistic_regression_2.py View File

@@ -1,19 +1,24 @@
import time


import torch as t import torch as t
from torch import nn, optim from torch import nn, optim
import torch.nn.functional as F
from torch.autograd import Variable from torch.autograd import Variable
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torchvision import transforms from torchvision import transforms
from torchvision import datasets from torchvision import datasets
import time


# 定义超参数
"""
Use pytorch nn.Module to implement logistic regression
"""


# define hyper parameters
batch_size = 32 batch_size = 32
learning_rate = 1e-3 learning_rate = 1e-3
num_epoches = 100 num_epoches = 100


# 下载训练集 MNIST 手写数字训练集
# download/load MNIST dataset
dataset_path = "../data/mnist" dataset_path = "../data/mnist"


train_dataset = datasets.MNIST( train_dataset = datasets.MNIST(
@@ -26,7 +31,7 @@ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)




# 定义 Logistic Regression 模型
# define Logistic Regression model
class Logstic_Regression(nn.Module): class Logstic_Regression(nn.Module):
def __init__(self, in_dim, n_class): def __init__(self, in_dim, n_class):
super(Logstic_Regression, self).__init__() super(Logstic_Regression, self).__init__()
@@ -37,18 +42,17 @@ class Logstic_Regression(nn.Module):
return out return out




model = Logstic_Regression(28 * 28, 10) # 图片大小是28x28
use_gpu = t.cuda.is_available() # 判断是否有GPU加速
if use_gpu:
model = model.cuda()
model = Logstic_Regression(28 * 28, 10) # model's input/output node size
use_gpu = t.cuda.is_available() # GPU use or not
if use_gpu: model = model.cuda()


# 定义loss和optimizer
# define loss & optimizer
criterion = nn.CrossEntropyLoss() criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate) optimizer = optim.SGD(model.parameters(), lr=learning_rate)


# 开始训练
# training
for epoch in range(num_epoches): for epoch in range(num_epoches):
print('*' * 10)
print('-' * 40)
print('epoch {}'.format(epoch + 1)) print('epoch {}'.format(epoch + 1))


since = time.time() since = time.time()
@@ -56,7 +60,7 @@ for epoch in range(num_epoches):
running_acc = 0.0 running_acc = 0.0
for i, data in enumerate(train_loader, 1): for i, data in enumerate(train_loader, 1):
img, label = data img, label = data
img = img.view(img.size(0), -1) # 将图片展开成 28x28
img = img.view(img.size(0), -1) # convert input image to dimensions of (n, 28x28)
if use_gpu: if use_gpu:
img = Variable(img).cuda() img = Variable(img).cuda()
label = Variable(label).cuda() label = Variable(label).cuda()
@@ -64,15 +68,15 @@ for epoch in range(num_epoches):
img = Variable(img) img = Variable(img)
label = Variable(label) label = Variable(label)


# 向前传播
# forward calculation
out = model(img) out = model(img)
loss = criterion(out, label) loss = criterion(out, label)
running_loss += loss.data[0] * label.size(0) running_loss += loss.data[0] * label.size(0)
_, pred = t.max(out, 1) _, pred = t.max(out, 1)
num_correct = (pred == label).sum() num_correct = (pred == label).sum()
running_acc += num_correct.data[0]
running_acc += float(num_correct.data[0])


# 向后传播
# bp
optimizer.zero_grad() optimizer.zero_grad()
loss.backward() loss.backward()
optimizer.step() optimizer.step()
@@ -104,12 +108,13 @@ for epoch in range(num_epoches):
eval_loss += loss.data[0] * label.size(0) eval_loss += loss.data[0] * label.size(0)
_, pred = t.max(out, 1) _, pred = t.max(out, 1)
num_correct = (pred == label).sum() num_correct = (pred == label).sum()
eval_acc += num_correct.data[0]
eval_acc += float(num_correct.data[0])


print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(
test_dataset)), eval_acc / (len(test_dataset))))
print('Test Loss: {:.6f}, Acc: {:.6f}'.format(
eval_loss / (len(test_dataset)),
eval_acc / (len(test_dataset))))
print('Time:{:.1f} s'.format(time.time() - since)) print('Time:{:.1f} s'.format(time.time() - since))
print() print()


# 保存模型
t.save(model.state_dict(), './model_LogsticRegression.pth')
# save model's parameters
#t.save(model.state_dict(), './model_LogsticRegression.pth')

+ 1
- 1
demo_code/2_poly_fitting.py View File

@@ -7,7 +7,7 @@ import matplotlib.pyplot as plt




""" """
Polynomial fitting by pytorch
Polynomial fitting by pytorch
""" """


# define the real model's parameters # define the real model's parameters


demo_code/CNN_CIFAR.py → demo_code/3_CNN_CIFAR.py View File


demo_code/Neural_Network.py → demo_code/3_NN_FC.py View File

@@ -31,9 +31,9 @@ test_loader = torch.utils.data.DataLoader(dataset=test_dataset,




# define Network # define Network
class Net(nn.Module):
class NN_FC1(nn.Module):
def __init__(self): def __init__(self):
super(Net, self).__init__()
super(NN_FC1, self).__init__()
self.l1 = nn.Linear(784, 520) self.l1 = nn.Linear(784, 520)
self.l2 = nn.Linear(520, 320) self.l2 = nn.Linear(520, 320)
self.l3 = nn.Linear(320, 240) self.l3 = nn.Linear(320, 240)
@@ -48,8 +48,30 @@ class Net(nn.Module):
x = F.relu(self.l4(x)) x = F.relu(self.l4(x))
return self.l5(x) return self.l5(x)


# Define the network
class NN_FC2(nn.Module):
def __init__(self):
super(NN_FC2, self).__init__()

in_dim = 28*28
n_hidden_1 = 300
n_hidden_2 = 100
out_dim = 10

self.layer1 = nn.Linear(in_dim, n_hidden_1)
self.layer2 = nn.Linear(n_hidden_1, n_hidden_2)
self.layer3 = nn.Linear(n_hidden_2, out_dim)

def forward(self, x):
x = x.view(-1, 784)
x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
x = self.layer3(x)
return x



model = Net()
# create the NN object
model = NN_FC2()


criterion = nn.CrossEntropyLoss() criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

+ 0
- 110
demo_code/Neural_Network.0.py View File

@@ -1,110 +0,0 @@
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
import torch.nn.functional as F

from torchvision import transforms
from torchvision import datasets

# set parameters
batch_size = 32
learning_rate = 1e-2
num_epoches = 50

# download & load MNIST dataset
dataset_path = "../data/mnist"

train_dataset = datasets.MNIST(
root=dataset_path, train=True, transform=transforms.ToTensor(), download=True)

test_dataset = datasets.MNIST(
root=dataset_path, train=False, transform=transforms.ToTensor())

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# Define the network
class NeuralNetwork(nn.Module):
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(NeuralNetwork, self).__init__()
self.layer1 = nn.Linear(in_dim, n_hidden_1)
self.layer2 = nn.Linear(n_hidden_1, n_hidden_2)
self.layer3 = nn.Linear(n_hidden_2, out_dim)

def forward(self, x):
x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
x = self.layer3(x)
return x


# create network & define loss function
model = NeuralNetwork(28 * 28, 300, 100, 10)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# train
for epoch in range(num_epoches):
print("epoch %6d" % int(epoch+1))
print('-' * 40)

running_loss = 0.0
running_acc = 0.0

for i, data in enumerate(train_loader, 1):
img, label = data
img = Variable(img.view(img.size(0), -1))
label = Variable(label)
# 向前传播
optimizer.zero_grad()

out = model(img)
loss = criterion(out, label)
running_loss += loss.data[0] * label.size(0)
pred = out.data.max(1, keepdim=True)[1]
running_acc += float(pred.eq(label.data.view_as(pred)).cpu().sum())
# 向后传播
loss.backward()
optimizer.step()

if i % 300 == 0:
print('[{}/{}] Loss: {:.6f}, Acc: {:.2f}%'.format(
epoch + 1, num_epoches,
1.0*running_loss / (batch_size * i),
100.0*running_acc / (batch_size * i)))
# do test
model.eval()
eval_loss = 0.
eval_acc = 0.

for data in test_loader:
img, label = data
img = img.view(img.size(0), -1)

img = Variable(img)
label = Variable(label)
out = model(img)
loss = criterion(out, label)
eval_loss += loss.data[0] * label.size(0)
pred = out.data.max(1, keepdim=True)[1]
eval_acc += float(pred.eq(label.data.view_as(pred)).cpu().sum())

print('\nTest Loss: {:.6f}, Acc: {:.2f}%'.format(
1.0*eval_loss / (len(test_dataset)),
100.0*eval_acc / (len(test_dataset))))
print()


# save model
torch.save(model.state_dict(), './model_Neural_Network.pth')

Loading…
Cancel
Save