Browse Source

Add bp, rearrange files

fetches/feikei/master
Shuhui Bu 6 years ago
parent
commit
cbc2df1b7a
23 changed files with 3121 additions and 1146 deletions
  1. +10
    -27
      0_numpy_matplotlib_scipy_sympy/matplotlib_simple_tutorial.ipynb
  2. +123
    -0
      0_numpy_matplotlib_scipy_sympy/matplotlib_simple_tutorial.py
  3. +0
    -0
      0_numpy_matplotlib_scipy_sympy/numpy_tutorial.ipynb
  4. +39
    -843
      1_logistic_regression/Least_squares.ipynb
  5. +5
    -2
      README.md
  6. +0
    -49
      References_notes.md
  7. +2301
    -221
      nn/mlp_bp.ipynb
  8. +194
    -4
      nn/mlp_bp.py
  9. +46
    -0
      references/References.md
  10. BIN
      references/nn/.nn_2.py.kate-swp
  11. +46
    -0
      references/nn/nn_3.py
  12. +125
    -0
      references/nn/nn_4.py
  13. +0
    -0
      tips/InstallPython.md
  14. +0
    -0
      tips/confusion matrix.ipynb
  15. +0
    -0
      tips/confusion matrix.py
  16. +156
    -0
      tips/datasets.ipynb
  17. +76
    -0
      tips/datasets.py
  18. +0
    -0
      tips/images/confusion_matrix1.png
  19. +0
    -0
      tips/images/confusion_matrix2.png
  20. +0
    -0
      tips/python/README.md
  21. +0
    -0
      tips/python/pip.md
  22. +0
    -0
      tips/python/virtualenv.md
  23. +0
    -0
      tips/python/virtualenv_wrapper.md

+ 10
- 27
0_numpy_matplotlib_scipy_sympy/matplotlib_simple_tutorial.ipynb
File diff suppressed because it is too large
View File


+ 123
- 0
0_numpy_matplotlib_scipy_sympy/matplotlib_simple_tutorial.py View File

@@ -0,0 +1,123 @@
# -*- coding: utf-8 -*-
# ---
# jupyter:
# jupytext_format_version: '1.2'
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.5.2
# ---

# # matplotlib
#
#

# ## 1. pyplot
# matplotlib.pyplot is a collection of command style functions that make matplotlib work like MATLAB. Each pyplot function makes some change to a figure: e.g., creates a figure, creates a plotting area in a figure, plots some lines in a plotting area, decorates the plot with labels, etc. In matplotlib.pyplot various states are preserved across function calls, so that it keeps track of things like the current figure and plotting area, and the plotting functions are directed to the current axes (please note that “axes” here and in most places in the documentation refers to the axes part of a figure and not the strict mathematical term for more than one axis).

# +
# This line configures matplotlib to show figures embedded in the notebook,
# instead of opening a new window for each figure. More about that later.
# If you are using an old version of IPython, try using '%pylab inline' instead.
# %matplotlib inline

import matplotlib.pyplot as plt
plt.plot([1,2,3,4])
plt.ylabel('some numbers')
plt.show()
# -

plt.plot([1, 2, 3, 4], [1, 4, 9, 16])


# For every x, y pair of arguments, there is an optional third argument which is the format string that indicates the color and line type of the plot. The letters and symbols of the format string are from MATLAB, and you concatenate a color string with a line style string. The default format string is ‘b-‘, which is a solid blue line. For example, to plot the above with red circles, you would issue

import matplotlib.pyplot as plt
plt.plot([1,2,3,4], [1,4,9,16], 'ro')
plt.axis([0, 6, 0, 20])
plt.show()

# +
import numpy as np
import matplotlib.pyplot as plt

# evenly sampled time at 200ms intervals
t = np.arange(0., 5., 0.2)

# red dashes, blue squares and green triangles
plt.plot(t, t, 'r--', t, t**2, 'bs', t, t**3, 'g^')
plt.show()
# -

# ### [Controlling line properties](https://matplotlib.org/users/pyplot_tutorial.html#controlling-line-properties)
#
# Lines have many attributes that you can set: linewidth, dash style, antialiased, etc; see matplotlib.lines.Line2D. There are several ways to set line properties
#

# ### Working with multiple figures and axes
#
# MATLAB, and pyplot, have the concept of the current figure and the current axes. All plotting commands apply to the current axes. The function gca() returns the current axes (a matplotlib.axes.Axes instance), and gcf() returns the current figure (matplotlib.figure.Figure instance). Normally, you don’t have to worry about this, because it is all taken care of behind the scenes. Below is a script to create two subplots.
#
#

# +
import numpy as np
import matplotlib.pyplot as plt

def f(t):
return np.exp(-t) * np.cos(2*np.pi*t)

t1 = np.arange(0.0, 5.0, 0.1)
t2 = np.arange(0.0, 5.0, 0.02)

plt.figure(1)
plt.subplot(211)
plt.plot(t1, f(t1), 'bo', t2, f(t2), 'k')

plt.subplot(212)
plt.plot(t2, np.cos(2*np.pi*t2), 'r--')
plt.show()
# -

# ## 2. Image

# +
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np

# load image
img=mpimg.imread('example.png')

imgplot = plt.imshow(img)

# -

# ### Applying pseudocolor schemes to image plots

lum_img = img[:,:,0]
plt.imshow(lum_img)

# use 'hot' color map
plt.imshow(lum_img, cmap="hot")
plt.colorbar()

# ### Examining a specific data range
#

plt.hist(lum_img.ravel(), bins=256, range=(0.0, 1.0), fc='k', ec='k')

# ## References
#
# * [Pyplot tutorial](https://matplotlib.org/users/pyplot_tutorial.html)
# * [Image tutorial](https://matplotlib.org/users/image_tutorial.html)

0_numpy_matplotlib_scipy_sympy/numpy.ipynb → 0_numpy_matplotlib_scipy_sympy/numpy_tutorial.ipynb View File


+ 39
- 843
1_logistic_regression/Least_squares.ipynb
File diff suppressed because it is too large
View File


+ 5
- 2
README.md View File

@@ -2,6 +2,9 @@

本notebook教程包含了一些使用Python来学习机器学习的教程,通过本教程能够引导学习Python的基础知识和机器学习的背景和实际编程。

由于本课程需要大量的编程练习才能取得比较好的学习效果,因此需要认真把作业和报告完成。作业的地址是:https://gitee.com/machinelearning2018/pr_homework 请按照里面的说明进行操作。


## 内容
1. [Python基础](0_python/)
2. [numpy & matplotlib](0_numpy_matplotlib_scipy_sympy/)
@@ -13,6 +16,6 @@
8. PyTorch

## 其他参考
* [安装Python环境](InstallPython.md)
* [安装Python环境](tips/InstallPython.md)
* [参考资料等](References.md)
* [confusion matrix](metric/confusion_matrix.ipynb)
* [confusion matrix](tips/confusion_matrix.ipynb)

+ 0
- 49
References_notes.md View File

@@ -1,49 +0,0 @@
## Notebooks:

machineLearning/10_digits_classification.ipynb

MachineLearningNotebooks/05.%20Logistic%20Regression.ipynb

MachineLearningNotebooks/08.%20Practical_NeuralNets.ipynb


## Exercise
http://sofasofa.io/competitions.php?type=practice
https://www.kaggle.com/competitions


Titanic
notebooks/data-science-ipython-notebooks/kaggle/titanic.ipynb


## Method

Programming Multiclass Logistic Regression
http://localhost:8889/notebooks/MachineLearningNotebooks/05.%20Logistic%20Regression.ipynb

Equation for MLP
http://localhost:8889/notebooks/MachineLearningNotebooks/07.%20MLP%20Neural%20Networks.ipynb

Optimization methods
http://localhost:8889/notebooks/MachineLearningNotebooks/06.%20Optimization.ipynb


https://github.com/wmpscc/DataMiningNotesAndPractice/blob/master/2.KMeans%E7%AE%97%E6%B3%95%E4%B8%8E%E4%BA%A4%E9%80%9A%E4%BA%8B%E6%95%85%E7%90%86%E8%B5%94%E5%AE%A1%E6%A0%B8%E9%A2%84%E6%B5%8B.md

evaluation metrics
http://localhost:8889/notebooks/machineLearning/10_digits_classification.ipynb


model selection and assessment
http://localhost:8889/notebooks/machineLearning/notebooks/01%20-%20Model%20Selection%20and%20Assessment.ipynb


NN
神经网络——梯度下降&反向传播 https://blog.csdn.net/skullfang/article/details/78634317
零基础入门深度学习(3) - 神经网络和反向传播算法 https://www.zybuluo.com/hanbingtao/note/476663
如何直观地解释 backpropagation 算法? https://www.zhihu.com/question/27239198
一文弄懂神经网络中的反向传播法——BackPropagation https://www.cnblogs.com/charlotte77/p/5629865.html

https://medium.com/@UdacityINDIA/how-to-build-your-first-neural-network-with-python-6819c7f65dbf
https://enlight.nyc/projects/neural-network/
https://www.python-course.eu/neural_networks_with_python_numpy.php

+ 2301
- 221
nn/mlp_bp.ipynb
File diff suppressed because it is too large
View File


+ 194
- 4
nn/mlp_bp.py View File

@@ -256,6 +256,11 @@ import matplotlib.pyplot as plt
np.random.seed(0)
X, y = datasets.make_moons(200, noise=0.20)

# generate nn output target
t = np.zeros((X.shape[0], 2))
t[np.where(y==0), 0] = 1
t[np.where(y==1), 1] = 1

# plot data
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
plt.show()
@@ -269,7 +274,10 @@ class NN_Model:
nn = NN_Model()
nn.n_input_dim = X.shape[1] # input size
nn.n_output_dim = 2 # output node size
nn.n_hide_dim = 3 # hidden node size
nn.n_hide_dim = 4 # hidden node size

nn.X = X
nn.y = y

# initial weight array
nn.W1 = np.random.randn(nn.n_input_dim, nn.n_hide_dim) / np.sqrt(nn.n_input_dim)
@@ -291,13 +299,195 @@ def forward(n, X):
n.z2 = sigmod(n.z1.dot(n.W2) + n.b2)
return n


# use random weight to perdict
forward(nn, X)
y = nn.z2[:, 0]>nn.z2[:,1]
y_pred = np.zeros(nn.z2.shape[0])
y_pred[np.where(nn.z2[:,0]<nn.z2[:,1])] = 1
print(y_pred)
print(nn.z2)

# plot data
plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap=plt.cm.Spectral)
plt.show()

# +
from sklearn.metrics import accuracy_score

y_true = np.array(nn.y).astype(float)

# back-propagation
def backpropagation(n, X, y):
for i in range(n.n_epoch):
# forward to calculate each node's output
forward(n, X)
# print loss, accuracy
L = np.sum((n.z2 - y)**2)
y_pred = np.zeros(nn.z2.shape[0])
y_pred[np.where(nn.z2[:,0]<nn.z2[:,1])] = 1
acc = accuracy_score(y_true, y_pred)
print("epoch [%4d] L = %f, acc = %f" % (i, L, acc))
# calc weights update
d2 = n.z2*(1-n.z2)*(y - n.z2)
d1 = n.z1*(1-n.z1)*(np.dot(d2, n.W2.T))
# update weights
n.W2 += n.epsilon * np.dot(n.z1.T, d2)
n.b2 += n.epsilon * np.sum(d2, axis=0)
n.W1 += n.epsilon * np.dot(X.T, d1)
n.b1 += n.epsilon * np.sum(d1, axis=0)

nn.n_epoch = 2000
backpropagation(nn, X, t)


# +
# plot data
y_pred = np.zeros(nn.z2.shape[0])
y_pred[np.where(nn.z2[:,0]<nn.z2[:,1])] = 1

plt.scatter(X[:, 0], X[:, 1], c=nn.y, cmap=plt.cm.Spectral)
plt.title("ground truth")
plt.show()

plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap=plt.cm.Spectral)
plt.title("predicted")
plt.show()

# -

# ## 如何使用类的方法封装多层神经网络?

# +
% matplotlib inline

import numpy as np
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt


# defin sigmod & its derivate function
def sigmod(X):
return 1.0/(1+np.exp(-X))


# generate the NN model
class NN_Model:
def __init__(self, nodes=None):
self.epsilon = 0.01 # learning rate
self.n_epoch = 1000 # iterative number
if not nodes:
self.nodes = [2, 4, 2] # default nodes size (from input -> output)
else:
self.nodes = nodes
def init_weight(self):
W = []
B = []
n_layer = len(self.nodes)
for i in range(n_layer-1):
w = np.random.randn(self.nodes[i], self.nodes[i+1]) / np.sqrt(self.nodes[i])
b = np.random.randn(1, self.nodes[i+1])
W.append(w)
B.append(b)
self.W = W
self.B = B
def forward(self, X):
Z = []
x0 = X
for i in range(len(self.nodes)-1):
z = sigmod(np.dot(x0, self.W[i]) + self.B[i])
x0 = z
Z.append(z)
self.Z = Z
# back-propagation
def backpropagation(self, X, y, n_epoch=None, epsilon=None):
if not n_epoch: n_epoch = self.n_epoch
if not epsilon: epsilon = self.epsilon
self.X = X
self.Y = y
for i in range(n_epoch):
# forward to calculate each node's output
self.forward(X)
# calc weights update
W = self.W
B = self.B
Z = self.Z
D = []
d0 = y
n_layer = len(self.nodes)
for j in range(n_layer-1, 0, -1):
jj = j - 1
z = self.Z[jj]
if j == n_layer - 1:
d = z*(1-z)*(d0 - z)
else:
d = z*(1-z)*np.dot(d0, W[jj].T)
d0 = d
D.insert(0, d)
# update weights
for j in range(n_layer-1, 0, -1):
jj = j - 1
if jj != 0:
W[jj] += epsilon * np.dot(Z[jj-1].T, D[jj])
else:
W[jj] += epsilon * np.dot(X.T, D[jj])
B[jj] += epsilon * np.sum(D[jj], axis=0)
def evaulate(self):
z = self.Z[-1]
# print loss, accuracy
L = np.sum((z - self.Y)**2)
y_pred = np.argmax(z)
y_true = np.argmax(self.Y)
acc = accuracy_score(y_true, y_pred)
print("L = %f, acc = %f" % (L, acc))



# generate sample data
np.random.seed(0)
X, y = datasets.make_moons(200, noise=0.20)

# generate nn output target
t = np.zeros((X.shape[0], 2))
t[np.where(y==0), 0] = 1
t[np.where(y==1), 1] = 1

# plot data
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
plt.show()


nn = NN_Model([2, 4, 2])
nn.init_weight()
nn.backpropagation(X, t)

nn.evaluate()

# -

# ## References


+ 46
- 0
references/References.md View File

@@ -0,0 +1,46 @@
## Notebooks:

* machineLearning/10_digits_classification.ipynb
* MachineLearningNotebooks/05.%20Logistic%20Regression.ipynb
* MachineLearningNotebooks/08.%20Practical_NeuralNets.ipynb


## Exercise
* http://sofasofa.io/competitions.php?type=practice
* https://www.kaggle.com/competitions


* Titanic: notebooks/data-science-ipython-notebooks/kaggle/titanic.ipynb


## Method

* Programming Multiclass Logistic Regression
notebooks/MachineLearningNotebooks/05.%20Logistic%20Regression.ipynb

* Equation for MLP
notebooks/MachineLearningNotebooks/07.%20MLP%20Neural%20Networks.ipynb

* Optimization methods
notebooks/MachineLearningNotebooks/06.%20Optimization.ipynb


* https://github.com/wmpscc/DataMiningNotesAndPractice/blob/master/2.KMeans%E7%AE%97%E6%B3%95%E4%B8%8E%E4%BA%A4%E9%80%9A%E4%BA%8B%E6%95%85%E7%90%86%E8%B5%94%E5%AE%A1%E6%A0%B8%E9%A2%84%E6%B5%8B.md

* evaluation metrics
http://localhost:8889/notebooks/machineLearning/10_digits_classification.ipynb


* model selection and assessment
http://localhost:8889/notebooks/machineLearning/notebooks/01%20-%20Model%20Selection%20and%20Assessment.ipynb


## NN
* 神经网络——梯度下降&反向传播 https://blog.csdn.net/skullfang/article/details/78634317
* 零基础入门深度学习(3) - 神经网络和反向传播算法 https://www.zybuluo.com/hanbingtao/note/476663
* 如何直观地解释 backpropagation 算法? https://www.zhihu.com/question/27239198
* 一文弄懂神经网络中的反向传播法——BackPropagation https://www.cnblogs.com/charlotte77/p/5629865.html

* https://medium.com/@UdacityINDIA/how-to-build-your-first-neural-network-with-python-6819c7f65dbf
* https://enlight.nyc/projects/neural-network/
* https://www.python-course.eu/neural_networks_with_python_numpy.php

BIN
references/nn/.nn_2.py.kate-swp View File


+ 46
- 0
references/nn/nn_3.py View File

@@ -0,0 +1,46 @@
%matplotlib nbagg

from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
import matplotlib.animation as animation

fig = plt.figure()
imgs = []

y_true = np.array(nn.y).astype(float)

# back-propagation
def backpropagation(n, X, y):
for i in range(n.n_epoch):
# forward to calculate each node's output
forward(n, X)
# print loss, accuracy
L = np.sum((n.z2 - y)**2)
y_pred = np.zeros(nn.z2.shape[0])
y_pred[np.where(nn.z2[:,0]<nn.z2[:,1])] = 1
acc = accuracy_score(y_true, y_pred)
print("epoch [%4d] L = %f, acc = %f" % (i, L, acc))
# calc weights update
d2 = n.z2*(1-n.z2)*(y - n.z2)
d1 = n.z1*(1-n.z1)*(np.dot(d2, n.W2.T))
# update weights
n.W2 += n.epsilon * np.dot(n.z1.T, d2)
n.b2 += n.epsilon * np.sum(d2, axis=0)
n.W1 += n.epsilon * np.dot(X.T, d1)
n.b1 += n.epsilon * np.sum(d1, axis=0)
# plot animation
#img = plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap=plt.cm.Spectral)
#imgs.append(img)

nn.n_epoch = 2000
backpropagation(nn, X, t)

#ani = animation.ArtistAnimation(fig, imgs)
#plt.show()

+ 125
- 0
references/nn/nn_4.py View File

@@ -0,0 +1,125 @@
#% matplotlib inline

import numpy as np
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt


# define sigmod & its derivate function
def sigmod(X):
return 1.0/(1+np.exp(-X))


# generate the NN model
class NN_Model:
def __init__(self, nodes=None):
self.epsilon = 0.01 # learning rate
self.n_epoch = 1000 # iterative number
if not nodes:
self.nodes = [2, 4, 2] # default nodes size (from input -> output)
else:
self.nodes = nodes
def init_weight(self):
W = []
B = []
n_layer = len(self.nodes)
for i in range(n_layer-1):
w = np.random.randn(self.nodes[i], self.nodes[i+1]) / np.sqrt(self.nodes[i])
b = np.random.randn(1, self.nodes[i+1])
W.append(w)
B.append(b)
self.W = W
self.B = B
def forward(self, X):
Z = []
x0 = X
for i in range(len(self.nodes)-1):
z = sigmod(np.dot(x0, self.W[i]) + self.B[i])
x0 = z
Z.append(z)
self.Z = Z
# back-propagation
def backpropagation(self, X, y, n_epoch=None, epsilon=None):
if not n_epoch: n_epoch = self.n_epoch
if not epsilon: epsilon = self.epsilon
self.X = X
self.Y = y
for i in range(n_epoch):
# forward to calculate each node's output
self.forward(X)
# calc weights update
W = self.W
B = self.B
Z = self.Z
D = []
d0 = y
n_layer = len(self.nodes)
for j in range(n_layer-1, 0, -1):
jj = j - 1
z = self.Z[jj]
if j == n_layer - 1:
d = z*(1-z)*(d0 - z)
else:
d = z*(1-z)*np.dot(d0, W[jj].T)
d0 = d
D.insert(0, d)
# update weights
for j in range(n_layer-1, 0, -1):
jj = j - 1
if jj != 0:
W[jj] += epsilon * np.dot(Z[jj-1].T, D[jj])
else:
W[jj] += epsilon * np.dot(X.T, D[jj])
B[jj] += epsilon * np.sum(D[jj], axis=0)
def evaulate(self):
z = self.Z[-1]
# print loss, accuracy
L = np.sum((z - self.Y)**2)
y_pred = np.argmax(z)
y_true = np.argmax(self.Y)
acc = accuracy_score(y_true, y_pred)
print("L = %f, acc = %f" % (L, acc))


# generate sample data
np.random.seed(0)
X, y = datasets.make_moons(200, noise=0.20)

# generate nn output target
t = np.zeros((X.shape[0], 2))
t[np.where(y==0), 0] = 1
t[np.where(y==1), 1] = 1

# plot data
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
plt.show()


nn = NN_Model([2, 4, 2])
nn.init_weight()
nn.backpropagation(X, t)

nn.evaluate()

InstallPython.md → tips/InstallPython.md View File


metric/confusion matrix.ipynb → tips/confusion matrix.ipynb View File


metric/confusion matrix.py → tips/confusion matrix.py View File


+ 156
- 0
tips/datasets.ipynb
File diff suppressed because it is too large
View File


+ 76
- 0
tips/datasets.py View File

@@ -0,0 +1,76 @@
# ---
# jupyter:
# jupytext_format_version: '1.2'
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.5.2
# ---

# ## Datasets

# ## Moons
#

# +
% matplotlib inline
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt

# generate sample data
np.random.seed(0)
X, y = datasets.make_moons(200, noise=0.20)

# plot data
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
plt.show()
# -

# ## XOR

# +
import numpy as np
import matplotlib.pyplot as plt

from sklearn.gaussian_process import GaussianProcessClassifier

rng = np.random.RandomState(0)
X = rng.randn(200, 2)
Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)

# plot data
plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Spectral)
plt.show()
# -

# ## Digital

# +
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits

# load data
digits = load_digits()

# copied from notebook 02_sklearn_data.ipynb
fig = plt.figure(figsize=(6, 6)) # figure size in inches
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

# plot the digits: each image is 8x8 pixels
for i in range(64):
ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])
ax.imshow(digits.images[i], cmap=plt.cm.binary)
# label the image with the target value
ax.text(0, 7, str(digits.target[i]))

metric/images/confusion_matrix1.png → tips/images/confusion_matrix1.png View File


metric/images/confusion_matrix2.png → tips/images/confusion_matrix2.png View File


0_python/tips/README.md → tips/python/README.md View File


0_python/tips/pip.md → tips/python/pip.md View File


0_python/tips/virtualenv.md → tips/python/virtualenv.md View File


0_python/tips/virtualenv_wrapper.md → tips/python/virtualenv_wrapper.md View File


Loading…
Cancel
Save