Browse Source

Re-orangize dir structure

pull/4/MERGE
bushuhui 4 years ago
parent
commit
f1f3d7ebc5
82 changed files with 1417 additions and 83 deletions
  1. +3
    -2
      6_pytorch/1_NN/3-nn-sequential-module.ipynb
  2. +1
    -1
      6_pytorch/1_NN/optimizer/6_1-sgd.ipynb
  3. +1
    -1
      6_pytorch/1_NN/optimizer/6_2-momentum.ipynb
  4. +1
    -1
      6_pytorch/1_NN/optimizer/6_3-adagrad.ipynb
  5. +1
    -1
      6_pytorch/1_NN/optimizer/6_4-rmsprop.ipynb
  6. +1
    -1
      6_pytorch/1_NN/optimizer/6_6-adam.ipynb
  7. +1
    -1
      6_pytorch/2_CNN/1-basic_conv.ipynb
  8. +1
    -1
      6_pytorch/2_CNN/2-batch-normalization.ipynb
  9. +8
    -8
      README.md
  10. +72
    -3
      References.md
  11. +0
    -62
      references/References.md
  12. +0
    -1
      references/nn/nn-from-scratch
  13. +0
    -0
      references_tips/InstallPython.md
  14. +0
    -0
      references_tips/InstallPython_EN.md
  15. +0
    -0
      references_tips/Intro_to_Deep_Learning.pdf
  16. +0
    -0
      references_tips/Matplotlib.ipynb
  17. +0
    -0
      references_tips/SciPy.ipynb
  18. +0
    -0
      references_tips/Scikit-learn.ipynb
  19. +0
    -0
      references_tips/Seaborn.ipynb
  20. +0
    -0
      references_tips/Statsmodels.ipynb
  21. +0
    -0
      references_tips/The Matrix Calculus You Need For Deep Learning.pdf
  22. +0
    -0
      references_tips/cheatsheet/All Cheat Sheets.pdf
  23. +0
    -0
      references_tips/cheatsheet/Bokeh.pdf
  24. +0
    -0
      references_tips/cheatsheet/Deep Learning Cheat Sheet-Hacker Noon.pdf
  25. +0
    -0
      references_tips/cheatsheet/Keras.jpg
  26. +0
    -0
      references_tips/cheatsheet/Matplotlib.png
  27. +0
    -0
      references_tips/cheatsheet/Neural Network Cells.png
  28. +0
    -0
      references_tips/cheatsheet/Neural Network Graphs.png
  29. +0
    -0
      references_tips/cheatsheet/Neural Networks Zoo.png
  30. +0
    -0
      references_tips/cheatsheet/Numpy.png
  31. +0
    -0
      references_tips/cheatsheet/Pandas-1.jpg
  32. +0
    -0
      references_tips/cheatsheet/Pandas-2.jpg
  33. +0
    -0
      references_tips/cheatsheet/Pandas-3.png
  34. +0
    -0
      references_tips/cheatsheet/README.md
  35. +0
    -0
      references_tips/cheatsheet/Scikit Learn.pdf
  36. +0
    -0
      references_tips/cheatsheet/Scikit Learn.png
  37. +0
    -0
      references_tips/cheatsheet/Scipy.png
  38. +0
    -0
      references_tips/cheatsheet/Seaborn.pdf
  39. +0
    -0
      references_tips/cheatsheet/conda.pdf
  40. +0
    -0
      references_tips/cheatsheet/python3.pdf
  41. +0
    -0
      references_tips/confusion_matrix.ipynb
  42. +0
    -0
      references_tips/dataset_CIFAR-10.py
  43. +0
    -0
      references_tips/dataset_circles.csv
  44. +0
    -0
      references_tips/datasets.ipynb
  45. +0
    -0
      references_tips/images/confusion_matrix1.png
  46. +0
    -0
      references_tips/images/confusion_matrix2.png
  47. +0
    -0
      references_tips/images/dnn_tips_01.jpeg
  48. +0
    -0
      references_tips/images/dnn_tips_02.jpeg
  49. +0
    -0
      references_tips/images/dnn_tips_03.jpeg
  50. +0
    -0
      references_tips/images/dnn_tips_04.jpeg
  51. +0
    -0
      references_tips/images/dnn_tips_05.jpeg
  52. +0
    -0
      references_tips/images/dnn_tips_06.jpeg
  53. +0
    -0
      references_tips/images/dnn_tips_07.jpeg
  54. +0
    -0
      references_tips/logistic_regression_demo/3a - Linear regression 1D.ipynb
  55. +0
    -0
      references_tips/logistic_regression_demo/3b - Linear regression 2D.ipynb
  56. +0
    -0
      references_tips/logistic_regression_demo/4 - Logistic Regression.ipynb
  57. +0
    -0
      references_tips/logistic_regression_demo/data/artifical_lin.txt
  58. +0
    -0
      references_tips/logistic_regression_demo/data/artifical_lin_2.txt
  59. +0
    -0
      references_tips/logistic_regression_demo/data/breast-cancer-wisconsin.data
  60. +0
    -0
      references_tips/logistic_regression_demo/ipython_notebook_config.py
  61. +0
    -0
      references_tips/logistic_regression_demo/utility.py
  62. +13
    -0
      references_tips/nn/nn-from-scratch/README.md
  63. +144
    -0
      references_tips/nn/nn-from-scratch/ann_classification.py
  64. BIN
      references_tips/nn/nn-from-scratch/nn-3-layer-network.png
  65. +605
    -0
      references_tips/nn/nn-from-scratch/nn-from-scratch.ipynb
  66. +294
    -0
      references_tips/nn/nn-from-scratch/nn-from-scratch.py
  67. +165
    -0
      references_tips/nn/nn-from-scratch/nn_from_scratch.py
  68. +47
    -0
      references_tips/nn/nn-from-scratch/requirements.txt
  69. +59
    -0
      references_tips/nn/nn-from-scratch/simple_classification.py
  70. +0
    -0
      references_tips/nn/nn_1.py
  71. +0
    -0
      references_tips/nn/nn_2.py
  72. +0
    -0
      references_tips/nn/nn_3.py
  73. +0
    -0
      references_tips/nn/nn_4.py
  74. +0
    -0
      references_tips/notebook_tips.ipynb
  75. +0
    -0
      references_tips/python/README.md
  76. +0
    -0
      references_tips/python/pip.md
  77. +0
    -0
      references_tips/python/virtualenv.md
  78. +0
    -0
      references_tips/python/virtualenv_wrapper.md
  79. +0
    -0
      references_tips/pytorch/tensor_divide_int.py
  80. +0
    -0
      references_tips/supervised_learning/Recognizing hand-written digits - SVM.ipynb
  81. +0
    -0
      references_tips/supervised_learning/supervised learning.ipynb
  82. +0
    -0
      references_tips/构建深度神经网络的一些实战建议.md

+ 3
- 2
6_pytorch/1_NN/3-nn-sequential-module.ipynb View File

@@ -229,8 +229,9 @@
"optimizer = torch.optim.SGD([w, b], 1e-1)\n",
"\n",
"def logistic_regression(x):\n",
" #FIXME: sigmod is included in nn.BCEWithLogitsLoss \n",
" return torch.mm(x, w) + b\n",
"\n",
" \n",
"criterion = nn.BCEWithLogitsLoss()"
]
},
@@ -1164,7 +1165,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.6.9"
}
},
"nbformat": 4,


+ 1
- 1
6_pytorch/1_NN/optimizer/6_1-sgd.ipynb View File

@@ -433,7 +433,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.6.9"
}
},
"nbformat": 4,


+ 1
- 1
6_pytorch/1_NN/optimizer/6_2-momentum.ipynb View File

@@ -388,7 +388,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.6.9"
}
},
"nbformat": 4,


+ 1
- 1
6_pytorch/1_NN/optimizer/6_3-adagrad.ipynb View File

@@ -256,7 +256,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.6.9"
}
},
"nbformat": 4,


+ 1
- 1
6_pytorch/1_NN/optimizer/6_4-rmsprop.ipynb View File

@@ -339,7 +339,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
"version": "3.6.9"
}
},
"nbformat": 4,


+ 1
- 1
6_pytorch/1_NN/optimizer/6_6-adam.ipynb View File

@@ -281,7 +281,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
"version": "3.6.9"
}
},
"nbformat": 4,


+ 1
- 1
6_pytorch/2_CNN/1-basic_conv.ipynb View File

@@ -347,7 +347,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.6.9"
}
},
"nbformat": 4,


+ 1
- 1
6_pytorch/2_CNN/2-batch-normalization.ipynb View File

@@ -561,7 +561,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.6.9"
}
},
"nbformat": 4,


+ 8
- 8
README.md View File

@@ -4,7 +4,7 @@

由于**本课程需要大量的编程练习才能取得比较好的学习效果**,因此需要认真去完成[《机器学习-作业和报告》](https://gitee.com/pi-lab/machinelearning_homework),写作业的过程可以查阅网上的资料,但是不能直接照抄,需要自己独立思考并独立写出代码。

为了让大家更好的自学本课程,课程讲座的视频会陆续上传到[b站 - 机器学习](https://www.bilibili.com/video/BV1oZ4y1N7ei/),欢迎大家观看学习。
为了让大家更好的自学本课程,课程讲座的视频会陆续上传到[《B站 - 机器学习》](https://www.bilibili.com/video/BV1oZ4y1N7ei/),欢迎大家观看学习。

![Machine Learning Cover](images/machine_learning.png)

@@ -81,17 +81,17 @@
## 3. 参考资料
* 资料速查
* [相关学习参考资料汇总](References.md)
* [一些速查手册](tips/cheatsheet)
* [一些速查手册](references_tips/cheatsheet)

* 机器学习方面技巧等
* [Confusion Matrix](tips/confusion_matrix.ipynb)
* [Datasets](tips/datasets.ipynb)
* [构建深度神经网络的一些实战建议](tips/构建深度神经网络的一些实战建议.md)
* [Intro to Deep Learning](tips/Intro_to_Deep_Learning.pdf)
* [Confusion Matrix](references_tips/confusion_matrix.ipynb)
* [Datasets](references_tips/datasets.ipynb)
* [构建深度神经网络的一些实战建议](references_tips/构建深度神经网络的一些实战建议.md)
* [Intro to Deep Learning](references_tips/Intro_to_Deep_Learning.pdf)

* Python技巧等
* [安装Python环境](tips/InstallPython.md)
* [Python tips](tips/python)
* [安装Python环境](references_tips/InstallPython.md)
* [Python tips](references_tips/python)

* Git
* [Git Tips - 常用方法速查,快速入门](https://gitee.com/pi-lab/learn_programming/blob/master/6_tools/git/git-tips.md)


+ 72
- 3
References.md View File

@@ -7,10 +7,13 @@

## References

* [形象直观了解谷歌大脑新型优化器LAMB](https://www.toutiao.com/i6687162064395305475/)

* 22 个神经网络结构设计/可视化工具
- https://www.toutiao.com/i6836884346155041292/
- https://github.com/ashishpatel26/Tools-to-Design-or-Visualize-Architecture-of-Neural-Network
- https://github.com/ashishpatel26/Tools-to-Design-or-Visualize-Architecture-of-Neural-Network
* CNN 可视化工具 https://m.toutiaocdn.com/group/6822123587156050435
- https://poloclub.github.io/cnn-explainer/
- https://github.com/poloclub/cnn-explainer
@@ -20,4 +23,70 @@
- https://github.com/minivision-ai/photo2cartoon

* [Awesome Deep Learning Project Ideas](https://github.com/NirantK/awesome-project-ideas)
* [Machine Learning From Scratch](https://github.com/eriklindernoren/ML-From-Scratch)

* [Machine Learning From Scratch](https://github.com/eriklindernoren/ML-From-Scratch)



## Notebooks

* machineLearning/10_digits_classification.ipynb
* MachineLearningNotebooks/05.%20Logistic%20Regression.ipynb
* MachineLearningNotebooks/08.%20Practical_NeuralNets.ipynb


## Exercise
* http://sofasofa.io/competitions.php?type=practice
* https://www.kaggle.com/competitions
* Machine learning project ideas
* https://data-flair.training/blogs/machine-learning-project-ideas/
* https://data-flair.training/blogs/deep-learning-project-ideas/
* https://www.kdnuggets.com/2020/03/20-machine-learning-datasets-project-ideas.html


* Titanic: notebooks/data-science-ipython-notebooks/kaggle/titanic.ipynb
* 使用神经网络解决拼图游戏 https://www.toutiao.com/a6855437347463365133/
* [Sudoku-Solver](https://github.com/shivaverma/Sudoku-Solver)


## Method

* Programming Multiclass Logistic Regression
notebooks/MachineLearningNotebooks/05.%20Logistic%20Regression.ipynb

* Equation for MLP
notebooks/MachineLearningNotebooks/07.%20MLP%20Neural%20Networks.ipynb

* Optimization methods
notebooks/MachineLearningNotebooks/06.%20Optimization.ipynb


* https://github.com/wmpscc/DataMiningNotesAndPractice/blob/master/2.KMeans%E7%AE%97%E6%B3%95%E4%B8%8E%E4%BA%A4%E9%80%9A%E4%BA%8B%E6%95%85%E7%90%86%E8%B5%94%E5%AE%A1%E6%A0%B8%E9%A2%84%E6%B5%8B.md

* evaluation metrics
http://localhost:8889/notebooks/machineLearning/10_digits_classification.ipynb


* model selection and assessment
http://localhost:8889/notebooks/machineLearning/notebooks/01%20-%20Model%20Selection%20and%20Assessment.ipynb


## NN
* 神经网络——梯度下降&反向传播 https://blog.csdn.net/skullfang/article/details/78634317
* 零基础入门深度学习(3) - 神经网络和反向传播算法 https://www.zybuluo.com/hanbingtao/note/476663
* 如何直观地解释 backpropagation 算法? https://www.zhihu.com/question/27239198
* 一文弄懂神经网络中的反向传播法——BackPropagation https://www.cnblogs.com/charlotte77/p/5629865.html

* https://medium.com/@UdacityINDIA/how-to-build-your-first-neural-network-with-python-6819c7f65dbf
* https://enlight.nyc/projects/neural-network/
* https://www.python-course.eu/neural_networks_with_python_numpy.php


## k-Means
* [如何使用 Keras 实现无监督聚类](http://m.sohu.com/a/236221126_717210)

## AutoEncoder (自编码/非监督学习)
* https://morvanzhou.github.io/tutorials/machine-learning/torch/4-04-autoencoder/
* https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/404_autoencoder.py
* pytorch AutoEncoder 自编码 https://www.jianshu.com/p/f0929f427d03
* Adversarial Autoencoders (with Pytorch) https://blog.paperspace.com/adversarial-autoencoders-with-pytorch/

+ 0
- 62
references/References.md View File

@@ -1,62 +0,0 @@
## Notebooks:

* machineLearning/10_digits_classification.ipynb
* MachineLearningNotebooks/05.%20Logistic%20Regression.ipynb
* MachineLearningNotebooks/08.%20Practical_NeuralNets.ipynb


## Exercise
* http://sofasofa.io/competitions.php?type=practice
* https://www.kaggle.com/competitions
* Machine learning project ideas
* https://data-flair.training/blogs/machine-learning-project-ideas/
* https://data-flair.training/blogs/deep-learning-project-ideas/
* https://www.kdnuggets.com/2020/03/20-machine-learning-datasets-project-ideas.html


* Titanic: notebooks/data-science-ipython-notebooks/kaggle/titanic.ipynb
* 使用神经网络解决拼图游戏 https://www.toutiao.com/a6855437347463365133/
* [Sudoku-Solver](https://github.com/shivaverma/Sudoku-Solver)


## Method

* Programming Multiclass Logistic Regression
notebooks/MachineLearningNotebooks/05.%20Logistic%20Regression.ipynb

* Equation for MLP
notebooks/MachineLearningNotebooks/07.%20MLP%20Neural%20Networks.ipynb

* Optimization methods
notebooks/MachineLearningNotebooks/06.%20Optimization.ipynb


* https://github.com/wmpscc/DataMiningNotesAndPractice/blob/master/2.KMeans%E7%AE%97%E6%B3%95%E4%B8%8E%E4%BA%A4%E9%80%9A%E4%BA%8B%E6%95%85%E7%90%86%E8%B5%94%E5%AE%A1%E6%A0%B8%E9%A2%84%E6%B5%8B.md

* evaluation metrics
http://localhost:8889/notebooks/machineLearning/10_digits_classification.ipynb


* model selection and assessment
http://localhost:8889/notebooks/machineLearning/notebooks/01%20-%20Model%20Selection%20and%20Assessment.ipynb


## NN
* 神经网络——梯度下降&反向传播 https://blog.csdn.net/skullfang/article/details/78634317
* 零基础入门深度学习(3) - 神经网络和反向传播算法 https://www.zybuluo.com/hanbingtao/note/476663
* 如何直观地解释 backpropagation 算法? https://www.zhihu.com/question/27239198
* 一文弄懂神经网络中的反向传播法——BackPropagation https://www.cnblogs.com/charlotte77/p/5629865.html

* https://medium.com/@UdacityINDIA/how-to-build-your-first-neural-network-with-python-6819c7f65dbf
* https://enlight.nyc/projects/neural-network/
* https://www.python-course.eu/neural_networks_with_python_numpy.php


## k-Means
* [如何使用 Keras 实现无监督聚类](http://m.sohu.com/a/236221126_717210)

## AutoEncoder (自编码/非监督学习)
* https://morvanzhou.github.io/tutorials/machine-learning/torch/4-04-autoencoder/
* https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/404_autoencoder.py
* pytorch AutoEncoder 自编码 https://www.jianshu.com/p/f0929f427d03
* Adversarial Autoencoders (with Pytorch) https://blog.paperspace.com/adversarial-autoencoders-with-pytorch/

+ 0
- 1
references/nn/nn-from-scratch

@@ -1 +0,0 @@
Subproject commit 0b52553c84c8bd5fed4f0c890c98af802e9705e9

tips/InstallPython.md → references_tips/InstallPython.md View File


tips/InstallPython_EN.md → references_tips/InstallPython_EN.md View File


tips/Intro_to_Deep_Learning.pdf → references_tips/Intro_to_Deep_Learning.pdf View File


references/Matplotlib.ipynb → references_tips/Matplotlib.ipynb View File


references/SciPy.ipynb → references_tips/SciPy.ipynb View File


references/Scikit-learn.ipynb → references_tips/Scikit-learn.ipynb View File


references/Seaborn.ipynb → references_tips/Seaborn.ipynb View File


references/Statsmodels.ipynb → references_tips/Statsmodels.ipynb View File


tips/The Matrix Calculus You Need For Deep Learning.pdf → references_tips/The Matrix Calculus You Need For Deep Learning.pdf View File


tips/cheatsheet/All Cheat Sheets.pdf → references_tips/cheatsheet/All Cheat Sheets.pdf View File


tips/cheatsheet/Bokeh.pdf → references_tips/cheatsheet/Bokeh.pdf View File


tips/cheatsheet/Deep Learning Cheat Sheet-Hacker Noon.pdf → references_tips/cheatsheet/Deep Learning Cheat Sheet-Hacker Noon.pdf View File


tips/cheatsheet/Keras.jpg → references_tips/cheatsheet/Keras.jpg View File


tips/cheatsheet/Matplotlib.png → references_tips/cheatsheet/Matplotlib.png View File


tips/cheatsheet/Neural Network Cells.png → references_tips/cheatsheet/Neural Network Cells.png View File


tips/cheatsheet/Neural Network Graphs.png → references_tips/cheatsheet/Neural Network Graphs.png View File


tips/cheatsheet/Neural Networks Zoo.png → references_tips/cheatsheet/Neural Networks Zoo.png View File


tips/cheatsheet/Numpy.png → references_tips/cheatsheet/Numpy.png View File


tips/cheatsheet/Pandas-1.jpg → references_tips/cheatsheet/Pandas-1.jpg View File


tips/cheatsheet/Pandas-2.jpg → references_tips/cheatsheet/Pandas-2.jpg View File


tips/cheatsheet/Pandas-3.png → references_tips/cheatsheet/Pandas-3.png View File


tips/cheatsheet/README.md → references_tips/cheatsheet/README.md View File


tips/cheatsheet/Scikit Learn.pdf → references_tips/cheatsheet/Scikit Learn.pdf View File


tips/cheatsheet/Scikit Learn.png → references_tips/cheatsheet/Scikit Learn.png View File


tips/cheatsheet/Scipy.png → references_tips/cheatsheet/Scipy.png View File


tips/cheatsheet/Seaborn.pdf → references_tips/cheatsheet/Seaborn.pdf View File


tips/cheatsheet/conda.pdf → references_tips/cheatsheet/conda.pdf View File


tips/cheatsheet/python3.pdf → references_tips/cheatsheet/python3.pdf View File


tips/confusion_matrix.ipynb → references_tips/confusion_matrix.ipynb View File


tips/dataset_CIFAR-10.py → references_tips/dataset_CIFAR-10.py View File


tips/dataset_circles.csv → references_tips/dataset_circles.csv View File


tips/datasets.ipynb → references_tips/datasets.ipynb View File


tips/images/confusion_matrix1.png → references_tips/images/confusion_matrix1.png View File


tips/images/confusion_matrix2.png → references_tips/images/confusion_matrix2.png View File


tips/images/dnn_tips_01.jpeg → references_tips/images/dnn_tips_01.jpeg View File


tips/images/dnn_tips_02.jpeg → references_tips/images/dnn_tips_02.jpeg View File


tips/images/dnn_tips_03.jpeg → references_tips/images/dnn_tips_03.jpeg View File


tips/images/dnn_tips_04.jpeg → references_tips/images/dnn_tips_04.jpeg View File


tips/images/dnn_tips_05.jpeg → references_tips/images/dnn_tips_05.jpeg View File


tips/images/dnn_tips_06.jpeg → references_tips/images/dnn_tips_06.jpeg View File


tips/images/dnn_tips_07.jpeg → references_tips/images/dnn_tips_07.jpeg View File


references/logistic_regression_demo/3a - Linear regression 1D.ipynb → references_tips/logistic_regression_demo/3a - Linear regression 1D.ipynb View File


references/logistic_regression_demo/3b - Linear regression 2D.ipynb → references_tips/logistic_regression_demo/3b - Linear regression 2D.ipynb View File


references/logistic_regression_demo/4 - Logistic Regression.ipynb → references_tips/logistic_regression_demo/4 - Logistic Regression.ipynb View File


references/logistic_regression_demo/data/artifical_lin.txt → references_tips/logistic_regression_demo/data/artifical_lin.txt View File


references/logistic_regression_demo/data/artifical_lin_2.txt → references_tips/logistic_regression_demo/data/artifical_lin_2.txt View File


references/logistic_regression_demo/data/breast-cancer-wisconsin.data → references_tips/logistic_regression_demo/data/breast-cancer-wisconsin.data View File


references/logistic_regression_demo/ipython_notebook_config.py → references_tips/logistic_regression_demo/ipython_notebook_config.py View File


references/logistic_regression_demo/utility.py → references_tips/logistic_regression_demo/utility.py View File


+ 13
- 0
references_tips/nn/nn-from-scratch/README.md View File

@@ -0,0 +1,13 @@
[**Please read the blog post that goes with this code!**](http://www.wildml.com/2015/09/implementing-a-neural-network-from-scratch/)

### iPython notebook setup

```bash
# Create and activate new virtual environment (optional)
virtualenv venv
source venv/bin/activate
# Install requirements
pip install -r requirements.txt
# Start the notebook server
jupyter notebook
```

+ 144
- 0
references_tips/nn/nn-from-scratch/ann_classification.py View File

@@ -0,0 +1,144 @@
__author__ = 'm.bashari'
import numpy as np
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt


class Config:
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality
# Gradient descent parameters (I picked these by hand)
epsilon = 0.01 # learning rate for gradient descent
reg_lambda = 0.01 # regularization strength


def generate_data():
np.random.seed(0)
X, y = datasets.make_moons(200, noise=0.20)
return X, y


def visualize(X, y, model):
# plt.scatter(X[:, 0], X[:, 1], s=40, c=y, cmap=plt.cm.Spectral)
# plt.show()
plot_decision_boundary(lambda x:predict(model,x), X, y)
plt.title("Logistic Regression")


def plot_decision_boundary(pred_func, X, y):
# Set min and max values and give it some padding
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
plt.show()


# Helper function to evaluate the total loss on the dataset
def calculate_loss(model, X, y):
num_examples = len(X) # training set size
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
# Forward propagation to calculate our predictions
z1 = X.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# Calculating the loss
corect_logprobs = -np.log(probs[range(num_examples), y])
data_loss = np.sum(corect_logprobs)
# Add regulatization term to loss (optional)
data_loss += Config.reg_lambda / 2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
return 1. / num_examples * data_loss


def predict(model, x):
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
# Forward propagation
z1 = x.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
return np.argmax(probs, axis=1)


# This function learns parameters for the neural network and returns the model.
# - nn_hdim: Number of nodes in the hidden layer
# - num_passes: Number of passes through the training data for gradient descent
# - print_loss: If True, print the loss every 1000 iterations
def build_model(X, y, nn_hdim, num_passes=20000, print_loss=False):
# Initialize the parameters to random values. We need to learn these.
num_examples = len(X)
np.random.seed(0)
W1 = np.random.randn(Config.nn_input_dim, nn_hdim) / np.sqrt(Config.nn_input_dim)
b1 = np.zeros((1, nn_hdim))
W2 = np.random.randn(nn_hdim, Config.nn_output_dim) / np.sqrt(nn_hdim)
b2 = np.zeros((1, Config.nn_output_dim))

# This is what we return at the end
model = {}

# Gradient descent. For each batch...
for i in range(0, num_passes):

# Forward propagation
z1 = X.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

# Backpropagation
delta3 = probs
delta3[range(num_examples), y] -= 1
dW2 = (a1.T).dot(delta3)
db2 = np.sum(delta3, axis=0, keepdims=True)
delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
dW1 = np.dot(X.T, delta2)
db1 = np.sum(delta2, axis=0)

# Add regularization terms (b1 and b2 don't have regularization terms)
dW2 += Config.reg_lambda * W2
dW1 += Config.reg_lambda * W1

# Gradient descent parameter update
W1 += -Config.epsilon * dW1
b1 += -Config.epsilon * db1
W2 += -Config.epsilon * dW2
b2 += -Config.epsilon * db2

# Assign new parameters to the model
model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}

# Optionally print the loss.
# This is expensive because it uses the whole dataset, so we don't want to do it too often.
if print_loss and i % 1000 == 0:
print("Loss after iteration %i: %f" % (i, calculate_loss(model, X, y)))

return model


def classify(X, y):
# clf = linear_model.LogisticRegressionCV()
# clf.fit(X, y)
# return clf

pass


def main():
X, y = generate_data()
model = build_model(X, y, 3, print_loss=True)
visualize(X, y, model)


if __name__ == "__main__":
main()

BIN
references_tips/nn/nn-from-scratch/nn-3-layer-network.png View File

Before After
Width: 1460  |  Height: 988  |  Size: 110 kB

+ 605
- 0
references_tips/nn/nn-from-scratch/nn-from-scratch.ipynb
File diff suppressed because it is too large
View File


+ 294
- 0
references_tips/nn/nn-from-scratch/nn-from-scratch.py View File

@@ -0,0 +1,294 @@
# ---
# jupyter:
# jupytext_format_version: '1.2'
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.5.2
# ---

# # Implementing a Neural Network from Scratch - An Introduction
#
# In this post we will implement a simple 3-layer neural network from scratch. We won't derive all the math that's required, but I will try to give an intuitive explanation of what we are doing and will point to resources to read up on the details.
#
# In this post I'm assuming that you are familiar with basic Calculus and Machine Learning concepts, e.g. you know what classification and regularization is. Ideally you also know a bit about how optimization techniques like gradient descent work. But even if you're not familiar with any of the above this post could still turn out to be interesting ;)
#
# But why implement a Neural Network from scratch at all? Even if you plan on using Neural Network libraries like [PyBrain](http://pybrain.org) in the future, implementing a network from scratch at least once is an extremely valuable exercise. It helps you gain an understanding of how neural networks work, and that is essential to designing effective models.
#
# One thing to note is that the code examples here aren't terribly efficient. They are meant to be easy to understand. In an upcoming post I will explore how to write an efficient Neural Network implementation using [Theano](http://deeplearning.net/software/theano/).

# +
# Package imports
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.linear_model
import matplotlib

# Display plots inline and change default figure size
# %matplotlib inline
matplotlib.rcParams['figure.figsize'] = (10.0, 8.0)
# -

# ## Generating a dataset
#
# Let's start by generating a dataset we can play with. Fortunately, [scikit-learn](http://scikit-learn.org/) has some useful dataset generators, so we don't need to write the code ourselves. We will go with the [`make_moons`](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_moons.html) function.

# Generate a dataset and plot it
np.random.seed(0)
X, y = sklearn.datasets.make_moons(200, noise=0.20)
plt.scatter(X[:,0], X[:,1], s=40, c=y, cmap=plt.cm.Spectral)

# The dataset we generated has two classes, plotted as red and blue points. You can think of the blue dots as male patients and the red dots as female patients, with the x- and y- axis being medical measurements.
#
# Our goal is to train a Machine Learning classifier that predicts the correct class (male or female) given the x- and y- coordinates. Note that the data is not *linearly separable*, we can't draw a straight line that separates the two classes. This means that linear classifiers, such as Logistic Regression, won't be able to fit the data unless you hand-engineer non-linear features (such as polynomials) that work well for the given dataset.
#
# In fact, that's one of the major advantages of Neural Networks. You don't need to worry about [feature engineering](http://machinelearningmastery.com/discover-feature-engineering-how-to-engineer-features-and-how-to-get-good-at-it/). The hidden layer of a neural network will learn features for you.

# ## Logistic Regression
#
# To demonstrate the point let's train a Logistic Regression classifier. It's input will be the x- and y-values and the output the predicted class (0 or 1). To make our life easy we use the Logistic Regression class from `scikit-learn`.

# Train the logistic regression classifier
clf = sklearn.linear_model.LogisticRegressionCV()
clf.fit(X, y)

# Helper function to plot a decision boundary.
# If you don't fully understand this function don't worry, it just generates the contour plot below.
def plot_decision_boundary(pred_func):
# Set min and max values and give it some padding
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)

# Plot the decision boundary
plot_decision_boundary(lambda x: clf.predict(x))
plt.title("Logistic Regression")

# The graph shows the decision boundary learned by our Logistic Regression classifier. It separates the data as good as it can using a straight line, but it's unable to capture the "moon shape" of our data.

# ## Training a Neural Network

# Let's now build a 3-layer neural network with one input layer, one hidden layer, and one output layer. The number of nodes in the input layer is determined by the dimensionality of our data, 2. Similarly, the number of nodes in the output layer is determined by the number of classes we have, also 2. (Because we only have 2 classes we could actually get away with only one output node predicting 0 or 1, but having 2 makes it easier to extend the network to more classes later on). The input to the network will be x- and y- coordinates and its output will be two probabilities, one for class 0 ("female") and one for class 1 ("male"). It looks something like this:
#
# <img src='./nn-3-layer-network.png' style='width: 50%'/>

# We can choose the dimensionality (the number of nodes) of the hidden layer. The more nodes we put into the hidden layer the more complex functions we will be able fit. But higher dimensionality comes at a cost. First, more computation is required to make predictions and learn the network parameters. A bigger number of parameters also means we become more prone to overfitting our data.
#
# How to choose the size of the hidden layer? While there are some general guidelines and recommendations, it always depends on your specific problem and is more of an art than a science. We will play with the number of nodes in the hidden layer later on and see how it affects our output.

# We also need to pick an *activation function* for our hidden layer. The activation function transforms the inputs of the layer into its outputs. A nonlinear activation function is what allows us to fit nonlinear hypotheses. Common chocies for activation functions are [tanh](https://reference.wolfram.com/language/ref/Tanh.html), the [sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function), or [ReLUs](https://en.wikipedia.org/wiki/Rectifier_neural_networks). We will use `tanh`, which performs quite well in many scenarios. A nice property of these functions is that their derivate can be computed using the original function value. For example, the derivative of $\tanh x$ is $1-\tanh^2 x$. This is useful because it allows us to compute $\tanh x$ once and re-use its value later on to get the derivative.

# Because we want our network to output probabilities the activation function for the output layer will be the [softmax](https://en.wikipedia.org/wiki/Softmax_function), which is simply a way to convert raw scores to probabilities. If you're familiar with the logistic function you can think of softmax as its generalization to multiple classes.

# ### How our network makes predictions
#
# Our network makes predictions using *forward propagation*, which is just a bunch of matrix multiplications and the application of the activation function(s) we defined above. If $x$ is the 2-dimensional input to our network then we calculate our prediction $\hat{y}$ (also two-dimensional) as follows:

# $$
# \begin{aligned}
# z_1 & = xW_1 + b_1 \\
# a_1 & = \tanh(z_1) \\
# z_2 & = a_1W_2 + b_2 \\
# a_2 & = \hat{y} = \mathrm{softmax}(z_2)
# \end{aligned}
# $$

# $z_i$ is the weighted sum of inputs of layer $i$ (bias included) and $a_i$ is the output of layer $i$ after applying the activation function. $W_1, b_1, W_2, b_2$ are parameters of our network, which we need to learn from our training data. You can think of them as matrices transforming data between layers of the network. Looking at the matrix multiplications above we can figure out the dimensionality of these matrices. If we use 500 nodes for our hidden layer then $W_1 \in \mathbb{R}^{2\times500}$, $b_1 \in \mathbb{R}^{500}$, $W_2 \in \mathbb{R}^{500\times2}$, $b_2 \in \mathbb{R}^{2}$. Now you see why we have more parameters if we increase the size of the hidden layer.

# ### Learning the Parameters
#
# Learning the parameters for our network means finding parameters ($W_1, b_1, W_2, b_2$) that minimize the error on our training data. But how do we define the error? We call the function that measures our error the *loss function*. A common choice with the softmax output is the [cross-entropy loss](https://en.wikipedia.org/wiki/Cross_entropy#Cross-entropy_error_function_and_logistic_regression). If we have $N$ training examples and $C$ classes then the loss for our prediction $\hat{y}$ with respect to the true labels $y$ is given by:
#
# $$
# \begin{aligned}
# L(y,\hat{y}) = - \frac{1}{N} \sum_{n \in N} \sum_{i \in C} y_{n,i} \log\hat{y}_{n,i}
# \end{aligned}
# $$
#
#

# The formula looks complicated, but all it really does is sum over our training examples and add to the loss if we predicted the incorrect class. So, the further away $y$ (the correct labels) and $\hat{y}$ (our predictions) are, the greater our loss will be.

# Remember that our goal is to find the parameters that minimize our loss function. We can use [gradient descent](http://cs231n.github.io/optimization-1/) to find its minimum. I will implement the most vanilla version of gradient descent, also called batch gradient descent with a fixed learning rate. Variations such as SGD (stochastic gradient descent) or minibatch gradient descent typically perform better in practice. So if you are serious you'll want to use one of these, and ideally you would also [decay the learning rate over time](http://cs231n.github.io/neural-networks-3/#anneal).
#
# As an input, gradient descent needs the gradients (vector of derivatives) of the loss function with respect to our parameters: $\frac{\partial{L}}{\partial{W_1}}$, $\frac{\partial{L}}{\partial{b_1}}$, $\frac{\partial{L}}{\partial{W_2}}$, $\frac{\partial{L}}{\partial{b_2}}$. To calculate these gradients we use the famous *backpropagation algorithm*, which is a way to efficiently calculate the gradients starting from the output. I won't go into detail how backpropagation works, but there are many excellent explanations ([here](http://colah.github.io/posts/2015-08-Backprop/) or [here](http://cs231n.github.io/optimization-2/)) floating around the web.
#
# Applying the backpropagation formula we find the following (trust me on this):

# $$
# \begin{aligned}
# & \delta_3 = \hat{y} - y \\
# & \delta_2 = (1 - \tanh^2z_1) \circ \delta_3W_2^T \\
# & \frac{\partial{L}}{\partial{W_2}} = a_1^T \delta_3 \\
# & \frac{\partial{L}}{\partial{b_2}} = \delta_3\\
# & \frac{\partial{L}}{\partial{W_1}} = x^T \delta_2\\
# & \frac{\partial{L}}{\partial{b_1}} = \delta_2 \\
# \end{aligned}
# $$

# ### Implementation
#
# Now we are ready for our implementation. We start by defining some useful variables and parameters for gradient descent:

# +
num_examples = len(X) # training set size
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality

# Gradient descent parameters (I picked these by hand)
epsilon = 0.01 # learning rate for gradient descent
reg_lambda = 0.01 # regularization strength
# -

# First let's implement the loss function we defined above. We use this to evaluate how well our model is doing:

# Helper function to evaluate the total loss on the dataset
def calculate_loss(model):
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
# Forward propagation to calculate our predictions
z1 = X.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# Calculating the loss
corect_logprobs = -np.log(probs[range(num_examples), y])
data_loss = np.sum(corect_logprobs)
# Add regulatization term to loss (optional)
data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
return 1./num_examples * data_loss

# We also implement a helper function to calculate the output of the network. It does forward propagation as defined above and returns the class with the highest probability.

# Helper function to predict an output (0 or 1)
def predict(model, x):
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
# Forward propagation
z1 = x.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
return np.argmax(probs, axis=1)

# Finally, here comes the function to train our Neural Network. It implements batch gradient descent using the backpropagation derivates we found above.

# This function learns parameters for the neural network and returns the model.
# - nn_hdim: Number of nodes in the hidden layer
# - num_passes: Number of passes through the training data for gradient descent
# - print_loss: If True, print the loss every 1000 iterations
def build_model(nn_hdim, num_passes=20000, print_loss=False):
# Initialize the parameters to random values. We need to learn these.
np.random.seed(0)
W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
b1 = np.zeros((1, nn_hdim))
W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
b2 = np.zeros((1, nn_output_dim))

# This is what we return at the end
model = {}
# Gradient descent. For each batch...
for i in range(0, num_passes):

# Forward propagation
z1 = X.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

# Backpropagation
delta3 = probs
delta3[range(num_examples), y] -= 1
dW2 = (a1.T).dot(delta3)
db2 = np.sum(delta3, axis=0, keepdims=True)
delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
dW1 = np.dot(X.T, delta2)
db1 = np.sum(delta2, axis=0)

# Add regularization terms (b1 and b2 don't have regularization terms)
dW2 += reg_lambda * W2
dW1 += reg_lambda * W1

# Gradient descent parameter update
W1 += -epsilon * dW1
b1 += -epsilon * db1
W2 += -epsilon * dW2
b2 += -epsilon * db2
# Assign new parameters to the model
model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
# Optionally print the loss.
# This is expensive because it uses the whole dataset, so we don't want to do it too often.
if print_loss and i % 1000 == 0:
print("Loss after iteration %i: %f" %(i, calculate_loss(model)))
return model

# ### A network with a hidden layer of size 3
#
# Let's see what happens if we train a network with a hidden layer size of 3.
#

# +
# Build a model with a 3-dimensional hidden layer
model = build_model(3, print_loss=True)

# Plot the decision boundary
plot_decision_boundary(lambda x: predict(model, x))
plt.title("Decision Boundary for hidden layer size 3")
# -

# Yay! This looks pretty good. Our neural networks was able to find a decision boundary that successfully separates the classes.

# # Varying the hidden layer size
#
# In the example above we picked a hidden layer size of 3. Let's now get a sense of how varying the hidden layer size affects the result.
#

plt.figure(figsize=(16, 32))
hidden_layer_dimensions = [1, 2, 3, 4, 5, 20, 50]
for i, nn_hdim in enumerate(hidden_layer_dimensions):
plt.subplot(5, 2, i+1)
plt.title('Hidden Layer size %d' % nn_hdim)
model = build_model(nn_hdim)
plot_decision_boundary(lambda x: predict(model, x))
plt.show()

# We can see that while a hidden layer of low dimensionality nicely capture the general trend of our data, but higher dimensionalities are prone to overfitting. They are "memorizing" the data as opposed to fitting the general shape. If we were to evaluate our model on a separate test set (and you should!) the model with a smaller hidden layer size would likely perform better because it generalizes better. We could counteract overfitting with stronger regularization, but picking the correct size for hidden layer is a much more "economical" solution.

# # Exercises
#
# Here are some things you can try to become more familiar with the code:
#
# 1. Instead of batch gradient descent, use minibatch gradient descent ([more info](http://cs231n.github.io/optimization-1/#gd)) to train the network. Minibatch gradient descent typically performs better in practice.
# 2. We used a fixed learning rate $\epsilon$ for gradient descent. Implement an annealing schedule for the gradient descent learning rate ([more info](http://cs231n.github.io/neural-networks-3/#anneal)).
# 3. We used a $\tanh$ activation function for our hidden layer. Experiment with other activation functions (some are mentioned above). Note that changing the activation function also means changing the backpropagation derivative.
# 4. Extend the network from two to three classes. You will need to generate an appropriate dataset for this.
# 5. Extend the network to four layers. Experiment with the layer size. Adding another hidden layer means you will need to adjust both the forward propagation as well as the backpropagation code.
#



+ 165
- 0
references_tips/nn/nn-from-scratch/nn_from_scratch.py View File

@@ -0,0 +1,165 @@
# ---
# jupyter:
# jupytext_format_version: '1.2'
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---

# %% 1
# Package imports
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.linear_model
import matplotlib

# Display plots inline and change default figure size
# %matplotlib inline
matplotlib.rcParams['figure.figsize'] = (10.0, 8.0)

# %% 2
np.random.seed(3)
X, y = sklearn.datasets.make_moons(200, noise=0.20)
plt.scatter(X[:,0], X[:,1], s=40, c=y, cmap=plt.cm.Spectral)

# %% 3
# Train the logistic rgeression classifier
clf = sklearn.linear_model.LogisticRegressionCV()
clf.fit(X, y)

# %% 4
# Helper function to plot a decision boundary.
# If you don't fully understand this function don't worry, it just generates the contour plot below.
def plot_decision_boundary(pred_func):
# Set min and max values and give it some padding
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)

# %% 12
# Plot the decision boundary
plot_decision_boundary(lambda x: clf.predict(x))
plt.title("Logistic Regression")

# %% 15
num_examples = len(X) # training set size
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality

# Gradient descent parameters (I picked these by hand)
epsilon = 0.01 # learning rate for gradient descent
reg_lambda = 0.01 # regularization strength

# %% 7
# Helper function to evaluate the total loss on the dataset
def calculate_loss(model):
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
# Forward propagation to calculate our predictions
z1 = X.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# Calculating the loss
corect_logprobs = -np.log(probs[range(num_examples), y])
data_loss = np.sum(corect_logprobs)
# Add regulatization term to loss (optional)
data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
return 1./num_examples * data_loss

# %% 8
# Helper function to predict an output (0 or 1)
def predict(model, x):
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
# Forward propagation
z1 = x.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
return np.argmax(probs, axis=1)

# %% 16
# This function learns parameters for the neural network and returns the model.
# - nn_hdim: Number of nodes in the hidden layer
# - num_passes: Number of passes through the training data for gradient descent
# - print_loss: If True, print the loss every 1000 iterations
def build_model(nn_hdim, num_passes=20000, print_loss=False):
# Initialize the parameters to random values. We need to learn these.
np.random.seed(0)
W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
b1 = np.zeros((1, nn_hdim))
W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
b2 = np.zeros((1, nn_output_dim))
# This is what we return at the end
model = {}
# Gradient descent. For each batch...
for i in range(0, num_passes):
# Forward propagation
z1 = X.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# Backpropagation
delta3 = probs
delta3[range(num_examples), y] -= 1
dW2 = (a1.T).dot(delta3)
db2 = np.sum(delta3, axis=0, keepdims=True)
delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
dW1 = np.dot(X.T, delta2)
db1 = np.sum(delta2, axis=0)
# Add regularization terms (b1 and b2 don't have regularization terms)
dW2 += reg_lambda * W2
dW1 += reg_lambda * W1
# Gradient descent parameter update
W1 += -epsilon * dW1
b1 += -epsilon * db1
W2 += -epsilon * dW2
b2 += -epsilon * db2
# Assign new parameters to the model
model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
# Optionally print the loss.
# This is expensive because it uses the whole dataset, so we don't want to do it too often.
if print_loss and i % 1000 == 0:
print("Loss after iteration %i: %f" %(i, calculate_loss(model)))
return model

# %% 17
# Build a model with a 3-dimensional hidden layer
model = build_model(3, print_loss=True)

# Plot the decision boundary
plot_decision_boundary(lambda x: predict(model, x))
plt.title("Decision Boundary for hidden layer size 3")

# %% 14
plt.figure(figsize=(16, 32))
hidden_layer_dimensions = [1, 2, 3, 4, 5, 20, 50]
for i, nn_hdim in enumerate(hidden_layer_dimensions):
plt.subplot(5, 2, i+1)
plt.title('Hidden Layer size %d' % nn_hdim)
model = build_model(nn_hdim)
plot_decision_boundary(lambda x: predict(model, x))
plt.show()

+ 47
- 0
references_tips/nn/nn-from-scratch/requirements.txt View File

@@ -0,0 +1,47 @@
appnope==0.1.0
backports.ssl-match-hostname==3.4.0.2
certifi==2015.4.28
decorator==4.0.2
funcsigs==0.4
functools32==3.2.3.post2
gnureadline==6.3.3
ipykernel==4.0.3
ipython==4.0.0
ipython-genutils==0.1.0
ipywidgets==4.0.2
Jinja2==2.8
jsonschema==2.5.1
jupyter==1.0.0
jupyter-client==4.0.0
jupyter-console==4.0.1
jupyter-core==4.0.4
MarkupSafe==0.23
matplotlib==1.4.3
mistune==0.7.1
mock==1.3.0
nbconvert==4.0.0
nbformat==4.0.0
nose==1.3.7
notebook==4.0.4
numpy==1.9.2
path.py==8.1
pbr==1.6.0
pexpect==3.3
pickleshare==0.5
ptyprocess==0.5
PyBrain==0.3
Pygments==2.0.2
pyparsing==2.0.3
python-dateutil==2.4.2
pytz==2015.4
pyzmq==14.7.0
qtconsole==4.0.1
scikit-learn==0.16.1
scipy==0.16.0
simplegeneric==0.8.1
six==1.9.0
sklearn==0.0
terminado==0.5
tornado==4.2.1
traitlets==4.0.0
wheel==0.24.0

+ 59
- 0
references_tips/nn/nn-from-scratch/simple_classification.py View File

@@ -0,0 +1,59 @@
# ---
# jupyter:
# jupytext_format_version: '1.2'
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---

__author__ = 'm.bashari'
import numpy as np
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt


def generate_data():
np.random.seed(0)
X, y = datasets.make_moons(200, noise=0.20)
return X, y


def visualize(X, y, clf):
# plt.scatter(X[:, 0], X[:, 1], s=40, c=y, cmap=plt.cm.Spectral)
# plt.show()
plot_decision_boundary(lambda x: clf.predict(x), X, y)
plt.title("Logistic Regression")


def plot_decision_boundary(pred_func, X, y):
# Set min and max values and give it some padding
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
plt.show()


def classify(X, y):
clf = linear_model.LogisticRegressionCV()
clf.fit(X, y)
return clf


def main():
X, y = generate_data()
# visualize(X, y)
clf = classify(X, y)
visualize(X, y, clf)


if __name__ == "__main__":
main()

references/nn/nn_1.py → references_tips/nn/nn_1.py View File


references/nn/nn_2.py → references_tips/nn/nn_2.py View File


references/nn/nn_3.py → references_tips/nn/nn_3.py View File


references/nn/nn_4.py → references_tips/nn/nn_4.py View File


tips/notebook_tips.ipynb → references_tips/notebook_tips.ipynb View File


tips/python/README.md → references_tips/python/README.md View File


tips/python/pip.md → references_tips/python/pip.md View File


tips/python/virtualenv.md → references_tips/python/virtualenv.md View File


tips/python/virtualenv_wrapper.md → references_tips/python/virtualenv_wrapper.md View File


tips/pytorch/tensor_divide_int.py → references_tips/pytorch/tensor_divide_int.py View File


references/supervised_learning/Recognizing hand-written digits - SVM.ipynb → references_tips/supervised_learning/Recognizing hand-written digits - SVM.ipynb View File


references/supervised_learning/supervised learning.ipynb → references_tips/supervised_learning/supervised learning.ipynb View File


tips/构建深度神经网络的一些实战建议.md → references_tips/构建深度神经网络的一些实战建议.md View File


Loading…
Cancel
Save