Add some notes

6 years ago · 9ef7b43480
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 .ipynb_checkpoints
 .idea
--- a/exercise/exercise.ipynb
+++ b/exercise/exercise.ipynb
@@ -25,10 +25,17 @@
    "\n",
    "\n",
    "### （3） 判断\n",
    "企业发放的奖金根据利润提成。利润(I)： 低于或等于 10 万元时，奖金可提 10%； 高于 10 万元，低于 20 万元时，低于 10 万元的部分按 10%提成，高于 10 万元的部分，可提成 7.5%； 20 万到 40 万之间时，高于 20 万元的部分，可提成 5%； 40 万到 60 万之间时，高于 40 万元的部分，可提成 3%； 60 万到 100 万之间时，高于 60 万元的部分，可提成 1.5%， 高于 100 万元时， 超过 100 万元的部分按 1%提成， 从键盘输入当月利润 I，求应发放奖金总数？\n",
    "企业发放的奖金根据利润提成。利润(I)： \n",
    "* 低于或等于 10 万元时，奖金可提 10%； \n",
    "* 高于 10 万元，低于 20 万元时，低于 10 万元的部分按 10%提成，高于 10 万元的部分，可提成 7.5%； \n",
    "* 20 万到 40 万之间时，高于 20 万元的部分，可提成 5%； \n",
    "* 40 万到 60 万之间时，高于 40 万元的部分，可提成 3%； \n",
    "* 60 万到 100 万之间时，高于 60 万元的部分，可提成 1.5%， \n",
    "* 高于 100 万元时， 超过 100 万元的部分按 1%提成， \n",
    "从键盘输入当月利润 I，求应发放奖金总数？\n",
    "\n",
    "### （4）循环\n",
    "输出9x9的口诀表\n",
    "输出9x9的乘法口诀表\n",
    "\n",
    "### （5）算法\n",
    "给一个数字列表，将其按照由大到小的顺序排列\n",
@@ -48,7 +55,7 @@
    "例如把`c:`下面所有的`.dll`文件找到\n",
    "\n",
    "### （8）应用3\n",
    "你还有个目录，里面是程序（假如是C或者是Python），统计一下你写过多少行代码。包括空行和注释，但是要分别列出来。\n",
    "你有个目录，里面是程序（假如是C或者是Python），统计一下你写过多少行代码。包括空行和注释，但是要分别列出来。\n",
    "\n"
   ]
  },
--- a/kmeans/k-means.ipynb
+++ b/kmeans/k-means.ipynb
@@ -414,6 +414,7 @@
  }
 ],
 "metadata": {
  "jupytext_formats": "ipynb,py",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
--- a/kmeans/k-means.py
+++ b/kmeans/k-means.py
@@ -1,3 +1,33 @@
 # -*- coding: utf-8 -*-
 # ---
 # jupyter:
 #   jupytext_format_version: '1.2'
 #   jupytext_formats: ipynb,py
 #   kernelspec:
 #     display_name: Python 3
 #     language: python
 #     name: python3
 #   language_info:
 #     codemirror_mode:
 #       name: ipython
 #       version: 3
 #     file_extension: .py
 #     mimetype: text/x-python
 #     name: python
 #     nbconvert_exporter: python
 #     pygments_lexer: ipython3
 #     version: 3.5.2
 # ---
 # # k-means demo
 # +
 # This line configures matplotlib to show figures embedded in the notebook, 
 # instead of opening a new window for each figure. More about that later. 
 # If you are using an old version of IPython, try using '%pylab inline' instead.
 # %matplotlib inline
 # import librarys
 from numpy import *
 import matplotlib.pyplot as plt
 import pandas as pd
@@ -7,12 +37,36 @@ names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
 dataset = pd.read_csv("iris.csv", header=0, index_col=0)
 dataset.head()
 # -
 #对类别进行编码，3个类别分别赋值0，1，2
 dataset['class'][dataset['class']=='Iris-setosa']=0
 dataset['class'][dataset['class']=='Iris-versicolor']=1
 dataset['class'][dataset['class']=='Iris-virginica']=2
 def originalDatashow(dataSet):
    #绘制原始的样本点
    num,dim=shape(dataSet)
    marksamples=['ob'] #样本图形标记
    for i in range(num):
        plt.plot(datamat.iat[i,0],datamat.iat[i,1],marksamples[0],markersize=5)
    plt.title('original dataset')
    plt.xlabel('sepal length')
    plt.ylabel('sepal width') 
    plt.show()
 # + {"scrolled": true}
 #获取样本数据
 datamat = dataset.loc[:, ['sepal-length', 'sepal-width']]
 # 真实的标签
 labels = dataset.loc[:, ['class']]
 #原始数据显示
 originalDatashow(datamat)
 # -
 def randChosenCent(dataSet,k):
    """初始化聚类中心：通过在区间范围随机产生的值作为新的中心点"""
@@ -34,6 +88,8 @@ def randChosenCent(dataSet,k):
    centroids = dataSet.iloc[centroidsIndex]
    return mat(centroids)
 # +
 def distEclud(vecA, vecB):
    """算距离, 两个向量间欧式距离"""
    return sqrt(sum(power(vecA - vecB, 2))) #la.norm(vecA-vecB)
@@ -91,9 +147,13 @@ def kMeans(dataSet, k):
            centroids[cent, :] = mean(ptsInClust, axis=0)
    return centroids, clusterAssment
 # -
 # 进行k-means聚类
 k = 3  # 用户定义聚类数
 mycentroids, clusterAssment = kMeans(datamat, k)
 # 2维数据聚类效果显示
 # +
 def datashow(dataSet, k, centroids, clusterAssment):  # 二维空间显示聚类结果
    from matplotlib import pyplot as plt
    num, dim = shape(dataSet)  # 样本数num ,维数dim
@@ -123,8 +183,8 @@ def datashow(dataSet, k, centroids, clusterAssment):  # 二维空间显示聚类
    plt.title('k-means cluster result')  # 标题
    plt.show()
 # 画出实际图像
 def trgartshow(dataSet, k, labels):
    from matplotlib import pyplot as plt
@@ -139,48 +199,17 @@ def trgartshow(dataSet, k, labels):
        plt.plot(datamat.iat[i, 0], datamat.iat[i, 1], marksamples[int(labels.iat[i, 0])], markersize=6,
                 label=label[int(labels.iat[i, 0])])
    plt.legend(loc='upper left')
    # 添加轴标签和标题
    plt.xlabel('sepal length')
    plt.ylabel('sepal width')
    plt.title('iris true result')  # 标题
    # 显示图形
    plt.show()
    # label=labels.iat[i,0]
 # -
 def originalDatashow(dataSet):
    """聚类前，绘制原始的样本点"""
    #样本的个数和特征维数
    num,dim=shape(dataSet)
    marksamples=['ob'] #样本图形标记
    for i in range(num):
        plt.plot(datamat.iat[i,0],datamat.iat[i,1],marksamples[0],markersize=5)
    plt.title('original dataset')
    plt.xlabel('sepal length')
    plt.ylabel('sepal width') #标题
    plt.show()
 if __name__ == '__main__':
    # 获取样本数据
    datamat = dataset.loc[:, ['sepal-length', 'sepal-width']]
    # 真实的标签
    labels = dataset.loc[:, ['class']]
    # 原始数据显示
    originalDatashow(datamat)
    # kmeans聚类
    k = 3  # 用户定义聚类数
    mycentroids, clusterAssment = kMeans(datamat, k)
    # 绘图显示
    datashow(datamat, k, mycentroids, clusterAssment)
    trgartshow(datamat, 3, labels)
    # plt.show()
 # 绘图显示
 datashow(datamat, k, mycentroids, clusterAssment)
 trgartshow(datamat, 3, labels)
--- a/logistic_regression/Least_squares.ipynb
+++ b/logistic_regression/Least_squares.ipynb
--- a/logistic_regression/Least_squares.py
+++ b/logistic_regression/Least_squares.py
@@ -0,0 +1,113 @@
 # ---
 # jupyter:
 #   jupytext_format_version: '1.2'
 #   kernelspec:
 #     display_name: Python 3
 #     language: python
 #     name: python3
 #   language_info:
 #     codemirror_mode:
 #       name: ipython
 #       version: 3
 #     file_extension: .py
 #     mimetype: text/x-python
 #     name: python
 #     nbconvert_exporter: python
 #     pygments_lexer: ipython3
 #     version: 3.5.2
 # ---
 # # Linear regression
 #
 #
 # ## Least squares
 #
 # A mathematical procedure for finding the best-fitting curve to a given set of points by minimizing the sum of the squares of the offsets ("the residuals") of the points from the curve. The sum of the squares of the offsets is used instead of the offset absolute values because this allows the residuals to be treated as a continuous differentiable quantity. However, because squares of the offsets are used, outlying points can have a disproportionate effect on the fit, a property which may or may not be desirable depending on the problem at hand. 
 #
 # ### Show the data
 #
 # +
 # %matplotlib inline
 import matplotlib.pyplot as plt
 import numpy as np
 import sklearn
 from sklearn import datasets
 # load data
 d = datasets.load_diabetes()
 X = d.data[:, 2]
 Y = d.target
 # draw original data
 plt.scatter(X, Y)
 plt.show()
 # -
 # ### Theory
 # For $N$ observation data:
 # $$
 # \mathbf{X} = \{x_1, x_2, ..., x_N \} \\
 # \mathbf{Y} = \{y_1, y_2, ..., y_N \}
 # $$
 #
 # We want to find the model which can predict the data. The simplest model is linear model, which has the form of 
 # $$
 # y = ax + b
 # $$
 #
 # The purpose is to find parameters $a, b$ which best fit the model to the observation data. 
 #
 # We use the sum of squares to measure the differences (loss function) between the model's prediction and observation data:
 # $$
 # L = \sum_{i=1}^{N} (y_i - a x_i + b)^2
 # $$
 #
 # To make the loss function minimize, we can find the parameters:
 # $$
 # \frac{\partial L}{\partial a} = -2 \sum_{i=1}^{N} (y_i - a x_i - b) x_i \\
 # \frac{\partial L}{\partial b} = -2 \sum_{i=1}^{N} (y_i - a x_i - b)
 # $$
 # When the loss is minimized, therefore the partial difference is zero, then we can get:
 # $$
 # -2 \sum_{i=1}^{N} (y_i - a x_i - b) x_i = 0 \\
 # -2 \sum_{i=1}^{N} (y_i - a x_i - b) = 0 \\
 # $$
 #
 # We reoder the items as:
 # $$
 # a \sum x_i^2 + b \sum x_i = \sum y_i x_i \\
 # a \sum x_i + b N = \sum y_i
 # $$
 # By solving the linear equation we can obtain the model parameters.
 # ### Program
 # +
 N = X.shape[0]
 S_X2 = np.sum(X*X)
 S_X  = np.sum(X)
 S_XY = np.sum(X*Y)
 S_Y  = np.sum(Y)
 A1 = np.array([[S_X2, S_X], 
               [S_X, N]])
 B1 = np.array([S_XY, S_Y])
 coeff = np.linalg.inv(A1).dot(B1)
 print('a = %f, b = %f' % (coeff[0], coeff[1]))
 x_min = np.min(X)
 x_max = np.max(X)
 y_min = coeff[0] * x_min + coeff[1]
 y_max = coeff[0] * x_max + coeff[1]
 plt.scatter(X, Y)
 plt.plot([x_min, x_max], [y_min, y_max], 'r')
 plt.show()
--- a/logistic_regression/linear_regression.py
+++ b/logistic_regression/linear_regression.py
@@ -0,0 +1,66 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import sklearn
 from sklearn import datasets
 # load data
 d = datasets.load_diabetes()
 X = d.data[:, 2]
 Y = d.target
 # draw original data
 plt.scatter(X, Y)
 plt.show()
 ###############################################################################
 # Least squares
 ###############################################################################
 # L = \sum_{i=1, N} (y_i - a*x_i - b)^2
 N = X.shape[0]
 S_X2 = np.sum(X*X)
 S_X  = np.sum(X)
 S_XY = np.sum(X*Y)
 S_Y  = np.sum(Y)
 A1 = np.array([[S_X2, S_X], [S_X, N]])
 B1 = np.array([S_XY, S_Y])
 coeff = np.linalg.inv(A1).dot(B1)
 x_min = np.min(X)
 x_max = np.max(X)
 y_min = coeff[0] * x_min + coeff[1]
 y_max = coeff[0] * x_max + coeff[1]
 plt.scatter(X, Y)
 plt.plot([x_min, x_max], [y_min, y_max], 'r')
 plt.show()
 ###############################################################################
 # Linear regression
 ###############################################################################
 # the loss function
 #   L = \sum_{i=1, N} (y_i - a*x_i - b)^2
 n_train = 1000
 a, b = 1, 1
 epsilon = 0.001
 for i in range(n_train):
    for j in range(N):
        a = a + epsilon*2*(Y[j] - a*X[j] - b)*X[j]
        b = b + epsilon*2*(Y[j] - a*X[j] - b)
    L = 0
    for j in range(N):
        L = L + (Y[j]-a*X[j]-b)**2
    print("epoch %4d: loss = %f" % (i, L))
--- a/matplotlib/matplotlib_ani1.ipynb
+++ b/matplotlib/matplotlib_ani1.ipynb
--- a/matplotlib/matplotlib_ani1.py
+++ b/matplotlib/matplotlib_ani1.py
@@ -0,0 +1,63 @@
 # ---
 # jupyter:
 #   jupytext_format_version: '1.2'
 #   kernelspec:
 #     display_name: Python 3
 #     language: python
 #     name: python3
 #   language_info:
 #     codemirror_mode:
 #       name: ipython
 #       version: 3
 #     file_extension: .py
 #     mimetype: text/x-python
 #     name: python
 #     nbconvert_exporter: python
 #     pygments_lexer: ipython3
 #     version: 3.5.2
 # ---
 # ## Animation
 #
 # +
 # %matplotlib inline
 import numpy as np
 import matplotlib.pyplot as plt
 from matplotlib import animation, rc
 from IPython.display import HTML
 # First set up the figure, the axis, and the plot element we want to animate
 fig, ax = plt.subplots()
 ax.set_xlim(( 0, 2))
 ax.set_ylim((-2, 2))
 line, = ax.plot([], [], lw=2)
 # +
 # initialization function: plot the background of each frame
 def init():
    line.set_data([], [])
    return (line,)
 # animation function. This is called sequentially
 def animate(i):
    x = np.linspace(0, 2, 1000)
    y = np.sin(2 * np.pi * (x - 0.01 * i))
    line.set_data(x, y)
    return (line,)
 # call the animator. blit=True means only re-draw the parts that have changed.
 anim = animation.FuncAnimation(fig, animate, init_func=init,
                               frames=100, interval=20, blit=True)
 HTML(anim.to_html5_video())
--- a/matplotlib/matplotlib_ani2.ipynb
+++ b/matplotlib/matplotlib_ani2.ipynb
--- a/matplotlib/matplotlib_ani2.py
+++ b/matplotlib/matplotlib_ani2.py
@@ -0,0 +1,37 @@
 # ---
 # jupyter:
 #   jupytext_format_version: '1.2'
 #   kernelspec:
 #     display_name: Python 3
 #     language: python
 #     name: python3
 #   language_info:
 #     codemirror_mode:
 #       name: ipython
 #       version: 3
 #     file_extension: .py
 #     mimetype: text/x-python
 #     name: python
 #     nbconvert_exporter: python
 #     pygments_lexer: ipython3
 #     version: 3.5.2
 # ---
 # +
 # %matplotlib nbagg
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.animation as animation
 fig = plt.figure()
 x = np.arange(0, 10, 0.1)
 ims = []
 for a in range(50):
    y = np.sin(x - a)
    im = plt.plot(x, y, "r")
    ims.append(im)
 ani = animation.ArtistAnimation(fig, ims)
 plt.show()
--- a/numpy_scipy_sympy/Sympy.ipynb
+++ b/numpy_scipy_sympy/Sympy.ipynb
--- a/pytorch/PyTorch快速入门.ipynb
+++ b/pytorch/PyTorch快速入门.ipynb
@@ -25,23 +25,23 @@
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "ename": "ImportError",
     "evalue": "No module named 'torch'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-1-93bdd78e6769>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0m__future__\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mprint_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mImportError\u001b[0m: No module named 'torch'"
     ]
     ],
     "output_type": "error"
    }
   ],
   "source": [
    " from __future__ import print_function\n",
    "from __future__ import print_function\n",
    "import torch as t"
   ]
  },