Browse Source

Add some notes

fetches/feikei/master
Shuhui Bu 6 years ago
parent
commit
9ef7b43480
13 changed files with 2065 additions and 322 deletions
  1. +2
    -0
      .gitignore
  2. +10
    -3
      exercise/exercise.ipynb
  3. +1
    -0
      kmeans/k-means.ipynb
  4. +68
    -39
      kmeans/k-means.py
  5. +186
    -0
      logistic_regression/Least_squares.ipynb
  6. +113
    -0
      logistic_regression/Least_squares.py
  7. +66
    -0
      logistic_regression/linear_regression.py
  8. +575
    -0
      matplotlib/matplotlib_ani1.ipynb
  9. +63
    -0
      matplotlib/matplotlib_ani1.py
  10. +844
    -0
      matplotlib/matplotlib_ani2.ipynb
  11. +37
    -0
      matplotlib/matplotlib_ani2.py
  12. +96
    -276
      numpy_scipy_sympy/Sympy.ipynb
  13. +4
    -4
      pytorch/PyTorch快速入门.ipynb

+ 2
- 0
.gitignore View File

@@ -1 +1,3 @@
.ipynb_checkpoints .ipynb_checkpoints
.idea


exercise.ipynb → exercise/exercise.ipynb View File

@@ -25,10 +25,17 @@
"\n", "\n",
"\n", "\n",
"### (3) 判断\n", "### (3) 判断\n",
"企业发放的奖金根据利润提成。利润(I): 低于或等于 10 万元时,奖金可提 10%; 高于 10 万元,低于 20 万元时,低于 10 万元的部分按 10%提成,高于 10 万元的部分,可提成 7.5%; 20 万到 40 万之间时,高于 20 万元的部分,可提成 5%; 40 万到 60 万之间时,高于 40 万元的部分,可提成 3%; 60 万到 100 万之间时,高于 60 万元的部分,可提成 1.5%, 高于 100 万元时, 超过 100 万元的部分按 1%提成, 从键盘输入当月利润 I,求应发放奖金总数?\n",
"企业发放的奖金根据利润提成。利润(I): \n",
"* 低于或等于 10 万元时,奖金可提 10%; \n",
"* 高于 10 万元,低于 20 万元时,低于 10 万元的部分按 10%提成,高于 10 万元的部分,可提成 7.5%; \n",
"* 20 万到 40 万之间时,高于 20 万元的部分,可提成 5%; \n",
"* 40 万到 60 万之间时,高于 40 万元的部分,可提成 3%; \n",
"* 60 万到 100 万之间时,高于 60 万元的部分,可提成 1.5%, \n",
"* 高于 100 万元时, 超过 100 万元的部分按 1%提成, \n",
"从键盘输入当月利润 I,求应发放奖金总数?\n",
"\n", "\n",
"### (4)循环\n", "### (4)循环\n",
"输出9x9的口诀表\n",
"输出9x9的乘法口诀表\n",
"\n", "\n",
"### (5)算法\n", "### (5)算法\n",
"给一个数字列表,将其按照由大到小的顺序排列\n", "给一个数字列表,将其按照由大到小的顺序排列\n",
@@ -48,7 +55,7 @@
"例如把`c:`下面所有的`.dll`文件找到\n", "例如把`c:`下面所有的`.dll`文件找到\n",
"\n", "\n",
"### (8)应用3\n", "### (8)应用3\n",
"你有个目录,里面是程序(假如是C或者是Python),统计一下你写过多少行代码。包括空行和注释,但是要分别列出来。\n",
"你有个目录,里面是程序(假如是C或者是Python),统计一下你写过多少行代码。包括空行和注释,但是要分别列出来。\n",
"\n" "\n"
] ]
}, },

+ 1
- 0
kmeans/k-means.ipynb View File

@@ -414,6 +414,7 @@
} }
], ],
"metadata": { "metadata": {
"jupytext_formats": "ipynb,py",
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3",
"language": "python", "language": "python",


+ 68
- 39
kmeans/k-means.py View File

@@ -1,3 +1,33 @@
# -*- coding: utf-8 -*-
# ---
# jupyter:
# jupytext_format_version: '1.2'
# jupytext_formats: ipynb,py
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.5.2
# ---

# # k-means demo

# +
# This line configures matplotlib to show figures embedded in the notebook,
# instead of opening a new window for each figure. More about that later.
# If you are using an old version of IPython, try using '%pylab inline' instead.
# %matplotlib inline

# import librarys
from numpy import * from numpy import *
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import pandas as pd import pandas as pd
@@ -7,12 +37,36 @@ names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pd.read_csv("iris.csv", header=0, index_col=0) dataset = pd.read_csv("iris.csv", header=0, index_col=0)
dataset.head() dataset.head()


# -

#对类别进行编码,3个类别分别赋值0,1,2 #对类别进行编码,3个类别分别赋值0,1,2
dataset['class'][dataset['class']=='Iris-setosa']=0 dataset['class'][dataset['class']=='Iris-setosa']=0
dataset['class'][dataset['class']=='Iris-versicolor']=1 dataset['class'][dataset['class']=='Iris-versicolor']=1
dataset['class'][dataset['class']=='Iris-virginica']=2 dataset['class'][dataset['class']=='Iris-virginica']=2




def originalDatashow(dataSet):
#绘制原始的样本点
num,dim=shape(dataSet)
marksamples=['ob'] #样本图形标记
for i in range(num):
plt.plot(datamat.iat[i,0],datamat.iat[i,1],marksamples[0],markersize=5)
plt.title('original dataset')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.show()

# + {"scrolled": true}
#获取样本数据
datamat = dataset.loc[:, ['sepal-length', 'sepal-width']]
# 真实的标签
labels = dataset.loc[:, ['class']]
#原始数据显示
originalDatashow(datamat)


# -


def randChosenCent(dataSet,k): def randChosenCent(dataSet,k):
"""初始化聚类中心:通过在区间范围随机产生的值作为新的中心点""" """初始化聚类中心:通过在区间范围随机产生的值作为新的中心点"""
@@ -34,6 +88,8 @@ def randChosenCent(dataSet,k):
centroids = dataSet.iloc[centroidsIndex] centroids = dataSet.iloc[centroidsIndex]
return mat(centroids) return mat(centroids)


# +

def distEclud(vecA, vecB): def distEclud(vecA, vecB):
"""算距离, 两个向量间欧式距离""" """算距离, 两个向量间欧式距离"""
return sqrt(sum(power(vecA - vecB, 2))) #la.norm(vecA-vecB) return sqrt(sum(power(vecA - vecB, 2))) #la.norm(vecA-vecB)
@@ -91,9 +147,13 @@ def kMeans(dataSet, k):
centroids[cent, :] = mean(ptsInClust, axis=0) centroids[cent, :] = mean(ptsInClust, axis=0)
return centroids, clusterAssment return centroids, clusterAssment


# -


# 进行k-means聚类
k = 3 # 用户定义聚类数
mycentroids, clusterAssment = kMeans(datamat, k)


# 2维数据聚类效果显示
# +
def datashow(dataSet, k, centroids, clusterAssment): # 二维空间显示聚类结果 def datashow(dataSet, k, centroids, clusterAssment): # 二维空间显示聚类结果
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
num, dim = shape(dataSet) # 样本数num ,维数dim num, dim = shape(dataSet) # 样本数num ,维数dim
@@ -123,8 +183,8 @@ def datashow(dataSet, k, centroids, clusterAssment): # 二维空间显示聚类


plt.title('k-means cluster result') # 标题 plt.title('k-means cluster result') # 标题
plt.show() plt.show()
# 画出实际图像 # 画出实际图像
def trgartshow(dataSet, k, labels): def trgartshow(dataSet, k, labels):
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
@@ -139,48 +199,17 @@ def trgartshow(dataSet, k, labels):
plt.plot(datamat.iat[i, 0], datamat.iat[i, 1], marksamples[int(labels.iat[i, 0])], markersize=6, plt.plot(datamat.iat[i, 0], datamat.iat[i, 1], marksamples[int(labels.iat[i, 0])], markersize=6,
label=label[int(labels.iat[i, 0])]) label=label[int(labels.iat[i, 0])])
plt.legend(loc='upper left') plt.legend(loc='upper left')
# 添加轴标签和标题 # 添加轴标签和标题

plt.xlabel('sepal length') plt.xlabel('sepal length')
plt.ylabel('sepal width') plt.ylabel('sepal width')

plt.title('iris true result') # 标题 plt.title('iris true result') # 标题


# 显示图形 # 显示图形
plt.show() plt.show()
# label=labels.iat[i,0] # label=labels.iat[i,0]
# -



def originalDatashow(dataSet):
"""聚类前,绘制原始的样本点"""
#样本的个数和特征维数
num,dim=shape(dataSet)
marksamples=['ob'] #样本图形标记
for i in range(num):
plt.plot(datamat.iat[i,0],datamat.iat[i,1],marksamples[0],markersize=5)
plt.title('original dataset')
plt.xlabel('sepal length')
plt.ylabel('sepal width') #标题
plt.show()


if __name__ == '__main__':
# 获取样本数据
datamat = dataset.loc[:, ['sepal-length', 'sepal-width']]
# 真实的标签
labels = dataset.loc[:, ['class']]
# 原始数据显示
originalDatashow(datamat)

# kmeans聚类
k = 3 # 用户定义聚类数
mycentroids, clusterAssment = kMeans(datamat, k)

# 绘图显示
datashow(datamat, k, mycentroids, clusterAssment)
trgartshow(datamat, 3, labels)
# plt.show()



# 绘图显示
datashow(datamat, k, mycentroids, clusterAssment)
trgartshow(datamat, 3, labels)

+ 186
- 0
logistic_regression/Least_squares.ipynb
File diff suppressed because it is too large
View File


+ 113
- 0
logistic_regression/Least_squares.py View File

@@ -0,0 +1,113 @@
# ---
# jupyter:
# jupytext_format_version: '1.2'
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.5.2
# ---

# # Linear regression
#
#

# ## Least squares
#
# A mathematical procedure for finding the best-fitting curve to a given set of points by minimizing the sum of the squares of the offsets ("the residuals") of the points from the curve. The sum of the squares of the offsets is used instead of the offset absolute values because this allows the residuals to be treated as a continuous differentiable quantity. However, because squares of the offsets are used, outlying points can have a disproportionate effect on the fit, a property which may or may not be desirable depending on the problem at hand.
#

# ### Show the data
#

# +
# %matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import sklearn
from sklearn import datasets

# load data
d = datasets.load_diabetes()

X = d.data[:, 2]
Y = d.target

# draw original data
plt.scatter(X, Y)
plt.show()
# -

# ### Theory
# For $N$ observation data:
# $$
# \mathbf{X} = \{x_1, x_2, ..., x_N \} \\
# \mathbf{Y} = \{y_1, y_2, ..., y_N \}
# $$
#
# We want to find the model which can predict the data. The simplest model is linear model, which has the form of
# $$
# y = ax + b
# $$
#
# The purpose is to find parameters $a, b$ which best fit the model to the observation data.
#
# We use the sum of squares to measure the differences (loss function) between the model's prediction and observation data:
# $$
# L = \sum_{i=1}^{N} (y_i - a x_i + b)^2
# $$
#
# To make the loss function minimize, we can find the parameters:
# $$
# \frac{\partial L}{\partial a} = -2 \sum_{i=1}^{N} (y_i - a x_i - b) x_i \\
# \frac{\partial L}{\partial b} = -2 \sum_{i=1}^{N} (y_i - a x_i - b)
# $$
# When the loss is minimized, therefore the partial difference is zero, then we can get:
# $$
# -2 \sum_{i=1}^{N} (y_i - a x_i - b) x_i = 0 \\
# -2 \sum_{i=1}^{N} (y_i - a x_i - b) = 0 \\
# $$
#
# We reoder the items as:
# $$
# a \sum x_i^2 + b \sum x_i = \sum y_i x_i \\
# a \sum x_i + b N = \sum y_i
# $$
# By solving the linear equation we can obtain the model parameters.

# ### Program

# +
N = X.shape[0]

S_X2 = np.sum(X*X)
S_X = np.sum(X)
S_XY = np.sum(X*Y)
S_Y = np.sum(Y)

A1 = np.array([[S_X2, S_X],
[S_X, N]])
B1 = np.array([S_XY, S_Y])

coeff = np.linalg.inv(A1).dot(B1)

print('a = %f, b = %f' % (coeff[0], coeff[1]))

x_min = np.min(X)
x_max = np.max(X)
y_min = coeff[0] * x_min + coeff[1]
y_max = coeff[0] * x_max + coeff[1]

plt.scatter(X, Y)
plt.plot([x_min, x_max], [y_min, y_max], 'r')
plt.show()

+ 66
- 0
logistic_regression/linear_regression.py View File

@@ -0,0 +1,66 @@

import matplotlib.pyplot as plt
import numpy as np
import sklearn
from sklearn import datasets

# load data
d = datasets.load_diabetes()

X = d.data[:, 2]
Y = d.target

# draw original data
plt.scatter(X, Y)
plt.show()


###############################################################################
# Least squares
###############################################################################

# L = \sum_{i=1, N} (y_i - a*x_i - b)^2
N = X.shape[0]

S_X2 = np.sum(X*X)
S_X = np.sum(X)
S_XY = np.sum(X*Y)
S_Y = np.sum(Y)

A1 = np.array([[S_X2, S_X], [S_X, N]])
B1 = np.array([S_XY, S_Y])

coeff = np.linalg.inv(A1).dot(B1)

x_min = np.min(X)
x_max = np.max(X)
y_min = coeff[0] * x_min + coeff[1]
y_max = coeff[0] * x_max + coeff[1]

plt.scatter(X, Y)
plt.plot([x_min, x_max], [y_min, y_max], 'r')
plt.show()


###############################################################################
# Linear regression
###############################################################################
# the loss function
# L = \sum_{i=1, N} (y_i - a*x_i - b)^2

n_train = 1000


a, b = 1, 1
epsilon = 0.001

for i in range(n_train):
for j in range(N):
a = a + epsilon*2*(Y[j] - a*X[j] - b)*X[j]
b = b + epsilon*2*(Y[j] - a*X[j] - b)

L = 0
for j in range(N):
L = L + (Y[j]-a*X[j]-b)**2
print("epoch %4d: loss = %f" % (i, L))


+ 575
- 0
matplotlib/matplotlib_ani1.ipynb
File diff suppressed because it is too large
View File


+ 63
- 0
matplotlib/matplotlib_ani1.py View File

@@ -0,0 +1,63 @@
# ---
# jupyter:
# jupytext_format_version: '1.2'
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.5.2
# ---

# ## Animation
#

# +
# %matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

from matplotlib import animation, rc
from IPython.display import HTML


# First set up the figure, the axis, and the plot element we want to animate
fig, ax = plt.subplots()

ax.set_xlim(( 0, 2))
ax.set_ylim((-2, 2))

line, = ax.plot([], [], lw=2)





# +

# initialization function: plot the background of each frame
def init():
line.set_data([], [])
return (line,)

# animation function. This is called sequentially
def animate(i):
x = np.linspace(0, 2, 1000)
y = np.sin(2 * np.pi * (x - 0.01 * i))
line.set_data(x, y)
return (line,)

# call the animator. blit=True means only re-draw the parts that have changed.
anim = animation.FuncAnimation(fig, animate, init_func=init,
frames=100, interval=20, blit=True)

HTML(anim.to_html5_video())

+ 844
- 0
matplotlib/matplotlib_ani2.ipynb
File diff suppressed because it is too large
View File


+ 37
- 0
matplotlib/matplotlib_ani2.py View File

@@ -0,0 +1,37 @@
# ---
# jupyter:
# jupytext_format_version: '1.2'
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.5.2
# ---

# +
# %matplotlib nbagg

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation

fig = plt.figure()
x = np.arange(0, 10, 0.1)

ims = []
for a in range(50):
y = np.sin(x - a)
im = plt.plot(x, y, "r")
ims.append(im)

ani = animation.ArtistAnimation(fig, ims)
plt.show()

+ 96
- 276
numpy_scipy_sympy/Sympy.ipynb
File diff suppressed because it is too large
View File


+ 4
- 4
pytorch/PyTorch快速入门.ipynb View File

@@ -25,23 +25,23 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"ename": "ImportError", "ename": "ImportError",
"evalue": "No module named 'torch'", "evalue": "No module named 'torch'",
"output_type": "error",
"traceback": [ "traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-1-93bdd78e6769>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0m__future__\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mprint_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m<ipython-input-1-93bdd78e6769>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0m__future__\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mprint_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mImportError\u001b[0m: No module named 'torch'" "\u001b[0;31mImportError\u001b[0m: No module named 'torch'"
]
],
"output_type": "error"
} }
], ],
"source": [ "source": [
" from __future__ import print_function\n",
"from __future__ import print_function\n",
"import torch as t" "import torch as t"
] ]
}, },


Loading…
Cancel
Save