|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
-
- import matplotlib.pyplot as plt
- import numpy as np
- import sklearn
- from sklearn import datasets
-
- # load data
- d = datasets.load_diabetes()
-
- X = d.data[:, 2]
- Y = d.target
-
- # draw original data
- plt.scatter(X, Y)
- plt.show()
-
-
- ###############################################################################
- # Least squares
- ###############################################################################
-
- # L = \sum_{i=1, N} (y_i - a*x_i - b)^2
- N = X.shape[0]
-
- S_X2 = np.sum(X*X)
- S_X = np.sum(X)
- S_XY = np.sum(X*Y)
- S_Y = np.sum(Y)
-
- A1 = np.array([[S_X2, S_X], [S_X, N]])
- B1 = np.array([S_XY, S_Y])
-
- coeff = np.linalg.inv(A1).dot(B1)
-
- x_min = np.min(X)
- x_max = np.max(X)
- y_min = coeff[0] * x_min + coeff[1]
- y_max = coeff[0] * x_max + coeff[1]
-
- plt.scatter(X, Y)
- plt.plot([x_min, x_max], [y_min, y_max], 'r')
- plt.show()
-
-
- ###############################################################################
- # Linear regression
- ###############################################################################
- # the loss function
- # L = \sum_{i=1, N} (y_i - a*x_i - b)^2
-
- n_train = 1000
-
-
- a, b = 1, 1
- epsilon = 0.001
-
- for i in range(n_train):
- for j in range(N):
- a = a + epsilon*2*(Y[j] - a*X[j] - b)*X[j]
- b = b + epsilon*2*(Y[j] - a*X[j] - b)
-
- L = 0
- for j in range(N):
- L = L + (Y[j]-a*X[j]-b)**2
- print("epoch %4d: loss = %f" % (i, L))
-
|