import matplotlib.pyplot as plt import numpy as np import sklearn from sklearn import datasets # load data d = datasets.load_diabetes() X = d.data[:, 2] Y = d.target # draw original data plt.scatter(X, Y) plt.show() ############################################################################### # Least squares ############################################################################### # L = \sum_{i=1, N} (y_i - a*x_i - b)^2 N = X.shape[0] S_X2 = np.sum(X*X) S_X = np.sum(X) S_XY = np.sum(X*Y) S_Y = np.sum(Y) A1 = np.array([[S_X2, S_X], [S_X, N]]) B1 = np.array([S_XY, S_Y]) coeff = np.linalg.inv(A1).dot(B1) x_min = np.min(X) x_max = np.max(X) y_min = coeff[0] * x_min + coeff[1] y_max = coeff[0] * x_max + coeff[1] plt.scatter(X, Y) plt.plot([x_min, x_max], [y_min, y_max], 'r') plt.show() ############################################################################### # Linear regression ############################################################################### # the loss function # L = \sum_{i=1, N} (y_i - a*x_i - b)^2 n_train = 1000 a, b = 1, 1 epsilon = 0.001 for i in range(n_train): for j in range(N): a = a + epsilon*2*(Y[j] - a*X[j] - b)*X[j] b = b + epsilon*2*(Y[j] - a*X[j] - b) L = 0 for j in range(N): L = L + (Y[j]-a*X[j]-b)**2 print("epoch %4d: loss = %f" % (i, L))