|
@@ -0,0 +1,81 @@ |
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
from numpy.ma.core import set_fill_value
|
|
|
|
|
|
from sklearn.datasets import load_digits
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
class LogReg:
|
|
|
|
|
|
def __init__(self, alpha, lam, iter):
|
|
|
|
|
|
digits = load_digits()
|
|
|
|
|
|
self.image = digits.images
|
|
|
|
|
|
#print(self.digits.data.shape)
|
|
|
|
|
|
self.train_size = int(digits.data.shape[0]*0.9)
|
|
|
|
|
|
self.train = digits.data[:self.train_size][:]
|
|
|
|
|
|
self.test = digits.data[self.train_size:][:]
|
|
|
|
|
|
self.train = self.train.astype(np.float64)
|
|
|
|
|
|
self.target = digits.target[:self.train_size]
|
|
|
|
|
|
self.test_target = digits.target[self.train_size:]
|
|
|
|
|
|
|
|
|
|
|
|
self.theta = np.zeros((10,64))#(种类,8*8)
|
|
|
|
|
|
self.alpha = alpha
|
|
|
|
|
|
self.lam = lam
|
|
|
|
|
|
self.iter = iter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sigmoid(self, x):
|
|
|
|
|
|
return 1 / (1 + np.exp(-x)) #sigmoid function
|
|
|
|
|
|
|
|
|
|
|
|
def OneHot(self,data):
|
|
|
|
|
|
tmp = np.zeros((10,))
|
|
|
|
|
|
tmp[data] = 1
|
|
|
|
|
|
return tmp
|
|
|
|
|
|
|
|
|
|
|
|
def preprocessing(self):
|
|
|
|
|
|
self.train = np.array([(i-np.min(i))/(np.max(i) - np.min(i)) for i in self.train]).astype(np.float64)#归一化
|
|
|
|
|
|
self.test = np.array([(i-np.min(i))/(np.max(i) - np.min(i)) for i in self.test]).astype(np.float64)#归一化
|
|
|
|
|
|
self.target = np.array([self.OneHot(i) for i in self.target]).astype(np.float64)#标签独热化
|
|
|
|
|
|
|
|
|
|
|
|
def Costfunction(self,i):
|
|
|
|
|
|
# argument i mean the ith
|
|
|
|
|
|
theta = self.theta[i, :].reshape(self.theta.shape[1], 1)
|
|
|
|
|
|
label = self.target[:,i]
|
|
|
|
|
|
data = self.train
|
|
|
|
|
|
|
|
|
|
|
|
h = self.sigmoid(data.dot(theta))
|
|
|
|
|
|
# 代价函数
|
|
|
|
|
|
cost = (-1/self.train_size) * (label.T.dot(np.log(h))+(1-label).T.dot(np.log(1-h)))\
|
|
|
|
|
|
+ (self.lam/(2*self.train_size)) * theta.T.dot(theta)
|
|
|
|
|
|
|
|
|
|
|
|
#求梯度
|
|
|
|
|
|
grad = (1 / self.train_size) * data.T.dot(h - label.reshape(-1,1))
|
|
|
|
|
|
# print(h)
|
|
|
|
|
|
grad = grad + (self.lam / self.train_size * theta)
|
|
|
|
|
|
# print(grad.shape)
|
|
|
|
|
|
return cost, grad
|
|
|
|
|
|
|
|
|
|
|
|
def regression(self):
|
|
|
|
|
|
for i in range(10):
|
|
|
|
|
|
for j in range(self.iter):
|
|
|
|
|
|
[J, grad] = self.Costfunction(i)
|
|
|
|
|
|
self.theta[i, :] = self.theta[i, :] - self.alpha * grad.T
|
|
|
|
|
|
print(J)
|
|
|
|
|
|
# print(self.theta)
|
|
|
|
|
|
return self.theta
|
|
|
|
|
|
|
|
|
|
|
|
def predict(self):
|
|
|
|
|
|
# calc 10 Hypothesis functions and select the max one
|
|
|
|
|
|
# use argmax(1) to get the index of max_val of each row
|
|
|
|
|
|
pred = self.sigmoid(self.test.dot(self.theta.T)).argmax(1)
|
|
|
|
|
|
num = 0
|
|
|
|
|
|
for i in range(len(self.test_target)):
|
|
|
|
|
|
if(pred[i] == self.test_target[i]):
|
|
|
|
|
|
num += 1
|
|
|
|
|
|
print(float(num)/float(len(self.test_target))*100.0,"%")
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
|
LR = LogReg( 1, 0.1, 3000)
|
|
|
|
|
|
LR.preprocessing()
|
|
|
|
|
|
LR.regression()
|
|
|
|
|
|
LR.predict()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
main() |