|
- # -*- coding=utf8 -*-
- from __future__ import division
- import numpy as np
- import sklearn.datasets
- import matplotlib.pyplot as plt
-
- np.random.seed(0)
- data, label = sklearn.datasets.make_moons(200, noise=0.30)
-
- def plot_decision_boundary(predict_func, data, label):
- """画出结果图
- Args:
- pred_func (callable): 预测函数
- data (numpy.ndarray): 训练数据集合
- label (numpy.ndarray): 训练数据标签
- """
- x_min, x_max = data[:, 0].min() - .5, data[:, 0].max() + .5
- y_min, y_max = data[:, 1].min() - .5, data[:, 1].max() + .5
- h = 0.01
-
- xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
-
- Z = predict_func(np.c_[xx.ravel(), yy.ravel()])
- Z = Z.reshape(xx.shape)
-
- plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
- plt.scatter(data[:, 0], data[:, 1], c=label, cmap=plt.cm.Spectral)
- plt.show()
-
- def sigmoid(x):
- return 1.0 / (1 + np.exp(-x))
-
- class Logistic(object):
- """logistic回归模型"""
- def __init__(self, data, label):
- self.data = data
- self.label = label
-
- self.data_num, n = np.shape(data)
- self.weights = np.ones(n)
- self.b = 1
-
- def train(self, num_iteration=150):
- """随机梯度上升算法
- Args:
- data (numpy.ndarray): 训练数据集
- labels (numpy.ndarray): 训练标签
- num_iteration (int): 迭代次数
- """
- for j in range(num_iteration):
- data_index = list(range(self.data_num))
- for i in range(self.data_num):
- # 学习速率
- alpha = 0.01
- rand_index = int(np.random.uniform(0, len(data_index)))
- error = self.label[rand_index] - sigmoid(sum(self.data[rand_index] * self.weights + self.b))
- self.weights += alpha * error * self.data[rand_index]
- self.b += alpha * error
- del(data_index[rand_index])
-
- def predict(self, predict_data):
- """预测函数"""
- result = list(map(lambda x: 1 if sum(self.weights * x + self.b) > 0 else 0,
- predict_data))
- return np.array(result)
-
- if __name__ == '__main__':
- logistic = Logistic(data, label)
- logistic.train(200)
- plot_decision_boundary(lambda x: logistic.predict(x), data, label)
|