# -*- coding=utf8 -*- from __future__ import division import numpy as np import sklearn.datasets import matplotlib.pyplot as plt np.random.seed(0) data, label = sklearn.datasets.make_moons(200, noise=0.30) def plot_decision_boundary(predict_func, data, label): """画出结果图 Args: pred_func (callable): 预测函数 data (numpy.ndarray): 训练数据集合 label (numpy.ndarray): 训练数据标签 """ x_min, x_max = data[:, 0].min() - .5, data[:, 0].max() + .5 y_min, y_max = data[:, 1].min() - .5, data[:, 1].max() + .5 h = 0.01 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = predict_func(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral) plt.scatter(data[:, 0], data[:, 1], c=label, cmap=plt.cm.Spectral) plt.show() def sigmoid(x): return 1.0 / (1 + np.exp(-x)) class Logistic(object): """logistic回归模型""" def __init__(self, data, label): self.data = data self.label = label self.data_num, n = np.shape(data) self.weights = np.ones(n) self.b = 1 def train(self, num_iteration=150): """随机梯度上升算法 Args: data (numpy.ndarray): 训练数据集 labels (numpy.ndarray): 训练标签 num_iteration (int): 迭代次数 """ for j in range(num_iteration): data_index = list(range(self.data_num)) for i in range(self.data_num): # 学习速率 alpha = 0.01 rand_index = int(np.random.uniform(0, len(data_index))) error = self.label[rand_index] - sigmoid(sum(self.data[rand_index] * self.weights + self.b)) self.weights += alpha * error * self.data[rand_index] self.b += alpha * error del(data_index[rand_index]) def predict(self, predict_data): """预测函数""" result = list(map(lambda x: 1 if sum(self.weights * x + self.b) > 0 else 0, predict_data)) return np.array(result) if __name__ == '__main__': logistic = Logistic(data, label) logistic.train(200) plot_decision_boundary(lambda x: logistic.predict(x), data, label)