From efb6fc6aca8c9f6180e1b4ec7e9c45e5d9300a0e Mon Sep 17 00:00:00 2001
From: Shuhui Bu <bushuhui@gmail.com>
Date: Tue, 2 Oct 2018 22:15:19 +0800
Subject: [PATCH] Add pytorch logistic regression codes

---
 demo_code/2_linear_regression_0.py                 |  92 ------------------
 ...near_regression.py => 2_linear_regression_1.py} |   3 +-
 ...near_Regression.py => 2_linear_regression_2.py} |  39 +++++---
 demo_code/2_logistic_regression_1.py               |  90 ++++++++++++++++++
 ...ic_Regression.py => 2_logistic_regression_2.py} |   0
 demo_code/2_poly_fitting_0.py                      | 105 ---------------------
 6 files changed, 118 insertions(+), 211 deletions(-)
 delete mode 100644 demo_code/2_linear_regression_0.py
 rename demo_code/{2_linear_regression.py => 2_linear_regression_1.py} (96%)
 rename demo_code/{Linear_Regression.py => 2_linear_regression_2.py} (65%)
 create mode 100644 demo_code/2_logistic_regression_1.py
 rename demo_code/{Logistic_Regression.py => 2_logistic_regression_2.py} (100%)
 delete mode 100644 demo_code/2_poly_fitting_0.py

diff --git a/demo_code/2_linear_regression_0.py b/demo_code/2_linear_regression_0.py
deleted file mode 100644
index 2fede4e..0000000
--- a/demo_code/2_linear_regression_0.py
+++ /dev/null
@@ -1,92 +0,0 @@
-
-import numpy as np
-
-import torch
-from torch.autograd import Variable
-
-import matplotlib.pyplot as plt
-
-"""
-Using pytorch to do linear regression
-"""
-
-torch.manual_seed(2018)
-
-# generate data
-x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168],
-                    [9.779], [6.182], [7.59], [2.167], [7.042],
-                    [10.791], [5.313], [7.997], [3.1]], dtype=np.float32)
-
-y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573],
-                    [3.366], [2.596], [2.53], [1.221], [2.827],
-                    [3.465], [1.65], [2.904], [1.3]], dtype=np.float32)
-
-
-# draw the data
-plt.plot(x_train, y_train, 'bo')
-plt.show()
-
-
-# convert to tensor
-x_train = torch.from_numpy(x_train)
-y_train = torch.from_numpy(y_train)
-
-# define model parameters
-w = Variable(torch.randn(1), requires_grad=True)
-b = Variable(torch.zeros(1), requires_grad=True)
-
-# construct the linear model
-x_train = Variable(x_train)
-y_train = Variable(y_train)
-
-def linear_model(x):
-    return x*w + b
-
-# first predictive
-y_pred = linear_model(x_train)
-
-# draw the real & predictived data
-plt.plot(x_train.data.numpy(), y_train.data.numpy(), 'bo', label="Real")
-plt.plot(x_train.data.numpy(), y_pred.data.numpy(), 'ro', label="Estimated")
-plt.legend()
-plt.show()
-
-# define the loss function
-def get_loss(y_pred, y):
-    return torch.mean((y_pred - y)**2)
-
-loss = get_loss(y_pred, y_train)
-print("loss = %f" % float(loss))
-
-
-# auto-grad
-loss.backward()
-print("w.grad = %f" % float(w.grad))
-print("b.grad = %f" % float(b.grad))
-
-# upgrade parameters
-eta = 1e-2
-
-w.data = w.data - eta*w.grad.data
-b.data = b.data - eta*w.grad.data
-
-y_pred = linear_model(x_train)
-plt.plot(x_train.data.numpy(), y_train.data.numpy(), 'bo', label="Real")
-plt.plot(x_train.data.numpy(), y_pred.data.numpy(), 'ro', label="Estimated")
-plt.legend()
-plt.show()
-
-
-for i in range(10):
-    y_pred = linear_model(x_train)
-    loss = get_loss(y_pred, y_train)
-
-    w.grad.zero_()
-    b.grad.zero_()
-    loss.backward()
-
-    w.data = w.data - eta*w.grad.data
-    b.data = b.data - eta*b.grad.data
-
-    print("epoch: %3d, loss: %f" % (i, loss.data[0]))
-
diff --git a/demo_code/2_linear_regression.py b/demo_code/2_linear_regression_1.py
similarity index 96%
rename from demo_code/2_linear_regression.py
rename to demo_code/2_linear_regression_1.py
index d1cd8f0..605dcde 100644
--- a/demo_code/2_linear_regression.py
+++ b/demo_code/2_linear_regression_1.py
@@ -49,8 +49,9 @@ def get_loss(y_pred, y):
 
 # upgrade parameters
 eta = 1e-2
+n_epoch = 100
 
-for i in range(100):
+for i in range(n_epoch):
     y_pred = linear_model(x_train)
 
     loss = get_loss(y_pred, y_train)
diff --git a/demo_code/Linear_Regression.py b/demo_code/2_linear_regression_2.py
similarity index 65%
rename from demo_code/Linear_Regression.py
rename to demo_code/2_linear_regression_2.py
index e14ede5..4e97f31 100644
--- a/demo_code/Linear_Regression.py
+++ b/demo_code/2_linear_regression_2.py
@@ -1,17 +1,31 @@
 
-import torch as t
+import torch
 from torch import nn, optim
 from torch.autograd import Variable
 import numpy as np
 import matplotlib.pyplot as plt
 
-# create numpy data
-x_train = np.linspace(0, 10, 100)
-y_train = 10*x_train + 4.5
 
-# convert to tensor (need to change nx1, float32 dtype)
-x_train = t.from_numpy(x_train.reshape(-1, 1).astype("float32"))
-y_train = t.from_numpy(y_train.reshape(-1, 1).astype("float32"))
+torch.manual_seed(2018)
+
+# model's real-parameters
+w_target = 3
+b_target = 10
+
+# generate data
+n_data = 100
+x_train = np.random.rand(n_data, 1)*20 - 10
+y_train = w_target*x_train + b_target + (np.random.randn(n_data, 1)*10-5.0)
+
+# draw the data
+plt.plot(x_train, y_train, 'bo')
+plt.show()
+
+
+# convert to tensor
+x_train = torch.from_numpy(x_train).float()
+y_train = torch.from_numpy(y_train).float()
+
 
 
 # Linear Regression Model
@@ -50,15 +64,14 @@ for epoch in range(num_epochs):
         print('Epoch[{}/{}], loss: {:.6f}'
               .format(epoch+1, num_epochs, loss.data[0]))
 
+
+# do evaluation & plot
 model.eval()
 predict = model(Variable(x_train))
 predict = predict.data.numpy()
-plt.plot(x_train.numpy(), y_train.numpy(), 'ro', label='Original data')
-plt.plot(x_train.numpy(), predict, label='Fitting Line')
+plt.plot(x_train.numpy(), y_train.numpy(), 'bo', label='Real')
+plt.plot(x_train.numpy(), predict, 'ro', label='Estimated')
 
-# 显示图例
-plt.legend() 
+plt.legend()
 plt.show()
 
-# 保存模型
-t.save(model.state_dict(), './model_LinearRegression.pth')
diff --git a/demo_code/2_logistic_regression_1.py b/demo_code/2_logistic_regression_1.py
new file mode 100644
index 0000000..cdbd10a
--- /dev/null
+++ b/demo_code/2_logistic_regression_1.py
@@ -0,0 +1,90 @@
+import numpy as np
+from sklearn import datasets
+import matplotlib.pyplot as plt
+
+import torch
+from torch.autograd import Variable
+import torch.nn.functional as F
+
+
+# generate sample data
+centers = [(0, 0), (5, 5)]
+n_samples = 200
+
+x_train, y_train = datasets.make_blobs(n_samples=n_samples, n_features=2, cluster_std=1.0,
+                  centers=centers, shuffle=False, random_state=42)
+y_label = y_train
+
+# plot data
+plt.scatter(x_train[:, 0], x_train[:, 1], c=y_label, label="Real", cmap=plt.cm.Spectral)
+plt.show()
+
+# convert to tensor
+x_train = torch.from_numpy(x_train).float()
+y_train = torch.from_numpy(y_train).float()
+y_train.unsqueeze_(1)
+
+# define model parameters
+w = Variable(torch.randn(2, 1).float(), requires_grad=True)
+b = Variable(torch.zeros(1).float(), requires_grad=True)
+
+# construct the linear model
+x_train = Variable(x_train)
+y_train = Variable(y_train)
+
+# define logistic regression function
+def logistic_regression(x):
+    return torch.sigmoid(torch.mm(x, w) + b)
+
+# define loss function
+def binary_loss(y_pred, y):
+    logits = (y * y_pred.clamp(1e-12).log() + (1 - y) * (1 - y_pred).clamp(1e-12).log()).mean()
+    return -logits
+
+# upgrade parameters
+eta = 1e-2
+n_epoch = 1000
+
+for i in range(n_epoch):
+    y_pred = logistic_regression(x_train)
+
+    loss = binary_loss(y_pred, y_train)
+    loss.backward()
+
+    w.data = w.data - eta*w.grad.data
+    b.data = b.data - eta*b.grad.data
+
+    w.grad.zero_()
+    b.grad.zero_()
+
+    y_est = y_pred.ge(0.5).float()
+    acc = float((y_est == y_train).sum().data[0]) / y_train.shape[0]
+    if i % 10 == 0:
+        print("epoch: %3d, loss: %f, acc: %f" % (i, loss.data[0], acc))
+
+
+# plot decision boundary
+w0 = float(w[0].data[0])
+w1 = float(w[1].data[0])
+b0 = float(b.data[0])
+print("w: %f %f, b = %f" % (w0, w1, b0))
+
+x_min = float(x_train[:, 0].min())
+x_max = float(x_train[:, 0].max())
+plot_x = np.arange(x_min, x_max, 0.1)
+plot_y = (-w0*plot_x - b0)/w1
+
+plt.scatter(x_train[:, 0], x_train[:, 1], c=y_label, label="Real", cmap=plt.cm.Spectral)
+plt.plot(plot_x, plot_y, 'g-', label="Decision boundary")
+plt.legend()
+plt.show()
+
+y_pred = logistic_regression(x_train)
+y_est = torch.Tensor(y_pred.size())
+y_est[y_pred > 0.5] = 1
+y_est[y_pred < 0.5] = 0
+
+y_est = y_est.numpy().flatten()
+err = np.sum((y_est - y_label)**2)
+print("err = %f" % err)
+
diff --git a/demo_code/Logistic_Regression.py b/demo_code/2_logistic_regression_2.py
similarity index 100%
rename from demo_code/Logistic_Regression.py
rename to demo_code/2_logistic_regression_2.py
diff --git a/demo_code/2_poly_fitting_0.py b/demo_code/2_poly_fitting_0.py
deleted file mode 100644
index 3c17607..0000000
--- a/demo_code/2_poly_fitting_0.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import numpy as np
-
-import torch
-from torch.autograd import Variable
-
-import matplotlib.pyplot as plt
-
-
-"""
-Polynomial fitting by pytorch
-"""
-
-# define the model's parameters
-w_target = np.array([0.5, 3, 2.4])
-b_target = np.array([0.9])
-
-f_des = "y = %f + %f * x + %f * x^2 + %f * x^3" % (
-    b_target[0],
-    w_target[0], w_target[1], w_target[2])
-print(f_des)
-
-# draw the data
-x_sample = np.arange(-3, 3.1, 0.1)
-y_sample = b_target[0] + w_target[0]*x_sample + w_target[1]*x_sample**2 + w_target[2]*x_sample**3
-
-plt.plot(x_sample, y_sample, label="Real")
-plt.legend()
-plt.show()
-
-
-# construct variabels
-x_train = np.stack([x_sample**i for i in range(1, 4)], axis=1)
-x_train = torch.from_numpy(x_train).float()
-
-y_train = torch.from_numpy(y_sample).float().unsqueeze(1)
-
-# define model parameters
-w = Variable(torch.randn(3, 1).float(), requires_grad=True)
-b = Variable(torch.zeros(1).float(), requires_grad=True)
-
-x_train = Variable(x_train)
-y_train = Variable(y_train)
-
-print(w.shape)
-print(b.shape)
-print(x_train.shape)
-print(y_train.shape)
-
-def polynomial(x):
-    return torch.mm(x, w) + b
-
-def get_loss(y_pred, y):
-    return torch.mean((y_pred-y)**2)
-
-# draw initial graph
-y_pred = polynomial(x_train)
-
-plt.plot(x_train.data.numpy()[:, 0], y_sample, label="Real", color='b')
-plt.plot(x_train.data.numpy()[:, 0], y_pred.data.numpy(), label="Fitting", color='r')
-plt.legend()
-plt.show()
-
-# compute loss
-loss = get_loss(y_pred, y_train)
-print("Loss = %f" % loss)
-
-loss.backward()
-print(w.grad)
-print(b.grad)
-
-eta = 0.001
-
-w.data = w.data - eta*w.grad.data
-b.data = b.data - eta*b.grad.data
-
-# second draw
-y_pred = polynomial(x_train)
-
-plt.plot(x_train.data.numpy()[:, 0], y_sample, label="Real", color='b')
-plt.plot(x_train.data.numpy()[:, 0], y_pred.data.numpy(), label="Fitting", color='r')
-plt.legend()
-plt.show()
-
-
-for i in range(100):
-    y_pred = polynomial(x_train)
-
-    loss = get_loss(y_pred, y_train)
-
-    w.grad.data.zero_()
-    b.grad.data.zero_()
-    loss.backward()
-
-    w.data = w.data - eta*w.grad.data
-    b.data = b.data - eta*b.grad.data
-
-    print("epoch: %4d, loss: %f" % (i, loss.data[0]))
-
-# second draw
-y_pred = polynomial(x_train)
-
-plt.plot(x_train.data.numpy()[:, 0], y_sample, label="Real", color='b')
-plt.plot(x_train.data.numpy()[:, 0], y_pred.data.numpy(), label="Fitting", color='r')
-plt.legend()
-plt.show()