|
|
@@ -14,109 +14,6 @@ def relu(z): |
|
|
|
'''
|
|
|
|
return np.maximum(0, z)
|
|
|
|
|
|
|
|
L = 0
|
|
|
|
|
|
|
|
##### neural network model
|
|
|
|
def neural_network(X, Y, learning_rate=0.01, num_iterations=2000, lambd=0):
|
|
|
|
n0, m = X.shape
|
|
|
|
n1 = 20
|
|
|
|
n2 = 7
|
|
|
|
n3 = 5
|
|
|
|
n4 = 1
|
|
|
|
layers_dims = [n0, n1, n2, n3, n4] # [12288, 20, 7, 5, 1]
|
|
|
|
L = len(layers_dims) - 1 # 4层神经网络,不计输入层
|
|
|
|
|
|
|
|
m = X.shape[1]
|
|
|
|
### initialize forward propagation
|
|
|
|
param_w = [i for i in range(L+1)]
|
|
|
|
param_b = [i for i in range(L+1)]
|
|
|
|
np.random.seed(10)
|
|
|
|
for l in range(1, L+1):
|
|
|
|
if l < L:
|
|
|
|
param_w[l] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(2 / layers_dims[l - 1])
|
|
|
|
if l == L:
|
|
|
|
param_w[l] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * 0.01
|
|
|
|
param_b[l] = np.zeros((layers_dims[l], 1))
|
|
|
|
|
|
|
|
activations = [X, ] + [i for i in range(L)]
|
|
|
|
prev_activations = [i for i in range(L+1)]
|
|
|
|
|
|
|
|
dA = [i for i in range(L+1)]
|
|
|
|
dz = [i for i in range(L+1)]
|
|
|
|
dw = [i for i in range(L+1)]
|
|
|
|
db = [i for i in range(L+1)]
|
|
|
|
|
|
|
|
for i in range(num_iterations):
|
|
|
|
### forward propagation
|
|
|
|
for l in range(1, L+1):
|
|
|
|
prev_activations[l] = np.dot(param_w[l], activations[l-1]) + param_b[l]
|
|
|
|
if l < L:
|
|
|
|
activations[l] = relu(prev_activations[l])
|
|
|
|
else:
|
|
|
|
activations[l] = sigmoid(prev_activations[l])
|
|
|
|
|
|
|
|
### 交叉熵损失函数
|
|
|
|
cross_entropy_cost = -1/m * (np.dot(np.log(activations[L]), Y.T) \
|
|
|
|
+ np.dot(np.log(1-activations[L]), 1-Y.T))
|
|
|
|
|
|
|
|
### 正则化
|
|
|
|
regularization_cost = 0
|
|
|
|
for l in range(1, L+1):
|
|
|
|
regularization_cost += np.sum(np.square(param_w[l])) * lambd/(2*m)
|
|
|
|
|
|
|
|
cost = cross_entropy_cost + regularization_cost
|
|
|
|
|
|
|
|
### initialize backward propagation
|
|
|
|
dA[L] = np.divide(1-Y, 1-activations[L]) - np.divide(Y, activations[L])
|
|
|
|
assert dA[L].shape == (1, m)
|
|
|
|
|
|
|
|
### 反向传播算法(梯度下降法是通用的优化算法,反向传播法是梯度下降法在深度神经网络上的具体实现方式。)
|
|
|
|
### backward propagation
|
|
|
|
for l in reversed(range(1, L+1)):
|
|
|
|
if l == L:
|
|
|
|
dz[l] = dA[l] * activations[l] * (1-activations[l])
|
|
|
|
else:
|
|
|
|
dz[l] = dA[l].copy()
|
|
|
|
dz[l][prev_activations[l] <= 0] = 0
|
|
|
|
|
|
|
|
dw[l] = 1/m * np.dot(dz[l], activations[l-1].T) + param_w[l] * lambd/m
|
|
|
|
db[l] = 1/m * np.sum(dz[l], axis=1, keepdims=True)
|
|
|
|
dA[l-1] = np.dot(param_w[l].T, dz[l])
|
|
|
|
|
|
|
|
assert dz[l].shape == prev_activations[l].shape
|
|
|
|
assert dw[l].shape == param_w[l].shape
|
|
|
|
assert db[l].shape == param_b[l].shape
|
|
|
|
assert dA[l-1].shape == activations[l-1].shape
|
|
|
|
|
|
|
|
param_w[l] = param_w[l] - learning_rate * dw[l]
|
|
|
|
param_b[l] = param_b[l] - learning_rate * db[l]
|
|
|
|
|
|
|
|
if i % 100 == 0:
|
|
|
|
print("cost after iteration {}: {}".format(i, cost))
|
|
|
|
|
|
|
|
parameters = {}
|
|
|
|
parameters["param_w"] = param_w
|
|
|
|
parameters["param_b"] = param_b
|
|
|
|
return parameters
|
|
|
|
|
|
|
|
def predict(X_new, parameters, threshold=0.5):
|
|
|
|
L = 4
|
|
|
|
param_w = parameters["param_w"]
|
|
|
|
param_b = parameters["param_b"]
|
|
|
|
|
|
|
|
activations = [X_new, ] + [i for i in range(L)]
|
|
|
|
prev_activations = [i for i in range(L + 1)]
|
|
|
|
m = X_new.shape[1]
|
|
|
|
|
|
|
|
for l in range(1, L + 1):
|
|
|
|
prev_activations[l] = np.dot(param_w[l], activations[l - 1]) + param_b[l]
|
|
|
|
if l < L:
|
|
|
|
activations[l] = relu(prev_activations[l])
|
|
|
|
else:
|
|
|
|
activations[l] = sigmoid(prev_activations[l])
|
|
|
|
prediction = (activations[L] > threshold).astype("int")
|
|
|
|
return prediction
|
|
|
|
|
|
|
|
def vectorize_sequences(sequences, dimension=10000):
|
|
|
|
|
|
|
|
results = np.zeros((len(sequences), dimension))
|
|
|
@@ -132,26 +29,173 @@ def save(weigths, path="./weigths.npz"): |
|
|
|
np.savez(path, param_w=weigths["param_w"], param_b=weigths["param_b"])
|
|
|
|
### np.savetxt("./weigths.out", weigths["param_w"], fmt='%s')
|
|
|
|
|
|
|
|
class FFNNModel():
|
|
|
|
def __init__(self, x, y, batchsize, epoch) -> None:
|
|
|
|
n0 = 10000
|
|
|
|
n1 = 20
|
|
|
|
n2 = 7
|
|
|
|
n3 = 5
|
|
|
|
n4 = 1
|
|
|
|
layers_dims = [n0, n1, n2, n3, n4] # [12288, 20, 7, 5, 1]
|
|
|
|
L = len(layers_dims) - 1 # 4层神经网络,不计输入层
|
|
|
|
|
|
|
|
### 初始化权重
|
|
|
|
param_w = [i for i in range(L + 1)]
|
|
|
|
param_b = [i for i in range(L + 1)]
|
|
|
|
|
|
|
|
for l in range(1, L+1):
|
|
|
|
if l < L:
|
|
|
|
param_w[l] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(2 / layers_dims[l - 1])
|
|
|
|
if l == L:
|
|
|
|
param_w[l] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * 0.01
|
|
|
|
param_b[l] = np.zeros((layers_dims[l], 1))
|
|
|
|
|
|
|
|
self.layers_dims = layers_dims
|
|
|
|
self.L = L
|
|
|
|
|
|
|
|
### 初始化权重
|
|
|
|
self.param_w = param_w
|
|
|
|
self.param_b = param_b
|
|
|
|
|
|
|
|
self.x = x
|
|
|
|
self.y = y
|
|
|
|
self.batchsize = batchsize
|
|
|
|
self.epoch = epoch
|
|
|
|
|
|
|
|
def load(self, path="./weigths.npz"):
|
|
|
|
weigths = np.load(path, allow_pickle=True)
|
|
|
|
|
|
|
|
self.param_w = weigths["param_w"]
|
|
|
|
self.param_b = weigths["param_b"]
|
|
|
|
|
|
|
|
def train(self) -> None:
|
|
|
|
for i in range(0, self.epoch):
|
|
|
|
cost = self.neural_network()
|
|
|
|
print("###### 第{}次训练的损失率: {} \n".format(i + 1, cost))
|
|
|
|
save({"param_w": self.param_w, "param_b": self.param_b})
|
|
|
|
|
|
|
|
def predict(self, x_test, threshold=0.5):
|
|
|
|
X_new = vectorize_sequences(x_test).T
|
|
|
|
L = self.L
|
|
|
|
param_w = self.param_w
|
|
|
|
param_b = self.param_b
|
|
|
|
|
|
|
|
activations = [X_new, ] + [i for i in range(L)]
|
|
|
|
prev_activations = [i for i in range(L + 1)]
|
|
|
|
m = X_new.shape[1]
|
|
|
|
|
|
|
|
for l in range(1, L + 1):
|
|
|
|
prev_activations[l] = np.dot(param_w[l], activations[l - 1]) + param_b[l]
|
|
|
|
if l < L:
|
|
|
|
activations[l] = relu(prev_activations[l])
|
|
|
|
else:
|
|
|
|
activations[l] = sigmoid(prev_activations[l])
|
|
|
|
prediction = (activations[L] > threshold).astype("int")
|
|
|
|
return prediction
|
|
|
|
|
|
|
|
##### neural network model
|
|
|
|
def neural_network(self, learning_rate=0.001, lambd=0):
|
|
|
|
totalDataLen = len(self.x)
|
|
|
|
loopSize = int(totalDataLen / self.batchsize)
|
|
|
|
|
|
|
|
rcost = 1.0
|
|
|
|
for i in range(0, loopSize):
|
|
|
|
dataStartIndex = i * self.batchsize
|
|
|
|
dataEndIndex = (i + 1) * self.batchsize
|
|
|
|
|
|
|
|
X = vectorize_sequences(self.x[dataStartIndex:dataEndIndex]).T
|
|
|
|
Y = np.asarray(self.y[dataStartIndex:dataEndIndex]).astype('float32')
|
|
|
|
|
|
|
|
param_w = self.param_w
|
|
|
|
param_b = self.param_b
|
|
|
|
|
|
|
|
layers_dims = self.layers_dims
|
|
|
|
L = self.L
|
|
|
|
|
|
|
|
m = X.shape[1]
|
|
|
|
|
|
|
|
activations = [X, ] + [i for i in range(L)]
|
|
|
|
prev_activations = [i for i in range(L+1)]
|
|
|
|
|
|
|
|
dA = [i for i in range(L+1)]
|
|
|
|
dz = [i for i in range(L+1)]
|
|
|
|
dw = [i for i in range(L+1)]
|
|
|
|
db = [i for i in range(L+1)]
|
|
|
|
|
|
|
|
### forward propagation
|
|
|
|
for l in range(1, L+1):
|
|
|
|
prev_activations[l] = np.dot(param_w[l], activations[l-1]) + param_b[l]
|
|
|
|
if l < L:
|
|
|
|
activations[l] = relu(prev_activations[l])
|
|
|
|
else:
|
|
|
|
activations[l] = sigmoid(prev_activations[l])
|
|
|
|
|
|
|
|
### 交叉熵损失函数
|
|
|
|
cross_entropy_cost = -1/m * (np.dot(np.log(activations[L]), Y.T) \
|
|
|
|
+ np.dot(np.log(1-activations[L]), 1-Y.T))
|
|
|
|
|
|
|
|
### 正则化
|
|
|
|
regularization_cost = 0
|
|
|
|
for l in range(1, L+1):
|
|
|
|
regularization_cost += np.sum(np.square(param_w[l])) * lambd/(2*m)
|
|
|
|
|
|
|
|
cost = cross_entropy_cost + regularization_cost
|
|
|
|
|
|
|
|
### initialize backward propagation
|
|
|
|
dA[L] = np.divide(1-Y, 1-activations[L]) - np.divide(Y, activations[L])
|
|
|
|
assert dA[L].shape == (1, m)
|
|
|
|
|
|
|
|
### 反向传播算法(梯度下降法是通用的优化算法,反向传播法是梯度下降法在深度神经网络上的具体实现方式。)
|
|
|
|
### backward propagation
|
|
|
|
for l in reversed(range(1, L+1)):
|
|
|
|
if l == L:
|
|
|
|
dz[l] = dA[l] * activations[l] * (1-activations[l])
|
|
|
|
else:
|
|
|
|
dz[l] = dA[l].copy()
|
|
|
|
dz[l][prev_activations[l] <= 0] = 0
|
|
|
|
|
|
|
|
dw[l] = 1/m * np.dot(dz[l], activations[l-1].T) + param_w[l] * lambd/m
|
|
|
|
db[l] = 1/m * np.sum(dz[l], axis=1, keepdims=True)
|
|
|
|
dA[l-1] = np.dot(param_w[l].T, dz[l])
|
|
|
|
|
|
|
|
assert dz[l].shape == prev_activations[l].shape
|
|
|
|
assert dw[l].shape == param_w[l].shape
|
|
|
|
assert db[l].shape == param_b[l].shape
|
|
|
|
assert dA[l-1].shape == activations[l-1].shape
|
|
|
|
|
|
|
|
param_w[l] = param_w[l] - learning_rate * dw[l]
|
|
|
|
param_b[l] = param_b[l] - learning_rate * db[l]
|
|
|
|
|
|
|
|
if i % 5000 == 4999:
|
|
|
|
print("第{}批数据的损失率: {}".format(i + 1, cost))
|
|
|
|
rcost = cost
|
|
|
|
|
|
|
|
if i == loopSize - 1:
|
|
|
|
rcost = cost
|
|
|
|
|
|
|
|
self.param_w = param_w
|
|
|
|
self.param_b = param_b
|
|
|
|
|
|
|
|
return rcost
|
|
|
|
|
|
|
|
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(path="imdb/imdb.npz",num_words=10000)
|
|
|
|
|
|
|
|
# Our vectorized training data
|
|
|
|
x_train = vectorize_sequences(train_data[:100])
|
|
|
|
# Our vectorized test data
|
|
|
|
x_test = vectorize_sequences(test_data[:5])
|
|
|
|
x_train = train_data[:2000]
|
|
|
|
x_test = test_data[:10]
|
|
|
|
|
|
|
|
y_train = np.asarray(train_labels[:100]).astype('float32')
|
|
|
|
y_test = np.asarray(test_labels[:5]).astype('float32')
|
|
|
|
y_train = train_labels[:2000]
|
|
|
|
y_test = test_labels[:10]
|
|
|
|
|
|
|
|
parameters = neural_network(x_train.T, y_train, num_iterations=1000)
|
|
|
|
save(parameters)
|
|
|
|
weigths = load()
|
|
|
|
model = FFNNModel(x_train, y_train, 16, 100)
|
|
|
|
model.load()
|
|
|
|
model.train()
|
|
|
|
|
|
|
|
y_p = predict(x_test.T, weigths)
|
|
|
|
print(y_p)
|
|
|
|
y_pre = model.predict(x_test)
|
|
|
|
print(y_test)
|
|
|
|
print(y_pre)
|
|
|
|
|
|
|
|
y_p2 = predict(x_train[:5].T, weigths)
|
|
|
|
print(y_p2)
|
|
|
|
print(y_train[:5])
|
|
|
|
y_pre2 = model.predict(x_train[:10])
|
|
|
|
print(y_train[:10])
|
|
|
|
print(y_pre2)
|
|
|
|
|
|
|
|
sys.exit()
|