diff --git a/深度学习笔记/前馈神经网络/feedforward_neural_network.py b/深度学习笔记/前馈神经网络/feedforward_neural_network.py index 58a669f..6cdf9c6 100755 --- a/深度学习笔记/前馈神经网络/feedforward_neural_network.py +++ b/深度学习笔记/前馈神经网络/feedforward_neural_network.py @@ -14,109 +14,6 @@ def relu(z): ''' return np.maximum(0, z) -L = 0 - -##### neural network model -def neural_network(X, Y, learning_rate=0.01, num_iterations=2000, lambd=0): - n0, m = X.shape - n1 = 20 - n2 = 7 - n3 = 5 - n4 = 1 - layers_dims = [n0, n1, n2, n3, n4] # [12288, 20, 7, 5, 1] - L = len(layers_dims) - 1 # 4层神经网络,不计输入层 - - m = X.shape[1] - ### initialize forward propagation - param_w = [i for i in range(L+1)] - param_b = [i for i in range(L+1)] - np.random.seed(10) - for l in range(1, L+1): - if l < L: - param_w[l] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(2 / layers_dims[l - 1]) - if l == L: - param_w[l] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * 0.01 - param_b[l] = np.zeros((layers_dims[l], 1)) - - activations = [X, ] + [i for i in range(L)] - prev_activations = [i for i in range(L+1)] - - dA = [i for i in range(L+1)] - dz = [i for i in range(L+1)] - dw = [i for i in range(L+1)] - db = [i for i in range(L+1)] - - for i in range(num_iterations): - ### forward propagation - for l in range(1, L+1): - prev_activations[l] = np.dot(param_w[l], activations[l-1]) + param_b[l] - if l < L: - activations[l] = relu(prev_activations[l]) - else: - activations[l] = sigmoid(prev_activations[l]) - - ### 交叉熵损失函数 - cross_entropy_cost = -1/m * (np.dot(np.log(activations[L]), Y.T) \ - + np.dot(np.log(1-activations[L]), 1-Y.T)) - - ### 正则化 - regularization_cost = 0 - for l in range(1, L+1): - regularization_cost += np.sum(np.square(param_w[l])) * lambd/(2*m) - - cost = cross_entropy_cost + regularization_cost - - ### initialize backward propagation - dA[L] = np.divide(1-Y, 1-activations[L]) - np.divide(Y, activations[L]) - assert dA[L].shape == (1, m) - - ### 反向传播算法(梯度下降法是通用的优化算法,反向传播法是梯度下降法在深度神经网络上的具体实现方式。) - ### backward propagation - for l in reversed(range(1, L+1)): - if l == L: - dz[l] = dA[l] * activations[l] * (1-activations[l]) - else: - dz[l] = dA[l].copy() - dz[l][prev_activations[l] <= 0] = 0 - - dw[l] = 1/m * np.dot(dz[l], activations[l-1].T) + param_w[l] * lambd/m - db[l] = 1/m * np.sum(dz[l], axis=1, keepdims=True) - dA[l-1] = np.dot(param_w[l].T, dz[l]) - - assert dz[l].shape == prev_activations[l].shape - assert dw[l].shape == param_w[l].shape - assert db[l].shape == param_b[l].shape - assert dA[l-1].shape == activations[l-1].shape - - param_w[l] = param_w[l] - learning_rate * dw[l] - param_b[l] = param_b[l] - learning_rate * db[l] - - if i % 100 == 0: - print("cost after iteration {}: {}".format(i, cost)) - - parameters = {} - parameters["param_w"] = param_w - parameters["param_b"] = param_b - return parameters - -def predict(X_new, parameters, threshold=0.5): - L = 4 - param_w = parameters["param_w"] - param_b = parameters["param_b"] - - activations = [X_new, ] + [i for i in range(L)] - prev_activations = [i for i in range(L + 1)] - m = X_new.shape[1] - - for l in range(1, L + 1): - prev_activations[l] = np.dot(param_w[l], activations[l - 1]) + param_b[l] - if l < L: - activations[l] = relu(prev_activations[l]) - else: - activations[l] = sigmoid(prev_activations[l]) - prediction = (activations[L] > threshold).astype("int") - return prediction - def vectorize_sequences(sequences, dimension=10000): results = np.zeros((len(sequences), dimension)) @@ -132,26 +29,173 @@ def save(weigths, path="./weigths.npz"): np.savez(path, param_w=weigths["param_w"], param_b=weigths["param_b"]) ### np.savetxt("./weigths.out", weigths["param_w"], fmt='%s') +class FFNNModel(): + def __init__(self, x, y, batchsize, epoch) -> None: + n0 = 10000 + n1 = 20 + n2 = 7 + n3 = 5 + n4 = 1 + layers_dims = [n0, n1, n2, n3, n4] # [12288, 20, 7, 5, 1] + L = len(layers_dims) - 1 # 4层神经网络,不计输入层 + + ### 初始化权重 + param_w = [i for i in range(L + 1)] + param_b = [i for i in range(L + 1)] + + for l in range(1, L+1): + if l < L: + param_w[l] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(2 / layers_dims[l - 1]) + if l == L: + param_w[l] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * 0.01 + param_b[l] = np.zeros((layers_dims[l], 1)) + + self.layers_dims = layers_dims + self.L = L + + ### 初始化权重 + self.param_w = param_w + self.param_b = param_b + + self.x = x + self.y = y + self.batchsize = batchsize + self.epoch = epoch + + def load(self, path="./weigths.npz"): + weigths = np.load(path, allow_pickle=True) + + self.param_w = weigths["param_w"] + self.param_b = weigths["param_b"] + + def train(self) -> None: + for i in range(0, self.epoch): + cost = self.neural_network() + print("###### 第{}次训练的损失率: {} \n".format(i + 1, cost)) + save({"param_w": self.param_w, "param_b": self.param_b}) + + def predict(self, x_test, threshold=0.5): + X_new = vectorize_sequences(x_test).T + L = self.L + param_w = self.param_w + param_b = self.param_b + + activations = [X_new, ] + [i for i in range(L)] + prev_activations = [i for i in range(L + 1)] + m = X_new.shape[1] + + for l in range(1, L + 1): + prev_activations[l] = np.dot(param_w[l], activations[l - 1]) + param_b[l] + if l < L: + activations[l] = relu(prev_activations[l]) + else: + activations[l] = sigmoid(prev_activations[l]) + prediction = (activations[L] > threshold).astype("int") + return prediction + + ##### neural network model + def neural_network(self, learning_rate=0.001, lambd=0): + totalDataLen = len(self.x) + loopSize = int(totalDataLen / self.batchsize) + + rcost = 1.0 + for i in range(0, loopSize): + dataStartIndex = i * self.batchsize + dataEndIndex = (i + 1) * self.batchsize + + X = vectorize_sequences(self.x[dataStartIndex:dataEndIndex]).T + Y = np.asarray(self.y[dataStartIndex:dataEndIndex]).astype('float32') + + param_w = self.param_w + param_b = self.param_b + + layers_dims = self.layers_dims + L = self.L + + m = X.shape[1] + + activations = [X, ] + [i for i in range(L)] + prev_activations = [i for i in range(L+1)] + + dA = [i for i in range(L+1)] + dz = [i for i in range(L+1)] + dw = [i for i in range(L+1)] + db = [i for i in range(L+1)] + + ### forward propagation + for l in range(1, L+1): + prev_activations[l] = np.dot(param_w[l], activations[l-1]) + param_b[l] + if l < L: + activations[l] = relu(prev_activations[l]) + else: + activations[l] = sigmoid(prev_activations[l]) + + ### 交叉熵损失函数 + cross_entropy_cost = -1/m * (np.dot(np.log(activations[L]), Y.T) \ + + np.dot(np.log(1-activations[L]), 1-Y.T)) + + ### 正则化 + regularization_cost = 0 + for l in range(1, L+1): + regularization_cost += np.sum(np.square(param_w[l])) * lambd/(2*m) + + cost = cross_entropy_cost + regularization_cost + + ### initialize backward propagation + dA[L] = np.divide(1-Y, 1-activations[L]) - np.divide(Y, activations[L]) + assert dA[L].shape == (1, m) + + ### 反向传播算法(梯度下降法是通用的优化算法,反向传播法是梯度下降法在深度神经网络上的具体实现方式。) + ### backward propagation + for l in reversed(range(1, L+1)): + if l == L: + dz[l] = dA[l] * activations[l] * (1-activations[l]) + else: + dz[l] = dA[l].copy() + dz[l][prev_activations[l] <= 0] = 0 + + dw[l] = 1/m * np.dot(dz[l], activations[l-1].T) + param_w[l] * lambd/m + db[l] = 1/m * np.sum(dz[l], axis=1, keepdims=True) + dA[l-1] = np.dot(param_w[l].T, dz[l]) + + assert dz[l].shape == prev_activations[l].shape + assert dw[l].shape == param_w[l].shape + assert db[l].shape == param_b[l].shape + assert dA[l-1].shape == activations[l-1].shape + + param_w[l] = param_w[l] - learning_rate * dw[l] + param_b[l] = param_b[l] - learning_rate * db[l] + + if i % 5000 == 4999: + print("第{}批数据的损失率: {}".format(i + 1, cost)) + rcost = cost + + if i == loopSize - 1: + rcost = cost + + self.param_w = param_w + self.param_b = param_b + + return rcost + (train_data, train_labels), (test_data, test_labels) = imdb.load_data(path="imdb/imdb.npz",num_words=10000) -# Our vectorized training data -x_train = vectorize_sequences(train_data[:100]) -# Our vectorized test data -x_test = vectorize_sequences(test_data[:5]) +x_train = train_data[:2000] +x_test = test_data[:10] -y_train = np.asarray(train_labels[:100]).astype('float32') -y_test = np.asarray(test_labels[:5]).astype('float32') +y_train = train_labels[:2000] +y_test = test_labels[:10] -parameters = neural_network(x_train.T, y_train, num_iterations=1000) -save(parameters) -weigths = load() +model = FFNNModel(x_train, y_train, 16, 100) +model.load() +model.train() -y_p = predict(x_test.T, weigths) -print(y_p) +y_pre = model.predict(x_test) print(y_test) +print(y_pre) -y_p2 = predict(x_train[:5].T, weigths) -print(y_p2) -print(y_train[:5]) +y_pre2 = model.predict(x_train[:10]) +print(y_train[:10]) +print(y_pre2) sys.exit()