BackPropagation

Zhao Cong

图就不给了,自己跑一下吧

Back Propagation

Import

1
2
3
4
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split

Load Data

1
2
3
X,y = datasets.make_moons(n_samples=1000,noise=0.2,random_state=100)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(X.shape,y.shape)
(1000, 2) (1000,)
1
2
3
4
5
6
7
def make_plot(X, y, plot_name):
plt.figure(figsize=(12, 8))
plt.title(plot_name, fontsize=30)
plt.scatter(X[y==0, 0], X[y==0, 1])
plt.scatter(X[y==1, 0], X[y==1, 1])

make_plot(X, y, "Classification Dataset Visualization ")

Network

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# Dense Layer
class Layer:
def __init__(self, n_input, n_output, activation=None, weights=None, bias=None):
"""
:param int n_input: 输入节点数
:param int n_output: 输出节点数
:param str activation: 激活函数类型
:param weights: 权值张量,默认类内部生成
:param bias: 偏置,默认类内部生成
"""
self.weights = weights if weights is not None else np.random.randn(n_input, n_output) * np.sqrt(1 / n_output)
self.bias = bias if bias is not None else np.random.rand(n_output) * 0.1
self.activation = activation
self.activation_output = None
self.error = None
self.delta = None
# 激活函数
def activate(self,X):
r = np.dot(X,self.weights) + self.bias
self.activation_output = self.apply_activation(r)
return self.activation_output
# 应用激活函数
def apply_activation(self,r):
if self.activation is None:
return r
elif self.activation == 'relu':
return np.maximum(r,0)
elif self.activation == 'sigmoid':
return 1/(1+np.exp(-r))
elif self.activation == 'tanh':
return np.tanh(r)
# 激活函数求导
def apply_activation_derivative(self,r):
if self.activation is None:
return 1
elif self.activation == 'relu':
grad = np.array(r,copy=True)
grad[r<0] = 0
grad[r>0] = 1
return grad
elif self.activation == 'sigmoid':
return r*(1-r)
elif self.activation == 'tanh':
return 1-r**2

Model

1
y_test.flatten().shape
(300,)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
class NeuralNetwork:
def __init__(self):
self.layers = []

def add_layer(self, layer):
self.layers.append(layer)
# 前向传播
def feed_forward(self,X):
for layer in self.layers:
X = layer.activate(X)
return X
# 后向传播
def back_propagation(self,X, y, learning_rate):
output = self.feed_forward(X)
for i in reversed(range(len(self.layers))): # 逆序循环,从最后一层开始
layer = self.layers[i] # 当前层
if layer == self.layers[-1]: # 最后的输出层
layer.error = -(y - output) # loss对激活函数的偏导数
layer.delta = layer.error * layer.apply_activation_derivative(output) # loss对未激活输出的偏导数
else: # 隐藏层
next_layer = self.layers[i+1] # 下一层
layer.error = np.dot(next_layer.weights,next_layer.delta) # loss对激活函数的偏导数,是个求和形式,故用点积
layer.delta = layer.error * layer.apply_activation_derivative(layer.activation_output) # loss对未激活输出的偏导数
# 更新weights
for i in range(len(self.layers)):
layer = self.layers[i]
o_i = np.atleast_2d(X if i==0 else self.layers[i-1].activation_output)# 上一层激活后的输出
layer.weights-= layer.delta*o_i.T*learning_rate # 更新权重

def train(self, X_train,X_test, y_train,y_test, learning_rate,max_epochs):
# 准备onehot编码
y_train_onehot = np.zeros((y_train.shape[0], 2))
y_train_onehot[np.arange(y_train.shape[0]), y_train] = 1

y_test_onehot = np.zeros((y_test.shape[0], 2)) # 新增验证集onehot
y_test_onehot[np.arange(y_test.shape[0]), y_test] = 1

# 初始化记录列表
train_mses = []
test_mses = []
train_accs = []
test_accs = []

for i in range(max_epochs):
# 训练过程
for j in range(len(X_train)):
self.back_propagation(X_train[j], y_train_onehot[j], learning_rate)

# === 计算训练集指标 ===
train_pred = self.feed_forward(X_train)
train_mse = np.mean(np.square(y_train_onehot - train_pred))
train_mses.append(train_mse)
train_acc = self.accuracy(self.predict(X_train), y_train.flatten()) * 100
train_accs.append(train_acc)

# === 计算验证集指标 ===
test_pred = self.feed_forward(X_test)
test_mse = np.mean(np.square(y_test_onehot - test_pred))
test_mses.append(test_mse)
test_acc = self.accuracy(self.predict(X_test), y_test.flatten()) * 100
test_accs.append(test_acc)

# 打印进度
print(f'Epoch #{i}:')
print(f' Train MSE: {train_mse:.4f}, Accuracy: {train_acc:.2f}%')
print(f' Valid MSE: {test_mse:.4f}, Accuracy: {test_acc:.2f}%')

return train_mses, test_mses, train_accs, test_accs

@staticmethod
def accuracy(y_predict, y_test): # 计算准确度
return np.sum(y_predict == y_test) / len(y_test)

def predict(self, X_predict): # 预测
y_predict = self.feed_forward(X_predict)
y_predict = np.argmax(y_predict, axis=1)
return y_predict

Train

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def plot_metrics(train_mses, test_mses, train_accs, test_accs):
plt.figure(figsize=(12, 5))

# 损失曲线
plt.subplot(1, 2, 1)
plt.plot(train_mses, label='Train MSE', c='royalblue', lw=1.5)
plt.plot(test_mses, label='Valid MSE', c='crimson', ls='--', lw=1.5)
plt.xlabel('Epoch')
plt.ylabel('MSE')
plt.title('Training vs Validation Loss')
plt.grid(True, alpha=0.3)
plt.legend()

# 准确率曲线
plt.subplot(1, 2, 2)
plt.plot(train_accs, label='Train Acc', c='darkgreen', lw=1.5)
plt.plot(test_accs, label='Valid Acc', c='orange', ls='--', lw=1.5)
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Training vs Validation Accuracy')
plt.grid(True, alpha=0.3)
plt.legend()

plt.tight_layout()
plt.show()
1
2
3
4
5
6
7
8
9
10
nn = NeuralNetwork() # 实例化网络类 
nn.add_layer(Layer(2, 25, 'sigmoid')) # 隐藏层 1, 2=>25
nn.add_layer(Layer(25, 50, 'sigmoid')) # 隐藏层 2, 25=>50
nn.add_layer(Layer(50, 25, 'sigmoid')) # 隐藏层 3, 50=>25
nn.add_layer(Layer(25, 2, 'sigmoid')) # 输出层, 25=>2
train_mses, test_mses, train_accs, test_accs = nn.train(
X_train, X_test, y_train, y_test, 0.1, 50
)
plot_metrics(train_mses, test_mses, train_accs, test_accs)

Epoch #0:
  Train MSE: 0.2485, Accuracy: 49.29%
  Valid MSE: 0.2480, Accuracy: 51.67%
Epoch #1:
  Train MSE: 0.2433, Accuracy: 73.71%
  Valid MSE: 0.2430, Accuracy: 76.00%
Epoch #2:
  Train MSE: 0.2237, Accuracy: 76.14%
  Valid MSE: 0.2239, Accuracy: 77.67%
Epoch #3:
  Train MSE: 0.1498, Accuracy: 78.71%
  Valid MSE: 0.1534, Accuracy: 76.67%
Epoch #4:
  Train MSE: 0.1120, Accuracy: 83.29%
  Valid MSE: 0.1223, Accuracy: 81.67%
Epoch #5:
  Train MSE: 0.0966, Accuracy: 85.71%
  Valid MSE: 0.1103, Accuracy: 84.00%
Epoch #6:
  Train MSE: 0.0902, Accuracy: 87.00%
  Valid MSE: 0.1056, Accuracy: 85.00%
Epoch #7:
  Train MSE: 0.0886, Accuracy: 87.29%
  Valid MSE: 0.1051, Accuracy: 85.00%
Epoch #8:
  Train MSE: 0.0882, Accuracy: 87.29%
  Valid MSE: 0.1052, Accuracy: 85.00%
Epoch #9:
  Train MSE: 0.0880, Accuracy: 87.14%
  Valid MSE: 0.1053, Accuracy: 84.67%
Epoch #10:
  Train MSE: 0.0879, Accuracy: 87.14%
  Valid MSE: 0.1052, Accuracy: 84.67%
Epoch #11:
  Train MSE: 0.0877, Accuracy: 87.14%
  Valid MSE: 0.1051, Accuracy: 84.67%
Epoch #12:
  Train MSE: 0.0876, Accuracy: 87.14%
  Valid MSE: 0.1048, Accuracy: 85.00%
Epoch #13:
  Train MSE: 0.0874, Accuracy: 87.29%
  Valid MSE: 0.1046, Accuracy: 85.00%
Epoch #14:
  Train MSE: 0.0872, Accuracy: 87.29%
  Valid MSE: 0.1043, Accuracy: 85.00%
Epoch #15:
  Train MSE: 0.0870, Accuracy: 87.43%
  Valid MSE: 0.1040, Accuracy: 84.67%
Epoch #16:
  Train MSE: 0.0869, Accuracy: 87.57%
  Valid MSE: 0.1037, Accuracy: 85.00%
Epoch #17:
  Train MSE: 0.0867, Accuracy: 87.71%
  Valid MSE: 0.1034, Accuracy: 85.33%
Epoch #18:
  Train MSE: 0.0866, Accuracy: 88.14%
  Valid MSE: 0.1031, Accuracy: 85.67%
Epoch #19:
  Train MSE: 0.0865, Accuracy: 88.14%
  Valid MSE: 0.1029, Accuracy: 85.67%
Epoch #20:
  Train MSE: 0.0864, Accuracy: 88.00%
  Valid MSE: 0.1026, Accuracy: 85.67%
Epoch #21:
  Train MSE: 0.0863, Accuracy: 88.00%
  Valid MSE: 0.1024, Accuracy: 85.67%
Epoch #22:
  Train MSE: 0.0862, Accuracy: 88.00%
  Valid MSE: 0.1022, Accuracy: 85.67%
Epoch #23:
  Train MSE: 0.0861, Accuracy: 88.14%
  Valid MSE: 0.1020, Accuracy: 85.67%
Epoch #24:
  Train MSE: 0.0861, Accuracy: 88.14%
  Valid MSE: 0.1018, Accuracy: 85.67%
Epoch #25:
  Train MSE: 0.0860, Accuracy: 88.00%
  Valid MSE: 0.1017, Accuracy: 85.67%
Epoch #26:
  Train MSE: 0.0860, Accuracy: 88.00%
  Valid MSE: 0.1015, Accuracy: 85.67%
Epoch #27:
  Train MSE: 0.0859, Accuracy: 88.00%
  Valid MSE: 0.1014, Accuracy: 86.00%
Epoch #28:
  Train MSE: 0.0859, Accuracy: 88.14%
  Valid MSE: 0.1013, Accuracy: 86.00%
Epoch #29:
  Train MSE: 0.0859, Accuracy: 88.14%
  Valid MSE: 0.1012, Accuracy: 86.33%
Epoch #30:
  Train MSE: 0.0858, Accuracy: 88.14%
  Valid MSE: 0.1011, Accuracy: 86.33%
Epoch #31:
  Train MSE: 0.0858, Accuracy: 88.14%
  Valid MSE: 0.1010, Accuracy: 86.33%
Epoch #32:
  Train MSE: 0.0858, Accuracy: 88.14%
  Valid MSE: 0.1009, Accuracy: 86.00%
Epoch #33:
  Train MSE: 0.0858, Accuracy: 88.14%
  Valid MSE: 0.1008, Accuracy: 86.00%
Epoch #34:
  Train MSE: 0.0858, Accuracy: 88.14%
  Valid MSE: 0.1007, Accuracy: 86.00%
Epoch #35:
  Train MSE: 0.0857, Accuracy: 88.00%
  Valid MSE: 0.1006, Accuracy: 86.00%
Epoch #36:
  Train MSE: 0.0857, Accuracy: 87.86%
  Valid MSE: 0.1006, Accuracy: 86.00%
Epoch #37:
  Train MSE: 0.0857, Accuracy: 87.86%
  Valid MSE: 0.1005, Accuracy: 86.00%
Epoch #38:
  Train MSE: 0.0857, Accuracy: 87.86%
  Valid MSE: 0.1004, Accuracy: 86.00%
Epoch #39:
  Train MSE: 0.0857, Accuracy: 87.86%
  Valid MSE: 0.1004, Accuracy: 86.00%
Epoch #40:
  Train MSE: 0.0857, Accuracy: 87.86%
  Valid MSE: 0.1003, Accuracy: 86.00%
Epoch #41:
  Train MSE: 0.0856, Accuracy: 88.00%
  Valid MSE: 0.1002, Accuracy: 86.00%
Epoch #42:
  Train MSE: 0.0856, Accuracy: 88.00%
  Valid MSE: 0.1002, Accuracy: 86.00%
Epoch #43:
  Train MSE: 0.0856, Accuracy: 88.00%
  Valid MSE: 0.1001, Accuracy: 86.00%
Epoch #44:
  Train MSE: 0.0856, Accuracy: 88.00%
  Valid MSE: 0.1001, Accuracy: 86.00%
Epoch #45:
  Train MSE: 0.0856, Accuracy: 88.00%
  Valid MSE: 0.1000, Accuracy: 86.00%
Epoch #46:
  Train MSE: 0.0856, Accuracy: 88.00%
  Valid MSE: 0.1000, Accuracy: 85.67%
Epoch #47:
  Train MSE: 0.0855, Accuracy: 88.00%
  Valid MSE: 0.0999, Accuracy: 85.67%
Epoch #48:
  Train MSE: 0.0855, Accuracy: 88.00%
  Valid MSE: 0.0999, Accuracy: 85.67%
Epoch #49:
  Train MSE: 0.0855, Accuracy: 88.00%
  Valid MSE: 0.0998, Accuracy: 85.67%
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
def plot_decision_boundary(model, axis):

x0, x1 = np.meshgrid(
np.linspace(axis[0], axis[1], int((axis[1] - axis[0])*100)).reshape(1, -1),
np.linspace(axis[2], axis[3], int((axis[3] - axis[2])*100)).reshape(-1, 1)
)
X_new = np.c_[x0.ravel(), x1.ravel()]

y_predict = model.predict(X_new)
zz = y_predict.reshape(x0.shape)

from matplotlib.colors import ListedColormap
custom_cmap = ListedColormap(['#EF9A9A', '#FFF590', '#90CAF9'])

plt.contourf(x0, x1, zz, linewidth=5, cmap=custom_cmap)
1
2
3
4
plt.figure(figsize=(12, 8))    
plot_decision_boundary(nn, [-2, 2.5, -1, 2])
plt.scatter(X[y==0, 0], X[y==0, 1])
plt.scatter(X[y==1, 0], X[y==1, 1])