ZC's Blog

BackPropagation

Zhao Cong

2025-04-17 22:25:52 2025-04-17 22:25:52 Created 2025-04-17 22:34:53 2025-04-17 22:34:53 Updated

DeepLearning

DL

1.9k Words 11 Mins

图就不给了，自己跑一下吧

Back Propagation

Import

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split

Load Data

1
2
3

X,y = datasets.make_moons(n_samples=1000,noise=0.2,random_state=100)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(X.shape,y.shape)

(1000, 2) (1000,)

def make_plot(X, y, plot_name):
    plt.figure(figsize=(12, 8))    
    plt.title(plot_name, fontsize=30)     
    plt.scatter(X[y==0, 0], X[y==0, 1])
    plt.scatter(X[y==1, 0], X[y==1, 1])

make_plot(X, y, "Classification Dataset Visualization ")

Network

# Dense Layer
class Layer:
    def __init__(self, n_input, n_output, activation=None, weights=None, bias=None):
        """
        :param int n_input: 输入节点数 
        :param int n_output: 输出节点数         
        :param str activation: 激活函数类型         
        :param weights: 权值张量，默认类内部生成         
        :param bias: 偏置，默认类内部生成 
        """
        self.weights = weights if weights is not None else np.random.randn(n_input, n_output) * np.sqrt(1 / n_output) 
        self.bias = bias if bias is not None else np.random.rand(n_output) * 0.1
        self.activation = activation
        self.activation_output = None
        self.error = None
        self.delta = None
    # 激活函数
    def activate(self,X):
        r = np.dot(X,self.weights) + self.bias
        self.activation_output = self.apply_activation(r)
        return self.activation_output
    # 应用激活函数
    def apply_activation(self,r):
        if self.activation is None:
            return r
        elif self.activation == 'relu':
            return np.maximum(r,0)
        elif self.activation == 'sigmoid':
            return 1/(1+np.exp(-r))
        elif self.activation == 'tanh':
            return np.tanh(r)
    # 激活函数求导      
    def apply_activation_derivative(self,r):
        if self.activation is None:
            return 1
        elif self.activation == 'relu':
            grad = np.array(r,copy=True)
            grad[r<0] = 0
            grad[r>0] = 1
            return grad
        elif self.activation == 'sigmoid':
            return r*(1-r)
        elif self.activation == 'tanh':
            return 1-r**2

Model

1	y_test.flatten().shape

(300,)

class NeuralNetwork:
    def __init__(self):
        self.layers = []
        
    def add_layer(self, layer):
        self.layers.append(layer)
    # 前向传播    
    def feed_forward(self,X):
        for layer in self.layers:
            X = layer.activate(X)
        return X
    # 后向传播
    def back_propagation(self,X, y, learning_rate):
        output = self.feed_forward(X)
        for i in reversed(range(len(self.layers))): # 逆序循环，从最后一层开始
            layer = self.layers[i] # 当前层
            if layer == self.layers[-1]: # 最后的输出层
                layer.error = -(y - output) # loss对激活函数的偏导数
                layer.delta = layer.error * layer.apply_activation_derivative(output) # loss对未激活输出的偏导数
            else: # 隐藏层
                next_layer  = self.layers[i+1] # 下一层
                layer.error = np.dot(next_layer.weights,next_layer.delta) # loss对激活函数的偏导数,是个求和形式，故用点积
                layer.delta = layer.error * layer.apply_activation_derivative(layer.activation_output) # loss对未激活输出的偏导数
        # 更新weights
        for i in range(len(self.layers)):
            layer = self.layers[i]
            o_i = np.atleast_2d(X if i==0 else self.layers[i-1].activation_output)# 上一层激活后的输出
            layer.weights-= layer.delta*o_i.T*learning_rate # 更新权重
    
    def train(self, X_train,X_test, y_train,y_test, learning_rate,max_epochs):
        # 准备onehot编码
        y_train_onehot = np.zeros((y_train.shape[0], 2)) 
        y_train_onehot[np.arange(y_train.shape[0]), y_train] = 1
        
        y_test_onehot = np.zeros((y_test.shape[0], 2))  # 新增验证集onehot
        y_test_onehot[np.arange(y_test.shape[0]), y_test] = 1
        
        # 初始化记录列表
        train_mses = []
        test_mses = []
        train_accs = []
        test_accs = []
        
        for i in range(max_epochs):
            # 训练过程
            for j in range(len(X_train)):
                self.back_propagation(X_train[j], y_train_onehot[j], learning_rate)
            
            # === 计算训练集指标 ===
            train_pred = self.feed_forward(X_train)
            train_mse = np.mean(np.square(y_train_onehot - train_pred))
            train_mses.append(train_mse)
            train_acc = self.accuracy(self.predict(X_train), y_train.flatten()) * 100
            train_accs.append(train_acc)
            
            # === 计算验证集指标 ===
            test_pred = self.feed_forward(X_test)
            test_mse = np.mean(np.square(y_test_onehot - test_pred))
            test_mses.append(test_mse)
            test_acc = self.accuracy(self.predict(X_test), y_test.flatten()) * 100
            test_accs.append(test_acc)
            
            # 打印进度
            print(f'Epoch #{i}:')
            print(f'  Train MSE: {train_mse:.4f}, Accuracy: {train_acc:.2f}%')
            print(f'  Valid MSE: {test_mse:.4f}, Accuracy: {test_acc:.2f}%')
        
        return train_mses, test_mses, train_accs, test_accs

    @staticmethod
    def accuracy(y_predict, y_test): # 计算准确度
        return np.sum(y_predict == y_test) / len(y_test)
    
    def predict(self, X_predict): # 预测
        y_predict = self.feed_forward(X_predict) 
        y_predict = np.argmax(y_predict, axis=1)
        return y_predict

Train

def plot_metrics(train_mses, test_mses, train_accs, test_accs):
    plt.figure(figsize=(12, 5))
    
    # 损失曲线
    plt.subplot(1, 2, 1)
    plt.plot(train_mses, label='Train MSE', c='royalblue', lw=1.5)
    plt.plot(test_mses, label='Valid MSE', c='crimson', ls='--', lw=1.5)
    plt.xlabel('Epoch')
    plt.ylabel('MSE')
    plt.title('Training vs Validation Loss')
    plt.grid(True, alpha=0.3)
    plt.legend()
    
    # 准确率曲线
    plt.subplot(1, 2, 2)
    plt.plot(train_accs, label='Train Acc', c='darkgreen', lw=1.5)
    plt.plot(test_accs, label='Valid Acc', c='orange', ls='--', lw=1.5)
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title('Training vs Validation Accuracy')
    plt.grid(True, alpha=0.3)
    plt.legend()
    
    plt.tight_layout()
    plt.show()

nn = NeuralNetwork() # 实例化网络类 
nn.add_layer(Layer(2, 25, 'sigmoid'))  # 隐藏层 1, 2=>25 
nn.add_layer(Layer(25, 50, 'sigmoid')) # 隐藏层 2, 25=>50 
nn.add_layer(Layer(50, 25, 'sigmoid')) # 隐藏层 3, 50=>25 
nn.add_layer(Layer(25, 2, 'sigmoid'))  # 输出层, 25=>2 
train_mses, test_mses, train_accs, test_accs = nn.train(
    X_train, X_test, y_train, y_test, 0.1, 50
)
plot_metrics(train_mses, test_mses, train_accs, test_accs)

Epoch #0:
  Train MSE: 0.2485, Accuracy: 49.29%
  Valid MSE: 0.2480, Accuracy: 51.67%
Epoch #1:
  Train MSE: 0.2433, Accuracy: 73.71%
  Valid MSE: 0.2430, Accuracy: 76.00%
Epoch #2:
  Train MSE: 0.2237, Accuracy: 76.14%
  Valid MSE: 0.2239, Accuracy: 77.67%
Epoch #3:
  Train MSE: 0.1498, Accuracy: 78.71%
  Valid MSE: 0.1534, Accuracy: 76.67%
Epoch #4:
  Train MSE: 0.1120, Accuracy: 83.29%
  Valid MSE: 0.1223, Accuracy: 81.67%
Epoch #5:
  Train MSE: 0.0966, Accuracy: 85.71%
  Valid MSE: 0.1103, Accuracy: 84.00%
Epoch #6:
  Train MSE: 0.0902, Accuracy: 87.00%
  Valid MSE: 0.1056, Accuracy: 85.00%
Epoch #7:
  Train MSE: 0.0886, Accuracy: 87.29%
  Valid MSE: 0.1051, Accuracy: 85.00%
Epoch #8:
  Train MSE: 0.0882, Accuracy: 87.29%
  Valid MSE: 0.1052, Accuracy: 85.00%
Epoch #9:
  Train MSE: 0.0880, Accuracy: 87.14%
  Valid MSE: 0.1053, Accuracy: 84.67%
Epoch #10:
  Train MSE: 0.0879, Accuracy: 87.14%
  Valid MSE: 0.1052, Accuracy: 84.67%
Epoch #11:
  Train MSE: 0.0877, Accuracy: 87.14%
  Valid MSE: 0.1051, Accuracy: 84.67%
Epoch #12:
  Train MSE: 0.0876, Accuracy: 87.14%
  Valid MSE: 0.1048, Accuracy: 85.00%
Epoch #13:
  Train MSE: 0.0874, Accuracy: 87.29%
  Valid MSE: 0.1046, Accuracy: 85.00%
Epoch #14:
  Train MSE: 0.0872, Accuracy: 87.29%
  Valid MSE: 0.1043, Accuracy: 85.00%
Epoch #15:
  Train MSE: 0.0870, Accuracy: 87.43%
  Valid MSE: 0.1040, Accuracy: 84.67%
Epoch #16:
  Train MSE: 0.0869, Accuracy: 87.57%
  Valid MSE: 0.1037, Accuracy: 85.00%
Epoch #17:
  Train MSE: 0.0867, Accuracy: 87.71%
  Valid MSE: 0.1034, Accuracy: 85.33%
Epoch #18:
  Train MSE: 0.0866, Accuracy: 88.14%
  Valid MSE: 0.1031, Accuracy: 85.67%
Epoch #19:
  Train MSE: 0.0865, Accuracy: 88.14%
  Valid MSE: 0.1029, Accuracy: 85.67%
Epoch #20:
  Train MSE: 0.0864, Accuracy: 88.00%
  Valid MSE: 0.1026, Accuracy: 85.67%
Epoch #21:
  Train MSE: 0.0863, Accuracy: 88.00%
  Valid MSE: 0.1024, Accuracy: 85.67%
Epoch #22:
  Train MSE: 0.0862, Accuracy: 88.00%
  Valid MSE: 0.1022, Accuracy: 85.67%
Epoch #23:
  Train MSE: 0.0861, Accuracy: 88.14%
  Valid MSE: 0.1020, Accuracy: 85.67%
Epoch #24:
  Train MSE: 0.0861, Accuracy: 88.14%
  Valid MSE: 0.1018, Accuracy: 85.67%
Epoch #25:
  Train MSE: 0.0860, Accuracy: 88.00%
  Valid MSE: 0.1017, Accuracy: 85.67%
Epoch #26:
  Train MSE: 0.0860, Accuracy: 88.00%
  Valid MSE: 0.1015, Accuracy: 85.67%
Epoch #27:
  Train MSE: 0.0859, Accuracy: 88.00%
  Valid MSE: 0.1014, Accuracy: 86.00%
Epoch #28:
  Train MSE: 0.0859, Accuracy: 88.14%
  Valid MSE: 0.1013, Accuracy: 86.00%
Epoch #29:
  Train MSE: 0.0859, Accuracy: 88.14%
  Valid MSE: 0.1012, Accuracy: 86.33%
Epoch #30:
  Train MSE: 0.0858, Accuracy: 88.14%
  Valid MSE: 0.1011, Accuracy: 86.33%
Epoch #31:
  Train MSE: 0.0858, Accuracy: 88.14%
  Valid MSE: 0.1010, Accuracy: 86.33%
Epoch #32:
  Train MSE: 0.0858, Accuracy: 88.14%
  Valid MSE: 0.1009, Accuracy: 86.00%
Epoch #33:
  Train MSE: 0.0858, Accuracy: 88.14%
  Valid MSE: 0.1008, Accuracy: 86.00%
Epoch #34:
  Train MSE: 0.0858, Accuracy: 88.14%
  Valid MSE: 0.1007, Accuracy: 86.00%
Epoch #35:
  Train MSE: 0.0857, Accuracy: 88.00%
  Valid MSE: 0.1006, Accuracy: 86.00%
Epoch #36:
  Train MSE: 0.0857, Accuracy: 87.86%
  Valid MSE: 0.1006, Accuracy: 86.00%
Epoch #37:
  Train MSE: 0.0857, Accuracy: 87.86%
  Valid MSE: 0.1005, Accuracy: 86.00%
Epoch #38:
  Train MSE: 0.0857, Accuracy: 87.86%
  Valid MSE: 0.1004, Accuracy: 86.00%
Epoch #39:
  Train MSE: 0.0857, Accuracy: 87.86%
  Valid MSE: 0.1004, Accuracy: 86.00%
Epoch #40:
  Train MSE: 0.0857, Accuracy: 87.86%
  Valid MSE: 0.1003, Accuracy: 86.00%
Epoch #41:
  Train MSE: 0.0856, Accuracy: 88.00%
  Valid MSE: 0.1002, Accuracy: 86.00%
Epoch #42:
  Train MSE: 0.0856, Accuracy: 88.00%
  Valid MSE: 0.1002, Accuracy: 86.00%
Epoch #43:
  Train MSE: 0.0856, Accuracy: 88.00%
  Valid MSE: 0.1001, Accuracy: 86.00%
Epoch #44:
  Train MSE: 0.0856, Accuracy: 88.00%
  Valid MSE: 0.1001, Accuracy: 86.00%
Epoch #45:
  Train MSE: 0.0856, Accuracy: 88.00%
  Valid MSE: 0.1000, Accuracy: 86.00%
Epoch #46:
  Train MSE: 0.0856, Accuracy: 88.00%
  Valid MSE: 0.1000, Accuracy: 85.67%
Epoch #47:
  Train MSE: 0.0855, Accuracy: 88.00%
  Valid MSE: 0.0999, Accuracy: 85.67%
Epoch #48:
  Train MSE: 0.0855, Accuracy: 88.00%
  Valid MSE: 0.0999, Accuracy: 85.67%
Epoch #49:
  Train MSE: 0.0855, Accuracy: 88.00%
  Valid MSE: 0.0998, Accuracy: 85.67%

def plot_decision_boundary(model, axis):
    
    x0, x1 = np.meshgrid(
        np.linspace(axis[0], axis[1], int((axis[1] - axis[0])*100)).reshape(1, -1),
        np.linspace(axis[2], axis[3], int((axis[3] - axis[2])*100)).reshape(-1, 1)
    )
    X_new = np.c_[x0.ravel(), x1.ravel()]
    
    y_predict = model.predict(X_new)
    zz = y_predict.reshape(x0.shape)
    
    from matplotlib.colors import ListedColormap
    custom_cmap = ListedColormap(['#EF9A9A', '#FFF590', '#90CAF9'])
    
    plt.contourf(x0, x1, zz, linewidth=5, cmap=custom_cmap)

plt.figure(figsize=(12, 8))    
plot_decision_boundary(nn, [-2, 2.5, -1, 2])
plt.scatter(X[y==0, 0], X[y==0, 1])
plt.scatter(X[y==1, 0], X[y==1, 1])

#DL

推荐阅读

TEST

Multivariate distributions and independence

Multivariate distributions and independence

Multivariate discrete distributions and independence

Multivariate discrete distributions and independence

推荐阅读

TEST

Multivariate distributions and independence

Multivariate distributions and independence

On this page

BackPropagation

Back Propagation