每日python训练：三层BP算法

原创 Zixin Haw 2023-03-22

466

每日python训练：三层BP算法

import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import datetime
from sklearn.preprocessing import OneHotEncoder
from pandas.plotting import radviz

#########################画出查看原始数据#######################
iris = pd.read_csv(r"Iris.csv")
# 去掉不需要的ID列
iris.drop(“Id”,axis=1,inplace=True)
#删除重复的值
iris.drop_duplicates(inplace=True)
iris[“Species”].drop_duplicates()
#Iris-setose Iris-versicolor Iris - virginica三个类别分别映射为0，1，2
iris[‘Species’] = iris[“Species”].map({‘Iris-versicolor’:0,“Iris-setosa”:1,“Iris-virginica”:2})

X = iris.iloc[1:,0:4]
Y = iris.iloc[1:,4]

#挑选出前两个维度作为x轴和y轴，也可以选择其他维度
x_axis = X.iloc[:,0]
y_axis = X.iloc[:,2]

#c指定点的颜色，当c赋值为数值时，会根据值的不同自动着色
plt.scatter(x_axis, y_axis, c=Y)
plt.show()

def initialize_parameters(n_x, n_h, n_y):

“”"初始化参数：选择随机值作为权重和编置的处理

Parameters
---------------------------
n_x: 输入层节点数
n_h: 隐含层节点数
w1：输入层到隐含层的权重
b1: 输入层到隐含层的偏置
w2：隐含层到输出层的权重
b2: 隐含层到输出层的偏置

“”"
np.random.seed(2)
w1 = np.random.randn(n_h, n_x) * 0.01
b1 = np.zeros(shape=(n_h, 1))
w2 = np.random.randn(n_y, n_h) * 0.01
b2 = np.zeros(shape=(n_y, 1))

# 通过字典存储参数
parameters = {‘w1’: w1, ‘b1’: b1, ‘w2’: w2, ‘b2’: b2}

return parameters

def tanh_(X,w,b):

“”“激活函数tanh函数的实现”""

X = np.asarray(X)
z = np.dot(w, X) + b
t = np.tanh(z)
return t

def sigmoid_(X,w,b):

“”“激活函数sigmoid函数的实现”""

X = np.asarray(X)
z = np.dot(w, X) + b
s = 1 / (1 + np.exp(-z))
return s

# 1
def forward_propagation(X, parameters):

“”"前向传播过程

Parameters
------------------
a1: 输入层到隐含层经过激活函数后的值yhk
a2: 隐含层到输出层经过激活函数后的值zjk

“”"
w1 = parameters[‘w1’]
b1 = parameters[‘b1’]
w2 = parameters[‘w2’]
b2 = parameters[‘b2’]

# 通过前向传播来计算a2

# 这个地方需注意矩阵加法：虽然(w1*X)和b1的维度不同，但可以相加
# 使用tanh作为第一层的激活函数
z1 = np.dot(w1, X) + b1
a1 = np.tanh(z1)
# a1=sigmoid_(X,w1,b1)

# 使用sigmoid作为第二层的激活函数
z2 = np.dot(w2, a1) + b2
a2 = 1 / (1 + np.exp(-z2))
# a2=tanh_(a1,w2,b2)

# 通过字典存储参数
cache = { ‘z1’: z1,‘a1’: a1, ‘z2’: z2, ‘a2’: a2}

return a2,cache

# 2
def compute_cost(a2, Y):

“”" 计算代价函数:使用交叉熵作为代价函数

Parameters
---------------
m: 样本数量，Y的形状为[样本数，1]，Y的列数即为总的样本数
Y：标签/目标值
cost: 损失值，真实与预测值之间的差距

“”"

m = Y.shape[1]
logprobs = np.multiply(np.log(a2), Y) + np.multiply((1 - Y), np.log(1 - a2))
cost = - np.sum(logprobs) / m

return cost

# 3
def backward_propagation(parameters, cache, X, Y):
“”"反向传播（计算代价函数的导数）

Parameters
----------------
m: 样本数量
w1: 输入层到隐含层的权重
w2: 隐含层到输出层的权重
a1: 隐含层计算得到的值
a2: 输出层计算得到的值
dw1: 权重w1的梯度
dw2: 权重w2的梯度
db1: 偏置b1的梯度
db2: 偏置b2的梯度

“”"
m = Y.shape[1]
w2 = parameters[‘w2’]
a1 = cache[‘a1’]
a2 = cache[‘a2’]

# 反向传播，计算梯度
dz2 = a2 - Y
dw2 = (1 / m) * np.dot(dz2, a1.T)
db2 = (1 / m) * np.sum(dz2, axis=1, keepdims=True)
dz1 = np.multiply(np.dot(w2.T, dz2), 1 - np.power(a1, 2))
dw1 = (1 / m) * np.dot(dz1, X.T)
db1 = (1 / m) * np.sum(dz1, axis=1, keepdims=True)

grads = {‘dw1’: dw1, ‘db1’: db1, ‘dw2’: dw2, ‘db2’: db2}

return grads

# 4
def update_parameters(parameters, grads, learning_rate=0.4):
“”"更新参数

Paramaters
------------
learning_rate: 学习率
w1,b1: 输入层到隐含层的权重，偏置
w2,b2: 隐含层到输出层的权重，偏置

“”"
w1 = parameters[‘w1’]
b1 = parameters[‘b1’]
w2 = parameters[‘w2’]
b2 = parameters[‘b2’]

dw1 = grads[‘dw1’]
db1 = grads[‘db1’]
dw2 = grads[‘dw2’]
db2 = grads[‘db2’]

# 更新参数
w1 = w1 - dw1 * learning_rate
b1 = b1 - db1 * learning_rate
w2 = w2 - dw2 * learning_rate
b2 = b2 - db2 * learning_rate

parameters = {‘w1’: w1, ‘b1’: b1, ‘w2’: w2, ‘b2’: b2}

return parameters

# 5
def nn_model(X, Y, n_h, n_input, n_output, num_iterations=10000, print_cost=False):
np.random.seed(3)

“”"建立三层BP神经网络

Parameters
--------------------------------
X: 样本特征
Y：样本标签
cache: 隐含层，输出层计算出的数值
parameters：权重与偏置
n_x,n_input: 输入层节点数
n_y,n_output: 输出层节点数
num_iterations: 迭代次数

“”"

n_x = n_input
n_y = n_output

# 初始化参数
parameters = initialize_parameters(n_x, n_h, n_y)
# 梯度下降循环
for i in range(0, num_iterations):
# 2.前向传播
a2, cache = forward_propagation(X, parameters)
# 3.计算代价函数
cost = compute_cost(a2, Y)
# 4.反向传播
grads = backward_propagation(parameters, cache, X, Y)
# 5.更新参数
parameters = update_parameters(parameters, grads)
# 每1000次迭代，输出一次代价函数
if print_cost and i % 1000 == 0:
print(‘迭代第%i次，代价函数为：%f’ % (i, cost))

return parameters

# 6
def predict(parameters, x_test, y_test):
“”"模型评估

Parameters
--------------------
n_rows : 输出结果的行数
n_cols : 输出结果的列数
output : 预测值结果
y_test : 真实值结果
acc ：准确率
“”"

w1 = parameters[‘w1’]
b1 = parameters[‘b1’]
w2 = parameters[‘w2’]
b2 = parameters[‘b2’]

z1 = np.dot(w1, x_test) + b1
a1 = np.tanh(z1)
z2 = np.dot(w2, a1) + b2
a2 = 1 / (1 + np.exp(-z2))

# a1=tanh_(x_test,w1,b1)
# a2=sigmoid_(a1,w2,b2)
n_rows = a2.shape[0]
n_cols = a2.shape[1]

output = np.empty(shape=(n_rows, n_cols), dtype=int)

for i in range(n_rows):
for j in range(n_cols):
if a2[i][j] > 0.5:
output[i][j] = 1
else:
output[i][j] = 0

# 将独热编码反转为标签
output = encoder.inverse_transform(output.T)
output = output.reshape(1, output.shape[0])
output = output.flatten()

print(‘预测结果：’, output)
print(‘真实结果：’, y_test)

count = 0
for k in range(0, n_cols):
if output[k] == y_test[k]:
count = count + 1
else:
print(‘错误分类样本的序号：’, k + 1)

acc = count / int(a2.shape[1]) * 100
print(‘准确率：%.2f%%’ % acc)

return output

# 7
def result_visualization(x_test, y_test, result):
“”"结果可视化
特征有4个维度，类别有1个维度，一共5个维度，故采用了RadViz图

Parameters
-------------
clos: y_test的行数

“”"

cols = y_test.shape[0]
y = []
pre = []
labels = [‘setosa’, ‘versicolor’, ‘virginica’]

# 将0、1、2转换成setosa、versicolor、virginica
for i in range(cols):
y.append(labels[y_test[i]])
pre.append(labels[result[i]])

# 将特征和类别矩阵拼接起来
real = np.column_stack((x_test.T, y))
prediction = np.column_stack((x_test.T, pre))

# 转换成DataFrame类型，并添加columns
df_real = pd.DataFrame(real, index=None, columns\
=[‘Sepal Length’, ‘Sepal Width’, ‘Petal Length’, ‘Petal Width’, ‘Species’])
df_prediction = pd.DataFrame(prediction, index=None, columns\
=[‘Sepal Length’, ‘Sepal Width’, ‘Petal Length’, ‘Petal Width’, ‘Species’])

# 将特征列转换为float类型，否则radviz会报错
df_real[[‘Sepal Length’, ‘Sepal Width’, ‘Petal Length’, ‘Petal Width’]] \
= df_real[[‘Sepal Length’, ‘Sepal Width’, ‘Petal Length’, ‘Petal Width’]].astype(float)
df_prediction[[‘Sepal Length’, ‘Sepal Width’, ‘Petal Length’, ‘Petal Width’]]\
= df_prediction[[‘Sepal Length’, ‘Sepal Width’, ‘Petal Length’, ‘Petal Width’]].astype(float)

# 绘图
plt.figure(‘真实分类’)
radviz(df_real, ‘Species’, color=[‘blue’, ‘green’, ‘red’, ‘yellow’])
plt.figure(‘预测分类’)
radviz(df_prediction, ‘Species’, color=[‘blue’, ‘green’, ‘red’, ‘yellow’])
plt.show()

if __name__ == “__main__”:

# 读取数据
iris = pd.read_csv(‘iris_training.csv’)
X =iris.iloc[:120,0:4].values.T
Y =iris.iloc[:120,4].values

# 将标签转换为独热编码
encoder = OneHotEncoder()
Y = encoder.fit_transform(Y.reshape(Y.shape[0], 1))
Y = Y.toarray().T
Y = Y.astype(‘uint8’)

# 开始训练
start_time = datetime.datetime.now()
# 输入4个节点，隐层10个节点，输出3个节点，迭代10000次
parameters = nn_model(X, Y, n_h=10, n_input=4, n_output=3, num_iterations=10000, print_cost=True)
end_time = datetime.datetime.now()
print(“用时：” + str(round((end_time - start_time).microseconds / 1000)) + ‘ms’)

# 对模型进行测试
x_test =iris.iloc[np.r_[0,120:],0:4].values.T
y_test =iris.iloc[np.r_[0,120:],4].values

result = predict(parameters, x_test, y_test)

# 分类结果可视化
result_visualization(x_test, y_test, result)import matplotlib as mpl

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import datetime
from sklearn.preprocessing import OneHotEncoder
from pandas.plotting import radviz


#########################画出查看原始数据#######################
iris = pd.read_csv(r"Iris.csv")
# 去掉不需要的ID列
iris.drop("Id",axis=1,inplace=True)
#删除重复的值
iris.drop_duplicates(inplace=True)
iris["Species"].drop_duplicates()
#Iris-setose Iris-versicolor Iris - virginica三个类别分别映射为0，1，2
iris['Species'] = iris["Species"].map({'Iris-versicolor':0,"Iris-setosa":1,"Iris-virginica":2})

X = iris.iloc[1:,0:4]
Y = iris.iloc[1:,4]

#挑选出前两个维度作为x轴和y轴，也可以选择其他维度
x_axis = X.iloc[:,0]
y_axis = X.iloc[:,2]

#c指定点的颜色，当c赋值为数值时，会根据值的不同自动着色
plt.scatter(x_axis, y_axis, c=Y)
plt.show()

def initialize_parameters(n_x, n_h, n_y):

"""初始化参数：选择随机值作为权重和编置的处理

Parameters
---------------------------
n_x: 输入层节点数
n_h: 隐含层节点数
w1：输入层到隐含层的权重
b1: 输入层到隐含层的偏置
w2：隐含层到输出层的权重
b2: 隐含层到输出层的偏置

"""
np.random.seed(2)
w1 = np.random.randn(n_h, n_x) * 0.01
b1 = np.zeros(shape=(n_h, 1))
w2 = np.random.randn(n_y, n_h) * 0.01
b2 = np.zeros(shape=(n_y, 1))

# 通过字典存储参数
parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}

return parameters

def tanh_(X,w,b):

"""激活函数tanh函数的实现"""

X = np.asarray(X)
z = np.dot(w, X) + b
t = np.tanh(z)
return t


def sigmoid_(X,w,b):

"""激活函数sigmoid函数的实现"""

X = np.asarray(X)
z = np.dot(w, X) + b
s = 1 / (1 + np.exp(-z))
return s


# 1
def forward_propagation(X, parameters):

"""前向传播过程

Parameters
------------------
a1: 输入层到隐含层经过激活函数后的值yhk
a2: 隐含层到输出层经过激活函数后的值zjk

"""
w1 = parameters['w1']
b1 = parameters['b1']
w2 = parameters['w2']
b2 = parameters['b2']

# 通过前向传播来计算a2

# 这个地方需注意矩阵加法：虽然(w1*X)和b1的维度不同，但可以相加
# 使用tanh作为第一层的激活函数
z1 = np.dot(w1, X) + b1
a1 = np.tanh(z1)
# a1=sigmoid_(X,w1,b1)

# 使用sigmoid作为第二层的激活函数
z2 = np.dot(w2, a1) + b2
a2 = 1 / (1 + np.exp(-z2))
# a2=tanh_(a1,w2,b2)

# 通过字典存储参数
cache = { 'z1': z1,'a1': a1, 'z2': z2, 'a2': a2}

return a2,cache

# 2
def compute_cost(a2, Y):

""" 计算代价函数:使用交叉熵作为代价函数

Parameters
---------------
m: 样本数量，Y的形状为[样本数，1]，Y的列数即为总的样本数
Y：标签/目标值
cost: 损失值，真实与预测值之间的差距

"""

m = Y.shape[1]
logprobs = np.multiply(np.log(a2), Y) + np.multiply((1 - Y), np.log(1 - a2))
cost = - np.sum(logprobs) / m

return cost

# 3
def backward_propagation(parameters, cache, X, Y):
"""反向传播（计算代价函数的导数）

Parameters
----------------
m: 样本数量
w1: 输入层到隐含层的权重
w2: 隐含层到输出层的权重
a1: 隐含层计算得到的值
a2: 输出层计算得到的值
dw1: 权重w1的梯度
dw2: 权重w2的梯度
db1: 偏置b1的梯度
db2: 偏置b2的梯度

"""
m = Y.shape[1]
w2 = parameters['w2']
a1 = cache['a1']
a2 = cache['a2']

# 反向传播，计算梯度
dz2 = a2 - Y
dw2 = (1 / m) * np.dot(dz2, a1.T)
db2 = (1 / m) * np.sum(dz2, axis=1, keepdims=True)
dz1 = np.multiply(np.dot(w2.T, dz2), 1 - np.power(a1, 2))
dw1 = (1 / m) * np.dot(dz1, X.T)
db1 = (1 / m) * np.sum(dz1, axis=1, keepdims=True)

grads = {'dw1': dw1, 'db1': db1, 'dw2': dw2, 'db2': db2}

return grads


# 4
def update_parameters(parameters, grads, learning_rate=0.4):
"""更新参数

Paramaters
------------
learning_rate: 学习率
w1,b1: 输入层到隐含层的权重，偏置
w2,b2: 隐含层到输出层的权重，偏置

"""
w1 = parameters['w1']
b1 = parameters['b1']
w2 = parameters['w2']
b2 = parameters['b2']

dw1 = grads['dw1']
db1 = grads['db1']
dw2 = grads['dw2']
db2 = grads['db2']

# 更新参数
w1 = w1 - dw1 * learning_rate
b1 = b1 - db1 * learning_rate
w2 = w2 - dw2 * learning_rate
b2 = b2 - db2 * learning_rate

parameters = {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}

return parameters


# 5
def nn_model(X, Y, n_h, n_input, n_output, num_iterations=10000, print_cost=False):
np.random.seed(3)

"""建立三层BP神经网络

Parameters
--------------------------------
X: 样本特征
Y：样本标签
cache: 隐含层，输出层计算出的数值
parameters：权重与偏置
n_x,n_input: 输入层节点数
n_y,n_output: 输出层节点数
num_iterations: 迭代次数

"""

n_x = n_input
n_y = n_output

# 初始化参数
parameters = initialize_parameters(n_x, n_h, n_y)
# 梯度下降循环
for i in range(0, num_iterations):
# 2.前向传播
a2, cache = forward_propagation(X, parameters)
# 3.计算代价函数
cost = compute_cost(a2, Y)
# 4.反向传播
grads = backward_propagation(parameters, cache, X, Y)
# 5.更新参数
parameters = update_parameters(parameters, grads)
# 每1000次迭代，输出一次代价函数
if print_cost and i % 1000 == 0:
print('迭代第%i次，代价函数为：%f' % (i, cost))

return parameters


# 6
def predict(parameters, x_test, y_test):
"""模型评估

Parameters
--------------------
n_rows : 输出结果的行数
n_cols : 输出结果的列数
output : 预测值结果
y_test : 真实值结果
acc ：准确率
"""

w1 = parameters['w1']
b1 = parameters['b1']
w2 = parameters['w2']
b2 = parameters['b2']

z1 = np.dot(w1, x_test) + b1
a1 = np.tanh(z1)
z2 = np.dot(w2, a1) + b2
a2 = 1 / (1 + np.exp(-z2))

# a1=tanh_(x_test,w1,b1)
# a2=sigmoid_(a1,w2,b2)
n_rows = a2.shape[0]
n_cols = a2.shape[1]

output = np.empty(shape=(n_rows, n_cols), dtype=int)

for i in range(n_rows):
for j in range(n_cols):
if a2[i][j] > 0.5:
output[i][j] = 1
else:
output[i][j] = 0

# 将独热编码反转为标签
output = encoder.inverse_transform(output.T)
output = output.reshape(1, output.shape[0])
output = output.flatten()

print('预测结果：', output)
print('真实结果：', y_test)

count = 0
for k in range(0, n_cols):
if output[k] == y_test[k]:
count = count + 1
else:
print('错误分类样本的序号：', k + 1)

acc = count / int(a2.shape[1]) * 100
print('准确率：%.2f%%' % acc)

return output

# 7
def result_visualization(x_test, y_test, result):
"""结果可视化
特征有4个维度，类别有1个维度，一共5个维度，故采用了RadViz图

Parameters
-------------
clos: y_test的行数

"""

cols = y_test.shape[0]
y = []
pre = []
labels = ['setosa', 'versicolor', 'virginica']

# 将0、1、2转换成setosa、versicolor、virginica
for i in range(cols):
y.append(labels[y_test[i]])
pre.append(labels[result[i]])

# 将特征和类别矩阵拼接起来
real = np.column_stack((x_test.T, y))
prediction = np.column_stack((x_test.T, pre))

# 转换成DataFrame类型，并添加columns
df_real = pd.DataFrame(real, index=None, columns\
=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species'])
df_prediction = pd.DataFrame(prediction, index=None, columns\
=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Species'])

# 将特征列转换为float类型，否则radviz会报错
df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']] \
= df_real[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float)
df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']]\
= df_prediction[['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']].astype(float)

# 绘图
plt.figure('真实分类')
radviz(df_real, 'Species', color=['blue', 'green', 'red', 'yellow'])
plt.figure('预测分类')
radviz(df_prediction, 'Species', color=['blue', 'green', 'red', 'yellow'])
plt.show()

if __name__ == "__main__":

# 读取数据
iris = pd.read_csv('iris_training.csv')
X =iris.iloc[:120,0:4].values.T
Y =iris.iloc[:120,4].values


# 将标签转换为独热编码
encoder = OneHotEncoder()
Y = encoder.fit_transform(Y.reshape(Y.shape[0], 1))
Y = Y.toarray().T
Y = Y.astype('uint8')

# 开始训练
start_time = datetime.datetime.now()
# 输入4个节点，隐层10个节点，输出3个节点，迭代10000次
parameters = nn_model(X, Y, n_h=10, n_input=4, n_output=3, num_iterations=10000, print_cost=True)
end_time = datetime.datetime.now()
print("用时：" + str(round((end_time - start_time).microseconds / 1000)) + 'ms')

# 对模型进行测试
x_test =iris.iloc[np.r_[0,120:],0:4].values.T
y_test =iris.iloc[np.r_[0,120:],4].values


result = predict(parameters, x_test, y_test)

# 分类结果可视化
result_visualization(x_test, y_test, result)

sigmoid iris

最后修改时间：2023-03-27 21:42:16

「喜欢这篇文章，您的关注和赞赏是给作者最好的鼓励」

关注作者

文章被以下合辑收录

墨力计划（共16篇）

每日积累一点的

每日python训练：三层BP算法

文章被以下合辑收录

评论