第五章 全连接神经网络
1.1 全连接神经网络
人工神经网络(Artificial Neural Network)可以对一组输入信号和一组输出信号之间的关系进行建模,是机器学习和认知科学中的一种模仿生物神经网络的结构和功能的数学模型。其灵感源于动物的神经中枢,由大量的人工神经元连接而成,能够根据外界环境改变内部结构,是一种自适应的系统。
全连接神经网络(Multi-Layer Perception)也称多层感知机,是一种连接方式较为简单的人共神经网络结构,属于前馈神经网络的一种。主要由输入层、隐藏层、输出层构成,并且在每个隐藏层中可以有多个神经元。
神经网络的学习能力主要来源于网络结构,根据层级数量的不同,每层神经元数量的多少,以及信息在层之间的传播方式,可以组合层多种神经网络模型。一般在隐层和输出层对信号进行加工处理,根据隐层的数量,MLP可分为单层或是多(隐)层。
针对单层MLP和多层MLP,每个隐层的神经元数量是可以变化的。通常没有一个很好的标准用于确定每层神经元的数量和隐层的个数。根据经验,更多的神经元会有更强的表示能力,同时也更容易造成网络的过拟合。
下面我们尝试用PyTorch搭建MLP回归与分类模型。
先导入需要的模块
1 2 3 4 5 6 7 8 9 10 11 12 13 import numpy as npimport pandas as pdfrom sklearn.preprocessing import StandardScaler,MinMaxScalerfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import accuracy_score,confusion_matrix,classification_reportfrom sklearn.manifold import TSNEimport torchimport torch.nn as nnfrom torch.optim import SGD,Adamimport matplotlib.pyplot as pltimport seaborn as snsimport torch.utils.data as Dataimport hiddenlayer as hl
1.2 MLP分类模型
数据准备与探查
数据采用UCI机器学习库的垃圾邮件数据,下载网址为:https://archive.ics.uci.edu/ml/datasets.Spambase
1 2 3 4 5 6 7 8 9 spam=pd.read_csv(r"C:\Users\lenovo\Desktop\spambase.csv" ) x=spam.iloc[:,0 :57 ].values y=spam.tar.values X_train,X_test,Y_train,Y_test=train_test_split(x,y,test_size=0.25 ,random_state=123 )
数据预处理
1 2 3 4 5 6 scales=MinMaxScaler(feature_range=(0 ,1 )) X_train_s=scales.fit_transform(X_train) X_test_s=scales.transform(X_test)
数据可视化
1 2 3 4 5 6 7 8 9 colname=spam.columns.values[:-1 ] plt.figure(figsize=(20 ,14 )) for i in range (len (colname)): plt.subplot(7 ,9 ,i+1 ) sns.boxplot(x=Y_train,y=X_train_s[:,i]) plt.title(colname[i]) plt.subplots_adjust(hspace=0.4 ) plt.show()
创建一个多层感知机
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 class MLP (nn.Module): def __init__ (self ): super (MLP, self).__init__() self.hidden1=nn.Sequential( nn.Linear(57 ,30 ,bias=True ), nn.ReLU() ) self.hidden2 = nn.Sequential( nn.Linear(30 , 10 , bias=True ), nn.ReLU() ) self.classifica = nn.Sequential( nn.Linear(10 ,2 , bias=True ), nn.Sigmoid() ) def forward (self,x ): fc1=self.hidden1(x) fc2=self.hidden2(fc1) output=self.classifica(fc2) return fc1,fc2,output
数据载入与转换
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 x_train_nots=torch.from_numpy(X_train_s.astype(np.float32)) y_train_tar=torch.from_numpy(Y_train.astype(np.int64)) x_test_nots=torch.from_numpy(X_test_s.astype(np.float32)) y_test_tar=torch.from_numpy(Y_test.astype(np.int64)) train_data=Data.TensorDataset(x_train_nots,y_train_tar) train_data_loader=Data.DataLoader( dataset=train_data, shuffle=True , batch_size=64 )
优化器和损失函数
1 2 3 4 5 6 7 8 9 10 11 mlp=MLP() opt=Adam(mlp.parameters(),lr=0.0003 ) loss_fn=nn.CrossEntropyLoss() history1=hl.History() canvas=hl.Canvas() print_step=30
训练过程
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 for epoch in range (50 ): for step,(bx,by) in enumerate (train_data_loader): _,_,out=mlp(bx) loss=loss_fn(out,by) opt.zero_grad() loss.backward() opt.step() niter=epoch*len (train_data_loader)+step+1 if niter%print_step==0 : _,_,output=mlp(x_test_nots) _,pre_lab=torch.max (output,1 ) test_accuracy=accuracy_score(y_test_tar,pre_lab) history1.log(niter,train_loss=loss,test_accuracy=test_accuracy) with canvas: canvas.draw_plot(history1['train_loss' ]) canvas.draw_plot(history1['test_accuracy' ]) _,_,output=mlp(x_test_nots) _,pre_lab=torch.max (output,1 ) test_accuracy=accuracy_score(y_test_tar,pre_lab) print ("Test_Accuracy" ,test_accuracy)
获取中间层输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 _,test_fc2,_=mlp(x_test_nots) test_fc2_tsne=TSNE(n_components=2 ).fit_transform(test_fc2.data.numpy()) plt.figure(figsize=(8 ,6 )) plt.xlim([min (test_fc2_tsne[:,0 ]-1 ),max (test_fc2_tsne[:,0 ])+1 ]) plt.ylim([min (test_fc2_tsne[:,1 ]-1 ),max (test_fc2_tsne[:,1 ])+1 ]) plt.plot(test_fc2_tsne[Y_test==0 ,0 ],test_fc2_tsne[Y_test==0 ,1 ],"bo" ,label="0" ) plt.plot(test_fc2_tsne[Y_test==1 ,0 ],test_fc2_tsne[Y_test==1 ,1 ],"rd" ,label="1" ) plt.legend() plt.title("test_fcc2_tsne" ) plt.show()
通过Hook调用中间层信息
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 activation={} def get_activation (name ): def hook (model,input ,output ): activation[name]=output.detach() return hook mlp.classifica.register_forward_hook(get_activation('classifica' )) _,_,_=mlp(x_test_nots) classfica=activation['classifica' ].data.numpy() plt.figure(figsize=(8 ,6 )) plt.plot(classfica[Y_test==0 ,0 ],classfica[Y_test==0 ,1 ],"bo" ,label="0" ) plt.plot(classfica[Y_test==1 ,0 ],classfica[Y_test==1 ,1 ],"rd" ,label="1" ) plt.legend() plt.show()
1.3 MLP回归模型
回归模型的数据选择sklearn.datasets中的加利福尼亚房价数据。
模块准备
1 2 3 4 5 6 7 8 9 10 11 12 13 14 import numpy as npimport pandas as pdfrom sklearn.preprocessing import StandardScalerfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import mean_squared_error,mean_absolute_errorfrom sklearn.datasets import fetch_california_housingimport torchimport torch.nn as nnimport torch.nn.functional as Ffrom torch.optim import SGDimport torch.utils.data as Dataimport matplotlib.pyplot as pltimport seaborn as sns
数据处理
1 2 3 4 5 6 7 8 9 10 11 12 housedata=fetch_california_housing() X_train,X_test,y_train,y_test=train_test_split( housedata.data, housedata.target, test_size=0.3 , random_state=42 ) scale=StandardScaler() X_train_s=scale.fit_transform(X_train) X_test_s=scale.transform(X_test)
数据探查
1 2 3 4 5 6 7 8 9 housedatadf=pd.DataFrame(data=X_train_s,columns=housedata.feature_names) housedatadf['target' ]=y_train datacor=np.corrcoef(housedatadf.values,rowvar=0 ) datacor=pd.DataFrame(data=datacor,columns=housedatadf.columns,index=housedatadf.columns) plt.figure(figsize=(8 ,6 )) ax=sns.heatmap(datacor,square=True ,annot=True ,fmt=".3f" ,linewidths=.5 ,cmap="YlGnBu" ,cbar_kws={"fraction" :0.046 ,"pad" :0.03 }) plt.show()
数据转化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 train_xt=torch.from_numpy(X_train_s.astype(np.float32)) train_yt=torch.from_numpy(y_train.astype(np.float32)) test_xt=torch.from_numpy(X_test_s.astype(np.float32)) test_yt=torch.from_numpy(y_test.astype(np.float32)) train_data=Data.TensorDataset(train_xt,train_yt) test_data=Data.TensorDataset(test_xt,test_yt) train_loader=Data.DataLoader( dataset=train_data, shuffle=True , batch_size=64 , )
定义一个多层感知机
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 class MLP (nn.Module): def __init__ (self ): super (MLP, self).__init__() self.hidden1=nn.Linear(8 ,100 ,bias=True ) self.hidden2=nn.Linear(100 ,100 ) self.hidden3=nn.Linear(100 ,50 ) self.predict=nn.Linear(50 ,1 ) def forward (self,x ): x=F.relu(self.hidden1(x)) x=F.relu(self.hidden2(x)) x=F.relu(self.hidden3(x)) output=self.predict(x) return output[:,0 ]
训练参数定义
1 2 3 4 mlp=MLP() optimizer=torch.optim.SGD(mlp.parameters(),lr=0.003 ) loss_func=nn.MSELoss() train_loss_all=[]
训练过程
1 2 3 4 5 6 7 8 9 10 11 12 for epoch in range (60 ): train_loss=0 train_num=0 for (bx,by) in train_loader: output=mlp(bx) loss=loss_func(output,by) optimizer.zero_grad() loss.backward() optimizer.step() train_loss+=loss.item()*bx.size(0 ) train_num+=bx.size(0 ) train_loss_all.append(train_loss/train_num)
结果可视化
1 2 3 4 5 6 7 plt.figure(figsize=(10 ,6 )) plt.plot(train_loss_all,'ro-' ,label="Train Loss" ) plt.legend() plt.grid() plt.xlabel("epoch" ) plt.ylabel("loss" ) plt.show()
预测测试集
1 2 3 4 5 pre_y=mlp(test_xt) pre_y=pre_y.data.numpy() mae=mean_absolute_error(y_test,pre_y) print ("在测试集上的绝对值误差为: " ,mae)
可视化差异
1 2 3 4 5 6 7 8 9 10 index=np.argsort(y_test) plt.figure(figsize=(12 ,5 )) plt.plot(np.arange(len (y_test)),y_test[index],'r' ,label="original Y" ) plt.scatter(np.arange(len (pre_y)),pre_y[index],s=3 ,c="b" ,label="Prediction" ) plt.legend(loc="upper left" ) plt.grid() plt.xlabel("Index" ) plt.ylabel("Y" ) plt.show()