神经网络的FPGA实现：卷积层操作(二)

tech2024-10-16 77

神经网络的FPGA实现：基础卷积操作(一) RGB三通道 Verilog HDL Xilinx VIVADO conv_pe.v文件在上述链接中

`timescale 1ns / 1ps module conv_layer#( parameter CHANNEL_IN=3, //输入图像的通道R G B parameter CHANNEL_OUT=1 //输出图像的通道 )( input clk,//时钟 input rst,//复位 input input_weight_en, //权值使能信号 input input_bias_en, //bias使能信号 //卷积核的输入 input [7:0] weight_ab, //单时刻写入的权值 input [7:0] bias_ab, //单时刻写入的bias output write_done_weight_bias,//权值和bias写完的信号 //输入特征图 input input_fmap_en,//特征输入使能信号 input [9*8*CHANNEL_IN-1:0]fmap,//特征输入3*3fmap,输入输出均为8bit，3通道 output output_en,//输出使能信号 output [8*CHANNEL_OUT-1:0] end_data//输出8bit，1通道 ); //memory 的类型不可综合语法若要综合需要改成RAM reg [7:0] weight [9*CHANNEL_IN*CHANNEL_OUT-1:0]; //weight 个数为9*CHANNEL_IN*CHANNEL_OUT个8bit的数据：9*CHANNEL_IN*CHANNEL_OUT个8位寄存器 reg [7:0] bias [CHANNEL_OUT-1:0]; //bias 个数为CHANNEL_OUT个8bit的数据：CHANNEL_OUT个8位寄存器 //写权值 reg [15:0]weight_count;//写权重计数 16位weight_count reg weight_write_done;//写权重完成 always @(posedge clk) begin if(rst) begin//复位 weight_count<= 0; weight_write_done<=0; end else begin//非复位 if(input_weight_en)begin//权重输入使能 weight[weight_count]<=weight_ab;//权重写入 weight_count<=weight_count+1;//权重写入计数 end if(weight_count==9*CHANNEL_IN*CHANNEL_OUT)begin//weight 个数为9*CHANNEL_IN*CHANNEL_OUT，权重全部写入 weight_write_done<=1;//权重写入完成 end end end //写偏置 reg [7:0]bias_count;//写偏置计数 reg bias_write_done;//写偏置完成 always @(posedge clk) begin if(rst) begin bias_count<= 0; bias_write_done<=0; end else begin if(input_bias_en)begin//偏置输入使能 bias[bias_count]<=bias_ab;//偏置写入 bias_count<=bias_count+1;//偏置写入计数 end if(bias_count==CHANNEL_OUT)begin//bias个数为CHANNEL_OUT，偏置全部写入 bias_write_done<=1;//偏置写入完成 end end end assign write_done_weight_bias=weight_write_done&bias_write_done; //参数传输完成 //fmap 的输入计算 //在此设置与输入通道数和输出通道数相关的 conv_pe 的个数为最快计算数据 // 实际工程中的conv_pe的个数与资源速度有关系需要整体考虑 //例如此工程中的数据输入通道为3 输出通道为1 设置为3*1个conv_pe wire valid_out_1,valid_out_2,valid_out_3;//每通道输出使能信号 wire [31:0]sum_data_1,sum_data_2,sum_data_3;//每通道输出数据 //3通道卷积 conv_pe uut_conv_pe_1( .clk(clk), .rst(rst), .input_en(input_fmap_en), .kernel_00(weight[0]), .kernel_01(weight[1]), .kernel_02(weight[2]), .kernel_10(weight[3]), .kernel_11(weight[4]), .kernel_12(weight[5]), .kernel_20(weight[6]), .kernel_21(weight[7]), .kernel_22(weight[8]), .fmap_00(fmap[7:0]), .fmap_01(fmap[15:8]), .fmap_02(fmap[23:16]), .fmap_10(fmap[31:24]), .fmap_11(fmap[39:32]), .fmap_12(fmap[47:40]), .fmap_20(fmap[55:48]), .fmap_21(fmap[63:56]), .fmap_22(fmap[71:64]), .valid_out(valid_out_1), .sum_data(sum_data_1) ); conv_pe uut_conv_pe_2( .clk(clk), .rst(rst), .input_en(input_fmap_en), .kernel_00(weight[9]), .kernel_01(weight[10]), .kernel_02(weight[11]), .kernel_10(weight[12]), .kernel_11(weight[13]), .kernel_12(weight[14]), .kernel_20(weight[15]), .kernel_21(weight[16]), .kernel_22(weight[17]), .fmap_00(fmap[79:72]), .fmap_01(fmap[87:80]), .fmap_02(fmap[95:88]), .fmap_10(fmap[103:96]), .fmap_11(fmap[111:104]), .fmap_12(fmap[119:112]), .fmap_20(fmap[127:120]), .fmap_21(fmap[135:128]), .fmap_22(fmap[143:136]), .valid_out(valid_out_2), .sum_data(sum_data_2) ); conv_pe uut_conv_pe_3( .clk(clk), .rst(rst), .input_en(input_fmap_en), .kernel_00(weight[18]), .kernel_01(weight[19]), .kernel_02(weight[20]), .kernel_10(weight[21]), .kernel_11(weight[22]), .kernel_12(weight[23]), .kernel_20(weight[24]), .kernel_21(weight[25]), .kernel_22(weight[26]), .fmap_00(fmap[151:144]), .fmap_01(fmap[159:152]), .fmap_02(fmap[167:160]), .fmap_10(fmap[175:168]), .fmap_11(fmap[183:176]), .fmap_12(fmap[191:184]), .fmap_20(fmap[199:192]), .fmap_21(fmap[207:200]), .fmap_22(fmap[215:208]), .valid_out(valid_out_3), .sum_data(sum_data_3) ); wire [31:0] sum_data_32; //3通道求和 wire signed [7:0]bias_temp;//临时存储 8位偏置数据 assign bias_temp=bias[0]; assign output_en=valid_out_1&valid_out_2&valid_out_3; //输出的使能信号 //将单个结果量化到0到255 wire [31:0]sum_data_1_Q,sum_data_2_Q,sum_data_3_Q;//每通道输出数据量化 assign sum_data_1_Q={24'd0,sum_data_1[7:0]}; assign sum_data_2_Q={24'd0,sum_data_2[7:0]}; assign sum_data_3_Q={24'd0,sum_data_3[7:0]}; //将最后的结果量化到-127到128 assign sum_data_32= (output_en==1)?sum_data_1_Q+sum_data_2_Q+sum_data_3_Q+{{24{bias_temp[7]}},bias_temp}:32'd0; //根据输出使能判断数据是否进行通道求和 assign end_data=sum_data_32[7:0];//通道求和数据输出 reg [31:0]conv_pe_count; always @(posedge clk) begin if(rst) begin conv_pe_count<= 0; end else begin if(output_en)begin conv_pe_count<=conv_pe_count+1; end end end endmodule

需要imageBlueChannels.txt、imageGreenChannels.txt、imageRedChannels.txt，以及bias.txt、weight.txt文件；并放入工程文件夹下。

`timescale 1ns / 1ps module tb_conv_layer; reg clk; reg rst; initial begin rst = 1; #1000 rst = 0; end always begin: clk1_blk clk = 0; forever #5 clk = ~clk;//周期为10个时间单位的波 end localparam CHANNEL_IN=3; localparam CHANNEL_OUT=1; reg input_weight_en,input_bias_en,input_fmap_en;//权重，偏置，特征输入使能信号 reg [7:0] weight_ab,bias_ab;//单时刻写入的8位weight与bias reg [9*8*CHANNEL_IN-1:0]fmap_ab;//写入的特征一次3通道被卷积位宽：(kernel=3*3)*（8bit/个）*通道数channel_in wire write_done_weight_bias,output_en;//参数写完使能，输出使能 wire [8*CHANNEL_OUT-1:0]end_data;//通道求和数据输出，输出8bit，1通道 conv_layer#( .CHANNEL_IN(CHANNEL_IN), .CHANNEL_OUT(CHANNEL_OUT) )uut_conv_layer( .clk(clk), .rst(rst), .input_weight_en(input_weight_en), .input_bias_en(input_bias_en), .weight_ab(weight_ab), .bias_ab(bias_ab), .write_done_weight_bias(write_done_weight_bias), .input_fmap_en(input_fmap_en), .fmap(fmap_ab), .output_en(output_en), .end_data(end_data) ); //实际的工程中传输数据到器件内的时候需要通过串口 pcie 光口网口等外部接口写入 //或者直接让权值存储到片内但是一般数据较多需要借助外部存储器如DDR进行权值缓存 reg [7:0] weight[9*CHANNEL_IN*CHANNEL_OUT-1:0]; //9*CHANNEL_IN*CHANNEL_OUT个 8位权重寄存器，每次卷积需要[（kernel*kernel）*channel_in]个权值 initial begin //数据的组织形式是1的块，按照通道的方向进入到数据中，然后按照列方式进入 $readmemh("C://Users//mayn//Desktop//nn//conv//conv_layer//weight.txt",weight);// 将所有的数据输入到mem1中 end integer weight_count; always @(posedge clk) begin if(rst) begin weight_count<= 0; input_weight_en<=0; weight_ab<=0; end else begin if(weight_count<9*CHANNEL_IN*CHANNEL_OUT)begin//权重未输入完毕时 input_weight_en<=1;//权重输入使能 weight_count<= weight_count+1;//权重输入计数 weight_ab<=weight[weight_count];//单时刻写入的权重值 end else begin input_weight_en<=0; weight_ab<=0; end end end reg [7:0]bias[CHANNEL_OUT-1:0]; // initial begin //数据的组织形式是1的块，按照通道的方向进入到数据中，然后按照列方式进入 $readmemh("C://Users//mayn//Desktop//nn//conv//conv_layer//bias.txt",bias);// 将所有的数据输入到mem1中 end integer bias_count; always @(posedge clk) begin if(rst) begin bias_count<= 0; input_bias_en<=0; bias_ab<=0; end else begin if(bias_count<CHANNEL_OUT)begin input_bias_en<=1; bias_count<= bias_count+1; bias_ab<=bias[bias_count]; end else begin input_bias_en<=0; bias_ab<=0; end end end localparam IMAGE_WIDTH=482; localparam IMAGE_HIGH=322; //一般写入特征图的值从外围存储器中获得 //写入fmap 的值 reg [7:0] fmap_R[IMAGE_WIDTH*IMAGE_HIGH-1:0]; // 图像的总数据 reg [7:0] fmap_G[IMAGE_WIDTH*IMAGE_HIGH-1:0]; // 图像的总数据 reg [7:0] fmap_B[IMAGE_WIDTH*IMAGE_HIGH-1:0]; // 图像的总数据 initial begin //数据的组织形式是1的块，按照通道的方向进入到数据中，然后按照列方式进入 $readmemh("C://Users//mayn//Desktop//nn//conv//conv_layer//imageBlueChannels.txt",fmap_B); $readmemh("C://Users//mayn//Desktop//nn//conv//conv_layer//imageGreenChannels.txt",fmap_G); $readmemh("C://Users//mayn//Desktop//nn//conv//conv_layer//imageRedChannels.txt",fmap_R);//读取3通道数据 end integer i,j; reg [2:0]states; always @(posedge clk ) begin if(rst) begin i<=0; j<=0; states<=0; input_fmap_en<=0; fmap_ab<=0; end else if(write_done_weight_bias) begin//权重偏置写入完毕 case(states) 0:begin if(i<IMAGE_WIDTH-2)begin//卷积输出width=IMAGE_WIDTH-kernel_size+1=482-3+1=480 input_fmap_en<=1; fmap_ab={fmap_R[i+2+IMAGE_WIDTH*(j+2)],fmap_R[i+1+IMAGE_WIDTH*(j+2)],fmap_R[i+0+IMAGE_WIDTH*(j+2)],//i=0,j=0:R第三行前三个fmap[964]、fmap[965]、fmap[966] fmap_R[i+2+IMAGE_WIDTH*(j+1)],fmap_R[i+1+IMAGE_WIDTH*(j+1)],fmap_R[i+0+IMAGE_WIDTH*(j+1)],//i=0,j=0:R第二行前三个fmap[482]、fmap[483]、fmap[484] fmap_R[i+2+IMAGE_WIDTH*(j+0)],fmap_R[i+1+IMAGE_WIDTH*(j+0)],fmap_R[i+0+IMAGE_WIDTH*(j+0)],//i=0,j=0:R第一行前三个fmap[0]、fmap[1]、fmap[2] fmap_G[i+2+IMAGE_WIDTH*(j+2)],fmap_G[i+1+IMAGE_WIDTH*(j+2)],fmap_G[i+0+IMAGE_WIDTH*(j+2)], fmap_G[i+2+IMAGE_WIDTH*(j+1)],fmap_G[i+1+IMAGE_WIDTH*(j+1)],fmap_G[i+0+IMAGE_WIDTH*(j+1)], fmap_G[i+2+IMAGE_WIDTH*(j+0)],fmap_G[i+1+IMAGE_WIDTH*(j+0)],fmap_G[i+0+IMAGE_WIDTH*(j+0)], fmap_B[i+2+IMAGE_WIDTH*(j+2)],fmap_B[i+1+IMAGE_WIDTH*(j+2)],fmap_B[i+0+IMAGE_WIDTH*(j+2)], fmap_B[i+2+IMAGE_WIDTH*(j+1)],fmap_B[i+1+IMAGE_WIDTH*(j+1)],fmap_B[i+0+IMAGE_WIDTH*(j+1)], fmap_B[i+2+IMAGE_WIDTH*(j+0)],fmap_B[i+1+IMAGE_WIDTH*(j+0)],fmap_B[i+0+IMAGE_WIDTH*(j+0)]}; i<=i+1;//i依次等于0-480，j=0,卷积窗口右移。 states<=1; end else begin input_fmap_en<=0; end // else end // 0: 1:begin input_fmap_en<=0; if(i==IMAGE_WIDTH-2)begin//第一轮卷积结束 i<=0; j<=j+1;//下一轮，卷积核下移一行，窗口开始右移卷积 end if(j<IMAGE_HIGH-2)begin//卷积输出high=fmap_high-kernel_size+1 states<=0; end if((j==IMAGE_HIGH-3)&&(i==IMAGE_WIDTH-2))begin//卷积窗口遍历结束 states<=2; end end // 1: 2:begin//卷积结束，初始化 input_fmap_en<=0; fmap_ab<=0; end // 2: endcase // states end end integer end_temp; initial begin end_temp=$fopen("C://Users//mayn//Desktop//nn//conv//conv_layer//conv_layer_result.txt","w"); end always @(posedge clk) begin if(uut_conv_layer.output_en)begin//uut模块中输出使能为1，写入卷积结果数据 $fwrite(end_temp,"%h\n",$signed(uut_conv_layer.end_data)); end end endmodule

最新回复(0)