基本算法
与分时复用的移位相加类似,取消分时复用,使用面积换时间,使用流水线设计,流水线填满后可以一个时钟周期计算出一个结果
- 分别计算乘数的移位结果,并与被乘数对应位相与
- 使用加法树将结果相加
RTL代码
移位部分
固定移位单元代码如下,当被乘数第n位为1时,输出乘数移位向左移位n位的结果
module shift_unit #(
parameter WIDTH = 4,
parameter SHIFT_NUM = 0
)(
input clk, // Clock
input rst_n, // Asynchronous reset active low
input shift_valid,
input shift_mask,
input [WIDTH - 1:0]shift_din,
output reg [2 * WIDTH - 1:0]shift_dout
);
wire [2 * WIDTH - 1:0]shift_din_ext;
assign shift_din_ext = {(WIDTH)'(0),shift_din};
always @ (posedge clk or negedge rst_n) begin
if(~rst_n) begin
shift_dout <= 'b0;
end else if((shift_valid == 1'b1) && (shift_mask == 1'b1)) begin
shift_dout <= shift_din_ext << SHIFT_NUM;
end else begin
shift_dout <= 'b0;
end
end
endmodule
移位器代码如下,使用生成语句生成位宽个移位器
module parallel_shifter #(
parameter WIDTH = 4
)(
input clk, // Clock
input rst_n, // Asynchronous reset active low
input mult_valid,
input [WIDTH - 1:0]mult1,mult2,
output [(WIDTH ** 2) * 2 - 1:0]shift_dout
);
genvar a;
generate
for (a = 0; a < WIDTH; a = a + 1) begin:shifter_layer
shift_unit #(
.WIDTH(WIDTH),
.SHIFT_NUM(a)
) u_shift_unit (
.clk(clk), // Clock
.rst_n(rst_n), // Asynchronous reset active low
.shift_valid(mult_valid),
.shift_mask(mult2[a]),
.shift_din(mult1),
.shift_dout(shift_dout[a * 2 * WIDTH +: 2 * WIDTH])
);
end
endgenerate
endmodule
加法部分
加法部分使用加法树,可以实现流水线操作,以下为加法数单层代码
module adder_layer #(
parameter ADDER_NUM = 4,
parameter ADDER_WIDTH = 8
)(
input clk, // Clock
input rst_n, // Asynchronous reset active low
input [ADDER_NUM * ADDER_WIDTH * 2 - 1:0]adder_din,
output [ADDER_NUM * (ADDER_WIDTH + 1) - 1:0]adder_dout
);
genvar i;
generate
for(i = 0;i < ADDER_NUM;i = i + 1) begin:adder_layer_gen
wire [ADDER_WIDTH - 1:0]add1 = adder_din[2 * i * ADDER_WIDTH +: ADDER_WIDTH];
wire [ADDER_WIDTH - 1:0]add2 = adder_din[(2 * i + 1) * ADDER_WIDTH +: ADDER_WIDTH];
wire [ADDER_WIDTH:0]sum = add1 + add2;
reg [ADDER_WIDTH:0]sum_reg;
always @ (posedge clk or negedge rst_n) begin
if(~rst_n) begin
sum_reg <= 'b0;
end else begin
sum_reg <= sum;
end
end
assign adder_dout[i * (ADDER_WIDTH + 1) +: ADDER_WIDTH + 1] = sum_reg;
end
endgenerate
endmodule
以下为加法树代码
module adder_tree #(
parameter LAYER_NUM = 4,
parameter MIN_ADDER_WIDTH = 8
)(
input clk, // Clock
input rst_n, // Asynchronous reset active low
input [(2 ** LAYER_NUM) * MIN_ADDER_WIDTH - 1:0]adder_din,
output [LAYER_NUM + MIN_ADDER_WIDTH - 1:0]adder_dout
);
genvar i;
generate
for(i = LAYER_NUM;i > 0;i = i - 1)begin:adder_layer_def
wire [(2 ** i) * (MIN_ADDER_WIDTH + LAYER_NUM - i) - 1:0]layer_din;
wire [2 ** (i - 1) * (MIN_ADDER_WIDTH + LAYER_NUM - i + 1) - 1:0]layer_dout;
if(i == LAYER_NUM) begin
assign layer_din = adder_din;
end else begin
assign layer_din = adder_layer_def[i + 1].layer_dout;
end
adder_layer # (
.ADDER_NUM(2 ** (i - 1)),
.ADDER_WIDTH(MIN_ADDER_WIDTH + LAYER_NUM - i)
) u_adder_layer (
.clk(clk), // Clock
.rst_n(rst_n), // Asynchronous reset active low
.adder_din(layer_din),
.adder_dout(layer_dout)
);
end
endgenerate
assign adder_dout = adder_layer_def[1].layer_dout;
endmodule
顶层
顶层组合了加法器和移位器,代码如下
module shift_adder #(
parameter LOG2_WIDTH = 2
)(
input clk, // Clock
input rst_n, // Asynchronous reset active low
input [2 ** LOG2_WIDTH - 1:0]mult1,mult2,
input din_valid,
output [(2 ** LOG2_WIDTH) * 2 - 1:0]dout
);
parameter WIDTH = 2 ** LOG2_WIDTH;
wire [(WIDTH ** 2) * 2 - 1:0]shift_dout;
parallel_shifter #(
.WIDTH(WIDTH)
) u_parallel_shifter (
.clk(clk), // Clock
.rst_n(rst_n), // Asynchronous reset active low
.mult_valid(din_valid),
.mult1(mult1),
.mult2(mult2),
.shift_dout(shift_dout)
);
wire [LOG2_WIDTH + 2 * WIDTH:0]adder_dout;
adder_tree #(
.LAYER_NUM(LOG2_WIDTH),
.MIN_ADDER_WIDTH(2 * WIDTH)
) u_adder_tree (
.clk(clk), // Clock
.rst_n(rst_n), // Asynchronous reset active low
.adder_din(shift_dout),
.adder_dout(adder_dout)
);
assign dout = adder_dout[WIDTH * 2 - 1:0];
endmodule
测试
测试平台使用sv语法完成,因该乘法器完成一次运算的时间固定因此无输出有效信号,找到固定延迟后与使用*
计算出的结果比较即可
module mult_tb (
);
parameter LOG2_WIDTH = 2;
parameter WIDTH = 2 ** LOG2_WIDTH;
logic clk,rst_n;
logic multiplier_valid;
logic [WIDTH - 1:0]multiplier1;
logic [WIDTH - 1:0]multiplier2;
logic [2 * WIDTH - 1:0]product;
shift_adder #(
.LOG2_WIDTH(LOG2_WIDTH)
) dut (
.clk(clk), // Clock
.rst_n(rst_n), // Asynchronous reset active low
.mult1(multiplier1),
.mult2(multiplier2),
.din_valid(multiplier_valid),
.dout(product)
);
initial begin
clk = 1'b0;
forever begin
#50 clk = ~clk;
end
end
initial begin
rst_n = 1'b1;
#5 rst_n = 1'b0;
#10 rst_n = 1'b1;
end
initial begin
{multiplier_valid,multiplier1,multiplier2} = 'b0;
repeat(100) begin
@(negedge clk);
multiplier1 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
multiplier2 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
multiplier_valid = 1'b1;
end
$stop();
end
reg [WIDTH - 1:0]mult11,mult12,mult13;
reg [WIDTH - 1:0]mult21,mult22,mult23;
reg [2 * WIDTH - 1:0]exp;
always @ (posedge clk or negedge rst_n) begin
if(~rst_n) begin
{mult11,mult12,mult13,mult21,mult22,mult23} <= 'b0;
end else begin
mult13 <= mult12;
mult12 <= mult11;
mult11 <= multiplier1;
mult23 <= mult22;
mult22 <= mult21;
mult21 <= multiplier2;
end
end
initial begin
exp = 'b0;
forever begin
@(negedge clk);
exp = mult13 * mult23;
if(exp == product) begin
$display("successful");
end else begin
$display("fail");
end
end
end
endmodule