RISC-V MCU中文社区

【分享】 修改蜂鸟E203 SoC乘法和除法指令实现单元的方法

发表于 开源蜂鸟E203 2023-05-26 22:10:01
0
1342
3

报名编号:CICC1014    团队名称:华芯极客

    原蜂鸟E203 SoC中乘法指令的实现方法为booth4编码的循环迭代实现,完成一次乘法运算需要17个时钟周期,除法指令的实现方法为不恢复余数法,完成一次除法运算需要36个时钟周期,如果能够修改乘法和除法指令的实现方式以减少运算所需的时钟周期,将会明显提高跑分程序的结果,本文将对乘除法指令实现单元的修改方法进行分享。

    e203_exu_alu_muldiv.v文件中的 e203_exu_alu_muldiv模块包含了乘法和除法指令的实现过程,模块中的输入输出信号均由valid和ready握手信号控制,原文件中的乘除法运算过程复用了ALU中的加减法器和缓存,如果不进行复用而采用独立的运算单元,整个运算单元的实现并不复杂,下面给出本文的参考实现:

`include "e203_defines.v"

module e203_exu_alu_muldiv(
  input  mdv_nob2b,
  // The Issue Handshake Interface to MULDIV
  input  muldiv_i_valid, // Handshake valid
  output muldiv_i_ready, // Handshake ready

  input  [`E203_XLEN-1:0] muldiv_i_rs1,
  input  [`E203_XLEN-1:0] muldiv_i_rs2,
  input  [`E203_XLEN-1:0] muldiv_i_imm,
  input  [`E203_DECINFO_MULDIV_WIDTH-1:0] muldiv_i_info,
  input  [`E203_ITAG_WIDTH-1:0] muldiv_i_itag,
  output muldiv_i_longpipe,
  input  flush_pulse,
  // The MULDIV Write-Back/Commit Interface
  output muldiv_o_valid, // Handshake valid
  input  muldiv_o_ready, // Handshake ready
  output [`E203_XLEN-1:0] muldiv_o_wbck_wdat,
  output muldiv_o_wbck_err,
  /* 如果不复用ALU中的加减法单元和缓存,这部分信号可以注释掉,当然e203_exu_alu.v文件中也要做简单修改
  // The operands and info to ALU
  output [`E203_MULDIV_ADDER_WIDTH-1:0] muldiv_req_alu_op1,
  output [`E203_MULDIV_ADDER_WIDTH-1:0] muldiv_req_alu_op2,
  output                                muldiv_req_alu_add ,
  output                                muldiv_req_alu_sub ,
  input  [`E203_MULDIV_ADDER_WIDTH-1:0] muldiv_req_alu_res,
  // The Shared-Buffer interface to ALU-Shared-Buffer
  output          muldiv_sbf_0_ena,
  output [33-1:0] muldiv_sbf_0_nxt,
  input  [33-1:0] muldiv_sbf_0_r,
  output          muldiv_sbf_1_ena,
  output [33-1:0] muldiv_sbf_1_nxt,
  input  [33-1:0] muldiv_sbf_1_r,
  */
  input  clk,
  input  rst_n
  );
  wire muldiv_i_hsked = muldiv_i_valid & muldiv_i_ready;
  wire muldiv_o_hsked = muldiv_o_valid & muldiv_o_ready;
  //指令流水线冲刷
  wire flushed_r;
  wire flushed_set = flush_pulse;
  wire flushed_clr = muldiv_o_hsked & (~flush_pulse);
  wire flushed_ena = flushed_set | flushed_clr;
  wire flushed_nxt = flushed_set | (~flushed_clr);
  sirv_gnrl_dfflr #(1) flushed_dfflr (flushed_ena, flushed_nxt, flushed_r, clk, rst_n);

  wire i_mul    = muldiv_i_info[`E203_DECINFO_MULDIV_MUL   ];
  wire i_mulh   = muldiv_i_info[`E203_DECINFO_MULDIV_MULH  ];
  wire i_mulhsu = muldiv_i_info[`E203_DECINFO_MULDIV_MULHSU];
  wire i_mulhu  = muldiv_i_info[`E203_DECINFO_MULDIV_MULHU ];
  wire i_div    = muldiv_i_info[`E203_DECINFO_MULDIV_DIV   ];
  wire i_divu   = muldiv_i_info[`E203_DECINFO_MULDIV_DIVU  ];
  wire i_rem    = muldiv_i_info[`E203_DECINFO_MULDIV_REM   ];
  wire i_remu   = muldiv_i_info[`E203_DECINFO_MULDIV_REMU  ];
  // If it is flushed then it is not back2back real case
  wire i_b2b    = muldiv_i_info[`E203_DECINFO_MULDIV_B2B   ] & (~flushed_r) & (~mdv_nob2b);
  wire back2back_seq = i_b2b;
  wire mul_rs1_sign = (i_mulhu)            ? 1'b0 : muldiv_i_rs1[`E203_XLEN-1];
  wire mul_rs2_sign = (i_mulhsu | i_mulhu) ? 1'b0 : muldiv_i_rs2[`E203_XLEN-1];
  wire [31:0] muldiv_op1 = mul_rs1_sign ? (~muldiv_i_rs1+1) : muldiv_i_rs1;
  wire [31:0] muldiv_op2 = mul_rs2_sign ? (~muldiv_i_rs2+1) : muldiv_i_rs2;
  wire muldiv_res_sign=mul_rs1_sign^mul_rs2_sign;
  wire i_op_mul = i_mul | i_mulh | i_mulhsu | i_mulhu;
  wire i_op_div = i_div | i_divu | i_rem    | i_remu;

  /////////////////////////////有限状态机/////////////////////////////////////////////////
  localparam IDLE = 0;
  localparam MUL = 1;
  localparam DIV = 2;
  localparam DONE = 3;

  reg [1:0] state_r;
  wire special_cases,mul32_done,div32_done;
  wire op_start=(state_r==IDLE)&muldiv_i_valid & (~back2back_seq) & (~special_cases)&(~flush_pulse);
  always @(posedge clk,negedge rst_n) begin
  if(~rst_n)state_r<=IDLE;
  else begin
    case (state_r)
    IDLE:begin
      if(op_start)begin
        if(i_op_mul)state_r<=MUL;
        if(i_op_div)state_r<=DIV;
      end
    end
    MUL:begin
      if(flush_pulse)state_r<=IDLE;
      else if(mul32_done)state_r<=DONE;
    end
    DIV:begin
      if(flush_pulse)state_r<=IDLE;
      else if(div32_done)state_r<=DONE;
    end
    DONE:begin
      if(muldiv_o_hsked|flush_pulse)state_r<=IDLE;
    end
    default:state_r<=IDLE;
    endcase
  end
  end
//////////////////////////////////乘法器实现////////////////////////////////////
  wire [63:0] ref_mul_res;
  wire mul32_start=op_start&i_op_mul;  //如果是多周期实现则需要该信号
  //这里直接相乘,具体实现时需要自行修改为其他实现方式(移位相加、阵列、Booth、Wallace等)
  assign ref_mul_res=muldiv_op1*muldiv_op2;
  assign mul32_done=1'b1;
  wire [63:0] mul_res_true=muldiv_res_sign?(~ref_mul_res+1):ref_mul_res;
  reg [63:0] mul_res_r;
  always @(posedge clk,negedge rst_n) begin
  if(~rst_n)mul_res_r<=64'd0;
  else if((state_r==MUL)&mul32_done)mul_res_r<=mul_res_true;
  end
  wire[`E203_XLEN-1:0] mul_res = i_mul ? mul_res_r[31:0] : mul_res_r[63:32];

///////////////////////////////除法运算实现//////////////////////////////////////
  //前面为除0和溢出判断,可直接使用原文件中的代码
  wire div_rs1_sign = (i_divu | i_remu) ? 1'b0 : muldiv_i_rs1[`E203_XLEN-1];
  wire div_rs2_sign = (i_divu | i_remu) ? 1'b0 : muldiv_i_rs2[`E203_XLEN-1];
 
  wire [65:0] dividend = {{33{div_rs1_sign}}, div_rs1_sign, muldiv_i_rs1};
  wire [33:0] divisor  = {div_rs2_sign, div_rs2_sign, muldiv_i_rs2};

  wire div_by_0 = ~(|muldiv_i_rs2);// Divisor is all zeros
  wire div_ovf  = (i_div | i_rem) & (&muldiv_i_rs2)  // Divisor is all ones, means -1
                        //Dividend is 10000...000, means -(2^xlen -1)
                & muldiv_i_rs1[`E203_XLEN-1] & (~(|muldiv_i_rs1[`E203_XLEN-2:0]));

  wire[`E203_XLEN-1:0] div_by_0_res_quot = ~`E203_XLEN'b0;
  wire[`E203_XLEN-1:0] div_by_0_res_remd = dividend[`E203_XLEN-1:0];
  wire[`E203_XLEN-1:0] div_by_0_res = (i_div | i_divu) ? div_by_0_res_quot : div_by_0_res_remd;

  wire[`E203_XLEN-1:0] div_ovf_res_quot  = {1'b1,{`E203_XLEN-1{1'b0}}};
  wire[`E203_XLEN-1:0] div_ovf_res_remd  = `E203_XLEN'b0;
  wire[`E203_XLEN-1:0] div_ovf_res = (i_div | i_divu) ? div_ovf_res_quot : div_ovf_res_remd;

  wire div_special_cases = i_op_div & (div_by_0 | div_ovf);
  wire [`E203_XLEN-1:0] div_special_res = div_by_0 ? div_by_0_res : div_ovf_res;
 
  //除法器实现
  wire [31:0] quotient,reminder;
  wire div32_start=op_start&i_op_div;
  //这里直接相除和取余,仅作为参考和仿真,具体实现时需自行修改为其他实现方式
  //除法器算法:恢复余数法、不恢复余数法、SRT、牛顿迭代法、Goldschmidt等
  assign quotient=muldiv_op1/muldiv_op2;
  assign reminder=muldiv_op1%muldiv_op2;
  assign div32_done=1'b1;
  wire [31:0] quotient_true=muldiv_res_sign?(~quotient+1):quotient;
  wire [31:0] reminder_true=muldiv_res_sign?(~reminder+1):reminder;
  reg [63:0] div_res_r;
  always @(posedge clk,negedge rst_n) begin
  if(~rst_n)div_res_r<=64'd0;
  else if((state_r==DIV)&div32_done)div_res_r<={quotient_true,reminder_true};
  end
  wire[`E203_XLEN-1:0] div_res = (i_div|i_divu)?div_res_r[63:32]:div_res_r[31:0];

/////////////////////////////输出信号生成//////////////////////////////////////
  assign special_cases = div_special_cases;
  wire[`E203_XLEN-1:0] special_res = div_special_res;
  wire [`E203_XLEN-1:0] back2back_mul_res = mul_res_r[31:0];
  wire [`E203_XLEN-1:0] back2back_mul_rem = div_res_r[31:0];
  wire [`E203_XLEN-1:0] back2back_mul_div = div_res_r[63:32];
  wire [`E203_XLEN-1:0] back2back_res = (
             ({`E203_XLEN{i_mul         }} & back2back_mul_res)
           | ({`E203_XLEN{i_rem | i_remu}} & back2back_mul_rem)
           | ({`E203_XLEN{i_div | i_divu}} & back2back_mul_div)
     );
  wire wbck_condi = (back2back_seq | special_cases) ? 1'b1 :(state_r==DONE);
  assign muldiv_o_valid = wbck_condi & muldiv_i_valid;
  assign muldiv_i_ready = wbck_condi & muldiv_o_ready;
  wire res_sel_spl = special_cases;
  wire res_sel_b2b  = back2back_seq & (~special_cases);
  wire res_sel_div  = (~back2back_seq) & (~special_cases) & i_op_div;
  wire res_sel_mul  = (~back2back_seq) & (~special_cases) & i_op_mul;
  assign muldiv_o_wbck_wdat =
               ({`E203_XLEN{res_sel_b2b}} & back2back_res)
             | ({`E203_XLEN{res_sel_spl}} & special_res)
             | ({`E203_XLEN{res_sel_div}} & div_res)
             | ({`E203_XLEN{res_sel_mul}} & mul_res);
  assign muldiv_o_wbck_err = 1'b0;
  assign muldiv_i_longpipe = 1'b0;

endmodule


喜欢3
用户评论
wwww

wwww 实名认证

懒的都不写签名

积分
问答
粉丝
关注
  • RV-STAR 开发板
  • RISC-V处理器设计系列课程
  • 培养RISC-V大学土壤 共建RISC-V教育生态
RV-STAR 开发板