From 82cbaba7e5c6fe2a62a4903b8b1a7b673c5b55b2 Mon Sep 17 00:00:00 2001 From: Brendan Haines Date: Sat, 7 Nov 2020 00:47:20 -0700 Subject: [PATCH] I think this properly stalls for all implemented instructions so I don't need nops --- hdl/core.v | 110 ++++++++++++++++++++++++++++++----------------- sim/core_tb.wcfg | 20 +++++++++ test/test.S | 58 ++++++++++++++++++++++++- 3 files changed, 147 insertions(+), 41 deletions(-) diff --git a/hdl/core.v b/hdl/core.v index 7ffaf0d..0985e09 100644 --- a/hdl/core.v +++ b/hdl/core.v @@ -90,6 +90,8 @@ reg r_ex_store, r_mem_store; reg r_ex_load, r_mem_load; reg [31:0] r_mem_wdata, r_wb_wdata; reg r_id_valid=0, r_ex_valid=0, r_mem_valid=0, r_wb_valid=0; +reg r_ex_branch_pol; +reg r_ex_branch; // IF reg s_if_stall = 0; @@ -97,9 +99,9 @@ reg [31:0] s_if_next_pc; reg [31:0] s_if_inst; always @(*) begin - s_if_stall = s_id_stall || 0; + s_if_stall = s_id_stall; - if (r_ex_jump && r_ex_valid) begin + if (s_ex_take_branch && r_ex_valid) begin s_if_next_pc = s_ex_alu_out; // s_if_stall = 1'b1; end else begin @@ -123,6 +125,7 @@ reg [3:0] s_id_aluop; reg s_id_invalid; reg s_id_jump, s_id_branch; reg s_id_store, s_id_load; +reg s_id_branch_pol; // RV32I / RV64I / RV32M localparam OP_LUI = 7'b0110111, @@ -156,7 +159,6 @@ localparam ALUOP_ADD = 4'b0000, always @(*) begin - s_id_stall = s_ex_stall || 0; s_id_invalid = 0; s_id_store = 0; s_id_load = 0; @@ -174,38 +176,68 @@ always @(*) begin s_id_immed_btype = {{19{r_id_inst[31]}}, r_id_inst[31], r_id_inst[7], r_id_inst[30:25], r_id_inst[11:8], 1'b0}; s_id_immed_jtype = {{11{r_id_inst[31]}}, r_id_inst[31], r_id_inst[19:12], r_id_inst[20], r_id_inst[30:21], 1'b0}; + // default values + s_id_s1 = 32'hxxxxxxxx; + s_id_s2 = 32'hxxxxxxxx; + s_id_jump = 0; + s_id_branch = 0; + s_id_branch_pol = 1'bx; + case (s_id_opcode) OP_LUI: begin // LUI s_id_s1 = 32'h00000000; s_id_s2 = s_id_immed_utype; s_id_aluop = ALUOP_ADD; - s_id_jump = 0; - s_id_branch = 0; end OP_AUIPC: begin // AUIPC s_id_s1 = r_id_pc; s_id_s2 = s_id_immed_utype; s_id_aluop = ALUOP_ADD; - s_id_jump = 0; - s_id_branch = 0; end OP_JAL: begin // JAL s_id_s1 = r_id_pc; s_id_s2 = s_id_immed_jtype; s_id_aluop = ALUOP_ADD; s_id_jump = 1; - s_id_branch = 0; end OP_JALR: begin // JALR s_id_s1 = regfile[s_id_rs1]; s_id_s2 = s_id_immed_itype; s_id_aluop = ALUOP_ADD; s_id_jump = 1; - s_id_branch = 0; end - // OP_BRANCH: begin - - // end + OP_BRANCH: begin + s_id_s1 = regfile[s_id_rs1]; + s_id_s2 = regfile[s_id_rs2]; + s_id_branch = 1; + case (s_id_funct3) + 3'b000: begin // BEQ + s_id_aluop = ALUOP_SUB; + s_id_branch_pol = 0; + end + 3'b001: begin // BNE + s_id_aluop = ALUOP_SUB; + s_id_branch_pol = 1; + end + 3'b100: begin // BLT + s_id_aluop = ALUOP_SLT; + s_id_branch_pol = 1; + end + 3'b101: begin // BGE + s_id_aluop = ALUOP_SLT; + s_id_branch_pol = 0; + end + 3'b110: begin // BLTU + s_id_aluop = ALUOP_SLTU; + s_id_branch_pol = 1; + end + 3'b111: begin // BGEU + s_id_aluop = ALUOP_SLTU; + s_id_branch_pol = 0; + end + default: s_id_invalid = 1; + endcase + end // OP_LOAD: begin // end @@ -215,8 +247,6 @@ always @(*) begin OP_IMM: begin s_id_s1 = regfile[s_id_rs1]; s_id_s2 = s_id_immed_itype; - s_id_jump = 0; - s_id_branch = 0; casex ({s_id_funct3, s_id_funct7}) 10'b000xxxxxxx: s_id_aluop = ALUOP_ADD; // ADDI 10'b010xxxxxxx: s_id_aluop = ALUOP_SLT; // SLTI @@ -227,18 +257,12 @@ always @(*) begin 10'b001000000x: s_id_aluop = ALUOP_SL; // SLLI // NOTE: technically s_id_funct7[0] must be 0 however GCC allows shifts of up to 63b despite assembling for 32b. I can tolerate this deviation from ISA spec at essentially no cost 10'b101000000x: s_id_aluop = ALUOP_SRL; // SRLI // NOTE: technically s_id_funct7[0] must be 0 however GCC allows shifts of up to 63b despite assembling for 32b. I can tolerate this deviation from ISA spec at essentially no cost 10'b101010000x: s_id_aluop = ALUOP_SRA; // SRAI // NOTE: technically s_id_funct7[0] must be 0 however GCC allows shifts of up to 63b despite assembling for 32b. I can tolerate this deviation from ISA spec at essentially no cost - default: begin - s_id_s1 = 32'hxxxxxxxx; - s_id_s2 = 32'hxxxxxxxx; - s_id_invalid = 1; - end + default: s_id_invalid = 1; endcase end OP_ALU: begin s_id_s1 = regfile[s_id_rs1]; s_id_s2 = regfile[s_id_rs2]; - s_id_jump = 0; - s_id_branch = 0; case ({s_id_funct3, s_id_funct7}) 10'b0000000000: s_id_aluop = ALUOP_ADD; // ADD 10'b0000100000: s_id_aluop = ALUOP_SUB; // SUB @@ -250,11 +274,7 @@ always @(*) begin 10'b1110000000: s_id_aluop = ALUOP_AND; // AND 10'b1010000000: s_id_aluop = ALUOP_SRL; // SRL 10'b1010100000: s_id_aluop = ALUOP_SRA; // SRA - default: begin - s_id_s1 = 32'hxxxxxxxx; - s_id_s2 = 32'hxxxxxxxx; - s_id_invalid = 1; - end + default: s_id_invalid = 1; endcase end // OP_FENCE: begin @@ -264,14 +284,21 @@ always @(*) begin // end default: begin - s_id_jump = 0; - s_id_branch = 0; - s_id_s1 = 32'hxxxxxxxx; - s_id_s2 = 32'hxxxxxxxx; s_id_invalid = 1; end endcase + s_id_stall = s_ex_stall || + (r_ex_valid && + (((r_ex_rd == s_id_rs1) && (s_id_rs1 != 0)) || + ((r_ex_rd == s_id_rs2) && (s_id_rs2 != 0)))) || + (r_mem_valid && + (((r_mem_rd == s_id_rs1) && (s_id_rs1 != 0)) || + ((r_mem_rd == s_id_rs2) && (s_id_rs2 != 0)))) || + (r_wb_valid && + (((r_wb_rd == s_id_rs1) && (s_id_rs1 != 0)) || + ((r_wb_rd == s_id_rs2) && (s_id_rs2 != 0)))); + if (s_id_invalid) begin $display("%0t:\tInvalid instruction at PC=0x%h", $time, r_id_pc); s_id_aluop = 3'hx; @@ -279,15 +306,16 @@ always @(*) begin end // EX -reg s_ex_stall; +reg s_ex_stall = 0; reg [31:0] s_ex_data1, s_ex_data2; reg [31:0] s_ex_alu_out; reg s_ex_alu_zero; reg [31:0] s_ex_ra; reg [31:0] s_ex_wdata; +reg s_ex_take_branch; always @(*) begin - s_ex_stall = s_mem_stall || 0; + s_ex_stall = s_mem_stall; // NOTE: s_ex_data* exist for adding data paths bypassing regfile in the future s_ex_data1 = r_ex_s1; @@ -331,6 +359,9 @@ always @(*) begin s_ex_alu_zero = (s_ex_alu_out == 0); s_ex_ra = r_ex_pc + 4; + // s_ex_branch_addr = r_ex_pc + + // TODO: determine and go to branch address (pc+offset) + s_ex_take_branch = r_ex_jump || (r_ex_branch && (s_ex_alu_zero ^ r_ex_branch_pol)); if (r_ex_jump) begin s_ex_wdata = s_ex_ra; end else begin @@ -339,11 +370,11 @@ always @(*) begin end // MEM -reg s_mem_stall; +reg s_mem_stall = 0; reg s_mem_bp; always @(*) begin - s_mem_stall = s_wb_stall || 0; + s_mem_stall = 0; // TODO: add stall logic when actually reading/writing s_mem_bp = 0; // if (r_mem_store) begin @@ -362,12 +393,10 @@ always @(*) begin end // WB -reg s_wb_stall; reg [31:0] s_wb_data; reg s_wb_write; always @(*) begin - s_wb_stall = 1'b0; // load instructions do not use output of alu in wb s_wb_data = r_wb_wdata; @@ -428,12 +457,11 @@ always @(posedge clk) begin: pipeline_update if (!s_id_stall) begin r_id_pc <= r_if_pc; r_id_inst <= s_if_inst; - r_id_valid <= ~(r_ex_jump && r_ex_valid); + r_id_valid <= ~(s_ex_take_branch && r_ex_valid); end // EX if (!s_ex_stall) begin - // TODO: also stall EX if taking branch r_ex_pc <= r_id_pc; r_ex_inst <= r_id_inst; r_ex_rd <= s_id_rd; @@ -441,9 +469,11 @@ always @(posedge clk) begin: pipeline_update r_ex_s2 <= s_id_s2; r_ex_aluop <= s_id_aluop; r_ex_jump <= s_id_jump; + r_ex_branch <= s_id_branch; r_ex_store <= s_id_store; r_ex_load <= s_id_load; - r_ex_valid <= r_id_valid && ~(r_ex_jump && r_ex_valid); + r_ex_valid <= r_id_valid && ~(s_ex_take_branch && r_ex_valid) && ~(s_id_stall && r_id_valid); + r_ex_branch_pol <= s_id_branch_pol; end @@ -463,7 +493,7 @@ always @(posedge clk) begin: pipeline_update end // WB - if (!s_wb_stall) begin + if (1) begin r_wb_pc <= r_mem_pc; r_wb_inst <= r_mem_inst; r_wb_rd <= r_mem_rd; diff --git a/sim/core_tb.wcfg b/sim/core_tb.wcfg index 8fe6078..3c93a60 100644 --- a/sim/core_tb.wcfg +++ b/sim/core_tb.wcfg @@ -273,6 +273,14 @@ s_id_jump s_id_jump + + s_id_branch + s_id_branch + + + s_id_branch_pol + s_id_branch_pol + s_id_invalid s_id_invalid @@ -341,6 +349,18 @@ r_ex_jump r_ex_jump + + r_ex_branch + r_ex_branch + + + r_ex_branch_pol + r_ex_branch_pol + + + s_ex_take_branch + s_ex_take_branch + MEM diff --git a/test/test.S b/test/test.S index cc4075d..b528b64 100644 --- a/test/test.S +++ b/test/test.S @@ -154,7 +154,7 @@ _start: nop sub x6, x0, x3 # x6 = 0xffffffff sub x7, x0, x4 # x7 = 0xfffffff0 - sub x8, x0, x5 # x7 = 0xffffff00 + sub x8, x0, x5 # x8 = 0xffffff00 nop slti x9, x3, 0 # x9 = 0x00000000 slti x10, x3, 1 # x10 = 0x00000000 @@ -206,6 +206,45 @@ _start: nop nop + # jal + jal x1, test_jalr + nop + nop + nop + + # can I remove nops now? + addi x6, x0, 0 # x6 = 0 + nop + nop + nop + addi x6, x0, 1 # x6 = 1 + addi x7, x6, 1 # x7 = 2 + addi x8, x6, 1 # x8 = 2 + addi x9, x6, 1 # x9 = 2 + +test1: + # TODO: redo these tests because my nop test messed it up + # beq + addi x30, x0, 1 # x30 = 1 + addi x9, x8, 0 # x9 == x8 + nop + nop + nop + bne x0, x0, fail # 0 == 0 + bne x9, x8, fail # x9 == x8 + bne x7, x8, test2 # x7 != x8 + j fail + +test2: + # beq + addi x30, x0, 2 # x30 = 2 + beq x0, x8, fail # 0 != x8 + beq x7, x8, fail # x7 != x8 + beq x8, x9, test3 # x8 == x9 + j fail + +test3: + # counter and infinite loop nop @@ -234,4 +273,21 @@ loop: nop nop + # jalr +test_jalr: + addi x2, x0, 0x12 # x2 = 0x12 + jalr x0, x1, 0 # return + +fail: + nop + nop + nop + jal x0, fail # loop forever + nop + nop + nop + nop + nop + nop + .data