I think this properly stalls for all implemented instructions so I don't need nops

This commit is contained in:
Brendan Haines 2020-11-07 00:47:20 -07:00
parent 1290418aa3
commit 82cbaba7e5
3 changed files with 147 additions and 41 deletions

View File

@ -90,6 +90,8 @@ reg r_ex_store, r_mem_store;
reg r_ex_load, r_mem_load;
reg [31:0] r_mem_wdata, r_wb_wdata;
reg r_id_valid=0, r_ex_valid=0, r_mem_valid=0, r_wb_valid=0;
reg r_ex_branch_pol;
reg r_ex_branch;
// IF
reg s_if_stall = 0;
@ -97,9 +99,9 @@ reg [31:0] s_if_next_pc;
reg [31:0] s_if_inst;
always @(*) begin
s_if_stall = s_id_stall || 0;
s_if_stall = s_id_stall;
if (r_ex_jump && r_ex_valid) begin
if (s_ex_take_branch && r_ex_valid) begin
s_if_next_pc = s_ex_alu_out;
// s_if_stall = 1'b1;
end else begin
@ -123,6 +125,7 @@ reg [3:0] s_id_aluop;
reg s_id_invalid;
reg s_id_jump, s_id_branch;
reg s_id_store, s_id_load;
reg s_id_branch_pol;
// RV32I / RV64I / RV32M
localparam OP_LUI = 7'b0110111,
@ -156,7 +159,6 @@ localparam ALUOP_ADD = 4'b0000,
always @(*) begin
s_id_stall = s_ex_stall || 0;
s_id_invalid = 0;
s_id_store = 0;
s_id_load = 0;
@ -174,38 +176,68 @@ always @(*) begin
s_id_immed_btype = {{19{r_id_inst[31]}}, r_id_inst[31], r_id_inst[7], r_id_inst[30:25], r_id_inst[11:8], 1'b0};
s_id_immed_jtype = {{11{r_id_inst[31]}}, r_id_inst[31], r_id_inst[19:12], r_id_inst[20], r_id_inst[30:21], 1'b0};
// default values
s_id_s1 = 32'hxxxxxxxx;
s_id_s2 = 32'hxxxxxxxx;
s_id_jump = 0;
s_id_branch = 0;
s_id_branch_pol = 1'bx;
case (s_id_opcode)
OP_LUI: begin // LUI
s_id_s1 = 32'h00000000;
s_id_s2 = s_id_immed_utype;
s_id_aluop = ALUOP_ADD;
s_id_jump = 0;
s_id_branch = 0;
end
OP_AUIPC: begin // AUIPC
s_id_s1 = r_id_pc;
s_id_s2 = s_id_immed_utype;
s_id_aluop = ALUOP_ADD;
s_id_jump = 0;
s_id_branch = 0;
end
OP_JAL: begin // JAL
s_id_s1 = r_id_pc;
s_id_s2 = s_id_immed_jtype;
s_id_aluop = ALUOP_ADD;
s_id_jump = 1;
s_id_branch = 0;
end
OP_JALR: begin // JALR
s_id_s1 = regfile[s_id_rs1];
s_id_s2 = s_id_immed_itype;
s_id_aluop = ALUOP_ADD;
s_id_jump = 1;
s_id_branch = 0;
end
// OP_BRANCH: begin
// end
OP_BRANCH: begin
s_id_s1 = regfile[s_id_rs1];
s_id_s2 = regfile[s_id_rs2];
s_id_branch = 1;
case (s_id_funct3)
3'b000: begin // BEQ
s_id_aluop = ALUOP_SUB;
s_id_branch_pol = 0;
end
3'b001: begin // BNE
s_id_aluop = ALUOP_SUB;
s_id_branch_pol = 1;
end
3'b100: begin // BLT
s_id_aluop = ALUOP_SLT;
s_id_branch_pol = 1;
end
3'b101: begin // BGE
s_id_aluop = ALUOP_SLT;
s_id_branch_pol = 0;
end
3'b110: begin // BLTU
s_id_aluop = ALUOP_SLTU;
s_id_branch_pol = 1;
end
3'b111: begin // BGEU
s_id_aluop = ALUOP_SLTU;
s_id_branch_pol = 0;
end
default: s_id_invalid = 1;
endcase
end
// OP_LOAD: begin
// end
@ -215,8 +247,6 @@ always @(*) begin
OP_IMM: begin
s_id_s1 = regfile[s_id_rs1];
s_id_s2 = s_id_immed_itype;
s_id_jump = 0;
s_id_branch = 0;
casex ({s_id_funct3, s_id_funct7})
10'b000xxxxxxx: s_id_aluop = ALUOP_ADD; // ADDI
10'b010xxxxxxx: s_id_aluop = ALUOP_SLT; // SLTI
@ -227,18 +257,12 @@ always @(*) begin
10'b001000000x: s_id_aluop = ALUOP_SL; // SLLI // NOTE: technically s_id_funct7[0] must be 0 however GCC allows shifts of up to 63b despite assembling for 32b. I can tolerate this deviation from ISA spec at essentially no cost
10'b101000000x: s_id_aluop = ALUOP_SRL; // SRLI // NOTE: technically s_id_funct7[0] must be 0 however GCC allows shifts of up to 63b despite assembling for 32b. I can tolerate this deviation from ISA spec at essentially no cost
10'b101010000x: s_id_aluop = ALUOP_SRA; // SRAI // NOTE: technically s_id_funct7[0] must be 0 however GCC allows shifts of up to 63b despite assembling for 32b. I can tolerate this deviation from ISA spec at essentially no cost
default: begin
s_id_s1 = 32'hxxxxxxxx;
s_id_s2 = 32'hxxxxxxxx;
s_id_invalid = 1;
end
default: s_id_invalid = 1;
endcase
end
OP_ALU: begin
s_id_s1 = regfile[s_id_rs1];
s_id_s2 = regfile[s_id_rs2];
s_id_jump = 0;
s_id_branch = 0;
case ({s_id_funct3, s_id_funct7})
10'b0000000000: s_id_aluop = ALUOP_ADD; // ADD
10'b0000100000: s_id_aluop = ALUOP_SUB; // SUB
@ -250,11 +274,7 @@ always @(*) begin
10'b1110000000: s_id_aluop = ALUOP_AND; // AND
10'b1010000000: s_id_aluop = ALUOP_SRL; // SRL
10'b1010100000: s_id_aluop = ALUOP_SRA; // SRA
default: begin
s_id_s1 = 32'hxxxxxxxx;
s_id_s2 = 32'hxxxxxxxx;
s_id_invalid = 1;
end
default: s_id_invalid = 1;
endcase
end
// OP_FENCE: begin
@ -264,14 +284,21 @@ always @(*) begin
// end
default: begin
s_id_jump = 0;
s_id_branch = 0;
s_id_s1 = 32'hxxxxxxxx;
s_id_s2 = 32'hxxxxxxxx;
s_id_invalid = 1;
end
endcase
s_id_stall = s_ex_stall ||
(r_ex_valid &&
(((r_ex_rd == s_id_rs1) && (s_id_rs1 != 0)) ||
((r_ex_rd == s_id_rs2) && (s_id_rs2 != 0)))) ||
(r_mem_valid &&
(((r_mem_rd == s_id_rs1) && (s_id_rs1 != 0)) ||
((r_mem_rd == s_id_rs2) && (s_id_rs2 != 0)))) ||
(r_wb_valid &&
(((r_wb_rd == s_id_rs1) && (s_id_rs1 != 0)) ||
((r_wb_rd == s_id_rs2) && (s_id_rs2 != 0))));
if (s_id_invalid) begin
$display("%0t:\tInvalid instruction at PC=0x%h", $time, r_id_pc);
s_id_aluop = 3'hx;
@ -279,15 +306,16 @@ always @(*) begin
end
// EX
reg s_ex_stall;
reg s_ex_stall = 0;
reg [31:0] s_ex_data1, s_ex_data2;
reg [31:0] s_ex_alu_out;
reg s_ex_alu_zero;
reg [31:0] s_ex_ra;
reg [31:0] s_ex_wdata;
reg s_ex_take_branch;
always @(*) begin
s_ex_stall = s_mem_stall || 0;
s_ex_stall = s_mem_stall;
// NOTE: s_ex_data* exist for adding data paths bypassing regfile in the future
s_ex_data1 = r_ex_s1;
@ -331,6 +359,9 @@ always @(*) begin
s_ex_alu_zero = (s_ex_alu_out == 0);
s_ex_ra = r_ex_pc + 4;
// s_ex_branch_addr = r_ex_pc +
// TODO: determine and go to branch address (pc+offset)
s_ex_take_branch = r_ex_jump || (r_ex_branch && (s_ex_alu_zero ^ r_ex_branch_pol));
if (r_ex_jump) begin
s_ex_wdata = s_ex_ra;
end else begin
@ -339,11 +370,11 @@ always @(*) begin
end
// MEM
reg s_mem_stall;
reg s_mem_stall = 0;
reg s_mem_bp;
always @(*) begin
s_mem_stall = s_wb_stall || 0;
s_mem_stall = 0; // TODO: add stall logic when actually reading/writing
s_mem_bp = 0;
// if (r_mem_store) begin
@ -362,12 +393,10 @@ always @(*) begin
end
// WB
reg s_wb_stall;
reg [31:0] s_wb_data;
reg s_wb_write;
always @(*) begin
s_wb_stall = 1'b0;
// load instructions do not use output of alu in wb
s_wb_data = r_wb_wdata;
@ -428,12 +457,11 @@ always @(posedge clk) begin: pipeline_update
if (!s_id_stall) begin
r_id_pc <= r_if_pc;
r_id_inst <= s_if_inst;
r_id_valid <= ~(r_ex_jump && r_ex_valid);
r_id_valid <= ~(s_ex_take_branch && r_ex_valid);
end
// EX
if (!s_ex_stall) begin
// TODO: also stall EX if taking branch
r_ex_pc <= r_id_pc;
r_ex_inst <= r_id_inst;
r_ex_rd <= s_id_rd;
@ -441,9 +469,11 @@ always @(posedge clk) begin: pipeline_update
r_ex_s2 <= s_id_s2;
r_ex_aluop <= s_id_aluop;
r_ex_jump <= s_id_jump;
r_ex_branch <= s_id_branch;
r_ex_store <= s_id_store;
r_ex_load <= s_id_load;
r_ex_valid <= r_id_valid && ~(r_ex_jump && r_ex_valid);
r_ex_valid <= r_id_valid && ~(s_ex_take_branch && r_ex_valid) && ~(s_id_stall && r_id_valid);
r_ex_branch_pol <= s_id_branch_pol;
end
@ -463,7 +493,7 @@ always @(posedge clk) begin: pipeline_update
end
// WB
if (!s_wb_stall) begin
if (1) begin
r_wb_pc <= r_mem_pc;
r_wb_inst <= r_mem_inst;
r_wb_rd <= r_mem_rd;

View File

@ -273,6 +273,14 @@
<obj_property name="ElementShortName">s_id_jump</obj_property>
<obj_property name="ObjectShortName">s_id_jump</obj_property>
</wvobject>
<wvobject fp_name="/core_tb/dut/s_id_branch" type="logic" db_ref_id="1">
<obj_property name="ElementShortName">s_id_branch</obj_property>
<obj_property name="ObjectShortName">s_id_branch</obj_property>
</wvobject>
<wvobject fp_name="/core_tb/dut/s_id_branch_pol" type="logic" db_ref_id="1">
<obj_property name="ElementShortName">s_id_branch_pol</obj_property>
<obj_property name="ObjectShortName">s_id_branch_pol</obj_property>
</wvobject>
<wvobject fp_name="/core_tb/dut/s_id_invalid" type="logic" db_ref_id="1">
<obj_property name="ElementShortName">s_id_invalid</obj_property>
<obj_property name="ObjectShortName">s_id_invalid</obj_property>
@ -341,6 +349,18 @@
<obj_property name="ElementShortName">r_ex_jump</obj_property>
<obj_property name="ObjectShortName">r_ex_jump</obj_property>
</wvobject>
<wvobject fp_name="/core_tb/dut/r_ex_branch" type="logic" db_ref_id="1">
<obj_property name="ElementShortName">r_ex_branch</obj_property>
<obj_property name="ObjectShortName">r_ex_branch</obj_property>
</wvobject>
<wvobject fp_name="/core_tb/dut/r_ex_branch_pol" type="logic" db_ref_id="1">
<obj_property name="ElementShortName">r_ex_branch_pol</obj_property>
<obj_property name="ObjectShortName">r_ex_branch_pol</obj_property>
</wvobject>
<wvobject fp_name="/core_tb/dut/s_ex_take_branch" type="logic" db_ref_id="1">
<obj_property name="ElementShortName">s_ex_take_branch</obj_property>
<obj_property name="ObjectShortName">s_ex_take_branch</obj_property>
</wvobject>
</wvobject>
<wvobject fp_name="group7" type="group">
<obj_property name="label">MEM</obj_property>

View File

@ -154,7 +154,7 @@ _start:
nop
sub x6, x0, x3 # x6 = 0xffffffff
sub x7, x0, x4 # x7 = 0xfffffff0
sub x8, x0, x5 # x7 = 0xffffff00
sub x8, x0, x5 # x8 = 0xffffff00
nop
slti x9, x3, 0 # x9 = 0x00000000
slti x10, x3, 1 # x10 = 0x00000000
@ -206,6 +206,45 @@ _start:
nop
nop
# jal
jal x1, test_jalr
nop
nop
nop
# can I remove nops now?
addi x6, x0, 0 # x6 = 0
nop
nop
nop
addi x6, x0, 1 # x6 = 1
addi x7, x6, 1 # x7 = 2
addi x8, x6, 1 # x8 = 2
addi x9, x6, 1 # x9 = 2
test1:
# TODO: redo these tests because my nop test messed it up
# beq
addi x30, x0, 1 # x30 = 1
addi x9, x8, 0 # x9 == x8
nop
nop
nop
bne x0, x0, fail # 0 == 0
bne x9, x8, fail # x9 == x8
bne x7, x8, test2 # x7 != x8
j fail
test2:
# beq
addi x30, x0, 2 # x30 = 2
beq x0, x8, fail # 0 != x8
beq x7, x8, fail # x7 != x8
beq x8, x9, test3 # x8 == x9
j fail
test3:
# counter and infinite loop
nop
@ -234,4 +273,21 @@ loop:
nop
nop
# jalr
test_jalr:
addi x2, x0, 0x12 # x2 = 0x12
jalr x0, x1, 0 # return
fail:
nop
nop
nop
jal x0, fail # loop forever
nop
nop
nop
nop
nop
nop
.data