module core( input clk, input reset, // Memory - instruction output reg [31:0] mem_inst_addr, input [31:0] mem_inst_data, // Memory - data output reg [31:0] mem_data_addr, input [31:0] mem_data_rdata, output reg [31:0] mem_data_wdata, output reg [(32/8)-1:0] mem_data_wmask, output reg mem_data_we, input mem_data_rvalid, input mem_data_wready, // output reg [31:0] mem_data_waddr, // output reg [31:0] mem_data_wdata, // output reg [3:0] mem_data_wmask, // output reg mem_data_wvalid, // input wire mem_data_wready, // output reg [31:0] mem_data_raddr, // output reg mem_data_raddr_valid, // input wire mem_data_raddr_ready, // input wire [31:0] mem_data_rdata, // input wire mem_data_rdata_valid, // output reg mem_data_rdata_ready, // // instruction memory // output axi_inst_ACLK, // output axi_inst_ARESETn, // output axi_inst_AWVALID, // output axi_inst_AWADDR, // output [2:0] axi_inst_AWPROT, // input axi_inst_AWREADY, // output axi_inst_WVALID, // output [DATA_WIDTH-1:0] axi_inst_WDATA, // output [(DATA_WIDTH/8)-1:0] axi_inst_WSTRB, // input axi_inst_WREADY, // input axi_inst_BVALID, // output axi_inst_BREADY, // input [1:0] axi_inst_BRESP, // output axi_inst_ARVALID, // output axi_inst_ARADDR, // output [2:0] axi_inst_ARPROT, // input axi_inst_ARREADY, // input axi_inst_RVALID, // input [DATA_WIDTH-1:0] axi_inst_RDATA, // input [1:0] axi_inst_RRESP, // output axi_inst_RREADY, // data memory // output axi_data_ACLK, // output axi_data_ARESETn, // output axi_data_AWVALID, // output axi_data_AWADDR, // output [2:0] axi_data_AWPROT, // input axi_data_AWREADY, // output axi_data_WVALID, // output [DATA_WIDTH-1:0] axi_data_WDATA, // output [(DATA_WIDTH/8)-1:0] axi_data_WSTRB, // input axi_data_WREADY, // input axi_data_BVALID, // output axi_data_BREADY, // input [1:0] axi_data_BRESP, // output axi_data_ARVALID, // output axi_data_ARADDR, // output [2:0] axi_data_ARPROT, // input axi_data_ARREADY, // input axi_data_RVALID, // input [DATA_WIDTH-1:0] axi_data_RDATA, // input [1:0] axi_data_RRESP, // output axi_data_RREADY, output dummy_out ); parameter PIPELINED = 1; localparam INST_NOP = 32'h00000013; // nop // Register File reg [31:0] regfile [0:31]; initial begin : init_regfile integer i; for (i=0; i<32; i=i+1) begin regfile[i] = 32'h00000000; end end // Registers reg [31:0] r_if_pc = 0, r_id_pc, r_ex_pc, r_mem_pc, r_wb_pc; reg [31:0] r_id_inst, r_ex_inst, r_mem_inst, r_wb_inst; reg r_id_valid=0, r_ex_valid=0, r_mem_valid=0, r_wb_valid=0; reg [4:0] r_ex_rs1, r_mem_rs1, r_wb_rs1; reg [4:0] r_ex_rs2, r_mem_rs2, r_wb_rs2; reg [4:0] r_ex_rd, r_mem_rd, r_wb_rd; reg [3:0] r_ex_aluop; reg [31:0] r_ex_s1, r_mem_s1; reg [31:0] r_ex_s2, r_mem_s2; reg [31:0] r_mem_alu_out, r_wb_alu_out; reg r_mem_alu_zero; reg r_ex_jump, r_mem_jump, r_wb_jump; reg r_ex_store, r_mem_store; reg r_ex_load, r_mem_load, r_wb_load; reg [31:0] r_mem_wdata, r_wb_wdata; reg r_ex_branch_pol; reg r_ex_branch, r_mem_branch, r_wb_branch; reg [31:0] r_ex_immed_btype; reg [31:0] r_mem_ra, r_wb_ra; reg [31:0] r_wb_load_data; // IF reg s_if_stall = 0; reg [31:0] s_if_next_pc; reg [31:0] s_if_inst; always @(*) begin s_if_stall = s_id_stall; if (s_ex_take_branch) begin s_if_next_pc = s_ex_branch_addr; end else begin s_if_next_pc = r_if_pc + 4; end mem_inst_addr = r_if_pc; s_if_inst = mem_inst_data; end // ID reg s_id_stall; reg [6:0] s_id_opcode; reg [2:0] s_id_funct3; reg [6:0] s_id_funct7; reg [4:0] s_id_rd, s_id_rs1, s_id_rs2; reg [31:0] s_id_immed_itype, s_id_immed_stype, s_id_immed_utype, s_id_immed_btype, s_id_immed_jtype; reg [31:0] s_id_s1, s_id_s2; reg [3:0] s_id_aluop; reg s_id_invalid; reg s_id_jump, s_id_branch; reg s_id_store, s_id_load; reg s_id_branch_pol; // RV32I / RV64I / RV32M localparam OP_LUI = 7'b0110111, OP_AUIPC = 7'b0010111, OP_JAL = 7'b1101111, OP_JALR = 7'b1100111, OP_BRANCH = 7'b1100011, OP_LOAD = 7'b0000011, OP_STORE = 7'b0100011, OP_IMM = 7'b0010011, OP_ALU = 7'b0110011, OP_FENCE = 7'b0001111, OP_SYSTEM = 7'b1110011; // RV64M // localparam OP_???????? = 7'b0111011; // RV32A / RV64A // localparam OP_ATOMIC = 7'b0101111; // TODO: add opcodes for other extensions // ALU OPCODES localparam ALUOP_ADD = 4'b0000, ALUOP_SUB = 4'b0001, ALUOP_XOR = 4'b0010, ALUOP_OR = 4'b0011, ALUOP_AND = 4'b0100, ALUOP_SL = 4'b0101, ALUOP_SRL = 4'b0110, ALUOP_SRA = 4'b0111, ALUOP_SLT = 4'b1000, ALUOP_SLTU = 4'b1001; always @(*) begin s_id_invalid = 0; s_id_store = 0; s_id_load = 0; s_id_opcode = r_id_inst[6:0]; s_id_rd = r_id_inst[11:7]; s_id_rs1 = r_id_inst[19:15]; s_id_rs2 = r_id_inst[24:20]; s_id_funct3 = r_id_inst[14:12]; s_id_funct7 = r_id_inst[31:25]; s_id_immed_itype = {{20{r_id_inst[31]}}, r_id_inst[31:20]}; s_id_immed_stype = {{20{r_id_inst[31]}}, r_id_inst[31:25], r_id_inst[11:7]}; s_id_immed_utype = {r_id_inst[31:12], 12'b0}; s_id_immed_btype = {{19{r_id_inst[31]}}, r_id_inst[31], r_id_inst[7], r_id_inst[30:25], r_id_inst[11:8], 1'b0}; s_id_immed_jtype = {{11{r_id_inst[31]}}, r_id_inst[31], r_id_inst[19:12], r_id_inst[20], r_id_inst[30:21], 1'b0}; // default values s_id_s1 = 32'hxxxxxxxx; s_id_s2 = 32'hxxxxxxxx; s_id_jump = 0; s_id_branch = 0; s_id_branch_pol = 1'bx; case (s_id_opcode) OP_LUI: begin // LUI s_id_s1 = 32'h00000000; s_id_s2 = s_id_immed_utype; s_id_aluop = ALUOP_ADD; end OP_AUIPC: begin // AUIPC s_id_s1 = r_id_pc; s_id_s2 = s_id_immed_utype; s_id_aluop = ALUOP_ADD; end OP_JAL: begin // JAL s_id_s1 = r_id_pc; s_id_s2 = s_id_immed_jtype; s_id_aluop = ALUOP_ADD; s_id_jump = 1; end OP_JALR: begin // JALR s_id_s1 = regfile[s_id_rs1]; s_id_s2 = s_id_immed_itype; s_id_aluop = ALUOP_ADD; s_id_jump = 1; end OP_BRANCH: begin s_id_s1 = regfile[s_id_rs1]; s_id_s2 = regfile[s_id_rs2]; s_id_branch = 1; case (s_id_funct3) 3'b000: begin // BEQ s_id_aluop = ALUOP_SUB; s_id_branch_pol = 0; end 3'b001: begin // BNE s_id_aluop = ALUOP_SUB; s_id_branch_pol = 1; end 3'b100: begin // BLT s_id_aluop = ALUOP_SLT; s_id_branch_pol = 1; end 3'b101: begin // BGE s_id_aluop = ALUOP_SLT; s_id_branch_pol = 0; end 3'b110: begin // BLTU s_id_aluop = ALUOP_SLTU; s_id_branch_pol = 1; end 3'b111: begin // BGEU s_id_aluop = ALUOP_SLTU; s_id_branch_pol = 0; end default: s_id_invalid = 1; endcase end OP_LOAD: begin s_id_load = 1; s_id_s1 = regfile[s_id_rs1]; s_id_s2 = s_id_immed_itype; // TODO: finish parsing (byte vs word. For now always assume word) end OP_STORE: begin s_id_store = 1; s_id_s1 = regfile[s_id_rs1]; s_id_s2 = s_id_immed_stype; // TODO: finish parsing (byte vs word. For now always assume word) end OP_IMM: begin s_id_s1 = regfile[s_id_rs1]; s_id_s2 = s_id_immed_itype; casex ({s_id_funct3, s_id_funct7}) 10'b000xxxxxxx: s_id_aluop = ALUOP_ADD; // ADDI 10'b010xxxxxxx: s_id_aluop = ALUOP_SLT; // SLTI 10'b011xxxxxxx: s_id_aluop = ALUOP_SLTU; // SLTIU 10'b100xxxxxxx: s_id_aluop = ALUOP_XOR; // XORI 10'b110xxxxxxx: s_id_aluop = ALUOP_OR; // ORI 10'b111xxxxxxx: s_id_aluop = ALUOP_AND; // ANDI 10'b001000000x: s_id_aluop = ALUOP_SL; // SLLI // NOTE: technically s_id_funct7[0] must be 0 however GCC allows shifts of up to 63b despite assembling for 32b. I can tolerate this deviation from ISA spec at essentially no cost 10'b101000000x: s_id_aluop = ALUOP_SRL; // SRLI // NOTE: technically s_id_funct7[0] must be 0 however GCC allows shifts of up to 63b despite assembling for 32b. I can tolerate this deviation from ISA spec at essentially no cost 10'b101010000x: s_id_aluop = ALUOP_SRA; // SRAI // NOTE: technically s_id_funct7[0] must be 0 however GCC allows shifts of up to 63b despite assembling for 32b. I can tolerate this deviation from ISA spec at essentially no cost default: s_id_invalid = 1; endcase end OP_ALU: begin s_id_s1 = regfile[s_id_rs1]; s_id_s2 = regfile[s_id_rs2]; case ({s_id_funct3, s_id_funct7}) 10'b0000000000: s_id_aluop = ALUOP_ADD; // ADD 10'b0000100000: s_id_aluop = ALUOP_SUB; // SUB 10'b0010000000: s_id_aluop = ALUOP_SL; // SLL 10'b0100000000: s_id_aluop = ALUOP_SLT; // SLT 10'b0110000000: s_id_aluop = ALUOP_SLTU; // SLTU 10'b1000000000: s_id_aluop = ALUOP_XOR; // XOR 10'b1100000000: s_id_aluop = ALUOP_OR; // OR 10'b1110000000: s_id_aluop = ALUOP_AND; // AND 10'b1010000000: s_id_aluop = ALUOP_SRL; // SRL 10'b1010100000: s_id_aluop = ALUOP_SRA; // SRA default: s_id_invalid = 1; endcase end // OP_FENCE: begin // end // OP_SYSTEM: begin // end default: begin s_id_invalid = 1; end endcase s_id_stall = s_ex_stall || (r_ex_valid && (s_ex_take_branch == 0) && (((r_ex_rd == s_id_rs1) && (s_id_rs1 != 0)) || ((r_ex_rd == s_id_rs2) && (s_id_rs2 != 0)))) || (r_mem_valid && (((r_mem_rd == s_id_rs1) && (s_id_rs1 != 0)) || ((r_mem_rd == s_id_rs2) && (s_id_rs2 != 0)))) || (r_wb_valid && (((r_wb_rd == s_id_rs1) && (s_id_rs1 != 0)) || ((r_wb_rd == s_id_rs2) && (s_id_rs2 != 0)))); if (s_id_invalid & r_id_valid) begin $display("%0t:\tInvalid instruction at PC=0x%h", $time, r_id_pc); s_id_aluop = 3'hx; end end // EX reg s_ex_stall = 0; reg [31:0] s_ex_data1, s_ex_data2; reg [31:0] s_ex_alu_out; reg s_ex_alu_zero; reg s_ex_take_branch; reg [31:0] s_ex_branch_addr; reg [31:0] s_ex_ra; always @(*) begin s_ex_stall = s_mem_stall; // NOTE: s_ex_data* exist for adding data paths bypassing regfile in the future s_ex_data1 = r_ex_s1; s_ex_data2 = r_ex_s2; case (r_ex_aluop) ALUOP_ADD: begin s_ex_alu_out = s_ex_data1 + s_ex_data2; end ALUOP_SUB: begin s_ex_alu_out = s_ex_data1 - s_ex_data2; end ALUOP_XOR: begin s_ex_alu_out = s_ex_data1 ^ s_ex_data2; end ALUOP_OR: begin s_ex_alu_out = s_ex_data1 | s_ex_data2; end ALUOP_AND: begin s_ex_alu_out = s_ex_data1 & s_ex_data2; end ALUOP_SL: begin s_ex_alu_out = s_ex_data1 << s_ex_data2[4:0]; end ALUOP_SRL: begin s_ex_alu_out = s_ex_data1 >> s_ex_data2[4:0]; end ALUOP_SRA: begin s_ex_alu_out = $signed(s_ex_data1) >>> s_ex_data2[4:0]; end ALUOP_SLT: begin s_ex_alu_out = $signed(s_ex_data1) < $signed(s_ex_data2); end ALUOP_SLTU: begin s_ex_alu_out = s_ex_data1 < s_ex_data2; end default: begin s_ex_alu_out = 32'hxxxxxxxx; end endcase s_ex_alu_zero = (s_ex_alu_out == 0); s_ex_take_branch = r_ex_valid && (r_ex_jump || (r_ex_branch && (s_ex_alu_zero ^ r_ex_branch_pol))); s_ex_ra = r_ex_pc + 4; if (r_ex_jump) begin s_ex_branch_addr = s_ex_alu_out; end else begin s_ex_branch_addr = r_ex_pc + r_ex_immed_btype; end end // MEM reg s_mem_stall = 0; reg s_mem_bp; reg [31:0] s_mem_load_data; always @(*) begin s_mem_stall = 0; // TODO: add stall logic when actually reading/writing s_mem_bp = 0; mem_data_addr = r_mem_alu_out; mem_data_wdata = regfile[r_mem_rs2]; mem_data_wmask = 4'b1111; // TODO: implement smaller writes mem_data_we = r_mem_store && r_mem_valid; if (r_mem_store) begin s_mem_stall = ~mem_data_wready; end s_mem_load_data = mem_data_rdata; // TODO: implement smaller reads if (r_mem_load) begin s_mem_stall = ~mem_data_rvalid; end end // WB reg [31:0] s_wb_data; reg s_wb_write; always @(*) begin // load instructions do not use output of alu in wb if (r_wb_jump) begin s_wb_data = r_wb_ra; end else if (r_wb_load) begin s_wb_data = r_wb_load_data; end else begin s_wb_data = r_wb_alu_out; end // FIXME: always writes!!! s_wb_write = (r_wb_branch == 0); end // SYS // Register update always @(posedge clk) begin: pipeline_update integer i; if (reset) begin r_if_pc <= 32'h00000000; r_id_pc <= 0; r_id_inst <= INST_NOP; r_ex_pc <= 0; r_ex_inst <= INST_NOP; r_ex_rd <= 0; r_ex_s1 <= 0; r_ex_s2 <= 0; r_ex_aluop <= 0; r_ex_jump <= 0; r_ex_store <= 0; r_ex_load <= 0; r_mem_pc <= 0; r_mem_inst <= INST_NOP; r_mem_rd <= 0; r_mem_s1 <= 0; r_mem_s2 <= 0; r_mem_alu_out <= 0; r_mem_alu_zero <= 0; r_mem_store <= 0; r_mem_load <= 0; r_wb_pc <= 0; r_wb_inst <= INST_NOP; r_wb_rd <= 0; for (i=1; i<32; i=i+1) begin regfile[i] <= 0; end end else begin // IF if (!s_if_stall) begin r_if_pc <= s_if_next_pc; end // ID if (!s_id_stall) begin r_id_pc <= r_if_pc; r_id_inst <= s_if_inst; r_id_valid <= ~(s_ex_take_branch && r_ex_valid); end // EX if (!s_ex_stall) begin r_ex_pc <= r_id_pc; r_ex_inst <= r_id_inst; r_ex_rs1 <= s_id_rs1; r_ex_rs2 <= s_id_rs2; r_ex_rd <= s_id_rd; r_ex_s1 <= s_id_s1; r_ex_s2 <= s_id_s2; r_ex_aluop <= s_id_aluop; r_ex_jump <= s_id_jump; r_ex_branch <= s_id_branch; r_ex_store <= s_id_store; r_ex_load <= s_id_load; r_ex_valid <= r_id_valid && ~(s_ex_take_branch && r_ex_valid) && ~(s_id_stall && r_id_valid); r_ex_branch_pol <= s_id_branch_pol; r_ex_immed_btype <= s_id_immed_btype; end // MEM if (!s_mem_stall) begin r_mem_pc <= r_ex_pc; r_mem_inst <= r_ex_inst; r_mem_rs1 <= r_ex_rs1; r_mem_rs2 <= r_ex_rs2; r_mem_rd <= r_ex_rd; r_mem_s1 <= r_ex_s1; r_mem_s2 <= r_ex_s2; r_mem_alu_out <= s_ex_alu_out; r_mem_alu_zero <= s_ex_alu_zero; r_mem_store <= r_ex_store; r_mem_load <= r_ex_load; r_mem_valid <= r_ex_valid; r_mem_branch <= r_ex_branch; r_mem_ra <= s_ex_ra; r_mem_jump <= r_ex_jump; end // WB if (1) begin r_wb_pc <= r_mem_pc; r_wb_inst <= r_mem_inst; r_wb_rs1 <= r_mem_rs1; r_wb_rs2 <= r_mem_rs2; r_wb_rd <= r_mem_rd; r_wb_alu_out <= r_mem_alu_out; r_wb_valid <= r_mem_valid; r_wb_branch <= r_mem_branch; r_wb_ra <= r_mem_ra; r_wb_jump <= r_mem_jump; r_wb_load <= r_mem_load; r_wb_load_data <= s_mem_load_data; end // Register File // TODO: should I write if s_wb_stall=1? if (r_wb_rd != 0 && s_wb_write && r_wb_valid) begin regfile[r_wb_rd] <= s_wb_data; // $display("%0t:\tPC=0x%h\tx%02d=0x%h", $time, r_id_pc, r_wb_rd, s_wb_data); end end end assign dummy_out = s_wb_data[0]; endmodule