diff options
-rw-r--r-- | src/rv2insn.sv | 2 | ||||
-rw-r--r-- | src/sched.sv | 174 | ||||
-rw-r--r-- | src/ttarv32.sv | 62 | ||||
-rw-r--r-- | tb/sched_tb.sv | 34 | ||||
-rw-r--r-- | tb/ttarv32_tb.sv | 1 |
5 files changed, 169 insertions, 104 deletions
diff --git a/src/rv2insn.sv b/src/rv2insn.sv index 8717d0f..c5c355b 100644 --- a/src/rv2insn.sv +++ b/src/rv2insn.sv @@ -1,5 +1,7 @@ `include "common.svh" +`default_nettype none + module rv2insn #( parameter QUE_DEPTH, parameter ALU_COUNT diff --git a/src/sched.sv b/src/sched.sv index 16a1e54..4165dc8 100644 --- a/src/sched.sv +++ b/src/sched.sv @@ -1,18 +1,24 @@ `include "common.svh" +`default_nettype none + module sched #( parameter QUE_DEPTH, parameter SLOT_COUNT, - parameter SLOT_DEPTH, type slot_t )( + input clk_i, + input rst_ni, input insn_t[QUE_DEPTH-1:0] que_i, output insn_t[QUE_DEPTH-1:0] que_o, input slot_t[SLOT_COUNT-1:0] slots_i, - output slot_t[SLOT_COUNT-1:0] slots_o + output slot_t[SLOT_COUNT-1:0] slots_o, + output bit[$clog2(QUE_DEPTH)-1:0] shift_o ); +bit[$clog2(QUE_DEPTH)-1:0] shift_r; + /* check_* don't always use whole signal they're given, so I think it's best to * just silence this */ /* verilator lint_off UNUSEDSIGNAL */ @@ -22,86 +28,136 @@ function automatic insn_t[QUE_DEPTH-1:0] next_insn(insn_t[QUE_DEPTH-1:0] que); endfunction /* check if a write is to the same destination port */ -function automatic logic check_dst(mov_t d1, mov_t[SLOT_DEPTH-1:0] d2); - logic[SLOT_DEPTH-1:0] dep = '0; - for (int i = 0; i < SLOT_DEPTH; ++i) begin - dep[i] = d1.dst != NOP & d1.dst == d2[i].dst; - end - return dep != 0; +function automatic logic check_dst(mov_t d1, mov_t d2); + return d1.dst != NOP & d1.dst == d2.dst; endfunction /* check if any read is from an output port that's not free */ -function automatic logic check_overlap(mov_t d1, mov_t[SLOT_DEPTH-1:0] d2); - logic[SLOT_DEPTH-1:0] dep = '0; - for (int i = 0; i < SLOT_DEPTH; ++i) begin - dep[i] = d1.src != NOP & d1.src == d2[i].dst; - end - return dep != 0; +function automatic logic check_overlap(mov_t d1, mov_t d2); + return d1.src != NOP & d1.src == d2.dst; endfunction -/* check if instruction depends on any existing slots */ -function automatic logic depends(insn_t i, slot_t[SLOT_COUNT-1:0] slots); - logic[SLOT_COUNT-1:0] dep = '0; - for (int ii = 0; ii < SLOT_COUNT; ++ii) begin - logic same_dst = check_dst(i.out, slots[ii].out); - logic overlap1 = check_overlap(i.in[0], slots[ii].out); - logic overlap2 = check_overlap(i.in[1], slots[ii].out); +function automatic bit depends_slot(insn_t i, slot_t slot); + bit same_dst = check_dst(i.out, slot.out); + bit overlap1 = check_overlap(i.in[0], slot.out); + bit overlap2 = check_overlap(i.in[1], slot.out); - logic same_src1 = i.in[0].dst != NOP - & i.in[0].dst == slots[ii].in[0].dst; + bit same_src1 = i.in[0].dst == slot.in[0].dst & i.in[0].dst != NOP; - /* op must have trigger so don't need to check against NOP */ - logic same_src2 = i.in[1].dst == slots[ii].in[1].dst; + /* op must have trigger so don't need to check against NOP */ + bit same_src2 = i.in[1].dst == slot.in[1].dst; + return same_dst | overlap1 | overlap2 | same_src1 | same_src2; +endfunction - dep[ii] = same_dst | overlap1 | overlap2 | same_src1 | same_src2; - end - return dep != 0; +/* convert instruction into equivalent slot */ +function automatic slot_t place(insn_t i); + slot_t slot; + slot.op = i.op; + slot.imm = i.imm; + slot.in = i.in; + slot.out = i.out; + return slot; +endfunction + +function automatic bit depends_insn(insn_t i, insn_t d); + slot_t slot = place(d); + return depends_slot(i, slot); endfunction function automatic logic is_noop(mov_t m); return m.src == NOP && m.dst == NOP; endfunction -function automatic mov_t[SLOT_DEPTH-1:0] place_out(int i, logic placed, - mov_t o, mov_t[SLOT_DEPTH-1:0] out); - if (i < SLOT_DEPTH) begin - logic ok = !placed & is_noop(out[i]); - out[i] = ok ? o : out[i]; - return place_out(i + 1, ok ? 1 : placed, o, out); - end else begin - return out; +function automatic bit[SLOT_COUNT-1:0] dependvec(int insn, insn_t[QUE_DEPTH-1:0] que, slot_t[SLOT_COUNT-1:0] slots); + bit dep = '0; + bit[SLOT_COUNT-1:0] possible; + for (int i = 0; i < insn; ++i) begin + dep |= depends_insn(que[insn], que[i]); end + + for (int i = 0; i < SLOT_COUNT; ++i) + possible[i] = !depends_slot(que[insn], slots[i]); + + return dep ? '0 : possible; endfunction -/* place instruction into slot */ -function automatic slot_t place(insn_t i, slot_t src); - slot_t slot; - slot.op = i.op; - slot.imm = i.imm; - slot.in = i.in; - slot.out = place_out(0, 0, i.out, src.out); +function automatic bit[SLOT_COUNT-1:0] select_first(bit[SLOT_COUNT-1:0] vec); + bit found = 0; + bit[SLOT_COUNT-1:0] selected = '0; + + for (int i = 0; i < SLOT_COUNT; ++i) begin + selected[i] = found ? '0 : vec[i]; + found |= vec[i]; + end + + return selected; +endfunction + +task automatic select( + input int i, + input bit[SLOT_COUNT-1:0] possible[QUE_DEPTH], + input bit[SLOT_COUNT-1:0] reserved, + output bit[SLOT_COUNT-1:0] selected[QUE_DEPTH]); + + bit[SLOT_COUNT-1:0] s = select_first(~reserved & possible[i]); + + if (i < QUE_DEPTH - 1) + select(i + 1, possible, s | reserved, selected); + + /* write out last, otherwise the recursive task will zero everything out */ + selected[i] = s; +endtask + +function automatic slot_t maybe_populate( + bit[SLOT_COUNT-1:0] selected[QUE_DEPTH], + insn_t[QUE_DEPTH-1:0] que, + slot_t[SLOT_COUNT-1:0] slots, + int slot_idx); + + slot_t slot = slots[slot_idx]; + for (int i = 0; i < QUE_DEPTH; ++i) + if (selected[i][slot_idx]) + slot = place(que[i]); + return slot; endfunction -typedef struct packed { - insn_t[QUE_DEPTH-1:0] que; - slot_t[SLOT_COUNT-1:0] slots; -} stages_out_t; - -function automatic stages_out_t stages(int i, - insn_t[QUE_DEPTH-1:0] que, slot_t[SLOT_COUNT-1:0] slots); - if (i < SLOT_COUNT) begin - logic ok = !depends(que[0], slots); - slots[i] = ok ? place(que[0], slots[i]) : slots[i]; - return stages(i + 1, ok ? next_insn(que) : que, slots); - end else begin - return {que, slots}; +function automatic void schedule(insn_t[QUE_DEPTH-1:0] que, slot_t[SLOT_COUNT-1:0] slots); + bit[SLOT_COUNT-1:0] possible[QUE_DEPTH]; + bit[SLOT_COUNT-1:0] selected[QUE_DEPTH]; + bit[$clog2(QUE_DEPTH)-1:0] shift; + + shift = shift_r; + + for (int i = 0; i < QUE_DEPTH; ++i) + possible[i] = dependvec(i, que, slots); + + select(0, possible, '0, selected); + + for (int i = 0; i < QUE_DEPTH; ++i) begin + assert($onehot0(selected[i])); + if (|selected[i]) + ++shift; end + + for (int i = 0; i < SLOT_COUNT; ++i) + slots_o[i] = maybe_populate(selected, que, slots_i, i); + + /* should que be shifted somewhere else, I wonder? */ + shift_o = shift; + que_o = que_i >> (shift * $bits(insn_t)); endfunction always_comb begin - stages_out_t out = stages(0, que_i, slots_i); - que_o = out.que; - slots_o = out.slots; + schedule(que_i, slots_i); end + +always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + shift_r <= '0; + end else begin + shift_r <= '0; + end +end + endmodule diff --git a/src/ttarv32.sv b/src/ttarv32.sv index 0edf1cb..cb04824 100644 --- a/src/ttarv32.sv +++ b/src/ttarv32.sv @@ -1,8 +1,9 @@ `include "common.svh" +`default_nettype none + module ttarv32 #( parameter SLOT_COUNT, - parameter SLOT_DEPTH, parameter ALU_COUNT )( input clk_i, @@ -13,7 +14,7 @@ module ttarv32 #( typedef struct packed { mov_t[1:0] in; - mov_t[SLOT_DEPTH-1:0] out; + mov_t out; op_t op; imm_t imm; } slot_t; @@ -39,63 +40,44 @@ rv2insn #( .que_o(rv_que) ); -typedef struct packed { - insn_t[SLOT_COUNT-1:0] que; - xlen_t pc; -} merge_t; - -/* repetition from sched.sv, hmm */ -function automatic insn_t[SLOT_COUNT-1:0] next_insn(insn_t[SLOT_COUNT-1:0] que); - return que >> $bits(insn_t); -endfunction - -function automatic logic is_noop(mov_t m); - return m.src == NOP && m.dst == NOP; -endfunction - -/* not the best thing in the world but good enough for now */ -function automatic logic is_free(insn_t i); - return is_noop(i.in[0]) & is_noop(i.in[1]) & is_noop(i.out); +function automatic insn_t[SLOT_COUNT-1:0] merge(insn_t[SLOT_COUNT-1:0] prev_que, insn_t[SLOT_COUNT-1:0] decoded, bit[$clog2(SLOT_COUNT)-1:0] shift); + return prev_que | (decoded << (shift * $bits(insn_t))); endfunction -function automatic merge_t merge(int i, - insn_t[SLOT_COUNT-1:0] prev_que, insn_t[SLOT_COUNT-1:0] next_que, xlen_t pc); - if (i < SLOT_COUNT) begin - logic ok = is_free(prev_que[i]); - prev_que[i] = ok ? next_que[0] : prev_que[i]; - return merge(i + 1, - prev_que, - ok ? next_insn(next_que) : next_que, - ok ? pc + 1 : pc); - end else begin - return {prev_que, pc}; - end -endfunction +bit[$clog2(SLOT_COUNT)-1:0] shift_r, shift; -merge_t merged; -assign merged = merge(0, prev_que_r, rv_que, pc_r); +insn_t[SLOT_COUNT-1:0] merged_que; +assign merged_que = merge(prev_que_r, rv_que, shift_r); sched #( .QUE_DEPTH(SLOT_COUNT), .SLOT_COUNT(SLOT_COUNT), - .SLOT_DEPTH(SLOT_DEPTH), .slot_t(slot_t) ) sched ( - .que_i(merged.que), + .clk_i(clk_i), + .rst_ni(rst_ni), + .que_i(merged_que), .que_o(next_que), .slots_i(prev_slots_r), - .slots_o(next_slots) + .slots_o(next_slots), + .shift_o(shift) ); always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin prev_slots_r <= 0; - prev_que_r <= 0; + prev_que_r <= 0; + shift_r <= 0; + pc_r <= 0; + pc_o <= 0; end else begin prev_slots_r <= next_slots; - prev_que_r <= next_que; - pc_r <= merged.pc; + prev_que_r <= next_que; + shift_r <= shift; + + pc_o <= pc_r + xlen_t'(shift); + pc_r <= pc_o; end end diff --git a/tb/sched_tb.sv b/tb/sched_tb.sv index 036464e..13c032e 100644 --- a/tb/sched_tb.sv +++ b/tb/sched_tb.sv @@ -2,6 +2,8 @@ module sched_tb; +logic clk, rst_n; + typedef struct packed { mov_t[1:0] in; mov_t[0:0] out; @@ -13,17 +15,20 @@ slot_t[1:0] prev_slots; slot_t[1:0] next_slots; insn_t[2:0] prev_que; insn_t[2:0] next_que; +bit[2-1:0] shift; sched #( .QUE_DEPTH(3), .SLOT_COUNT(2), - .SLOT_DEPTH(1), .slot_t(slot_t) ) sched ( + .clk_i(clk), + .rst_ni(rst_n), .que_i(prev_que), .que_o(next_que), .slots_i(prev_slots), - .slots_o(next_slots) + .slots_o(next_slots), + .shift_o(shift) ); slot_t[1:0] first_check = {76'b0, @@ -39,6 +44,13 @@ initial begin $dumpfile("sched_tb.vcd"); $dumpvars(); + clk = 0; + rst_n = 0; + + #1 + + rst_n = 1; + /* pretend we've queued something like * add r1, r2, r3 * add r4, r2, r1 @@ -59,20 +71,34 @@ initial begin prev_slots = '0; - #10 + #1 + clk = 1; + + #1 + clk = 0; assert (next_slots == first_check) else $error("\nwanted:\t%h", first_check, "\ngot:\t%h", next_slots); + assert (shift == 1) + else $error("expected shift == 1, got %d", shift); + /* pretend all operations finished */ prev_slots = '0; prev_que = next_que; - #10 + #1 + clk = 1; + + #1 + clk = 0; assert (next_slots == second_check) else $error("\nwanted:\t%h", second_check, "\ngot:\t%h", next_slots); + assert (shift == 2) + else $error("expected shift == 2, got %d", shift); + #10 $finish; end endmodule diff --git a/tb/ttarv32_tb.sv b/tb/ttarv32_tb.sv index 6277f02..5b30f25 100644 --- a/tb/ttarv32_tb.sv +++ b/tb/ttarv32_tb.sv @@ -18,7 +18,6 @@ logic rst_n; ttarv32 #( .SLOT_COUNT(4), - .SLOT_DEPTH(3), .ALU_COUNT(2) ) ttarv32 ( .clk_i(clk), |