From b1ee83cd1ea325a5d353365ac35790a7c1d47845 Mon Sep 17 00:00:00 2001 From: Kimplul Date: Sun, 10 Aug 2025 20:16:09 +0300 Subject: decrease max path length + Not fully functional, but good enough for now --- src/sched.sv | 174 +++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 115 insertions(+), 59 deletions(-) (limited to 'src/sched.sv') diff --git a/src/sched.sv b/src/sched.sv index 16a1e54..4165dc8 100644 --- a/src/sched.sv +++ b/src/sched.sv @@ -1,18 +1,24 @@ `include "common.svh" +`default_nettype none + module sched #( parameter QUE_DEPTH, parameter SLOT_COUNT, - parameter SLOT_DEPTH, type slot_t )( + input clk_i, + input rst_ni, input insn_t[QUE_DEPTH-1:0] que_i, output insn_t[QUE_DEPTH-1:0] que_o, input slot_t[SLOT_COUNT-1:0] slots_i, - output slot_t[SLOT_COUNT-1:0] slots_o + output slot_t[SLOT_COUNT-1:0] slots_o, + output bit[$clog2(QUE_DEPTH)-1:0] shift_o ); +bit[$clog2(QUE_DEPTH)-1:0] shift_r; + /* check_* don't always use whole signal they're given, so I think it's best to * just silence this */ /* verilator lint_off UNUSEDSIGNAL */ @@ -22,86 +28,136 @@ function automatic insn_t[QUE_DEPTH-1:0] next_insn(insn_t[QUE_DEPTH-1:0] que); endfunction /* check if a write is to the same destination port */ -function automatic logic check_dst(mov_t d1, mov_t[SLOT_DEPTH-1:0] d2); - logic[SLOT_DEPTH-1:0] dep = '0; - for (int i = 0; i < SLOT_DEPTH; ++i) begin - dep[i] = d1.dst != NOP & d1.dst == d2[i].dst; - end - return dep != 0; +function automatic logic check_dst(mov_t d1, mov_t d2); + return d1.dst != NOP & d1.dst == d2.dst; endfunction /* check if any read is from an output port that's not free */ -function automatic logic check_overlap(mov_t d1, mov_t[SLOT_DEPTH-1:0] d2); - logic[SLOT_DEPTH-1:0] dep = '0; - for (int i = 0; i < SLOT_DEPTH; ++i) begin - dep[i] = d1.src != NOP & d1.src == d2[i].dst; - end - return dep != 0; +function automatic logic check_overlap(mov_t d1, mov_t d2); + return d1.src != NOP & d1.src == d2.dst; endfunction -/* check if instruction depends on any existing slots */ -function automatic logic depends(insn_t i, slot_t[SLOT_COUNT-1:0] slots); - logic[SLOT_COUNT-1:0] dep = '0; - for (int ii = 0; ii < SLOT_COUNT; ++ii) begin - logic same_dst = check_dst(i.out, slots[ii].out); - logic overlap1 = check_overlap(i.in[0], slots[ii].out); - logic overlap2 = check_overlap(i.in[1], slots[ii].out); +function automatic bit depends_slot(insn_t i, slot_t slot); + bit same_dst = check_dst(i.out, slot.out); + bit overlap1 = check_overlap(i.in[0], slot.out); + bit overlap2 = check_overlap(i.in[1], slot.out); - logic same_src1 = i.in[0].dst != NOP - & i.in[0].dst == slots[ii].in[0].dst; + bit same_src1 = i.in[0].dst == slot.in[0].dst & i.in[0].dst != NOP; - /* op must have trigger so don't need to check against NOP */ - logic same_src2 = i.in[1].dst == slots[ii].in[1].dst; + /* op must have trigger so don't need to check against NOP */ + bit same_src2 = i.in[1].dst == slot.in[1].dst; + return same_dst | overlap1 | overlap2 | same_src1 | same_src2; +endfunction - dep[ii] = same_dst | overlap1 | overlap2 | same_src1 | same_src2; - end - return dep != 0; +/* convert instruction into equivalent slot */ +function automatic slot_t place(insn_t i); + slot_t slot; + slot.op = i.op; + slot.imm = i.imm; + slot.in = i.in; + slot.out = i.out; + return slot; +endfunction + +function automatic bit depends_insn(insn_t i, insn_t d); + slot_t slot = place(d); + return depends_slot(i, slot); endfunction function automatic logic is_noop(mov_t m); return m.src == NOP && m.dst == NOP; endfunction -function automatic mov_t[SLOT_DEPTH-1:0] place_out(int i, logic placed, - mov_t o, mov_t[SLOT_DEPTH-1:0] out); - if (i < SLOT_DEPTH) begin - logic ok = !placed & is_noop(out[i]); - out[i] = ok ? o : out[i]; - return place_out(i + 1, ok ? 1 : placed, o, out); - end else begin - return out; +function automatic bit[SLOT_COUNT-1:0] dependvec(int insn, insn_t[QUE_DEPTH-1:0] que, slot_t[SLOT_COUNT-1:0] slots); + bit dep = '0; + bit[SLOT_COUNT-1:0] possible; + for (int i = 0; i < insn; ++i) begin + dep |= depends_insn(que[insn], que[i]); end + + for (int i = 0; i < SLOT_COUNT; ++i) + possible[i] = !depends_slot(que[insn], slots[i]); + + return dep ? '0 : possible; endfunction -/* place instruction into slot */ -function automatic slot_t place(insn_t i, slot_t src); - slot_t slot; - slot.op = i.op; - slot.imm = i.imm; - slot.in = i.in; - slot.out = place_out(0, 0, i.out, src.out); +function automatic bit[SLOT_COUNT-1:0] select_first(bit[SLOT_COUNT-1:0] vec); + bit found = 0; + bit[SLOT_COUNT-1:0] selected = '0; + + for (int i = 0; i < SLOT_COUNT; ++i) begin + selected[i] = found ? '0 : vec[i]; + found |= vec[i]; + end + + return selected; +endfunction + +task automatic select( + input int i, + input bit[SLOT_COUNT-1:0] possible[QUE_DEPTH], + input bit[SLOT_COUNT-1:0] reserved, + output bit[SLOT_COUNT-1:0] selected[QUE_DEPTH]); + + bit[SLOT_COUNT-1:0] s = select_first(~reserved & possible[i]); + + if (i < QUE_DEPTH - 1) + select(i + 1, possible, s | reserved, selected); + + /* write out last, otherwise the recursive task will zero everything out */ + selected[i] = s; +endtask + +function automatic slot_t maybe_populate( + bit[SLOT_COUNT-1:0] selected[QUE_DEPTH], + insn_t[QUE_DEPTH-1:0] que, + slot_t[SLOT_COUNT-1:0] slots, + int slot_idx); + + slot_t slot = slots[slot_idx]; + for (int i = 0; i < QUE_DEPTH; ++i) + if (selected[i][slot_idx]) + slot = place(que[i]); + return slot; endfunction -typedef struct packed { - insn_t[QUE_DEPTH-1:0] que; - slot_t[SLOT_COUNT-1:0] slots; -} stages_out_t; - -function automatic stages_out_t stages(int i, - insn_t[QUE_DEPTH-1:0] que, slot_t[SLOT_COUNT-1:0] slots); - if (i < SLOT_COUNT) begin - logic ok = !depends(que[0], slots); - slots[i] = ok ? place(que[0], slots[i]) : slots[i]; - return stages(i + 1, ok ? next_insn(que) : que, slots); - end else begin - return {que, slots}; +function automatic void schedule(insn_t[QUE_DEPTH-1:0] que, slot_t[SLOT_COUNT-1:0] slots); + bit[SLOT_COUNT-1:0] possible[QUE_DEPTH]; + bit[SLOT_COUNT-1:0] selected[QUE_DEPTH]; + bit[$clog2(QUE_DEPTH)-1:0] shift; + + shift = shift_r; + + for (int i = 0; i < QUE_DEPTH; ++i) + possible[i] = dependvec(i, que, slots); + + select(0, possible, '0, selected); + + for (int i = 0; i < QUE_DEPTH; ++i) begin + assert($onehot0(selected[i])); + if (|selected[i]) + ++shift; end + + for (int i = 0; i < SLOT_COUNT; ++i) + slots_o[i] = maybe_populate(selected, que, slots_i, i); + + /* should que be shifted somewhere else, I wonder? */ + shift_o = shift; + que_o = que_i >> (shift * $bits(insn_t)); endfunction always_comb begin - stages_out_t out = stages(0, que_i, slots_i); - que_o = out.que; - slots_o = out.slots; + schedule(que_i, slots_i); end + +always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + shift_r <= '0; + end else begin + shift_r <= '0; + end +end + endmodule -- cgit v1.2.3