summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKimplul <kimi.h.kuparinen@gmail.com>2025-08-10 20:16:09 +0300
committerKimplul <kimi.h.kuparinen@gmail.com>2025-08-10 20:16:09 +0300
commitb1ee83cd1ea325a5d353365ac35790a7c1d47845 (patch)
treefcb4157129bbc3cb84a3990edb15fddda5f9806a
parent373edb42380875eafc8b9de54d80b939d263d041 (diff)
downloadttarv32-b1ee83cd1ea325a5d353365ac35790a7c1d47845.tar.gz
ttarv32-b1ee83cd1ea325a5d353365ac35790a7c1d47845.zip
decrease max path lengthHEADmaster
+ Not fully functional, but good enough for now
-rw-r--r--src/rv2insn.sv2
-rw-r--r--src/sched.sv174
-rw-r--r--src/ttarv32.sv62
-rw-r--r--tb/sched_tb.sv34
-rw-r--r--tb/ttarv32_tb.sv1
5 files changed, 169 insertions, 104 deletions
diff --git a/src/rv2insn.sv b/src/rv2insn.sv
index 8717d0f..c5c355b 100644
--- a/src/rv2insn.sv
+++ b/src/rv2insn.sv
@@ -1,5 +1,7 @@
`include "common.svh"
+`default_nettype none
+
module rv2insn #(
parameter QUE_DEPTH,
parameter ALU_COUNT
diff --git a/src/sched.sv b/src/sched.sv
index 16a1e54..4165dc8 100644
--- a/src/sched.sv
+++ b/src/sched.sv
@@ -1,18 +1,24 @@
`include "common.svh"
+`default_nettype none
+
module sched
#(
parameter QUE_DEPTH,
parameter SLOT_COUNT,
- parameter SLOT_DEPTH,
type slot_t
)(
+ input clk_i,
+ input rst_ni,
input insn_t[QUE_DEPTH-1:0] que_i,
output insn_t[QUE_DEPTH-1:0] que_o,
input slot_t[SLOT_COUNT-1:0] slots_i,
- output slot_t[SLOT_COUNT-1:0] slots_o
+ output slot_t[SLOT_COUNT-1:0] slots_o,
+ output bit[$clog2(QUE_DEPTH)-1:0] shift_o
);
+bit[$clog2(QUE_DEPTH)-1:0] shift_r;
+
/* check_* don't always use whole signal they're given, so I think it's best to
* just silence this */
/* verilator lint_off UNUSEDSIGNAL */
@@ -22,86 +28,136 @@ function automatic insn_t[QUE_DEPTH-1:0] next_insn(insn_t[QUE_DEPTH-1:0] que);
endfunction
/* check if a write is to the same destination port */
-function automatic logic check_dst(mov_t d1, mov_t[SLOT_DEPTH-1:0] d2);
- logic[SLOT_DEPTH-1:0] dep = '0;
- for (int i = 0; i < SLOT_DEPTH; ++i) begin
- dep[i] = d1.dst != NOP & d1.dst == d2[i].dst;
- end
- return dep != 0;
+function automatic logic check_dst(mov_t d1, mov_t d2);
+ return d1.dst != NOP & d1.dst == d2.dst;
endfunction
/* check if any read is from an output port that's not free */
-function automatic logic check_overlap(mov_t d1, mov_t[SLOT_DEPTH-1:0] d2);
- logic[SLOT_DEPTH-1:0] dep = '0;
- for (int i = 0; i < SLOT_DEPTH; ++i) begin
- dep[i] = d1.src != NOP & d1.src == d2[i].dst;
- end
- return dep != 0;
+function automatic logic check_overlap(mov_t d1, mov_t d2);
+ return d1.src != NOP & d1.src == d2.dst;
endfunction
-/* check if instruction depends on any existing slots */
-function automatic logic depends(insn_t i, slot_t[SLOT_COUNT-1:0] slots);
- logic[SLOT_COUNT-1:0] dep = '0;
- for (int ii = 0; ii < SLOT_COUNT; ++ii) begin
- logic same_dst = check_dst(i.out, slots[ii].out);
- logic overlap1 = check_overlap(i.in[0], slots[ii].out);
- logic overlap2 = check_overlap(i.in[1], slots[ii].out);
+function automatic bit depends_slot(insn_t i, slot_t slot);
+ bit same_dst = check_dst(i.out, slot.out);
+ bit overlap1 = check_overlap(i.in[0], slot.out);
+ bit overlap2 = check_overlap(i.in[1], slot.out);
- logic same_src1 = i.in[0].dst != NOP
- & i.in[0].dst == slots[ii].in[0].dst;
+ bit same_src1 = i.in[0].dst == slot.in[0].dst & i.in[0].dst != NOP;
- /* op must have trigger so don't need to check against NOP */
- logic same_src2 = i.in[1].dst == slots[ii].in[1].dst;
+ /* op must have trigger so don't need to check against NOP */
+ bit same_src2 = i.in[1].dst == slot.in[1].dst;
+ return same_dst | overlap1 | overlap2 | same_src1 | same_src2;
+endfunction
- dep[ii] = same_dst | overlap1 | overlap2 | same_src1 | same_src2;
- end
- return dep != 0;
+/* convert instruction into equivalent slot */
+function automatic slot_t place(insn_t i);
+ slot_t slot;
+ slot.op = i.op;
+ slot.imm = i.imm;
+ slot.in = i.in;
+ slot.out = i.out;
+ return slot;
+endfunction
+
+function automatic bit depends_insn(insn_t i, insn_t d);
+ slot_t slot = place(d);
+ return depends_slot(i, slot);
endfunction
function automatic logic is_noop(mov_t m);
return m.src == NOP && m.dst == NOP;
endfunction
-function automatic mov_t[SLOT_DEPTH-1:0] place_out(int i, logic placed,
- mov_t o, mov_t[SLOT_DEPTH-1:0] out);
- if (i < SLOT_DEPTH) begin
- logic ok = !placed & is_noop(out[i]);
- out[i] = ok ? o : out[i];
- return place_out(i + 1, ok ? 1 : placed, o, out);
- end else begin
- return out;
+function automatic bit[SLOT_COUNT-1:0] dependvec(int insn, insn_t[QUE_DEPTH-1:0] que, slot_t[SLOT_COUNT-1:0] slots);
+ bit dep = '0;
+ bit[SLOT_COUNT-1:0] possible;
+ for (int i = 0; i < insn; ++i) begin
+ dep |= depends_insn(que[insn], que[i]);
end
+
+ for (int i = 0; i < SLOT_COUNT; ++i)
+ possible[i] = !depends_slot(que[insn], slots[i]);
+
+ return dep ? '0 : possible;
endfunction
-/* place instruction into slot */
-function automatic slot_t place(insn_t i, slot_t src);
- slot_t slot;
- slot.op = i.op;
- slot.imm = i.imm;
- slot.in = i.in;
- slot.out = place_out(0, 0, i.out, src.out);
+function automatic bit[SLOT_COUNT-1:0] select_first(bit[SLOT_COUNT-1:0] vec);
+ bit found = 0;
+ bit[SLOT_COUNT-1:0] selected = '0;
+
+ for (int i = 0; i < SLOT_COUNT; ++i) begin
+ selected[i] = found ? '0 : vec[i];
+ found |= vec[i];
+ end
+
+ return selected;
+endfunction
+
+task automatic select(
+ input int i,
+ input bit[SLOT_COUNT-1:0] possible[QUE_DEPTH],
+ input bit[SLOT_COUNT-1:0] reserved,
+ output bit[SLOT_COUNT-1:0] selected[QUE_DEPTH]);
+
+ bit[SLOT_COUNT-1:0] s = select_first(~reserved & possible[i]);
+
+ if (i < QUE_DEPTH - 1)
+ select(i + 1, possible, s | reserved, selected);
+
+ /* write out last, otherwise the recursive task will zero everything out */
+ selected[i] = s;
+endtask
+
+function automatic slot_t maybe_populate(
+ bit[SLOT_COUNT-1:0] selected[QUE_DEPTH],
+ insn_t[QUE_DEPTH-1:0] que,
+ slot_t[SLOT_COUNT-1:0] slots,
+ int slot_idx);
+
+ slot_t slot = slots[slot_idx];
+ for (int i = 0; i < QUE_DEPTH; ++i)
+ if (selected[i][slot_idx])
+ slot = place(que[i]);
+
return slot;
endfunction
-typedef struct packed {
- insn_t[QUE_DEPTH-1:0] que;
- slot_t[SLOT_COUNT-1:0] slots;
-} stages_out_t;
-
-function automatic stages_out_t stages(int i,
- insn_t[QUE_DEPTH-1:0] que, slot_t[SLOT_COUNT-1:0] slots);
- if (i < SLOT_COUNT) begin
- logic ok = !depends(que[0], slots);
- slots[i] = ok ? place(que[0], slots[i]) : slots[i];
- return stages(i + 1, ok ? next_insn(que) : que, slots);
- end else begin
- return {que, slots};
+function automatic void schedule(insn_t[QUE_DEPTH-1:0] que, slot_t[SLOT_COUNT-1:0] slots);
+ bit[SLOT_COUNT-1:0] possible[QUE_DEPTH];
+ bit[SLOT_COUNT-1:0] selected[QUE_DEPTH];
+ bit[$clog2(QUE_DEPTH)-1:0] shift;
+
+ shift = shift_r;
+
+ for (int i = 0; i < QUE_DEPTH; ++i)
+ possible[i] = dependvec(i, que, slots);
+
+ select(0, possible, '0, selected);
+
+ for (int i = 0; i < QUE_DEPTH; ++i) begin
+ assert($onehot0(selected[i]));
+ if (|selected[i])
+ ++shift;
end
+
+ for (int i = 0; i < SLOT_COUNT; ++i)
+ slots_o[i] = maybe_populate(selected, que, slots_i, i);
+
+ /* should que be shifted somewhere else, I wonder? */
+ shift_o = shift;
+ que_o = que_i >> (shift * $bits(insn_t));
endfunction
always_comb begin
- stages_out_t out = stages(0, que_i, slots_i);
- que_o = out.que;
- slots_o = out.slots;
+ schedule(que_i, slots_i);
end
+
+always_ff @(posedge clk_i or negedge rst_ni) begin
+ if (!rst_ni) begin
+ shift_r <= '0;
+ end else begin
+ shift_r <= '0;
+ end
+end
+
endmodule
diff --git a/src/ttarv32.sv b/src/ttarv32.sv
index 0edf1cb..cb04824 100644
--- a/src/ttarv32.sv
+++ b/src/ttarv32.sv
@@ -1,8 +1,9 @@
`include "common.svh"
+`default_nettype none
+
module ttarv32 #(
parameter SLOT_COUNT,
- parameter SLOT_DEPTH,
parameter ALU_COUNT
)(
input clk_i,
@@ -13,7 +14,7 @@ module ttarv32 #(
typedef struct packed {
mov_t[1:0] in;
- mov_t[SLOT_DEPTH-1:0] out;
+ mov_t out;
op_t op;
imm_t imm;
} slot_t;
@@ -39,63 +40,44 @@ rv2insn #(
.que_o(rv_que)
);
-typedef struct packed {
- insn_t[SLOT_COUNT-1:0] que;
- xlen_t pc;
-} merge_t;
-
-/* repetition from sched.sv, hmm */
-function automatic insn_t[SLOT_COUNT-1:0] next_insn(insn_t[SLOT_COUNT-1:0] que);
- return que >> $bits(insn_t);
-endfunction
-
-function automatic logic is_noop(mov_t m);
- return m.src == NOP && m.dst == NOP;
-endfunction
-
-/* not the best thing in the world but good enough for now */
-function automatic logic is_free(insn_t i);
- return is_noop(i.in[0]) & is_noop(i.in[1]) & is_noop(i.out);
+function automatic insn_t[SLOT_COUNT-1:0] merge(insn_t[SLOT_COUNT-1:0] prev_que, insn_t[SLOT_COUNT-1:0] decoded, bit[$clog2(SLOT_COUNT)-1:0] shift);
+ return prev_que | (decoded << (shift * $bits(insn_t)));
endfunction
-function automatic merge_t merge(int i,
- insn_t[SLOT_COUNT-1:0] prev_que, insn_t[SLOT_COUNT-1:0] next_que, xlen_t pc);
- if (i < SLOT_COUNT) begin
- logic ok = is_free(prev_que[i]);
- prev_que[i] = ok ? next_que[0] : prev_que[i];
- return merge(i + 1,
- prev_que,
- ok ? next_insn(next_que) : next_que,
- ok ? pc + 1 : pc);
- end else begin
- return {prev_que, pc};
- end
-endfunction
+bit[$clog2(SLOT_COUNT)-1:0] shift_r, shift;
-merge_t merged;
-assign merged = merge(0, prev_que_r, rv_que, pc_r);
+insn_t[SLOT_COUNT-1:0] merged_que;
+assign merged_que = merge(prev_que_r, rv_que, shift_r);
sched #(
.QUE_DEPTH(SLOT_COUNT),
.SLOT_COUNT(SLOT_COUNT),
- .SLOT_DEPTH(SLOT_DEPTH),
.slot_t(slot_t)
) sched (
- .que_i(merged.que),
+ .clk_i(clk_i),
+ .rst_ni(rst_ni),
+ .que_i(merged_que),
.que_o(next_que),
.slots_i(prev_slots_r),
- .slots_o(next_slots)
+ .slots_o(next_slots),
+ .shift_o(shift)
);
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
prev_slots_r <= 0;
- prev_que_r <= 0;
+ prev_que_r <= 0;
+ shift_r <= 0;
+
pc_r <= 0;
+ pc_o <= 0;
end else begin
prev_slots_r <= next_slots;
- prev_que_r <= next_que;
- pc_r <= merged.pc;
+ prev_que_r <= next_que;
+ shift_r <= shift;
+
+ pc_o <= pc_r + xlen_t'(shift);
+ pc_r <= pc_o;
end
end
diff --git a/tb/sched_tb.sv b/tb/sched_tb.sv
index 036464e..13c032e 100644
--- a/tb/sched_tb.sv
+++ b/tb/sched_tb.sv
@@ -2,6 +2,8 @@
module sched_tb;
+logic clk, rst_n;
+
typedef struct packed {
mov_t[1:0] in;
mov_t[0:0] out;
@@ -13,17 +15,20 @@ slot_t[1:0] prev_slots;
slot_t[1:0] next_slots;
insn_t[2:0] prev_que;
insn_t[2:0] next_que;
+bit[2-1:0] shift;
sched #(
.QUE_DEPTH(3),
.SLOT_COUNT(2),
- .SLOT_DEPTH(1),
.slot_t(slot_t)
) sched (
+ .clk_i(clk),
+ .rst_ni(rst_n),
.que_i(prev_que),
.que_o(next_que),
.slots_i(prev_slots),
- .slots_o(next_slots)
+ .slots_o(next_slots),
+ .shift_o(shift)
);
slot_t[1:0] first_check = {76'b0,
@@ -39,6 +44,13 @@ initial begin
$dumpfile("sched_tb.vcd");
$dumpvars();
+ clk = 0;
+ rst_n = 0;
+
+ #1
+
+ rst_n = 1;
+
/* pretend we've queued something like
* add r1, r2, r3
* add r4, r2, r1
@@ -59,20 +71,34 @@ initial begin
prev_slots = '0;
- #10
+ #1
+ clk = 1;
+
+ #1
+ clk = 0;
assert (next_slots == first_check)
else $error("\nwanted:\t%h", first_check, "\ngot:\t%h", next_slots);
+ assert (shift == 1)
+ else $error("expected shift == 1, got %d", shift);
+
/* pretend all operations finished */
prev_slots = '0;
prev_que = next_que;
- #10
+ #1
+ clk = 1;
+
+ #1
+ clk = 0;
assert (next_slots == second_check)
else $error("\nwanted:\t%h", second_check, "\ngot:\t%h", next_slots);
+ assert (shift == 2)
+ else $error("expected shift == 2, got %d", shift);
+
#10 $finish;
end
endmodule
diff --git a/tb/ttarv32_tb.sv b/tb/ttarv32_tb.sv
index 6277f02..5b30f25 100644
--- a/tb/ttarv32_tb.sv
+++ b/tb/ttarv32_tb.sv
@@ -18,7 +18,6 @@ logic rst_n;
ttarv32 #(
.SLOT_COUNT(4),
- .SLOT_DEPTH(3),
.ALU_COUNT(2)
) ttarv32 (
.clk_i(clk),