use crate::program::{Instruction, Opcode};
use crate::register::{NUM_REGISTERS, RegisterId};
mod model {
use crate::program::Opcode;
pub(super) const TARGET_CYCLES: usize = 192;
pub(super) const SCHEDULE_SIZE: usize = TARGET_CYCLES + MAX_LATENCY;
pub(super) const NUM_EXECUTION_PORTS: usize = 3;
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub(super) struct ExecPorts(pub(super) u8);
const P5: ExecPorts = ExecPorts(1 << 0);
const P0: ExecPorts = ExecPorts(1 << 1);
const P1: ExecPorts = ExecPorts(1 << 2);
const P01: ExecPorts = ExecPorts(P0.0 | P1.0);
const P05: ExecPorts = ExecPorts(P0.0 | P5.0);
const P015: ExecPorts = ExecPorts(P0.0 | P1.0 | P5.0);
const MAX_LATENCY: usize = 4;
#[inline(always)]
pub(super) fn instruction_latency_cycles(op: Opcode) -> usize {
match op {
Opcode::AddConst => 1,
Opcode::AddShift => 1,
Opcode::Branch => 1,
Opcode::Rotate => 1,
Opcode::Sub => 1,
Opcode::Target => 1,
Opcode::Xor => 1,
Opcode::XorConst => 1,
Opcode::Mul => 3,
Opcode::SMulH => 4,
Opcode::UMulH => 4,
}
}
#[inline(always)]
pub(super) fn micro_operations(op: Opcode) -> (ExecPorts, Option<ExecPorts>) {
match op {
Opcode::AddConst => (P015, None),
Opcode::Sub => (P015, None),
Opcode::Xor => (P015, None),
Opcode::XorConst => (P015, None),
Opcode::Mul => (P1, None),
Opcode::AddShift => (P01, None),
Opcode::Rotate => (P05, None),
Opcode::SMulH => (P1, Some(P5)),
Opcode::UMulH => (P1, Some(P5)),
Opcode::Branch => (P015, Some(P015)),
Opcode::Target => (P015, Some(P015)),
}
}
#[inline(always)]
pub(super) fn instruction_sub_cycle_count(op: Opcode) -> usize {
match micro_operations(op) {
(_, None) => 1,
(_, Some(_)) => 2,
}
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
struct ExecPortIndex {
index: u8,
}
#[derive(Debug, Default, Clone)]
pub(crate) struct Scheduler {
sub_cycle: SubCycle,
cycle: Cycle,
exec: ExecSchedule,
data: DataSchedule,
}
impl Scheduler {
#[inline(always)]
pub(crate) fn new() -> Self {
Default::default()
}
#[inline(always)]
pub(crate) fn stall(&mut self) -> Result<(), ()> {
self.advance(SubCycle::PER_CYCLE as usize)
}
#[inline(always)]
pub(crate) fn instruction_stream_sub_cycle(&self) -> SubCycle {
self.sub_cycle
}
#[inline(always)]
fn advance(&mut self, n: usize) -> Result<(), ()> {
let sub_cycle = self.sub_cycle.add_usize(n)?;
let cycle = sub_cycle.cycle();
if cycle < Cycle::target() {
self.sub_cycle = sub_cycle;
self.cycle = cycle;
Ok(())
} else {
Err(())
}
}
#[inline(always)]
pub(crate) fn advance_instruction_stream(&mut self, op: Opcode) -> Result<(), ()> {
self.advance(model::instruction_sub_cycle_count(op))
}
#[inline(always)]
pub(crate) fn instruction_plan(&self, op: Opcode) -> Result<InstructionPlan, ()> {
self.exec.instruction_plan(self.cycle, op)
}
#[inline(always)]
pub(crate) fn commit_instruction_plan(&mut self, plan: &InstructionPlan, inst: &Instruction) {
self.exec.mark_instruction_busy(plan);
if let Some(dst) = inst.destination() {
self.data
.plan_register_write(dst, plan.cycle_retired(inst.opcode()));
}
}
#[inline(always)]
pub(crate) fn register_available(&self, reg: RegisterId, cycle: Cycle) -> bool {
self.data.register_available(reg, cycle)
}
#[inline(always)]
pub(crate) fn overall_latency(&self) -> Cycle {
self.data.overall_latency()
}
}
#[derive(Debug, Default, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)]
pub(crate) struct Cycle(u8);
impl Cycle {
#[inline(always)]
fn target() -> Self {
Cycle(
model::TARGET_CYCLES
.try_into()
.expect("Cycle type wide enough for target count"),
)
}
#[inline(always)]
pub(crate) fn as_usize(&self) -> usize {
self.0.into()
}
#[inline(always)]
fn add_usize(&self, n: usize) -> Result<Self, ()> {
let result = self.as_usize() + n;
if result < model::SCHEDULE_SIZE {
Ok(Cycle(
result
.try_into()
.expect("Cycle type wide enough for full schedule size"),
))
} else {
Err(())
}
}
}
#[derive(Debug, Default, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)]
pub(crate) struct SubCycle(u16);
impl SubCycle {
const PER_CYCLE: u16 = 3;
const MAX: Self = SubCycle(model::SCHEDULE_SIZE as u16 * Self::PER_CYCLE - 1);
#[inline(always)]
pub(crate) fn as_usize(&self) -> usize {
self.0.into()
}
#[inline(always)]
fn cycle(&self) -> Cycle {
Cycle(
(self.0 / Self::PER_CYCLE)
.try_into()
.expect("Cycle type wide enough for conversion from SubCycle"),
)
}
#[inline(always)]
fn add_usize(&self, n: usize) -> Result<Self, ()> {
let result = self.as_usize() + n;
if result < Self::MAX.0.into() {
Ok(SubCycle(result.try_into().expect(
"SubCycle type wide enough for full schedule size",
)))
} else {
Err(())
}
}
}
#[derive(Debug, Default, Clone)]
struct PortSchedule {
busy: SimpleBitArray<
{ (usize::BITS as usize + model::SCHEDULE_SIZE - 1) / (usize::BITS as usize) },
>,
}
#[derive(Debug, Default, Clone)]
struct DataSchedule {
latencies: [Cycle; NUM_REGISTERS],
}
impl DataSchedule {
#[inline(always)]
fn plan_register_write(&mut self, dst: RegisterId, cycle: Cycle) {
self.latencies[dst.as_usize()] = cycle;
}
#[inline(always)]
fn register_available(&self, reg: RegisterId, cycle: Cycle) -> bool {
self.latencies[reg.as_usize()] <= cycle
}
#[inline(always)]
fn overall_latency(&self) -> Cycle {
match self.latencies.iter().max() {
Some(cycle) => *cycle,
None => Default::default(),
}
}
}
#[derive(Debug, Default, Clone)]
struct ExecSchedule {
ports: [PortSchedule; model::NUM_EXECUTION_PORTS],
}
impl ExecSchedule {
#[inline(always)]
fn micro_plan(&self, begin: Cycle, ports: model::ExecPorts) -> Result<MicroOpPlan, ()> {
let mut cycle = begin;
loop {
for index in 0..(model::NUM_EXECUTION_PORTS as u8) {
if (ports.0 & (1 << index)) != 0
&& !self.ports[index as usize].busy.get(cycle.as_usize())
{
return Ok(MicroOpPlan {
cycle,
port: ExecPortIndex { index },
});
}
}
cycle = cycle.add_usize(1)?;
}
}
#[inline(always)]
fn mark_micro_busy(&mut self, plan: MicroOpPlan) {
self.ports[plan.port.index as usize]
.busy
.set(plan.cycle.as_usize(), true);
}
#[inline(always)]
fn instruction_plan(&self, begin: Cycle, op: Opcode) -> Result<InstructionPlan, ()> {
match model::micro_operations(op) {
(single_port, None) => {
InstructionPlan::from_micro_plans(self.micro_plan(begin, single_port)?, None)
}
(first_port, Some(second_port)) => {
let mut cycle = begin;
loop {
if let (Ok(first_plan), Ok(second_plan)) = (
self.micro_plan(cycle, first_port),
self.micro_plan(cycle, second_port),
) {
if let Ok(joint_plan) =
InstructionPlan::from_micro_plans(first_plan, Some(second_plan))
{
return Ok(joint_plan);
}
}
cycle = cycle.add_usize(1)?;
}
}
}
}
#[inline(always)]
fn mark_instruction_busy(&mut self, plan: &InstructionPlan) {
let (first, second) = plan.as_micro_plans();
self.mark_micro_busy(first);
if let Some(second) = second {
self.mark_micro_busy(second);
}
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
struct MicroOpPlan {
cycle: Cycle,
port: ExecPortIndex,
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub(crate) struct InstructionPlan {
cycle: Cycle,
first_port: ExecPortIndex,
second_port: Option<ExecPortIndex>,
}
impl InstructionPlan {
#[inline(always)]
pub(crate) fn cycle_issued(&self) -> Cycle {
self.cycle
}
#[inline(always)]
pub(crate) fn cycle_retired(&self, op: Opcode) -> Cycle {
self.cycle
.add_usize(model::instruction_latency_cycles(op))
.expect("instruction retired prior to end of schedule")
}
#[inline(always)]
fn as_micro_plans(&self) -> (MicroOpPlan, Option<MicroOpPlan>) {
(
MicroOpPlan {
cycle: self.cycle,
port: self.first_port,
},
self.second_port.map(|port| MicroOpPlan {
cycle: self.cycle,
port,
}),
)
}
#[inline(always)]
fn from_micro_plans(first_op: MicroOpPlan, second_op: Option<MicroOpPlan>) -> Result<Self, ()> {
let second_port = match second_op {
None => None,
Some(second_op) => {
if first_op.cycle == second_op.cycle {
Some(second_op.port)
} else {
return Err(());
}
}
};
Ok(Self {
cycle: first_op.cycle,
first_port: first_op.port,
second_port,
})
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
struct SimpleBitArray<const N: usize> {
inner: [usize; N],
}
impl<const N: usize> Default for SimpleBitArray<N> {
#[inline(always)]
fn default() -> Self {
Self {
inner: [0_usize; N],
}
}
}
impl<const N: usize> SimpleBitArray<N> {
#[inline(always)]
fn set(&mut self, index: usize, value: bool) {
let word_size = usize::BITS as usize;
let word_index = index / word_size;
let bit_mask = 1 << (index % word_size);
if value {
self.inner[word_index] |= bit_mask;
} else {
self.inner[word_index] &= !bit_mask;
}
}
#[inline(always)]
fn get(&self, index: usize) -> bool {
let word_size = usize::BITS as usize;
let word_index = index / word_size;
let bit_mask = 1 << (index % word_size);
0 != (self.inner[word_index] & bit_mask)
}
}