1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
// Filename: lib.rs
// Version: 0.15
// Date: 01-01-2021 (DD-MM-YYYY)
// Library: gpcas_cpu_model
//
// Copyright (c) 2021 Kai Rese
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program. If not, see
// <https://www.gnu.org/licenses/>.
//! Defines the structure of an abstract CPU model and some helpful functions for working with it,
//! as well as the corresponding file structure.
//!
//! Any application working with the model should use the structs and definitions in this crate
//! to ensure compatibility.
#![warn(missing_docs)]
pub mod config;
mod legacy;
mod validation;
use gpcas_base::file::{deserialize_upgrade_from, DeserializeFunction, GpcasFileStruct};
use gpcas_base::instruction_type;
use gpcas_isa::Isa;
use serde::{Deserialize, Serialize};
use std::iter::once;
pub use validation::{validate, BranchPredictionError, FailedValidation, ValidationError};
/// The configuration of an abstract CPU model.
#[derive(Clone, Default, Deserialize, Serialize)]
pub struct CpuModel {
/// The maximum supported vector size of the model in bits.
pub max_vector_size: usize,
/// The ALU latency of each instruction class.
pub execution_latencies: InstructionLatencies,
/// Configuration for the fetch stage of the front end.
pub fetch: config::Fetch,
/// Configuration for the memory controller.
pub memory_controller: config::MemoryController,
/// Caches of the memory hierarchy.
pub caches: Vec<config::Cache>,
/// Configuration for the decode stage of the front end.
pub decoder: config::Decoder,
/// Configuration for the reorder buffer.
pub reorder_buffer: config::ReorderBuffer,
/// How many cycles the dispatch unit needs to dispatch an instruction.
pub dispatch_cycle_count: usize,
/// Configuration of the register file.
pub register_file: config::RegisterFile,
/// Configuration of schedulers.
pub schedulers: Vec<config::Scheduler>,
/// Configuration for each pipeline from just after the front end to the last stage.
pub pipelines: Vec<config::Pipeline>,
/// Configuration of the load/store unit.
pub load_store_unit: config::LoadStoreUnit,
}
/// Defines the amount of clock cycles each instruction type needs to execute.
///
/// The fields of this struct are used to selectively overwrite default values. If a field is
/// `None`, the default value is used.
#[derive(Clone, Default, Deserialize, Serialize)]
pub struct InstructionLatencies {
/// Typically takes multiple, but few clock cycles.
pub integer_multiply: Option<u16>,
/// Takes many clock cycles, usually dependent on the operand size.
pub integer_divide: Option<u16>,
/// Combination of add- and mul-operation.
pub integer_multiply_add: Option<u16>,
/// Completes fast, but needs a shifter.
pub integer_shift: Option<u16>,
/// Might take longer than floating point multiplication.
pub float_add: Option<u16>,
/// Usually as fast as integer multiplication.
pub float_multiply: Option<u16>,
/// Takes many clock cycles, usually dependent on the operand size.
pub float_divide: Option<u16>,
/// Might need multiple execution ports if a design has separate add- and mul-pipes.
pub float_multiply_add: Option<u16>,
}
impl CpuModel {
/// Creates a new model without components and default global values.
pub fn new() -> Self {
CpuModel {
max_vector_size: 128,
execution_latencies: InstructionLatencies::default(),
fetch: config::Fetch::default(),
memory_controller: config::MemoryController::default(),
caches: Vec::new(),
decoder: config::Decoder::default(),
reorder_buffer: Default::default(),
dispatch_cycle_count: 0,
register_file: Default::default(),
schedulers: Vec::new(),
pipelines: Vec::new(),
load_store_unit: Default::default(),
}
}
/// Validates the model for a correct configuration.
///
/// This does the same as [validation::validate] and can be used if one prefers an
/// object oriented interface.
pub fn validate(&self, isa: &Isa) -> Result<(), FailedValidation> {
validation::validate(self, isa)
}
}
impl InstructionLatencies {
/// Transforms the object into an array conforming to the [`gpcas_base::instruction_type`]
/// definition as array indices.
pub fn as_array(&self) -> [u16; instruction_type::TYPE_COUNT] {
[
// register moves
1,
// moves
1,
// simple
1,
// integer add
1,
self.integer_multiply.unwrap_or(default_latencies::INT_MUL),
self.integer_divide.unwrap_or(default_latencies::INT_DIV),
self.integer_multiply_add
.unwrap_or(default_latencies::INT_MUL_ADD),
self.integer_shift.unwrap_or(default_latencies::INT_SHIFT),
self.float_add.unwrap_or(default_latencies::FLOAT_ADD),
self.float_multiply.unwrap_or(default_latencies::FLOAT_MUL),
self.float_divide.unwrap_or(default_latencies::FLOAT_DIV),
self.float_multiply_add
.unwrap_or(default_latencies::FLOAT_MUL_ADD),
// branches
1,
]
}
/// Return an iterator over every latency defined in the struct.
pub fn iter(&self) -> impl std::iter::Iterator<Item = Option<u16>> {
once(self.integer_multiply)
.chain(once(self.integer_divide))
.chain(once(self.integer_multiply_add))
.chain(once(self.integer_shift))
.chain(once(self.float_add))
.chain(once(self.float_multiply))
.chain(once(self.integer_divide))
.chain(once(self.float_multiply_add))
}
}
impl GpcasFileStruct for CpuModel {
const FILE_IDENTIFIER: &'static str = "gpcas::cpu_model";
const CURRENT_FILE_VERSION: usize = 4;
const COMPATIBLE_VERSIONS: &'static [(usize, DeserializeFunction<Self>)] = &[
(2, deserialize_upgrade_from::<legacy::v2::CPUModel, Self>),
(3, deserialize_upgrade_from::<legacy::v3::CPUModel, Self>),
];
}
/// The default latency in clock cycles for all instruction types.
///
/// Types not mentioned in here either share their value with another one, or are hardcoded to one
/// clock cycle.
pub mod default_latencies {
/// Typically takes multiple, but few clock cycles.
pub const INT_MUL: u16 = 3;
/// Takes many clock cycles, usually dependent on the operand size.
pub const INT_DIV: u16 = 22;
/// Combination of add- and mul-operation.
pub const INT_MUL_ADD: u16 = 3;
/// Completes fast, but needs a shifter.
pub const INT_SHIFT: u16 = 1;
/// Might take longer than floating point multiplication.
pub const FLOAT_ADD: u16 = 4;
/// Usually as fast as integer multiplication.
pub const FLOAT_MUL: u16 = 3;
/// Takes many clock cycles, usually dependent on the operand size.
pub const FLOAT_DIV: u16 = 22;
/// Might take multiple ports if a design has separate add- and mul-pipes.
pub const FLOAT_MUL_ADD: u16 = 5;
}
