// Filename: lib.rs
// Version:	 0.8
// Date:	 15-09-2021 (DD-MM-YYYY)
// Library:  gpcas_cpu_model
//
// Copyright (c) 2021 Kai Rese
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program. If not, see
// <https://www.gnu.org/licenses/>.

//! Defines the structure of an abstract CPU model and some helpful functions for working with it,
//! as well as the corresponding file structure.
//!
//! Any application working with the model should use the structs and definitions in this crate
//! to ensure compatibility.
#![warn(missing_docs)]

pub mod component_config;
mod legacy;

use gpcas_base::file::{deserialize_upgrade_from, DeserializeFunction, GpcasFileStruct};
use serde::{Deserialize, Serialize};

/// The configuration of an abstract CPU model.
#[derive(Default, Deserialize, Serialize)]
pub struct CPUModel {
    /// The maximum supported vector size of the model in bits.
    pub max_vector_size: usize,
    /// The ALU latency of each instruction class.
    pub execution_latencies: InstructionLatencies,
    /// Configuration for the fetch stage of the front end.
    pub fetch_config: component_config::FetchConfig,
    /// Configuration for the memory controller.
    pub memory_controller_config: component_config::MemoryControllerConfig,
    /// Caches of the memory hierarchy.
    pub caches: Vec<component_config::CacheConfig>,
    /// The amount of parallel decoders.
    pub decoder_count: usize,
    /// Configuration of the register file.
    pub reg_file_config: component_config::RegisterFileConfig,
    /// Configuration of schedulers.
    pub schedulers: Vec<component_config::SchedulerConfig>,
    /// Configuration for each pipeline from front end to the last stage.
    pub pipelines: Vec<component_config::PipelineConfig>,
}

/// Defines the amount of clock cycles each instruction type needs.
///
/// The fields of this struct are used to selectively overwrite default values. If a field is
/// `None`, the default value is used.
///
/// A value of zero means that the type isn't supported in a pipeline. By default, all types are
/// supported.
#[derive(Default, Deserialize, Serialize)]
pub struct InstructionLatencies {
    /// Typically takes multiple, but few clock cycles.
    pub integer_multiply: Option<u16>,
    /// Takes many clock cycles, usually dependent on the operand size.
    pub integer_divide: Option<u16>,
    /// Combination of add- and mul-operation.
    pub integer_multiply_add: Option<u16>,
    /// Completes fast, but needs a shifter.
    pub integer_shift: Option<u16>,
    /// Might take longer than floating point multiplication.
    pub float_add: Option<u16>,
    /// Usually as fast as integer multiplication.
    pub float_multiply: Option<u16>,
    /// Takes many clock cycles, usually dependent on the operand size.
    pub float_divide: Option<u16>,
    /// Might need multiple execution ports if a design has separate add- and mul-pipes.
    pub float_multiply_add: Option<u16>,
}

impl CPUModel {
    /// Creates a new model without components and default global values.
    pub fn new() -> Self {
        CPUModel {
            max_vector_size: 128,
            execution_latencies: InstructionLatencies::default(),
            fetch_config: component_config::FetchConfig::default(),
            memory_controller_config: component_config::MemoryControllerConfig::default(),
            caches: Vec::new(),
            decoder_count: 1,
            reg_file_config: Default::default(),
            schedulers: Vec::new(),
            pipelines: Vec::new(),
        }
    }
}

impl InstructionLatencies {
    /// Transforms the object into an array conforming to the [`gpcas_base::instruction_type`]
    /// definition as array indices.
    pub fn as_array(&self) -> [u16; 14] {
        [
            // register moves
            1,
            // moves
            1,
            // simple
            1,
            // integer add
            1,
            self.integer_multiply.unwrap_or(default_latencies::INT_MUL),
            self.integer_divide.unwrap_or(default_latencies::INT_DIV),
            self.integer_multiply_add
                .unwrap_or(default_latencies::INT_MUL_ADD),
            self.integer_shift.unwrap_or(default_latencies::INT_SHIFT),
            self.float_add.unwrap_or(default_latencies::FLOAT_ADD),
            self.float_multiply.unwrap_or(default_latencies::FLOAT_MUL),
            self.float_divide.unwrap_or(default_latencies::FLOAT_DIV),
            self.float_multiply_add
                .unwrap_or(default_latencies::FLOAT_MUL_ADD),
            // branches
            1,
            // jumps
            1,
        ]
    }
}

impl GpcasFileStruct for CPUModel {
    const FILE_IDENTIFIER: &'static str = "gpcas::cpu_model";
    const CURRENT_FILE_VERSION: usize = 1;
    const COMPATIBLE_VERSIONS: &'static [(usize, DeserializeFunction<CPUModel>)] =
        &[(0, deserialize_upgrade_from::<legacy::CPUModelV0, CPUModel>)];
}

/// The default latency in clock cycles for all instruction types.
///
/// Types not mentioned in here either share their value with another one, or are hardcoded to one
/// clock cycle.
pub mod default_latencies {
    /// Typically takes multiple, but few clock cycles.
    pub const INT_MUL: u16 = 3;
    /// Takes many clock cycles, usually dependent on the operand size.
    pub const INT_DIV: u16 = 22;
    /// Combination of add- and mul-operation.
    pub const INT_MUL_ADD: u16 = 3;
    /// Completes fast, but needs a shifter.
    pub const INT_SHIFT: u16 = 1;
    /// Might take longer than floating point multiplication.
    pub const FLOAT_ADD: u16 = 4;
    /// Usually as fast as integer multiplication.
    pub const FLOAT_MUL: u16 = 3;
    /// Takes many clock cycles, usually dependent on the operand size.
    pub const FLOAT_DIV: u16 = 22;
    /// Might take multiple ports if a design has separate add- and mul-pipes.
    pub const FLOAT_MUL_ADD: u16 = 5;
}