// Filename: lib.rs
// Version:	 0.15
// Date:	 01-01-2021 (DD-MM-YYYY)
// Library:  gpcas_cpu_model
//
// Copyright (c) 2021 Kai Rese
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program. If not, see
// <https://www.gnu.org/licenses/>.

//! Defines the structure of an abstract CPU model and some helpful functions for working with it,
//! as well as the corresponding file structure.
//!
//! Any application working with the model should use the structs and definitions in this crate
//! to ensure compatibility.
#![warn(missing_docs)]

pub mod config;
mod legacy;
mod validation;

use gpcas_base::file::{deserialize_upgrade_from, DeserializeFunction, GpcasFileStruct};
use gpcas_base::instruction_type;
use gpcas_isa::Isa;
use serde::{Deserialize, Serialize};
use std::iter::once;

pub use validation::{validate, BranchPredictionError, FailedValidation, ValidationError};

/// The configuration of an abstract CPU model.
#[derive(Clone, Default, Deserialize, Serialize)]
pub struct CpuModel {
    /// The maximum supported vector size of the model in bits.
    pub max_vector_size: usize,
    /// The ALU latency of each instruction class.
    pub execution_latencies: InstructionLatencies,
    /// Configuration for the fetch stage of the front end.
    pub fetch: config::Fetch,
    /// Configuration for the memory controller.
    pub memory_controller: config::MemoryController,
    /// Caches of the memory hierarchy.
    pub caches: Vec<config::Cache>,
    /// Configuration for the decode stage of the front end.
    pub decoder: config::Decoder,
    /// Configuration for the reorder buffer.
    pub reorder_buffer: config::ReorderBuffer,
    /// How many cycles the dispatch unit needs to dispatch an instruction.
    pub dispatch_cycle_count: usize,
    /// Configuration of the register file.
    pub register_file: config::RegisterFile,
    /// Configuration of schedulers.
    pub schedulers: Vec<config::Scheduler>,
    /// Configuration for each pipeline from just after the front end to the last stage.
    pub pipelines: Vec<config::Pipeline>,
    /// Configuration of the load/store unit.
    pub load_store_unit: config::LoadStoreUnit,
}

/// Defines the amount of clock cycles each instruction type needs to execute.
///
/// The fields of this struct are used to selectively overwrite default values. If a field is
/// `None`, the default value is used.
#[derive(Clone, Default, Deserialize, Serialize)]
pub struct InstructionLatencies {
    /// Typically takes multiple, but few clock cycles.
    pub integer_multiply: Option<u16>,
    /// Takes many clock cycles, usually dependent on the operand size.
    pub integer_divide: Option<u16>,
    /// Combination of add- and mul-operation.
    pub integer_multiply_add: Option<u16>,
    /// Completes fast, but needs a shifter.
    pub integer_shift: Option<u16>,
    /// Might take longer than floating point multiplication.
    pub float_add: Option<u16>,
    /// Usually as fast as integer multiplication.
    pub float_multiply: Option<u16>,
    /// Takes many clock cycles, usually dependent on the operand size.
    pub float_divide: Option<u16>,
    /// Might need multiple execution ports if a design has separate add- and mul-pipes.
    pub float_multiply_add: Option<u16>,
}

impl CpuModel {
    /// Creates a new model without components and default global values.
    pub fn new() -> Self {
        CpuModel {
            max_vector_size: 128,
            execution_latencies: InstructionLatencies::default(),
            fetch: config::Fetch::default(),
            memory_controller: config::MemoryController::default(),
            caches: Vec::new(),
            decoder: config::Decoder::default(),
            reorder_buffer: Default::default(),
            dispatch_cycle_count: 0,
            register_file: Default::default(),
            schedulers: Vec::new(),
            pipelines: Vec::new(),
            load_store_unit: Default::default(),
        }
    }

    /// Validates the model for a correct configuration.
    ///
    /// This does the same as [validation::validate] and can be used if one prefers an
    /// object oriented interface.
    pub fn validate(&self, isa: &Isa) -> Result<(), FailedValidation> {
        validation::validate(self, isa)
    }
}

impl InstructionLatencies {
    /// Transforms the object into an array conforming to the [`gpcas_base::instruction_type`]
    /// definition as array indices.
    pub fn as_array(&self) -> [u16; instruction_type::TYPE_COUNT] {
        [
            // register moves
            1,
            // moves
            1,
            // simple
            1,
            // integer add
            1,
            self.integer_multiply.unwrap_or(default_latencies::INT_MUL),
            self.integer_divide.unwrap_or(default_latencies::INT_DIV),
            self.integer_multiply_add
                .unwrap_or(default_latencies::INT_MUL_ADD),
            self.integer_shift.unwrap_or(default_latencies::INT_SHIFT),
            self.float_add.unwrap_or(default_latencies::FLOAT_ADD),
            self.float_multiply.unwrap_or(default_latencies::FLOAT_MUL),
            self.float_divide.unwrap_or(default_latencies::FLOAT_DIV),
            self.float_multiply_add
                .unwrap_or(default_latencies::FLOAT_MUL_ADD),
            // branches
            1,
        ]
    }

    /// Return an iterator over every latency defined in the struct.
    pub fn iter(&self) -> impl std::iter::Iterator<Item = Option<u16>> {
        once(self.integer_multiply)
            .chain(once(self.integer_divide))
            .chain(once(self.integer_multiply_add))
            .chain(once(self.integer_shift))
            .chain(once(self.float_add))
            .chain(once(self.float_multiply))
            .chain(once(self.integer_divide))
            .chain(once(self.float_multiply_add))
    }
}

impl GpcasFileStruct for CpuModel {
    const FILE_IDENTIFIER: &'static str = "gpcas::cpu_model";
    const CURRENT_FILE_VERSION: usize = 4;
    const COMPATIBLE_VERSIONS: &'static [(usize, DeserializeFunction<Self>)] = &[
        (2, deserialize_upgrade_from::<legacy::v2::CPUModel, Self>),
        (3, deserialize_upgrade_from::<legacy::v3::CPUModel, Self>),
    ];
}

/// The default latency in clock cycles for all instruction types.
///
/// Types not mentioned in here either share their value with another one, or are hardcoded to one
/// clock cycle.
pub mod default_latencies {
    /// Typically takes multiple, but few clock cycles.
    pub const INT_MUL: u16 = 3;
    /// Takes many clock cycles, usually dependent on the operand size.
    pub const INT_DIV: u16 = 22;
    /// Combination of add- and mul-operation.
    pub const INT_MUL_ADD: u16 = 3;
    /// Completes fast, but needs a shifter.
    pub const INT_SHIFT: u16 = 1;
    /// Might take longer than floating point multiplication.
    pub const FLOAT_ADD: u16 = 4;
    /// Usually as fast as integer multiplication.
    pub const FLOAT_MUL: u16 = 3;
    /// Takes many clock cycles, usually dependent on the operand size.
    pub const FLOAT_DIV: u16 = 22;
    /// Might take multiple ports if a design has separate add- and mul-pipes.
    pub const FLOAT_MUL_ADD: u16 = 5;
}