quantrs2-ml 0.1.3

//! Keras-style model building API for QuantRS2-ML
//!
//! This module provides a Keras-like interface for building quantum machine learning
//! models, with both Sequential and Functional API patterns familiar to Keras users.

mod attention;
mod callbacks;
mod conv;
mod layers;
mod quantum_layers;
mod rnn;
mod schedules;

pub use attention::*;
pub use callbacks::*;
pub use conv::*;
pub use layers::*;
pub use quantum_layers::*;
pub use rnn::*;
pub use schedules::*;

use crate::error::{MLError, Result};
use scirs2_core::ndarray::{s, ArrayD, Axis, IxDyn};
use std::collections::HashMap;

/// Keras-style layer trait
pub trait KerasLayer: Send + Sync {
    /// Build the layer (called during model compilation)
    fn build(&mut self, input_shape: &[usize]) -> Result<()>;

    /// Forward pass through the layer
    fn call(&self, inputs: &ArrayD<f64>) -> Result<ArrayD<f64>>;

    /// Compute output shape given input shape
    fn compute_output_shape(&self, input_shape: &[usize]) -> Vec<usize>;

    /// Get layer name
    fn name(&self) -> &str;

    /// Get trainable parameters
    fn get_weights(&self) -> Vec<ArrayD<f64>>;

    /// Set trainable parameters
    fn set_weights(&mut self, weights: Vec<ArrayD<f64>>) -> Result<()>;

    /// Get number of parameters
    fn count_params(&self) -> usize {
        self.get_weights().iter().map(|w| w.len()).sum()
    }

    /// Check if layer is built
    fn built(&self) -> bool;
}

/// Activation function types
#[derive(Debug, Clone)]
pub enum ActivationFunction {
    /// Linear activation (identity)
    Linear,
    /// ReLU activation
    ReLU,
    /// Sigmoid activation
    Sigmoid,
    /// Tanh activation
    Tanh,
    /// Softmax activation
    Softmax,
    /// Leaky ReLU with alpha
    LeakyReLU(f64),
    /// ELU with alpha
    ELU(f64),
}

/// Weight initializer types
#[derive(Debug, Clone)]
pub enum InitializerType {
    /// All zeros
    Zeros,
    /// All ones
    Ones,
    /// Glorot uniform (Xavier uniform)
    GlorotUniform,
    /// Glorot normal (Xavier normal)
    GlorotNormal,
    /// He uniform
    HeUniform,
}

/// Sequential model
pub struct Sequential {
    /// Layers in the model
    layers: Vec<Box<dyn KerasLayer>>,
    /// Model name
    name: String,
    /// Built flag
    built: bool,
    /// Compiled flag
    compiled: bool,
    /// Input shape
    input_shape: Option<Vec<usize>>,
    /// Loss function
    loss: Option<LossFunction>,
    /// Optimizer
    optimizer: Option<OptimizerType>,
    /// Metrics
    metrics: Vec<MetricType>,
}

impl Sequential {
    /// Create new sequential model
    pub fn new() -> Self {
        Self {
            layers: Vec::new(),
            name: format!("sequential_{}", fastrand::u32(..)),
            built: false,
            compiled: false,
            input_shape: None,
            loss: None,
            optimizer: None,
            metrics: Vec::new(),
        }
    }

    /// Set model name
    pub fn name(mut self, name: impl Into<String>) -> Self {
        self.name = name.into();
        self
    }

    /// Add layer to model
    pub fn add(&mut self, layer: Box<dyn KerasLayer>) {
        self.layers.push(layer);
        self.built = false;
    }

    /// Build the model with given input shape
    pub fn build(&mut self, input_shape: Vec<usize>) -> Result<()> {
        self.input_shape = Some(input_shape.clone());
        let mut current_shape = input_shape;

        for layer in &mut self.layers {
            layer.build(&current_shape)?;
            current_shape = layer.compute_output_shape(&current_shape);
        }

        self.built = true;
        Ok(())
    }

    /// Compile the model
    pub fn compile(
        mut self,
        loss: LossFunction,
        optimizer: OptimizerType,
        metrics: Vec<MetricType>,
    ) -> Self {
        self.loss = Some(loss);
        self.optimizer = Some(optimizer);
        self.metrics = metrics;
        self.compiled = true;
        self
    }

    /// Get model summary
    pub fn summary(&self) -> ModelSummary {
        let mut layers_info = Vec::new();
        let mut total_params = 0;
        let mut trainable_params = 0;

        let mut current_shape = self.input_shape.clone().unwrap_or_default();

        for layer in &self.layers {
            let output_shape = layer.compute_output_shape(&current_shape);
            let params = layer.count_params();

            layers_info.push(LayerInfo {
                name: layer.name().to_string(),
                layer_type: "Layer".to_string(),
                output_shape: output_shape.clone(),
                param_count: params,
            });

            total_params += params;
            trainable_params += params;
            current_shape = output_shape;
        }

        ModelSummary {
            layers: layers_info,
            total_params,
            trainable_params,
            non_trainable_params: 0,
        }
    }

    /// Forward pass (predict)
    pub fn predict(&self, inputs: &ArrayD<f64>) -> Result<ArrayD<f64>> {
        if !self.built {
            return Err(MLError::InvalidConfiguration(
                "Model must be built before prediction".to_string(),
            ));
        }

        let mut current = inputs.clone();

        for layer in &self.layers {
            current = layer.call(&current)?;
        }

        Ok(current)
    }

    /// Train the model
    #[allow(non_snake_case)]
    pub fn fit(
        &mut self,
        X: &ArrayD<f64>,
        y: &ArrayD<f64>,
        epochs: usize,
        batch_size: Option<usize>,
        validation_data: Option<(&ArrayD<f64>, &ArrayD<f64>)>,
        callbacks: Vec<Box<dyn Callback>>,
    ) -> Result<TrainingHistory> {
        if !self.compiled {
            return Err(MLError::InvalidConfiguration(
                "Model must be compiled before training".to_string(),
            ));
        }

        let batch_size = batch_size.unwrap_or(32);
        let n_samples = X.shape()[0];
        let n_batches = (n_samples + batch_size - 1) / batch_size;

        let mut history = TrainingHistory::new();

        for epoch in 0..epochs {
            let mut epoch_loss = 0.0;
            let mut epoch_metrics: HashMap<String, f64> = HashMap::new();

            for metric in &self.metrics {
                epoch_metrics.insert(metric.name(), 0.0);
            }

            for batch_idx in 0..n_batches {
                let start_idx = batch_idx * batch_size;
                let end_idx = ((batch_idx + 1) * batch_size).min(n_samples);

                let X_batch = X.slice(s![start_idx..end_idx, ..]);
                let y_batch = y.slice(s![start_idx..end_idx, ..]);

                let predictions = self.predict(&X_batch.to_owned().into_dyn())?;

                let loss = self.compute_loss(&predictions, &y_batch.to_owned().into_dyn())?;
                epoch_loss += loss;

                self.backward_pass(&predictions, &y_batch.to_owned().into_dyn())?;

                for metric in &self.metrics {
                    let metric_value =
                        metric.compute(&predictions, &y_batch.to_owned().into_dyn())?;
                    *epoch_metrics.entry(metric.name()).or_insert(0.0) += metric_value;
                }
            }

            epoch_loss /= n_batches as f64;
            for value in epoch_metrics.values_mut() {
                *value /= n_batches as f64;
            }

            let (val_loss, val_metrics) = if let Some((X_val, y_val)) = validation_data {
                let val_predictions = self.predict(X_val)?;
                let val_loss = self.compute_loss(&val_predictions, y_val)?;

                let mut val_metrics = HashMap::new();
                for metric in &self.metrics {
                    let metric_value = metric.compute(&val_predictions, y_val)?;
                    val_metrics.insert(format!("val_{}", metric.name()), metric_value);
                }

                (Some(val_loss), val_metrics)
            } else {
                (None, HashMap::new())
            };

            history.add_epoch(epoch_loss, epoch_metrics, val_loss, val_metrics);

            for callback in &callbacks {
                callback.on_epoch_end(epoch, &history)?;
            }

            println!("Epoch {}/{} - loss: {:.4}", epoch + 1, epochs, epoch_loss);
        }

        Ok(history)
    }

    /// Evaluate the model
    #[allow(non_snake_case)]
    pub fn evaluate(
        &self,
        X: &ArrayD<f64>,
        y: &ArrayD<f64>,
        _batch_size: Option<usize>,
    ) -> Result<HashMap<String, f64>> {
        let predictions = self.predict(X)?;
        let loss = self.compute_loss(&predictions, y)?;

        let mut results = HashMap::new();
        results.insert("loss".to_string(), loss);

        for metric in &self.metrics {
            let metric_value = metric.compute(&predictions, y)?;
            results.insert(metric.name(), metric_value);
        }

        Ok(results)
    }

    /// Compute loss
    fn compute_loss(&self, predictions: &ArrayD<f64>, targets: &ArrayD<f64>) -> Result<f64> {
        if let Some(ref loss_fn) = self.loss {
            loss_fn.compute(predictions, targets)
        } else {
            Err(MLError::InvalidConfiguration(
                "Loss function not specified".to_string(),
            ))
        }
    }

    /// Backward pass (placeholder)
    fn backward_pass(&mut self, _predictions: &ArrayD<f64>, _targets: &ArrayD<f64>) -> Result<()> {
        Ok(())
    }
}

impl Default for Sequential {
    fn default() -> Self {
        Self::new()
    }
}

/// Loss functions
#[derive(Debug, Clone)]
pub enum LossFunction {
    /// Mean squared error
    MeanSquaredError,
    /// Binary crossentropy
    BinaryCrossentropy,
    /// Categorical crossentropy
    CategoricalCrossentropy,
    /// Sparse categorical crossentropy
    SparseCategoricalCrossentropy,
    /// Mean absolute error
    MeanAbsoluteError,
    /// Huber loss
    Huber(f64),
}

impl LossFunction {
    /// Compute loss
    pub fn compute(&self, predictions: &ArrayD<f64>, targets: &ArrayD<f64>) -> Result<f64> {
        match self {
            LossFunction::MeanSquaredError => {
                let diff = predictions - targets;
                diff.mapv(|x| x * x).mean().ok_or_else(|| {
                    MLError::ComputationError("Failed to compute mean of empty array".to_string())
                })
            }
            LossFunction::BinaryCrossentropy => {
                let epsilon = 1e-15;
                let clipped_preds = predictions.mapv(|x| x.max(epsilon).min(1.0 - epsilon));
                let loss = targets * clipped_preds.mapv(|x| x.ln())
                    + (1.0 - targets) * clipped_preds.mapv(|x| (1.0 - x).ln());
                loss.mean().map(|m| -m).ok_or_else(|| {
                    MLError::ComputationError("Failed to compute mean of empty array".to_string())
                })
            }
            LossFunction::MeanAbsoluteError => {
                let diff = predictions - targets;
                diff.mapv(|x| x.abs()).mean().ok_or_else(|| {
                    MLError::ComputationError("Failed to compute mean of empty array".to_string())
                })
            }
            _ => Err(MLError::InvalidConfiguration(
                "Loss function not implemented".to_string(),
            )),
        }
    }
}

/// Optimizer types
#[derive(Debug, Clone)]
pub enum OptimizerType {
    /// Stochastic Gradient Descent
    SGD { learning_rate: f64, momentum: f64 },
    /// Adam optimizer
    Adam {
        learning_rate: f64,
        beta1: f64,
        beta2: f64,
        epsilon: f64,
    },
    /// RMSprop optimizer
    RMSprop {
        learning_rate: f64,
        rho: f64,
        epsilon: f64,
    },
    /// AdaGrad optimizer
    AdaGrad { learning_rate: f64, epsilon: f64 },
}

/// Metric types
#[derive(Debug, Clone)]
pub enum MetricType {
    /// Accuracy
    Accuracy,
    /// Precision
    Precision,
    /// Recall
    Recall,
    /// F1 Score
    F1Score,
    /// Mean Absolute Error
    MeanAbsoluteError,
    /// Mean Squared Error
    MeanSquaredError,
}

impl MetricType {
    /// Get metric name
    pub fn name(&self) -> String {
        match self {
            MetricType::Accuracy => "accuracy".to_string(),
            MetricType::Precision => "precision".to_string(),
            MetricType::Recall => "recall".to_string(),
            MetricType::F1Score => "f1_score".to_string(),
            MetricType::MeanAbsoluteError => "mean_absolute_error".to_string(),
            MetricType::MeanSquaredError => "mean_squared_error".to_string(),
        }
    }

    /// Compute metric
    pub fn compute(&self, predictions: &ArrayD<f64>, targets: &ArrayD<f64>) -> Result<f64> {
        match self {
            MetricType::Accuracy => {
                let pred_classes = predictions.mapv(|x| if x > 0.5 { 1.0 } else { 0.0 });
                let correct = pred_classes
                    .iter()
                    .zip(targets.iter())
                    .filter(|(&pred, &target)| (pred - target).abs() < 1e-6)
                    .count();
                Ok(correct as f64 / targets.len() as f64)
            }
            MetricType::MeanAbsoluteError => {
                let diff = predictions - targets;
                diff.mapv(|x| x.abs()).mean().ok_or_else(|| {
                    MLError::ComputationError("Failed to compute mean of empty array".to_string())
                })
            }
            MetricType::MeanSquaredError => {
                let diff = predictions - targets;
                diff.mapv(|x| x * x).mean().ok_or_else(|| {
                    MLError::ComputationError("Failed to compute mean of empty array".to_string())
                })
            }
            _ => Err(MLError::InvalidConfiguration(
                "Metric not implemented".to_string(),
            )),
        }
    }
}

/// Training history
#[derive(Debug, Clone)]
pub struct TrainingHistory {
    /// Training loss for each epoch
    pub loss: Vec<f64>,
    /// Training metrics for each epoch
    pub metrics: Vec<HashMap<String, f64>>,
    /// Validation loss for each epoch
    pub val_loss: Vec<f64>,
    /// Validation metrics for each epoch
    pub val_metrics: Vec<HashMap<String, f64>>,
}

impl TrainingHistory {
    /// Create new training history
    pub fn new() -> Self {
        Self {
            loss: Vec::new(),
            metrics: Vec::new(),
            val_loss: Vec::new(),
            val_metrics: Vec::new(),
        }
    }

    /// Add epoch results
    pub fn add_epoch(
        &mut self,
        loss: f64,
        metrics: HashMap<String, f64>,
        val_loss: Option<f64>,
        val_metrics: HashMap<String, f64>,
    ) {
        self.loss.push(loss);
        self.metrics.push(metrics);

        if let Some(val_loss) = val_loss {
            self.val_loss.push(val_loss);
        }
        self.val_metrics.push(val_metrics);
    }
}

impl Default for TrainingHistory {
    fn default() -> Self {
        Self::new()
    }
}

/// Model summary information
#[derive(Debug)]
pub struct ModelSummary {
    /// Layer information
    pub layers: Vec<LayerInfo>,
    /// Total number of parameters
    pub total_params: usize,
    /// Number of trainable parameters
    pub trainable_params: usize,
    /// Number of non-trainable parameters
    pub non_trainable_params: usize,
}

/// Layer information for summary
#[derive(Debug)]
pub struct LayerInfo {
    /// Layer name
    pub name: String,
    /// Layer type
    pub layer_type: String,
    /// Output shape
    pub output_shape: Vec<usize>,
    /// Parameter count
    pub param_count: usize,
}

/// Model input specification
pub struct Input {
    /// Input shape (excluding batch dimension)
    pub shape: Vec<usize>,
    /// Input name
    pub name: Option<String>,
    /// Data type
    pub dtype: DataType,
}

impl Input {
    /// Create new input specification
    pub fn new(shape: Vec<usize>) -> Self {
        Self {
            shape,
            name: None,
            dtype: DataType::Float64,
        }
    }

    /// Set input name
    pub fn name(mut self, name: impl Into<String>) -> Self {
        self.name = Some(name.into());
        self
    }

    /// Set data type
    pub fn dtype(mut self, dtype: DataType) -> Self {
        self.dtype = dtype;
        self
    }
}

/// Data types
#[derive(Debug, Clone)]
pub enum DataType {
    /// 32-bit float
    Float32,
    /// 64-bit float
    Float64,
    /// 32-bit integer
    Int32,
    /// 64-bit integer
    Int64,
}

/// Utility functions for building models
pub mod utils {
    use super::*;

    /// Create a simple sequential model for classification
    pub fn create_classification_model(
        _input_dim: usize,
        num_classes: usize,
        hidden_layers: Vec<usize>,
    ) -> Sequential {
        let mut model = Sequential::new();

        for (i, &units) in hidden_layers.iter().enumerate() {
            model.add(Box::new(
                Dense::new(units)
                    .activation(ActivationFunction::ReLU)
                    .name(format!("dense_{}", i)),
            ));
        }

        let output_activation = if num_classes == 2 {
            ActivationFunction::Sigmoid
        } else {
            ActivationFunction::Softmax
        };

        model.add(Box::new(
            Dense::new(num_classes)
                .activation(output_activation)
                .name("output"),
        ));

        model
    }

    /// Create a quantum neural network model
    pub fn create_quantum_model(
        num_qubits: usize,
        num_classes: usize,
        num_layers: usize,
    ) -> Sequential {
        let mut model = Sequential::new();

        model.add(Box::new(
            QuantumDense::new(num_qubits, num_classes)
                .num_layers(num_layers)
                .ansatz_type(QuantumAnsatzType::HardwareEfficient)
                .name("quantum_layer"),
        ));

        if num_classes > 1 {
            model.add(Box::new(
                Activation::new(ActivationFunction::Softmax).name("softmax"),
            ));
        }

        model
    }

    /// Create a hybrid quantum-classical model
    pub fn create_hybrid_model(
        _input_dim: usize,
        num_qubits: usize,
        num_classes: usize,
        classical_hidden: Vec<usize>,
    ) -> Sequential {
        let mut model = Sequential::new();

        for (i, &units) in classical_hidden.iter().enumerate() {
            model.add(Box::new(
                Dense::new(units)
                    .activation(ActivationFunction::ReLU)
                    .name(format!("classical_{}", i)),
            ));
        }

        model.add(Box::new(
            QuantumDense::new(num_qubits, 64)
                .num_layers(2)
                .ansatz_type(QuantumAnsatzType::HardwareEfficient)
                .name("quantum_layer"),
        ));

        model.add(Box::new(
            Dense::new(num_classes)
                .activation(if num_classes == 2 {
                    ActivationFunction::Sigmoid
                } else {
                    ActivationFunction::Softmax
                })
                .name("output"),
        ));

        model
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use scirs2_core::ndarray::Array;

    #[test]
    fn test_dense_layer() {
        let mut dense = Dense::new(10)
            .activation(ActivationFunction::ReLU)
            .name("test_dense");

        assert!(!dense.built());

        dense.build(&[5]).expect("Should build successfully");

        assert!(dense.built());
        assert_eq!(dense.compute_output_shape(&[32, 5]), vec![32, 10]);
    }

    #[test]
    fn test_sequential_model() {
        let mut model = Sequential::new();
        model.add(Box::new(Dense::new(10)));
        model.add(Box::new(Activation::new(ActivationFunction::ReLU)));
        model.add(Box::new(Dense::new(5)));

        model
            .build(vec![32, 20])
            .expect("Should build successfully");

        let summary = model.summary();
        assert_eq!(summary.layers.len(), 3);
    }

    #[test]
    fn test_activation_functions() {
        let relu = ActivationFunction::ReLU;
        let sigmoid = ActivationFunction::Sigmoid;
        let _tanh = ActivationFunction::Tanh;

        let mut act_relu = Activation::new(relu);
        act_relu.build(&[10]).expect("Should build");

        let mut act_sigmoid = Activation::new(sigmoid);
        act_sigmoid.build(&[10]).expect("Should build");
    }
}