etensor-core 0.0.1

//! High-performance CPU memory allocators and tensor factories.

use crate::tensor::Tensor;
use crate::shape::Shape;
use crate::buffer::Buffer;
use crate::dtypes::DType;
use crate::device::Device;
use crate::errors::{EtensorError, EtensorResult};

/// Physical memory allocator for Host (CPU) RAM.
pub struct CpuAllocator;

impl CpuAllocator {
    /// Allocates a contiguous CPU tensor filled with zeros.
    pub fn zeros(shape: Shape, dtype: DType, requires_grad: bool) -> EtensorResult<Tensor> {
        if dtype != DType::F32 {
            return Err(EtensorError::InternalError(
                "CpuAllocator currently only supports F32 initialization.".to_string(),
            ));
        }

        let num_elements = shape.num_elements();
        let buffer = Buffer::from_f32_vec(vec![0.0; num_elements]);

        Ok(Tensor::new(buffer, shape, Device::Cpu, dtype, requires_grad))
    }

    /// Allocates a contiguous CPU tensor filled with ones.
    pub fn ones(shape: Shape, dtype: DType, requires_grad: bool) -> EtensorResult<Tensor> {
        if dtype != DType::F32 {
            return Err(EtensorError::InternalError(
                "CpuAllocator currently only supports F32 initialization.".to_string(),
            ));
        }

        let num_elements = shape.num_elements();
        let buffer = Buffer::from_f32_vec(vec![1.0; num_elements]);

        Ok(Tensor::new(buffer, shape, Device::Cpu, dtype, requires_grad))
    }

    /// Allocates a contiguous CPU tensor filled with a specific scalar value.
    pub fn full(shape: Shape, value: f32, dtype: DType, requires_grad: bool) -> EtensorResult<Tensor> {
        if dtype != DType::F32 {
            return Err(EtensorError::InternalError(
                "CpuAllocator currently only supports F32 initialization.".to_string(),
            ));
        }

        let num_elements = shape.num_elements();
        let buffer = Buffer::from_f32_vec(vec![value; num_elements]);

        Ok(Tensor::new(buffer, shape, Device::Cpu, dtype, requires_grad))
    }

    /// Creates a 1D tensor with evenly spaced values within a given interval.
    pub fn arange(start: f32, end: f32, step: f32, dtype: DType, requires_grad: bool) -> EtensorResult<Tensor> {
        if dtype != DType::F32 {
            return Err(EtensorError::InternalError(
                "CpuAllocator currently only supports F32 initialization.".to_string(),
            ));
        }
        
        if step == 0.0 {
            return Err(EtensorError::InternalError("Arange step size cannot be zero.".to_string()));
        }

        // Calculate total elements required
        let size = ((end - start) / step).ceil() as usize;
        let mut data = Vec::with_capacity(size);
        
        let mut current = start;
        for _ in 0..size {
            data.push(current);
            current += step;
        }

        let shape = Shape::new(vec![size]);
        let buffer = Buffer::from_f32_vec(data);

        Ok(Tensor::new(buffer, shape, Device::Cpu, dtype, requires_grad))
    }
}

// =====================================================================
// UNIT TESTS
// =====================================================================
#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_cpu_alloc_zeros() {
        let shape = Shape::new(vec![2, 3]);
        let t = CpuAllocator::zeros(shape, DType::F32, false).unwrap();
        
        let slice = t.data.as_f32_slice().unwrap();
        assert_eq!(slice.len(), 6);
        assert_eq!(slice, &[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
    }

    #[test]
    fn test_cpu_alloc_ones() {
        let shape = Shape::new(vec![4]);
        let t = CpuAllocator::ones(shape, DType::F32, true).unwrap();
        
        let slice = t.data.as_f32_slice().unwrap();
        assert_eq!(slice, &[1.0, 1.0, 1.0, 1.0]);
        assert!(t.requires_grad);
    }

    #[test]
    fn test_cpu_alloc_full() {
        let shape = Shape::new(vec![2, 2]);
        let t = CpuAllocator::full(shape, 42.0, DType::F32, false).unwrap();
        
        let slice = t.data.as_f32_slice().unwrap();
        assert_eq!(slice, &[42.0, 42.0, 42.0, 42.0]);
    }

    #[test]
    fn test_cpu_alloc_arange() {
        // Range from 0.0 to 5.0 with step 1.0 -> [0.0, 1.0, 2.0, 3.0, 4.0]
        let t = CpuAllocator::arange(0.0, 5.0, 1.0, DType::F32, false).unwrap();
        
        let slice = t.data.as_f32_slice().unwrap();
        assert_eq!(slice, &[0.0, 1.0, 2.0, 3.0, 4.0]);
        assert_eq!(t.shape.dims, vec![5]);
    }
}