etensor-core 0.0.1

//! The Backward Pass Runner.

use crate::tensor::Tensor;
use crate::buffer::Buffer;
use crate::dtypes::DType;
use crate::errors::{EtensorError, EtensorResult};
use crate::autograd::tape;
use crate::autograd::gradients::Gradients;

/// Triggers the backpropagation engine starting from the given root tensor (usually the Loss).
/// 
/// This function:
/// 1. Seeds the root tensor's gradient with `1.0`.
/// 2. Extracts the `thread_local!` Tape.
/// 3. Executes all recorded `TapeActions` in strict reverse order.
/// 4. Returns the fully populated `Gradients` map containing all derivatives.
pub fn backward(root: &Tensor) -> EtensorResult<Gradients> {
    // 1. Gatekeeper: Ensure the tensor is part of the graph
    if !root.requires_grad {
        return Err(EtensorError::AutogradError(
            "Cannot call backward() on a tensor where requires_grad is false.".to_string(),
        ));
    }

    // 2. Initialize the accumulation store
    let mut grads = Gradients::new();

    // 3. Seed the Root Gradient
    // In calculus, the derivative of a variable with respect to itself (dx/dx) is exactly 1.0.
    // For Phase 3, we enforce CPU F32 execution. (Hardware routing logic happens in Phase 4).
    if root.dtype != DType::F32 || !root.device.is_cpu() {
        return Err(EtensorError::AutogradError(
            "Phase 3 backward engine currently only supports CPU Float32 seeding.".to_string(),
        ));
    }

    // Allocate a buffer of 1.0s matching the exact size of the root tensor
    let num_elements = root.shape.num_elements();
    let seed_buffer = Buffer::from_f32_vec(vec![1.0; num_elements]);
    
    // Insert the seed into the gradients map
    grads.insert(root.id, seed_buffer)?;

    // 4. Extract the computation history
    let actions = tape::take();

    // 5. Execute Reverse-Mode Autodifferentiation
    // We must read the tape backwards to correctly apply the Chain Rule from output to input.
    for action in actions.into_iter().rev() {
        // If an operation fails internally, we catch the error, stop the loop, 
        // and bubble it up to Python safely.
        action.backward(&mut grads)?;
    }

    Ok(grads)
}

// =====================================================================
// UNIT TESTS
// =====================================================================
#[cfg(test)]
mod tests {
    use super::*;
    use crate::tensor::{Tensor, TensorId};
    use crate::shape::Shape;
    use crate::device::Device;
    use crate::autograd::tape::{TapeAction, record};

    // A mock mathematical operation simulating: y = x + 0
    struct MockPassThroughBackward {
        input_id: TensorId,
        output_id: TensorId,
    }

    impl TapeAction for MockPassThroughBackward {
        fn backward(&self, grads: &mut Gradients) -> EtensorResult<()> {
            // 1. Retrieve the gradient of the output (dy)
            let dy_buffer = grads.get(&self.output_id).unwrap().clone();
            
            // 2. The derivative of (x + 0) is 1. So dx = dy * 1. 
            // We just pass the gradient directly back to the input ID.
            grads.insert(self.input_id, dy_buffer)?;
            
            Ok(())
        }

        fn name(&self) -> String {
            "MockPassThrough".to_string()
        }
    }

    #[test]
    fn test_requires_grad_guard() {
        let shape = Shape::new(vec![1]);
        let data = Buffer::from_f32_vec(vec![42.0]);
        // requires_grad = false
        let t = Tensor::new(data, shape, Device::Cpu, DType::F32, false);

        let result = backward(&t);
        assert!(result.is_err());
        
        if let Err(EtensorError::AutogradError(msg)) = result {
            assert!(msg.contains("requires_grad is false"));
        } else {
            panic!("Engine bypassed the requires_grad safety guard!");
        }
    }

    #[test]
    fn test_engine_seeding_and_execution() {
        // Clean tape just in case
        let _ = tape::take();

        // 1. Create mock Input and Output tensors
        let shape = Shape::new(vec![1]);
        let input_tensor = Tensor::new(
            Buffer::from_f32_vec(vec![5.0]), shape.clone(), Device::Cpu, DType::F32, true
        );
        let output_tensor = Tensor::new(
            Buffer::from_f32_vec(vec![5.0]), shape, Device::Cpu, DType::F32, true
        );

        // 2. Push our mock operation to the Tape
        record(Box::new(MockPassThroughBackward {
            input_id: input_tensor.id,
            output_id: output_tensor.id,
        }));

        // 3. Trigger the engine from the output!
        let grads = backward(&output_tensor).unwrap();

        // 4. Verify the results
        // The output should be seeded with 1.0
        let dy = grads.get(&output_tensor.id).unwrap().as_f32_slice().unwrap();
        assert_eq!(dy[0], 1.0);

        // The engine should have executed the Tape backwards, passing the 1.0 to the input
        let dx = grads.get(&input_tensor.id).unwrap().as_f32_slice().unwrap();
        assert_eq!(dx[0], 1.0);
        
        // Ensure tape is completely consumed
        assert_eq!(tape::take().len(), 0);
    }
}