1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
//! Automatic differentiation (autograd)
//!
//! This module provides both reverse-mode and forward-mode automatic differentiation
//! for computing gradients of tensor computations.
//!
//! # Overview
//!
//! The autograd system consists of:
//!
//! ## Reverse-Mode AD (Backpropagation)
//! - [`Var`]: A tensor that tracks gradients
//! - [`GradFn`]: Trait for computing gradients in backward pass
//! - [`GradStore`]: Storage for accumulated gradients (first-order)
//! - [`VarGradStore`]: Storage for gradient Vars (second-order)
//! - [`backward`]: Function to compute gradients via reverse-mode AD
//! - [`backward_with_graph`]: Backward with graph retention for Hessians
//!
//! ## Forward-Mode AD (JVP)
//! - [`DualTensor`]: Tensor carrying both primal value and tangent
//! - [`jvp`]: Compute Jacobian-vector product in a single forward pass
//! - [`jvp_multi`]: JVP for functions with multiple outputs
//! - [`jacobian_forward`]: Compute full Jacobian using forward-mode
//! - `dual_ops`: Operations on dual tensors (dual_add, dual_mul, etc.)
//!
//! # When to Use Forward vs Reverse Mode
//!
//! - **Reverse-mode (VJP)**: Efficient when inputs >> outputs
//! - Training neural networks (many params, scalar loss)
//! - Computing gradients of scalar functions
//!
//! - **Forward-mode (JVP)**: Efficient when outputs >> inputs
//! - Directional derivatives
//! - Newton-Krylov methods (need J @ v without forming J)
//! - Sensitivity analysis with few inputs
//!
//! # First-Order Example (Reverse-Mode)
//!
//! ```
//! # use numr::prelude::*;
//! # use numr::autograd::{Var, backward, var_mul};
//! # let device = CpuDevice::new();
//! # let client = CpuRuntime::default_client(&device);
//! // Create leaf variables
//! let x = Var::new(Tensor::from_slice(&[2.0f32], &[1], &device), true);
//! let y = Var::new(Tensor::from_slice(&[3.0f32], &[1], &device), true);
//!
//! // Forward: z = x * y
//! let z = var_mul(&x, &y, &client)?;
//!
//! // Backward
//! let grads = backward(&z, &client)?;
//!
//! // dx = y = 3.0, dy = x = 2.0
//! let grad_x = grads.get(x.id()).unwrap();
//! let grad_y = grads.get(y.id()).unwrap();
//! # Ok::<(), numr::error::Error>(())
//! ```
//!
//! # Forward-Mode Example (JVP)
//!
//! ```
//! # use numr::prelude::*;
//! # use numr::autograd::{DualTensor, jvp, dual_ops::*};
//! # let device = CpuDevice::new();
//! # let client = CpuRuntime::default_client(&device);
//! // f(x) = x² at x=3, tangent v=1 → df/dx in direction v
//! let x = Tensor::from_slice(&[3.0f32], &[1], &device);
//! let v = Tensor::from_slice(&[1.0f32], &[1], &device);
//!
//! let (y, dy) = jvp(
//! |inputs, c| {
//! let x = &inputs[0];
//! dual_mul(x, x, c)
//! },
//! &[&x],
//! &[&v],
//! &client,
//! )?;
//! // y = 9.0, dy = 2*3*1 = 6.0
//! # Ok::<(), numr::error::Error>(())
//! ```
//!
//! # Second-Order Example (Hessian-Vector Product)
//!
//! ```
//! # use numr::prelude::*;
//! # use numr::autograd::{Var, backward, backward_with_graph, var_mul, var_sum};
//! # let device = CpuDevice::new();
//! # let client = CpuRuntime::default_client(&device);
//! // f(x) = x²
//! let x = Var::new(Tensor::from_slice(&[3.0f32], &[1], &device), true);
//! let y = var_mul(&x, &x, &client)?;
//!
//! // First backward with graph retention
//! let grads = backward_with_graph(&y, &client)?;
//! let grad_x = grads.get_var(x.id()).unwrap(); // dy/dx = 2x = 6
//!
//! // Compute Hessian-vector product: H @ v where v = [1.0]
//! let v = Var::new(Tensor::from_slice(&[1.0f32], &[1], &device), false);
//! let grad_v = var_mul(grad_x, &v, &client)?;
//! let scalar = var_sum(&grad_v, &[], false, &client)?;
//!
//! // Second backward gives d²y/dx² * v = 2 * 1 = 2
//! let second_grads = backward(&scalar, &client)?;
//! # Ok::<(), numr::error::Error>(())
//! ```
// Reverse-mode AD
// Forward-mode AD
// Reverse-mode exports
pub use crateTensorId;
pub use ;
pub use checkpoint;
pub use GradFn;
pub use GradStore;
pub use Var;
pub use VarGradStore;
pub use var_dropout;
pub use ;
// Shape operation exports (re-exported via autograd::ops::*)
pub use ;
// Forward-mode exports
pub use DualTensor;
pub use ;