tenflowers-core 0.1.1

Core tensor operations and execution engine for TenfloweRS
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
//! Numerical Gradient Validation Utilities
//!
//! This module provides tools for validating automatic differentiation implementations
//! by comparing analytical gradients against numerical gradients computed via finite
//! differences. This is essential for ensuring correctness of custom operations and
//! gradient implementations.
//!
//! # Overview
//!
//! Numerical gradient checking uses the definition of a derivative:
//!
//! ```text
//! f'(x) ≈ [f(x + ε) - f(x - ε)] / (2ε)
//! ```
//!
//! By comparing this numerical approximation with the analytical gradient computed
//! via automatic differentiation, we can detect bugs in gradient implementations.
//!
//! # Example
//!
//! ```rust,ignore
//! use tenflowers_core::numerical_gradient::{check_gradients, GradientCheckConfig};
//! use tenflowers_core::Tensor;
//!
//! // Define a function to test
//! fn square(x: &Tensor<f32>) -> Tensor<f32> {
//!     x * x
//! }
//!
//! // Analytical gradient: 2x
//! fn square_grad(x: &Tensor<f32>) -> Tensor<f32> {
//!     x * 2.0
//! }
//!
//! let config = GradientCheckConfig::default();
//! let x = Tensor::from_vec(vec![1.0, 2.0, 3.0], &[3]);
//!
//! // Verify gradient correctness
//! let result = check_gradients(&x, square, square_grad, &config);
//! assert!(result.is_ok());
//! ```

use crate::{Result, Tensor, TensorError};
use scirs2_core::numeric::{Float, FromPrimitive};
use std::marker::PhantomData;

/// Configuration for numerical gradient checking
#[derive(Debug, Clone)]
pub struct GradientCheckConfig {
    /// Epsilon for finite difference computation
    pub epsilon: f64,
    /// Relative tolerance for gradient comparison
    pub rtol: f64,
    /// Absolute tolerance for gradient comparison
    pub atol: f64,
    /// Check gradients element-wise (slower but more detailed)
    pub check_elementwise: bool,
    /// Use central differences (more accurate but 2x slower)
    pub use_central_difference: bool,
    /// Maximum number of samples to check (for large tensors)
    pub max_samples: Option<usize>,
    /// Random seed for sampling (if max_samples is set)
    pub random_seed: Option<u64>,
}

impl Default for GradientCheckConfig {
    fn default() -> Self {
        Self {
            epsilon: 1e-5,
            rtol: 1e-3,
            atol: 1e-5,
            check_elementwise: false,
            use_central_difference: true,
            max_samples: None,
            random_seed: None,
        }
    }
}

impl GradientCheckConfig {
    /// Create a configuration for strict checking (tighter tolerances)
    pub fn strict() -> Self {
        Self {
            epsilon: 1e-6,
            rtol: 1e-4,
            atol: 1e-6,
            check_elementwise: true,
            use_central_difference: true,
            max_samples: None,
            random_seed: None,
        }
    }

    /// Create a configuration for relaxed checking (looser tolerances)
    pub fn relaxed() -> Self {
        Self {
            epsilon: 1e-4,
            rtol: 1e-2,
            atol: 1e-4,
            check_elementwise: false,
            use_central_difference: true,
            max_samples: Some(100),
            random_seed: Some(42),
        }
    }

    /// Create a configuration for fast checking (forward differences, sampling)
    pub fn fast() -> Self {
        Self {
            epsilon: 1e-5,
            rtol: 1e-3,
            atol: 1e-5,
            check_elementwise: false,
            use_central_difference: false,
            max_samples: Some(50),
            random_seed: Some(42),
        }
    }
}

/// Result of gradient checking
#[derive(Debug, Clone)]
pub struct GradientCheckResult {
    /// Whether the gradient check passed
    pub passed: bool,
    /// Maximum relative error found
    pub max_relative_error: f64,
    /// Maximum absolute error found
    pub max_absolute_error: f64,
    /// Number of elements checked
    pub num_elements_checked: usize,
    /// Number of elements that failed tolerance check
    pub num_failures: usize,
    /// Indices of failed elements (if check_elementwise is true)
    pub failed_indices: Vec<usize>,
    /// Detailed error message if check failed
    pub error_message: Option<String>,
}

impl GradientCheckResult {
    /// Check if the gradient validation passed
    pub fn is_ok(&self) -> bool {
        self.passed
    }

    /// Get failure rate (percentage of elements that failed)
    pub fn failure_rate(&self) -> f64 {
        if self.num_elements_checked == 0 {
            0.0
        } else {
            (self.num_failures as f64 / self.num_elements_checked as f64) * 100.0
        }
    }

    /// Generate a summary report
    pub fn summary(&self) -> String {
        if self.passed {
            format!(
                "✓ Gradient check passed\n\
                 Elements checked: {}\n\
                 Max relative error: {:.2e}\n\
                 Max absolute error: {:.2e}",
                self.num_elements_checked, self.max_relative_error, self.max_absolute_error
            )
        } else {
            format!(
                "✗ Gradient check FAILED\n\
                 Elements checked: {}\n\
                 Failures: {} ({:.2}%)\n\
                 Max relative error: {:.2e}\n\
                 Max absolute error: {:.2e}\n\
                 {}",
                self.num_elements_checked,
                self.num_failures,
                self.failure_rate(),
                self.max_relative_error,
                self.max_absolute_error,
                self.error_message.as_deref().unwrap_or("")
            )
        }
    }
}

/// Numerical gradient checker
pub struct NumericalGradientChecker<T> {
    config: GradientCheckConfig,
    _phantom: PhantomData<T>,
}

impl<T> NumericalGradientChecker<T>
where
    T: Float + FromPrimitive + Clone + Send + Sync + Default + 'static,
{
    /// Create a new gradient checker with the given configuration
    pub fn new(config: GradientCheckConfig) -> Self {
        Self {
            config,
            _phantom: PhantomData,
        }
    }

    /// Compute numerical gradient using finite differences
    pub fn compute_numerical_gradient<F>(&self, input: &Tensor<T>, func: F) -> Result<Tensor<T>>
    where
        F: Fn(&Tensor<T>) -> Result<Tensor<T>>,
    {
        let input_data = input.data();
        let input_shape = input.shape();
        let mut gradient_data = Vec::with_capacity(input_data.len());

        let epsilon = T::from_f64(self.config.epsilon).ok_or_else(|| {
            TensorError::invalid_operation_simple("Failed to convert epsilon".to_string())
        })?;

        for i in 0..input_data.len() {
            let grad = if self.config.use_central_difference {
                // Central difference: [f(x + ε) - f(x - ε)] / (2ε)
                let mut input_plus = input_data.to_vec();
                let mut input_minus = input_data.to_vec();

                input_plus[i] = input_plus[i] + epsilon;
                input_minus[i] = input_minus[i] - epsilon;

                let x_plus = Tensor::from_array(
                    scirs2_core::ndarray::Array::from_shape_vec(
                        input_shape.dims().to_vec(),
                        input_plus.to_vec(),
                    )
                    .map_err(|e| TensorError::invalid_argument(format!("Shape mismatch: {}", e)))?
                    .into_dyn(),
                );

                let x_minus = Tensor::from_array(
                    scirs2_core::ndarray::Array::from_shape_vec(
                        input_shape.dims().to_vec(),
                        input_minus.to_vec(),
                    )
                    .map_err(|e| TensorError::invalid_argument(format!("Shape mismatch: {}", e)))?
                    .into_dyn(),
                );

                let f_plus = func(&x_plus)?;
                let f_minus = func(&x_minus)?;

                // For element-wise functions, derivative at position i
                let two_epsilon = epsilon + epsilon;
                let diff = f_plus.data()[i] - f_minus.data()[i];
                diff / two_epsilon
            } else {
                // Forward difference: [f(x + ε) - f(x)] / ε
                let mut input_plus = input_data.to_vec();
                input_plus[i] = input_plus[i] + epsilon;

                let x_plus = Tensor::from_array(
                    scirs2_core::ndarray::Array::from_shape_vec(
                        input_shape.dims().to_vec(),
                        input_plus.to_vec(),
                    )
                    .map_err(|e| TensorError::invalid_argument(format!("Shape mismatch: {}", e)))?
                    .into_dyn(),
                );

                let f_plus = func(&x_plus)?;
                let f_x = func(input)?;

                // For element-wise functions, derivative at position i
                let diff = f_plus.data()[i] - f_x.data()[i];
                diff / epsilon
            };

            gradient_data.push(grad);
        }

        let gradient_array =
            scirs2_core::ndarray::Array::from_shape_vec(input_shape.dims().to_vec(), gradient_data)
                .map_err(|e| TensorError::invalid_argument(format!("Shape mismatch: {}", e)))?
                .into_dyn();

        Ok(Tensor::from_array(gradient_array))
    }

    /// Compare analytical and numerical gradients
    pub fn compare_gradients(
        &self,
        numerical: &Tensor<T>,
        analytical: &Tensor<T>,
    ) -> Result<GradientCheckResult> {
        if numerical.shape() != analytical.shape() {
            return Err(TensorError::invalid_argument(format!(
                "Shape mismatch: numerical {:?} vs analytical {:?}",
                numerical.shape(),
                analytical.shape()
            )));
        }

        let num_data = numerical.data();
        let ana_data = analytical.data();

        let rtol = self.config.rtol;
        let atol = self.config.atol;

        let mut max_rel_error = 0.0;
        let mut max_abs_error = 0.0;
        let mut num_failures = 0;
        let mut failed_indices = Vec::new();

        for i in 0..num_data.len() {
            let num_val = num_data[i].to_f64().unwrap_or(0.0);
            let ana_val = ana_data[i].to_f64().unwrap_or(0.0);

            let abs_error = (num_val - ana_val).abs();
            let rel_error = if ana_val.abs() > 1e-10 {
                abs_error / ana_val.abs()
            } else {
                abs_error
            };

            max_rel_error = max_rel_error.max(rel_error);
            max_abs_error = max_abs_error.max(abs_error);

            if rel_error > rtol && abs_error > atol {
                num_failures += 1;
                if self.config.check_elementwise {
                    failed_indices.push(i);
                }
            }
        }

        let passed = num_failures == 0;
        let error_message = if !passed {
            Some(format!(
                "Gradient mismatch: {} of {} elements exceed tolerance (rtol={}, atol={})",
                num_failures,
                num_data.len(),
                rtol,
                atol
            ))
        } else {
            None
        };

        Ok(GradientCheckResult {
            passed,
            max_relative_error: max_rel_error,
            max_absolute_error: max_abs_error,
            num_elements_checked: num_data.len(),
            num_failures,
            failed_indices,
            error_message,
        })
    }

    /// Full gradient check: compute numerical gradient and compare
    pub fn check<F, G>(
        &self,
        input: &Tensor<T>,
        forward: F,
        gradient: G,
    ) -> Result<GradientCheckResult>
    where
        F: Fn(&Tensor<T>) -> Result<Tensor<T>>,
        G: Fn(&Tensor<T>) -> Result<Tensor<T>>,
    {
        let numerical_grad = self.compute_numerical_gradient(input, forward)?;
        let analytical_grad = gradient(input)?;

        self.compare_gradients(&numerical_grad, &analytical_grad)
    }
}

/// Convenience function for quick gradient checking
pub fn check_gradients<T, F, G>(
    input: &Tensor<T>,
    forward: F,
    gradient: G,
    config: &GradientCheckConfig,
) -> Result<GradientCheckResult>
where
    T: Float + FromPrimitive + Clone + Send + Sync + Default + 'static,
    F: Fn(&Tensor<T>) -> Result<Tensor<T>>,
    G: Fn(&Tensor<T>) -> Result<Tensor<T>>,
{
    let checker = NumericalGradientChecker::new(config.clone());
    checker.check(input, forward, gradient)
}

/// Convenience function with default configuration
pub fn quick_check_gradients<T, F, G>(
    input: &Tensor<T>,
    forward: F,
    gradient: G,
) -> Result<GradientCheckResult>
where
    T: Float + FromPrimitive + Clone + Send + Sync + Default + 'static,
    F: Fn(&Tensor<T>) -> Result<Tensor<T>>,
    G: Fn(&Tensor<T>) -> Result<Tensor<T>>,
{
    check_gradients(input, forward, gradient, &GradientCheckConfig::default())
}

#[cfg(test)]
mod tests {
    use super::*;
    use scirs2_core::ndarray::array;

    #[test]
    fn test_gradient_check_linear() {
        // Test f(x) = 2x, f'(x) = 2
        let input = Tensor::from_array(array![1.0, 2.0, 3.0].into_dyn());

        let forward = |x: &Tensor<f32>| {
            let data: Vec<f32> = x.data().iter().map(|&v| v * 2.0).collect();
            let result_array = scirs2_core::ndarray::Array::from_vec(data).into_dyn();
            Ok(Tensor::from_array(result_array))
        };

        let gradient = |_x: &Tensor<f32>| {
            let grad = array![2.0, 2.0, 2.0].into_dyn();
            Ok(Tensor::from_array(grad))
        };

        // Use relaxed config due to f32 precision limits
        let config = GradientCheckConfig::relaxed();
        let result = check_gradients(&input, forward, gradient, &config)
            .expect("test: check_gradients should succeed");

        assert!(
            result.passed,
            "Gradient check should pass for linear function: {}",
            result.summary()
        );
    }

    #[test]
    fn test_gradient_check_square() {
        // Test f(x) = x^2, f'(x) = 2x
        let input = Tensor::from_array(array![1.0, 2.0, 3.0].into_dyn());

        let forward = |x: &Tensor<f32>| {
            let data: Vec<f32> = x.data().iter().map(|&v| v * v).collect();
            let result_array = scirs2_core::ndarray::Array::from_vec(data).into_dyn();
            Ok(Tensor::from_array(result_array))
        };

        let gradient = |x: &Tensor<f32>| {
            let data: Vec<f32> = x.data().iter().map(|&v| 2.0 * v).collect();
            let grad_array = scirs2_core::ndarray::Array::from_vec(data).into_dyn();
            Ok(Tensor::from_array(grad_array))
        };

        // Use relaxed config due to f32 precision limits
        let config = GradientCheckConfig::relaxed();
        let result = check_gradients(&input, forward, gradient, &config)
            .expect("test: check_gradients should succeed");

        assert!(
            result.passed,
            "Gradient check should pass for square function: {}",
            result.summary()
        );
    }

    #[test]
    fn test_gradient_check_incorrect_gradient() {
        // Test with intentionally incorrect gradient
        let input = Tensor::from_array(array![1.0, 2.0, 3.0].into_dyn());

        let forward = |x: &Tensor<f32>| {
            let data: Vec<f32> = x.data().iter().map(|&v| v * v).collect();
            let result_array = scirs2_core::ndarray::Array::from_vec(data).into_dyn();
            Ok(Tensor::from_array(result_array))
        };

        let wrong_gradient = |x: &Tensor<f32>| {
            // Incorrect: should be 2x, but we return 3x
            let data: Vec<f32> = x.data().iter().map(|&v| 3.0 * v).collect();
            let grad_array = scirs2_core::ndarray::Array::from_vec(data).into_dyn();
            Ok(Tensor::from_array(grad_array))
        };

        let config = GradientCheckConfig::default();
        let result = check_gradients(&input, forward, wrong_gradient, &config)
            .expect("test: check_gradients should succeed");

        assert!(
            !result.passed,
            "Gradient check should fail for incorrect gradient"
        );
        assert!(result.num_failures > 0);
    }

    #[test]
    fn test_gradient_check_config_tolerances() {
        let input = Tensor::from_array(array![1.0].into_dyn());

        let forward = |x: &Tensor<f32>| {
            let data: Vec<f32> = x.data().iter().map(|&v| v * v).collect();
            let result_array = scirs2_core::ndarray::Array::from_vec(data).into_dyn();
            Ok(Tensor::from_array(result_array))
        };

        let slightly_off_gradient = |x: &Tensor<f32>| {
            // Slightly incorrect: 2x * 1.01
            let data: Vec<f32> = x.data().iter().map(|&v| 2.0 * v * 1.01).collect();
            let grad_array = scirs2_core::ndarray::Array::from_vec(data).into_dyn();
            Ok(Tensor::from_array(grad_array))
        };

        // Should pass with relaxed config
        let relaxed = GradientCheckConfig::relaxed();
        let result = check_gradients(&input, forward, slightly_off_gradient, &relaxed)
            .expect("test: check_gradients should succeed");
        assert!(result.passed, "Should pass with relaxed tolerances");

        // Should fail with strict config
        let strict = GradientCheckConfig::strict();
        let result = check_gradients(&input, forward, slightly_off_gradient, &strict)
            .expect("test: check_gradients should succeed");
        assert!(!result.passed, "Should fail with strict tolerances");
    }

    #[test]
    fn test_gradient_check_result_summary() {
        let result = GradientCheckResult {
            passed: false,
            max_relative_error: 0.05,
            max_absolute_error: 0.01,
            num_elements_checked: 100,
            num_failures: 10,
            failed_indices: vec![],
            error_message: Some("Test error".to_string()),
        };

        let summary = result.summary();
        assert!(summary.contains("FAILED"));
        assert!(summary.contains("10.00%"));

        assert_eq!(result.failure_rate(), 10.0);
    }
}