Skip to main content

torsh_tensor/
lib.rs

1//! Tensor implementation for ToRSh with PyTorch-compatible API
2//!
3//! This crate provides a high-level tensor API that wraps scirs2's autograd
4//! functionality with a familiar PyTorch-like interface.
5//!
6//! # Architecture
7//!
8//! The tensor implementation is organized into specialized modules:
9//!
10//! - [`storage`] - Storage management with automatic memory mapping optimization
11//! - [`core_ops`] - Core tensor operations, creation, and gradient management
12//! - [`shape_ops`] - Shape manipulation, views, and dimension operations
13//! - [`data_ops`] - Data access, indexing, and manipulation operations
14//! - [`advanced_ops`] - Advanced operations, reductions, and backend integration
15//! - [`math_ops`] - Mathematical operations and functions
16//! - [`complex_ops`] - Complex number operations and specialized autograd
17//!
18//! # Quick Start
19//!
20//! ```rust
21//! use torsh_tensor::Tensor;
22//! use torsh_core::device::DeviceType;
23//!
24//! // Create a tensor
25//! let data = vec![1.0f32, 2.0, 3.0, 4.0];
26//! let tensor = Tensor::from_data(data, vec![2, 2], DeviceType::Cpu)?;
27//!
28//! // Basic operations
29//! let reshaped = tensor.view(&[4, 1])?;
30//! let sum = tensor.sum()?;
31//! let norm_val = tensor.norm()?.item()?;
32//! let normalized = tensor.div_scalar(norm_val)?;
33//!
34//! // Enable gradients for autograd
35//! let x = tensor.requires_grad_(true);
36//! let y = x.pow(2.0)?;
37//! let loss = y.sum()?;  // Create scalar for backward pass
38//! loss.backward()?;
39//! # Ok::<(), torsh_core::error::TorshError>(())
40//! ```
41//!
42//! # Features
43//!
44//! - **Automatic memory management**: Optimized storage with memory mapping for large tensors
45//! - **Zero-copy views**: Efficient tensor views with shared underlying data
46//! - **PyTorch compatibility**: Familiar API for easy migration from PyTorch
47//! - **Automatic differentiation**: Full gradient computation support
48//! - **Device abstraction**: CPU and GPU device support
49//! - **Complex numbers**: Native complex tensor operations
50//! - **SciRS2 integration**: Optimized backend operations for performance
51
52#![cfg_attr(not(feature = "std"), no_std)]
53
54#[cfg(not(feature = "std"))]
55extern crate alloc;
56
57// Core modules providing the tensor implementation
58pub mod adaptive_auto_tuner;
59pub mod advanced_ops;
60pub mod advanced_simd_ops;
61pub mod algorithmic_optimizations;
62pub mod complex_ops;
63pub mod comprehensive_integration_tests;
64pub mod computation_graph;
65pub mod core_ops;
66pub mod cross_platform_validator;
67pub mod data_ops;
68pub mod expression_optimizer;
69pub mod expression_templates;
70pub mod hardware_accelerators;
71pub mod manipulation;
72pub mod math_ops;
73pub mod memory_optimization;
74pub mod optimization_cli;
75pub mod shape_ops;
76pub mod storage;
77pub mod ultimate_integration_optimizer;
78pub mod ultra_performance_profiler;
79
80// Utility and integration modules
81#[cfg(feature = "async")]
82pub mod async_ops;
83pub mod auto_batching;
84pub mod backend_integration;
85pub mod bfloat16_ops;
86pub mod broadcast;
87pub mod cache_optimization;
88pub mod conv;
89pub mod convenience;
90pub mod creation;
91pub mod custom_dtype;
92pub mod custom_ops;
93pub mod indexing;
94pub mod lazy_loading;
95// pub mod lazy_ops; // Temporarily disabled due to complex trait bounds - using fluent API instead
96pub mod lockfree_cache;
97pub mod memory_pool;
98#[cfg(feature = "memory-profiling")]
99pub mod memory_profiler;
100pub mod nan_inf_detection;
101#[cfg(feature = "operation-logging")]
102pub mod operation_logging;
103// pub mod ops; // Disabled due to duplicate definitions with core modules (all, any, sum, mean, matmul, cat, etc.)
104pub mod fft;
105pub mod scirs2_backend;
106pub mod scirs2_stats_integration;
107pub mod shape_inference_debugger;
108pub mod sparse;
109pub mod stats;
110pub mod tensor_comprehension;
111pub mod tensor_tracker;
112pub mod tensor_utils;
113pub mod tensor_view; // Zero-copy tensor views (CRITICAL #1)
114pub mod tensor_views;
115pub mod type_conversions;
116
117// TODO: Implement custom data types module
118// #[cfg(feature = "custom-types")]
119// pub mod custom_data_types;
120
121#[cfg(feature = "serialize")]
122pub mod serialize;
123
124// Re-export core types and traits
125use torsh_core::{
126    device::DeviceType,
127    dtype::{FloatElement, TensorElement},
128    error::Result,
129};
130
131// Re-export the main tensor type
132pub use core_ops::{Operation, Tensor};
133
134// Re-export convenience methods
135pub use convenience::{FluentTensor, TensorConvenience, TensorFluentExt};
136
137// Re-export lazy evaluation functionality (temporarily disabled)
138// pub use lazy_ops::{LazyTensor, TensorLazyExt};
139
140// Re-export sparse tensor functionality (COO, CSR, CSC formats)
141pub use sparse::{SparseCSC, SparseCSR, SparseTensor};
142
143// Re-export custom operation functionality
144pub use custom_ops::{
145    global_registry, CustomOperation, CustomOperationRegistry, OperationMetadata, OperationParams,
146    TensorCustomOps,
147};
148
149// Re-export storage types for advanced usage
150pub use storage::{MemoryMappedStorage, TensorStorage};
151
152// Re-export zero-copy view types (CRITICAL #1)
153pub use tensor_view::{TensorView, TensorViewMut};
154
155// Version information
156pub const VERSION: &str = env!("CARGO_PKG_VERSION");
157pub const VERSION_MAJOR: u32 = 0;
158pub const VERSION_MINOR: u32 = 1;
159pub const VERSION_PATCH: u32 = 0;
160
161/// Tensor creation macro similar to PyTorch
162#[macro_export]
163macro_rules! tensor {
164    // 1D array from bracketed values
165    ([$($val:expr),+ $(,)?]) => {
166        $crate::creation::tensor_1d(&[$($val),+])
167    };
168
169    // Multiple values without brackets (at least 2 values to avoid scalar conflict)
170    ($val1:expr, $val2:expr $(, $val:expr)* $(,)?) => {
171        $crate::creation::tensor_1d(&[$val1, $val2 $(, $val)*])
172    };
173
174    // Single value (scalar)
175    ($val:expr) => {
176        $crate::creation::tensor_scalar($val)
177    };
178}
179
180/// 2D tensor creation macro
181#[macro_export]
182macro_rules! tensor_2d {
183    ([$($row:expr),+ $(,)?]) => {{
184        let rows: Vec<Vec<_>> = vec![$($row.to_vec()),+];
185        let row_refs: Vec<&[_]> = rows.iter().map(|row| row.as_slice()).collect();
186        $crate::creation::tensor_2d(&row_refs)
187    }};
188}
189
190// Display implementation for Tensor
191impl<T: TensorElement> std::fmt::Debug for Tensor<T> {
192    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
193        write!(
194            f,
195            "Tensor(shape={:?}, dtype={}, device={})",
196            self.shape().dims(),
197            self.dtype(),
198            self.device
199        )
200    }
201}
202
203// Additional utility implementations
204impl<T: TensorElement> Tensor<T> {
205    /// Get the reference count of the underlying storage Arc (for testing CoW behavior)
206    #[cfg(test)]
207    pub fn data_ref_count(&self) -> usize {
208        use std::sync::Arc;
209        match &self.storage {
210            TensorStorage::InMemory(data) => Arc::strong_count(data),
211            TensorStorage::MemoryMapped(storage) => Arc::strong_count(storage),
212            #[cfg(feature = "simd")]
213            TensorStorage::Aligned(data) => Arc::strong_count(data),
214            #[cfg(feature = "simd")]
215            TensorStorage::SimdOptimized(storage) => Arc::strong_count(storage),
216        }
217    }
218
219    /// Create from vec with shape (convenience method)
220    pub fn from_vec(data: Vec<T>, shape: &[usize]) -> Result<Self>
221    where
222        T: Copy,
223    {
224        Self::from_data(data, shape.to_vec(), DeviceType::Cpu)
225    }
226}
227
228// TODO: Conditional AutogradTensor trait implementation - torsh-autograd not yet available
229// #[cfg(feature = "autograd")]
230// impl<T: TensorElement> torsh_autograd::AutogradTensor<T> for Tensor<T> {
231//     fn shape(&self) -> Shape {
232//         self.shape()
233//     }
234//
235//     fn requires_grad(&self) -> bool {
236//         self.requires_grad()
237//     }
238//
239//     fn data(&self) -> Box<dyn std::ops::Deref<Target = [T]> + '_> {
240//         // Return a boxed vector that can be dereferenced as a slice
241//         Box::new(self.to_vec().unwrap_or_default())
242//     }
243//
244//     fn clone_tensor(&self) -> Box<dyn torsh_autograd::AutogradTensor<T>> {
245//         Box::new(self.clone())
246//     }
247//
248//     fn to_vec(&self) -> Vec<T>
249//     where
250//         T: Copy,
251//     {
252//         self.to_vec().unwrap_or_default()
253//     }
254//
255//     fn device(&self) -> &dyn torsh_core::Device {
256//         match &self.device {
257//             DeviceType::Cpu => {
258//                 static CPU_DEVICE: torsh_core::device::CpuDevice =
259//                     torsh_core::device::CpuDevice::new();
260//                 &CPU_DEVICE
261//             }
262//             DeviceType::Cuda(_) => {
263//                 static CPU_DEVICE: torsh_core::device::CpuDevice =
264//                     torsh_core::device::CpuDevice::new();
265//                 &CPU_DEVICE // TODO: Return proper CUDA device
266//             }
267//             _ => {
268//                 static CPU_DEVICE: torsh_core::device::CpuDevice =
269//                     torsh_core::device::CpuDevice::new();
270//                 &CPU_DEVICE
271//             }
272//         }
273//     }
274//
275//     fn ones_like(&self) -> Box<dyn torsh_autograd::AutogradTensor<T>>
276//     where
277//         T: Copy,
278//     {
279//         Box::new(self.ones_like().unwrap_or_else(|_| self.clone()))
280//     }
281//
282//     fn zeros_like(&self) -> Box<dyn torsh_autograd::AutogradTensor<T>>
283//     where
284//         T: Copy,
285//     {
286//         Box::new(self.zeros_like().unwrap_or_else(|_| self.clone()))
287//     }
288// }
289
290// Re-export commonly used functions and types for convenience
291pub mod prelude {
292    pub use crate::advanced_simd_ops::{
293        AdvancedSimdOps, ReductionType, SimdConfig, SimdPerformanceInfo,
294    };
295    pub use crate::algorithmic_optimizations::{
296        AlgorithmConfig, AlgorithmPerformanceStats, AlgorithmicOptimizer, SchedulingStrategy,
297    };
298    pub use crate::comprehensive_integration_tests::{
299        run_comprehensive_integration_tests, ComprehensiveIntegrationTestSuite,
300        ComprehensiveTestReport, IntegrationAnalysis, IntegrationTestConfig, PerformanceAnalysis,
301        StabilityAnalysis, TestCategory,
302    };
303    pub use crate::core_ops::Operation;
304    pub use crate::creation::{eye, ones, rand, randn, zeros};
305    pub use crate::cross_platform_validator::{
306        CpuArchitecture, CrossPlatformReport, CrossPlatformValidator, GpuVendor,
307        HardwareDetectionReport, HardwareDetector, OptimizationConfig, OptimizationReport,
308        Platform, PlatformOptimizer, ValidationConfig, ValidationFramework, ValidationReport,
309    };
310    pub use crate::expression_optimizer::{
311        ExpressionGraph, ExpressionNode, ExpressionOptimizer, NodeId, OperationType,
312        OptimizationStats, OptimizationStrategy, OptimizerConfig, TensorExpressionOps,
313    };
314    pub use crate::hardware_accelerators::{
315        AccelerationWorkload, ComplexityLevel, CpuAccelerationMetrics, CpuAcceleratorEngine,
316        GpuAccelerationMetrics, GpuAcceleratorEngine, HardwareAcceleratorReport,
317        HardwareAcceleratorSystem, MemoryAccelerationMetrics, MemoryAcceleratorEngine,
318        NetworkAccelerationMetrics, OptimizationCoordinator, SpecializedAcceleratorEngine,
319        WorkloadType,
320    };
321    pub use crate::memory_optimization::{
322        AdvancedMemoryPool, AggregateMemoryStats, DefragmentationReport, GlobalMemoryOptimizer,
323        MemoryConfig, MemoryStats,
324    };
325    pub use crate::optimization_cli::{
326        run_cli_command, run_optimization_cli, CLICommand, CLIConfig, OptimizationCLI,
327        OptimizationLevel, OptimizationType,
328    };
329    pub use crate::ultimate_integration_optimizer::{
330        CrossLayerSynergyGains, EfficiencyImprovements, EnergyEfficiencyImprovements,
331        GlobalPerformanceCache, IntelligentLearningSystem, LayerSpecificImprovements,
332        OptimizationComplexity, OptimizationStatus, ScalabilityImprovements,
333        SystemOptimizationCoordinator, UltimateIntegrationOptimizer, UltimateOptimizationResult,
334    };
335    pub use crate::{Tensor, TensorConvenience, TensorStorage};
336    pub use torsh_core::{
337        device::DeviceType,
338        dtype::{DType, FloatElement, TensorElement},
339        error::{Result, TorshError},
340        shape::Shape,
341    };
342}
343
344#[cfg(test)]
345mod integration_tests {
346    use super::*;
347    use torsh_core::device::DeviceType;
348    use torsh_core::dtype::DType;
349
350    #[test]
351    fn test_tensor_creation_and_basic_ops() {
352        let data = vec![1.0f32, 2.0, 3.0, 4.0];
353        let tensor = Tensor::from_data(data, vec![2, 2], DeviceType::Cpu)
354            .expect("tensor creation should succeed");
355
356        assert_eq!(tensor.shape().dims(), &[2, 2]);
357        assert_eq!(tensor.numel(), 4);
358        assert_eq!(tensor.dtype(), DType::F32);
359    }
360
361    #[test]
362    fn test_tensor_reshape_and_view() {
363        let data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0];
364        let tensor = Tensor::from_data(data, vec![2, 3], DeviceType::Cpu)
365            .expect("tensor creation should succeed");
366
367        let reshaped = tensor.view(&[3, 2]).expect("view should succeed");
368        assert_eq!(reshaped.shape().dims(), &[3, 2]);
369
370        let slice = tensor
371            .slice_tensor(0, 0, 1)
372            .expect("slice_tensor should succeed");
373        assert_eq!(slice.shape().dims(), &[1, 3]);
374    }
375
376    #[test]
377    fn test_tensor_math_operations() {
378        let a = Tensor::from_data(vec![1.0f32, 2.0, 3.0], vec![3], DeviceType::Cpu)
379            .expect("tensor creation should succeed");
380        let b = Tensor::from_data(vec![4.0f32, 5.0, 6.0], vec![3], DeviceType::Cpu)
381            .expect("tensor creation should succeed");
382
383        let sum = a.add(&b).expect("addition should succeed");
384        assert_eq!(
385            sum.data().expect("data retrieval should succeed"),
386            vec![5.0, 7.0, 9.0]
387        );
388
389        let product = a.mul(&b).expect("multiplication should succeed");
390        assert_eq!(
391            product.data().expect("data retrieval should succeed"),
392            vec![4.0, 10.0, 18.0]
393        );
394    }
395
396    #[test]
397    fn test_tensor_advanced_operations() {
398        let data = vec![1.0f32, 4.0, 9.0, 16.0];
399        let tensor = Tensor::from_data(data, vec![4], DeviceType::Cpu)
400            .expect("tensor creation should succeed");
401
402        let sqrt_result = tensor.sqrt().expect("sqrt should succeed");
403        assert_eq!(
404            sqrt_result.data().expect("data retrieval should succeed"),
405            vec![1.0, 2.0, 3.0, 4.0]
406        );
407
408        let norm = tensor.norm().expect("norm should succeed");
409        assert!(norm.item().expect("item extraction should succeed") > 0.0);
410    }
411
412    #[test]
413    fn test_tensor_data_operations() {
414        let mut tensor =
415            Tensor::<f32>::zeros(&[2, 3], DeviceType::Cpu).expect("zeros creation should succeed");
416
417        tensor.fill_(5.0).expect("fill should succeed");
418        assert_eq!(
419            tensor.get_item(&[0, 0]).expect("get_item should succeed"),
420            5.0
421        );
422
423        let indices = Tensor::from_data(vec![0i64, 2], vec![2], DeviceType::Cpu)
424            .expect("tensor creation should succeed");
425        let _src = Tensor::from_data(vec![10.0f32, 20.0], vec![2], DeviceType::Cpu)
426            .expect("tensor creation should succeed");
427
428        let data_1d = vec![1.0f32, 2.0, 3.0, 4.0, 5.0];
429        let tensor_1d = Tensor::from_data(data_1d, vec![5], DeviceType::Cpu)
430            .expect("tensor creation should succeed");
431        let gathered = tensor_1d
432            .gather(0, &indices)
433            .expect("gather should succeed");
434        assert_eq!(
435            gathered.data().expect("data retrieval should succeed"),
436            vec![1.0, 3.0]
437        );
438    }
439
440    #[test]
441    fn test_tensor_storage_optimization() {
442        // Small tensor should use in-memory storage
443        let small =
444            Tensor::<f32>::zeros(&[10], DeviceType::Cpu).expect("zeros creation should succeed");
445        assert_eq!(small.storage_type(), "in_memory");
446
447        // Test copy-on-write behavior
448        let tensor1 =
449            Tensor::<f32>::ones(&[5], DeviceType::Cpu).expect("ones creation should succeed");
450        let tensor2 = tensor1.clone();
451        assert!(tensor1.shares_storage(&tensor2));
452    }
453
454    #[test]
455    fn test_gradient_operations() {
456        let tensor = Tensor::<f32>::ones(&[2, 2], DeviceType::Cpu)
457            .expect("ones creation should succeed")
458            .requires_grad_(true);
459
460        assert!(tensor.requires_grad());
461        assert!(!tensor.has_grad());
462
463        let detached = tensor.detach();
464        assert!(!detached.requires_grad());
465    }
466}