Skip to main content

torsh_tensor/
lib.rs

1//! Tensor implementation for ToRSh with PyTorch-compatible API
2//!
3//! This crate provides a high-level tensor API that wraps scirs2's autograd
4//! functionality with a familiar PyTorch-like interface.
5//!
6//! # Architecture
7//!
8//! The tensor implementation is organized into specialized modules:
9//!
10//! - [`storage`] - Storage management with automatic memory mapping optimization
11//! - [`core_ops`] - Core tensor operations, creation, and gradient management
12//! - [`shape_ops`] - Shape manipulation, views, and dimension operations
13//! - [`data_ops`] - Data access, indexing, and manipulation operations
14//! - [`advanced_ops`] - Advanced operations, reductions, and backend integration
15//! - [`math_ops`] - Mathematical operations and functions
16//! - [`complex_ops`] - Complex number operations and specialized autograd
17//!
18//! # Quick Start
19//!
20//! ```rust
21//! use torsh_tensor::Tensor;
22//! use torsh_core::device::DeviceType;
23//!
24//! // Create a tensor
25//! let data = vec![1.0f32, 2.0, 3.0, 4.0];
26//! let tensor = Tensor::from_data(data, vec![2, 2], DeviceType::Cpu)?;
27//!
28//! // Basic operations
29//! let reshaped = tensor.view(&[4, 1])?;
30//! let sum = tensor.sum()?;
31//! let norm_val = tensor.norm()?.item()?;
32//! let normalized = tensor.div_scalar(norm_val)?;
33//!
34//! // Enable gradients for autograd
35//! let x = tensor.requires_grad_(true);
36//! let y = x.pow(2.0)?;
37//! let loss = y.sum()?;  // Create scalar for backward pass
38//! loss.backward()?;
39//! # Ok::<(), torsh_core::error::TorshError>(())
40//! ```
41//!
42//! # Features
43//!
44//! - **Automatic memory management**: Optimized storage with memory mapping for large tensors
45//! - **Zero-copy views**: Efficient tensor views with shared underlying data
46//! - **PyTorch compatibility**: Familiar API for easy migration from PyTorch
47//! - **Automatic differentiation**: Full gradient computation support
48//! - **Device abstraction**: CPU and GPU device support
49//! - **Complex numbers**: Native complex tensor operations
50//! - **SciRS2 integration**: Optimized backend operations for performance
51
52#![cfg_attr(not(feature = "std"), no_std)]
53
54#[cfg(not(feature = "std"))]
55extern crate alloc;
56
57// Core modules providing the tensor implementation
58pub mod adaptive_auto_tuner;
59pub mod advanced_ops;
60pub mod advanced_simd_ops;
61pub mod algorithmic_optimizations;
62pub mod complex_ops;
63pub mod comprehensive_integration_tests;
64pub mod computation_graph;
65pub mod core_ops;
66pub mod cross_platform_validator;
67pub mod data_ops;
68pub mod expression_optimizer;
69pub mod expression_templates;
70pub mod hardware_accelerators;
71pub mod manipulation;
72pub mod math_ops;
73pub mod memory_optimization;
74pub mod optimization_cli;
75pub mod shape_ops;
76pub mod storage;
77pub mod ultimate_integration_optimizer;
78pub mod ultra_performance_profiler;
79
80// Utility and integration modules
81#[cfg(feature = "async")]
82pub mod async_ops;
83pub mod auto_batching;
84pub mod backend_integration;
85pub mod bfloat16_ops;
86pub mod broadcast;
87pub mod cache_optimization;
88pub mod conv;
89pub mod convenience;
90pub mod creation;
91pub mod custom_dtype;
92pub mod custom_ops;
93pub mod indexing;
94pub mod lazy_loading;
95// pub mod lazy_ops; // Temporarily disabled due to complex trait bounds - using fluent API instead
96pub mod lockfree_cache;
97pub mod memory_pool;
98#[cfg(feature = "memory-profiling")]
99pub mod memory_profiler;
100pub mod nan_inf_detection;
101#[cfg(feature = "operation-logging")]
102pub mod operation_logging;
103// pub mod ops; // Disabled due to duplicate definitions with core modules (all, any, sum, mean, matmul, cat, etc.)
104pub mod fft;
105pub mod scirs2_backend;
106pub mod scirs2_stats_integration;
107pub mod shape_inference_debugger;
108pub mod simd_ops_f32;
109pub mod sparse;
110pub mod stats;
111pub mod tensor_comprehension;
112pub mod tensor_tracker;
113pub mod tensor_utils;
114pub mod tensor_view; // Zero-copy tensor views (CRITICAL #1)
115pub mod tensor_views;
116pub mod type_conversions;
117
118// TODO: Implement custom data types module
119// #[cfg(feature = "custom-types")]
120// pub mod custom_data_types;
121
122#[cfg(feature = "serialize")]
123pub mod serialize;
124
125// Re-export core types and traits
126use torsh_core::{
127    device::DeviceType,
128    dtype::{FloatElement, TensorElement},
129    error::Result,
130};
131
132// Re-export the main tensor type
133pub use core_ops::{Operation, Tensor};
134
135// Re-export convenience methods
136pub use convenience::{FluentTensor, TensorConvenience, TensorFluentExt};
137
138// Re-export lazy evaluation functionality (temporarily disabled)
139// pub use lazy_ops::{LazyTensor, TensorLazyExt};
140
141// Re-export sparse tensor functionality (COO, CSR, CSC formats)
142pub use sparse::{SparseCSC, SparseCSR, SparseTensor};
143
144// Re-export custom operation functionality
145pub use custom_ops::{
146    global_registry, CustomOperation, CustomOperationRegistry, OperationMetadata, OperationParams,
147    TensorCustomOps,
148};
149
150// Re-export storage types for advanced usage
151pub use storage::{MemoryMappedStorage, TensorStorage};
152
153// Re-export zero-copy view types (CRITICAL #1)
154pub use tensor_view::{TensorView, TensorViewMut};
155
156// Version information
157pub const VERSION: &str = env!("CARGO_PKG_VERSION");
158pub const VERSION_MAJOR: u32 = 0;
159pub const VERSION_MINOR: u32 = 1;
160pub const VERSION_PATCH: u32 = 0;
161
162/// Tensor creation macro similar to PyTorch
163#[macro_export]
164macro_rules! tensor {
165    // 1D array from bracketed values
166    ([$($val:expr),+ $(,)?]) => {
167        $crate::creation::tensor_1d(&[$($val),+])
168    };
169
170    // Multiple values without brackets (at least 2 values to avoid scalar conflict)
171    ($val1:expr, $val2:expr $(, $val:expr)* $(,)?) => {
172        $crate::creation::tensor_1d(&[$val1, $val2 $(, $val)*])
173    };
174
175    // Single value (scalar)
176    ($val:expr) => {
177        $crate::creation::tensor_scalar($val)
178    };
179}
180
181/// 2D tensor creation macro
182#[macro_export]
183macro_rules! tensor_2d {
184    ([$($row:expr),+ $(,)?]) => {{
185        let rows: Vec<Vec<_>> = vec![$($row.to_vec()),+];
186        let row_refs: Vec<&[_]> = rows.iter().map(|row| row.as_slice()).collect();
187        $crate::creation::tensor_2d(&row_refs)
188    }};
189}
190
191// Display implementation for Tensor
192impl<T: TensorElement> std::fmt::Debug for Tensor<T> {
193    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194        write!(
195            f,
196            "Tensor(shape={:?}, dtype={}, device={})",
197            self.shape().dims(),
198            self.dtype(),
199            self.device
200        )
201    }
202}
203
204// Additional utility implementations
205impl<T: TensorElement> Tensor<T> {
206    /// Get the reference count of the underlying storage Arc (for testing CoW behavior)
207    #[cfg(test)]
208    pub fn data_ref_count(&self) -> usize {
209        use std::sync::Arc;
210        match &self.storage {
211            TensorStorage::InMemory(data) => Arc::strong_count(data),
212            TensorStorage::MemoryMapped(storage) => Arc::strong_count(storage),
213            #[cfg(feature = "simd")]
214            TensorStorage::Aligned(data) => Arc::strong_count(data),
215            #[cfg(feature = "simd")]
216            TensorStorage::SimdOptimized(storage) => Arc::strong_count(storage),
217        }
218    }
219
220    /// Create from vec with shape (convenience method)
221    pub fn from_vec(data: Vec<T>, shape: &[usize]) -> Result<Self>
222    where
223        T: Copy,
224    {
225        Self::from_data(data, shape.to_vec(), DeviceType::Cpu)
226    }
227}
228
229// TODO: Conditional AutogradTensor trait implementation - torsh-autograd not yet available
230// #[cfg(feature = "autograd")]
231// impl<T: TensorElement> torsh_autograd::AutogradTensor<T> for Tensor<T> {
232//     fn shape(&self) -> Shape {
233//         self.shape()
234//     }
235//
236//     fn requires_grad(&self) -> bool {
237//         self.requires_grad()
238//     }
239//
240//     fn data(&self) -> Box<dyn std::ops::Deref<Target = [T]> + '_> {
241//         // Return a boxed vector that can be dereferenced as a slice
242//         Box::new(self.to_vec().unwrap_or_default())
243//     }
244//
245//     fn clone_tensor(&self) -> Box<dyn torsh_autograd::AutogradTensor<T>> {
246//         Box::new(self.clone())
247//     }
248//
249//     fn to_vec(&self) -> Vec<T>
250//     where
251//         T: Copy,
252//     {
253//         self.to_vec().unwrap_or_default()
254//     }
255//
256//     fn device(&self) -> &dyn torsh_core::Device {
257//         match &self.device {
258//             DeviceType::Cpu => {
259//                 static CPU_DEVICE: torsh_core::device::CpuDevice =
260//                     torsh_core::device::CpuDevice::new();
261//                 &CPU_DEVICE
262//             }
263//             DeviceType::Cuda(_) => {
264//                 static CPU_DEVICE: torsh_core::device::CpuDevice =
265//                     torsh_core::device::CpuDevice::new();
266//                 &CPU_DEVICE // TODO: Return proper CUDA device
267//             }
268//             _ => {
269//                 static CPU_DEVICE: torsh_core::device::CpuDevice =
270//                     torsh_core::device::CpuDevice::new();
271//                 &CPU_DEVICE
272//             }
273//         }
274//     }
275//
276//     fn ones_like(&self) -> Box<dyn torsh_autograd::AutogradTensor<T>>
277//     where
278//         T: Copy,
279//     {
280//         Box::new(self.ones_like().unwrap_or_else(|_| self.clone()))
281//     }
282//
283//     fn zeros_like(&self) -> Box<dyn torsh_autograd::AutogradTensor<T>>
284//     where
285//         T: Copy,
286//     {
287//         Box::new(self.zeros_like().unwrap_or_else(|_| self.clone()))
288//     }
289// }
290
291// Re-export commonly used functions and types for convenience
292pub mod prelude {
293    pub use crate::advanced_simd_ops::{
294        AdvancedSimdOps, ReductionType, SimdConfig, SimdPerformanceInfo,
295    };
296    pub use crate::algorithmic_optimizations::{
297        AlgorithmConfig, AlgorithmPerformanceStats, AlgorithmicOptimizer, SchedulingStrategy,
298    };
299    pub use crate::comprehensive_integration_tests::{
300        run_comprehensive_integration_tests, ComprehensiveIntegrationTestSuite,
301        ComprehensiveTestReport, IntegrationAnalysis, IntegrationTestConfig, PerformanceAnalysis,
302        StabilityAnalysis, TestCategory,
303    };
304    pub use crate::core_ops::Operation;
305    pub use crate::creation::{eye, ones, rand, randn, zeros};
306    pub use crate::cross_platform_validator::{
307        CpuArchitecture, CrossPlatformReport, CrossPlatformValidator, GpuVendor,
308        HardwareDetectionReport, HardwareDetector, OptimizationConfig, OptimizationReport,
309        Platform, PlatformOptimizer, ValidationConfig, ValidationFramework, ValidationReport,
310    };
311    pub use crate::expression_optimizer::{
312        ExpressionGraph, ExpressionNode, ExpressionOptimizer, NodeId, OperationType,
313        OptimizationStats, OptimizationStrategy, OptimizerConfig, TensorExpressionOps,
314    };
315    pub use crate::hardware_accelerators::{
316        AccelerationWorkload, ComplexityLevel, CpuAccelerationMetrics, CpuAcceleratorEngine,
317        GpuAccelerationMetrics, GpuAcceleratorEngine, HardwareAcceleratorReport,
318        HardwareAcceleratorSystem, MemoryAccelerationMetrics, MemoryAcceleratorEngine,
319        NetworkAccelerationMetrics, OptimizationCoordinator, SpecializedAcceleratorEngine,
320        WorkloadType,
321    };
322    pub use crate::memory_optimization::{
323        AdvancedMemoryPool, AggregateMemoryStats, DefragmentationReport, GlobalMemoryOptimizer,
324        MemoryConfig, MemoryStats,
325    };
326    pub use crate::optimization_cli::{
327        run_cli_command, run_optimization_cli, CLICommand, CLIConfig, OptimizationCLI,
328        OptimizationLevel, OptimizationType,
329    };
330    pub use crate::ultimate_integration_optimizer::{
331        CrossLayerSynergyGains, EfficiencyImprovements, EnergyEfficiencyImprovements,
332        GlobalPerformanceCache, IntelligentLearningSystem, LayerSpecificImprovements,
333        OptimizationComplexity, OptimizationStatus, ScalabilityImprovements,
334        SystemOptimizationCoordinator, UltimateIntegrationOptimizer, UltimateOptimizationResult,
335    };
336    pub use crate::{Tensor, TensorConvenience, TensorStorage};
337    pub use torsh_core::{
338        device::DeviceType,
339        dtype::{DType, FloatElement, TensorElement},
340        error::{Result, TorshError},
341        shape::Shape,
342    };
343}
344
345#[cfg(test)]
346mod integration_tests {
347    use super::*;
348    use torsh_core::device::DeviceType;
349    use torsh_core::dtype::DType;
350
351    #[test]
352    fn test_tensor_creation_and_basic_ops() {
353        let data = vec![1.0f32, 2.0, 3.0, 4.0];
354        let tensor = Tensor::from_data(data, vec![2, 2], DeviceType::Cpu)
355            .expect("tensor creation should succeed");
356
357        assert_eq!(tensor.shape().dims(), &[2, 2]);
358        assert_eq!(tensor.numel(), 4);
359        assert_eq!(tensor.dtype(), DType::F32);
360    }
361
362    #[test]
363    fn test_tensor_reshape_and_view() {
364        let data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0];
365        let tensor = Tensor::from_data(data, vec![2, 3], DeviceType::Cpu)
366            .expect("tensor creation should succeed");
367
368        let reshaped = tensor.view(&[3, 2]).expect("view should succeed");
369        assert_eq!(reshaped.shape().dims(), &[3, 2]);
370
371        let slice = tensor
372            .slice_tensor(0, 0, 1)
373            .expect("slice_tensor should succeed");
374        assert_eq!(slice.shape().dims(), &[1, 3]);
375    }
376
377    #[test]
378    fn test_tensor_math_operations() {
379        let a = Tensor::from_data(vec![1.0f32, 2.0, 3.0], vec![3], DeviceType::Cpu)
380            .expect("tensor creation should succeed");
381        let b = Tensor::from_data(vec![4.0f32, 5.0, 6.0], vec![3], DeviceType::Cpu)
382            .expect("tensor creation should succeed");
383
384        let sum = a.add(&b).expect("addition should succeed");
385        assert_eq!(
386            sum.data().expect("data retrieval should succeed"),
387            vec![5.0, 7.0, 9.0]
388        );
389
390        let product = a.mul(&b).expect("multiplication should succeed");
391        assert_eq!(
392            product.data().expect("data retrieval should succeed"),
393            vec![4.0, 10.0, 18.0]
394        );
395    }
396
397    #[test]
398    fn test_tensor_advanced_operations() {
399        let data = vec![1.0f32, 4.0, 9.0, 16.0];
400        let tensor = Tensor::from_data(data, vec![4], DeviceType::Cpu)
401            .expect("tensor creation should succeed");
402
403        let sqrt_result = tensor.sqrt().expect("sqrt should succeed");
404        assert_eq!(
405            sqrt_result.data().expect("data retrieval should succeed"),
406            vec![1.0, 2.0, 3.0, 4.0]
407        );
408
409        let norm = tensor.norm().expect("norm should succeed");
410        assert!(norm.item().expect("item extraction should succeed") > 0.0);
411    }
412
413    #[test]
414    fn test_tensor_data_operations() {
415        let mut tensor =
416            Tensor::<f32>::zeros(&[2, 3], DeviceType::Cpu).expect("zeros creation should succeed");
417
418        tensor.fill_(5.0).expect("fill should succeed");
419        assert_eq!(
420            tensor.get_item(&[0, 0]).expect("get_item should succeed"),
421            5.0
422        );
423
424        let indices = Tensor::from_data(vec![0i64, 2], vec![2], DeviceType::Cpu)
425            .expect("tensor creation should succeed");
426        let _src = Tensor::from_data(vec![10.0f32, 20.0], vec![2], DeviceType::Cpu)
427            .expect("tensor creation should succeed");
428
429        let data_1d = vec![1.0f32, 2.0, 3.0, 4.0, 5.0];
430        let tensor_1d = Tensor::from_data(data_1d, vec![5], DeviceType::Cpu)
431            .expect("tensor creation should succeed");
432        let gathered = tensor_1d
433            .gather(0, &indices)
434            .expect("gather should succeed");
435        assert_eq!(
436            gathered.data().expect("data retrieval should succeed"),
437            vec![1.0, 3.0]
438        );
439    }
440
441    #[test]
442    fn test_tensor_storage_optimization() {
443        // Small tensor should use in-memory storage
444        let small =
445            Tensor::<f32>::zeros(&[10], DeviceType::Cpu).expect("zeros creation should succeed");
446        assert_eq!(small.storage_type(), "in_memory");
447
448        // Test copy-on-write behavior
449        let tensor1 =
450            Tensor::<f32>::ones(&[5], DeviceType::Cpu).expect("ones creation should succeed");
451        let tensor2 = tensor1.clone();
452        assert!(tensor1.shares_storage(&tensor2));
453    }
454
455    #[test]
456    fn test_gradient_operations() {
457        let tensor = Tensor::<f32>::ones(&[2, 2], DeviceType::Cpu)
458            .expect("ones creation should succeed")
459            .requires_grad_(true);
460
461        assert!(tensor.requires_grad());
462        assert!(!tensor.has_grad());
463
464        let detached = tensor.detach();
465        assert!(!detached.requires_grad());
466    }
467}