1#![cfg_attr(not(feature = "std"), no_std)]
53
54#[cfg(not(feature = "std"))]
55extern crate alloc;
56
57pub mod adaptive_auto_tuner;
59pub mod advanced_ops;
60pub mod advanced_simd_ops;
61pub mod algorithmic_optimizations;
62pub mod complex_ops;
63pub mod comprehensive_integration_tests;
64pub mod computation_graph;
65pub mod core_ops;
66pub mod cross_platform_validator;
67pub mod data_ops;
68pub mod expression_optimizer;
69pub mod expression_templates;
70pub mod hardware_accelerators;
71pub mod manipulation;
72pub mod math_ops;
73pub mod memory_optimization;
74pub mod optimization_cli;
75pub mod shape_ops;
76pub mod storage;
77pub mod ultimate_integration_optimizer;
78pub mod ultra_performance_profiler;
79
80#[cfg(feature = "async")]
82pub mod async_ops;
83pub mod auto_batching;
84pub mod backend_integration;
85pub mod bfloat16_ops;
86pub mod broadcast;
87pub mod cache_optimization;
88pub mod conv;
89pub mod convenience;
90pub mod creation;
91pub mod custom_dtype;
92pub mod custom_ops;
93pub mod indexing;
94pub mod lazy_loading;
95pub mod lockfree_cache;
97pub mod memory_pool;
98#[cfg(feature = "memory-profiling")]
99pub mod memory_profiler;
100pub mod nan_inf_detection;
101#[cfg(feature = "operation-logging")]
102pub mod operation_logging;
103pub mod fft;
105pub mod scirs2_backend;
106pub mod scirs2_stats_integration;
107pub mod shape_inference_debugger;
108pub mod simd_ops_f32;
109pub mod sparse;
110pub mod stats;
111pub mod tensor_comprehension;
112pub mod tensor_tracker;
113pub mod tensor_utils;
114pub mod tensor_view; pub mod tensor_views;
116pub mod type_conversions;
117
118#[cfg(feature = "serialize")]
123pub mod serialize;
124
125use torsh_core::{
127 device::DeviceType,
128 dtype::{FloatElement, TensorElement},
129 error::Result,
130};
131
132pub use core_ops::{Operation, Tensor};
134
135pub use convenience::{FluentTensor, TensorConvenience, TensorFluentExt};
137
138pub use sparse::{SparseCSC, SparseCSR, SparseTensor};
143
144pub use custom_ops::{
146 global_registry, CustomOperation, CustomOperationRegistry, OperationMetadata, OperationParams,
147 TensorCustomOps,
148};
149
150pub use storage::{MemoryMappedStorage, TensorStorage};
152
153pub use tensor_view::{TensorView, TensorViewMut};
155
156pub const VERSION: &str = env!("CARGO_PKG_VERSION");
158pub const VERSION_MAJOR: u32 = 0;
159pub const VERSION_MINOR: u32 = 1;
160pub const VERSION_PATCH: u32 = 0;
161
162#[macro_export]
164macro_rules! tensor {
165 ([$($val:expr),+ $(,)?]) => {
167 $crate::creation::tensor_1d(&[$($val),+])
168 };
169
170 ($val1:expr, $val2:expr $(, $val:expr)* $(,)?) => {
172 $crate::creation::tensor_1d(&[$val1, $val2 $(, $val)*])
173 };
174
175 ($val:expr) => {
177 $crate::creation::tensor_scalar($val)
178 };
179}
180
181#[macro_export]
183macro_rules! tensor_2d {
184 ([$($row:expr),+ $(,)?]) => {{
185 let rows: Vec<Vec<_>> = vec![$($row.to_vec()),+];
186 let row_refs: Vec<&[_]> = rows.iter().map(|row| row.as_slice()).collect();
187 $crate::creation::tensor_2d(&row_refs)
188 }};
189}
190
191impl<T: TensorElement> std::fmt::Debug for Tensor<T> {
193 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194 write!(
195 f,
196 "Tensor(shape={:?}, dtype={}, device={})",
197 self.shape().dims(),
198 self.dtype(),
199 self.device
200 )
201 }
202}
203
204impl<T: TensorElement> Tensor<T> {
206 #[cfg(test)]
208 pub fn data_ref_count(&self) -> usize {
209 use std::sync::Arc;
210 match &self.storage {
211 TensorStorage::InMemory(data) => Arc::strong_count(data),
212 TensorStorage::MemoryMapped(storage) => Arc::strong_count(storage),
213 #[cfg(feature = "simd")]
214 TensorStorage::Aligned(data) => Arc::strong_count(data),
215 #[cfg(feature = "simd")]
216 TensorStorage::SimdOptimized(storage) => Arc::strong_count(storage),
217 }
218 }
219
220 pub fn from_vec(data: Vec<T>, shape: &[usize]) -> Result<Self>
222 where
223 T: Copy,
224 {
225 Self::from_data(data, shape.to_vec(), DeviceType::Cpu)
226 }
227}
228
229pub mod prelude {
293 pub use crate::advanced_simd_ops::{
294 AdvancedSimdOps, ReductionType, SimdConfig, SimdPerformanceInfo,
295 };
296 pub use crate::algorithmic_optimizations::{
297 AlgorithmConfig, AlgorithmPerformanceStats, AlgorithmicOptimizer, SchedulingStrategy,
298 };
299 pub use crate::comprehensive_integration_tests::{
300 run_comprehensive_integration_tests, ComprehensiveIntegrationTestSuite,
301 ComprehensiveTestReport, IntegrationAnalysis, IntegrationTestConfig, PerformanceAnalysis,
302 StabilityAnalysis, TestCategory,
303 };
304 pub use crate::core_ops::Operation;
305 pub use crate::creation::{eye, ones, rand, randn, zeros};
306 pub use crate::cross_platform_validator::{
307 CpuArchitecture, CrossPlatformReport, CrossPlatformValidator, GpuVendor,
308 HardwareDetectionReport, HardwareDetector, OptimizationConfig, OptimizationReport,
309 Platform, PlatformOptimizer, ValidationConfig, ValidationFramework, ValidationReport,
310 };
311 pub use crate::expression_optimizer::{
312 ExpressionGraph, ExpressionNode, ExpressionOptimizer, NodeId, OperationType,
313 OptimizationStats, OptimizationStrategy, OptimizerConfig, TensorExpressionOps,
314 };
315 pub use crate::hardware_accelerators::{
316 AccelerationWorkload, ComplexityLevel, CpuAccelerationMetrics, CpuAcceleratorEngine,
317 GpuAccelerationMetrics, GpuAcceleratorEngine, HardwareAcceleratorReport,
318 HardwareAcceleratorSystem, MemoryAccelerationMetrics, MemoryAcceleratorEngine,
319 NetworkAccelerationMetrics, OptimizationCoordinator, SpecializedAcceleratorEngine,
320 WorkloadType,
321 };
322 pub use crate::memory_optimization::{
323 AdvancedMemoryPool, AggregateMemoryStats, DefragmentationReport, GlobalMemoryOptimizer,
324 MemoryConfig, MemoryStats,
325 };
326 pub use crate::optimization_cli::{
327 run_cli_command, run_optimization_cli, CLICommand, CLIConfig, OptimizationCLI,
328 OptimizationLevel, OptimizationType,
329 };
330 pub use crate::ultimate_integration_optimizer::{
331 CrossLayerSynergyGains, EfficiencyImprovements, EnergyEfficiencyImprovements,
332 GlobalPerformanceCache, IntelligentLearningSystem, LayerSpecificImprovements,
333 OptimizationComplexity, OptimizationStatus, ScalabilityImprovements,
334 SystemOptimizationCoordinator, UltimateIntegrationOptimizer, UltimateOptimizationResult,
335 };
336 pub use crate::{Tensor, TensorConvenience, TensorStorage};
337 pub use torsh_core::{
338 device::DeviceType,
339 dtype::{DType, FloatElement, TensorElement},
340 error::{Result, TorshError},
341 shape::Shape,
342 };
343}
344
345#[cfg(test)]
346mod integration_tests {
347 use super::*;
348 use torsh_core::device::DeviceType;
349 use torsh_core::dtype::DType;
350
351 #[test]
352 fn test_tensor_creation_and_basic_ops() {
353 let data = vec![1.0f32, 2.0, 3.0, 4.0];
354 let tensor = Tensor::from_data(data, vec![2, 2], DeviceType::Cpu)
355 .expect("tensor creation should succeed");
356
357 assert_eq!(tensor.shape().dims(), &[2, 2]);
358 assert_eq!(tensor.numel(), 4);
359 assert_eq!(tensor.dtype(), DType::F32);
360 }
361
362 #[test]
363 fn test_tensor_reshape_and_view() {
364 let data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0];
365 let tensor = Tensor::from_data(data, vec![2, 3], DeviceType::Cpu)
366 .expect("tensor creation should succeed");
367
368 let reshaped = tensor.view(&[3, 2]).expect("view should succeed");
369 assert_eq!(reshaped.shape().dims(), &[3, 2]);
370
371 let slice = tensor
372 .slice_tensor(0, 0, 1)
373 .expect("slice_tensor should succeed");
374 assert_eq!(slice.shape().dims(), &[1, 3]);
375 }
376
377 #[test]
378 fn test_tensor_math_operations() {
379 let a = Tensor::from_data(vec![1.0f32, 2.0, 3.0], vec![3], DeviceType::Cpu)
380 .expect("tensor creation should succeed");
381 let b = Tensor::from_data(vec![4.0f32, 5.0, 6.0], vec![3], DeviceType::Cpu)
382 .expect("tensor creation should succeed");
383
384 let sum = a.add(&b).expect("addition should succeed");
385 assert_eq!(
386 sum.data().expect("data retrieval should succeed"),
387 vec![5.0, 7.0, 9.0]
388 );
389
390 let product = a.mul(&b).expect("multiplication should succeed");
391 assert_eq!(
392 product.data().expect("data retrieval should succeed"),
393 vec![4.0, 10.0, 18.0]
394 );
395 }
396
397 #[test]
398 fn test_tensor_advanced_operations() {
399 let data = vec![1.0f32, 4.0, 9.0, 16.0];
400 let tensor = Tensor::from_data(data, vec![4], DeviceType::Cpu)
401 .expect("tensor creation should succeed");
402
403 let sqrt_result = tensor.sqrt().expect("sqrt should succeed");
404 assert_eq!(
405 sqrt_result.data().expect("data retrieval should succeed"),
406 vec![1.0, 2.0, 3.0, 4.0]
407 );
408
409 let norm = tensor.norm().expect("norm should succeed");
410 assert!(norm.item().expect("item extraction should succeed") > 0.0);
411 }
412
413 #[test]
414 fn test_tensor_data_operations() {
415 let mut tensor =
416 Tensor::<f32>::zeros(&[2, 3], DeviceType::Cpu).expect("zeros creation should succeed");
417
418 tensor.fill_(5.0).expect("fill should succeed");
419 assert_eq!(
420 tensor.get_item(&[0, 0]).expect("get_item should succeed"),
421 5.0
422 );
423
424 let indices = Tensor::from_data(vec![0i64, 2], vec![2], DeviceType::Cpu)
425 .expect("tensor creation should succeed");
426 let _src = Tensor::from_data(vec![10.0f32, 20.0], vec![2], DeviceType::Cpu)
427 .expect("tensor creation should succeed");
428
429 let data_1d = vec![1.0f32, 2.0, 3.0, 4.0, 5.0];
430 let tensor_1d = Tensor::from_data(data_1d, vec![5], DeviceType::Cpu)
431 .expect("tensor creation should succeed");
432 let gathered = tensor_1d
433 .gather(0, &indices)
434 .expect("gather should succeed");
435 assert_eq!(
436 gathered.data().expect("data retrieval should succeed"),
437 vec![1.0, 3.0]
438 );
439 }
440
441 #[test]
442 fn test_tensor_storage_optimization() {
443 let small =
445 Tensor::<f32>::zeros(&[10], DeviceType::Cpu).expect("zeros creation should succeed");
446 assert_eq!(small.storage_type(), "in_memory");
447
448 let tensor1 =
450 Tensor::<f32>::ones(&[5], DeviceType::Cpu).expect("ones creation should succeed");
451 let tensor2 = tensor1.clone();
452 assert!(tensor1.shares_storage(&tensor2));
453 }
454
455 #[test]
456 fn test_gradient_operations() {
457 let tensor = Tensor::<f32>::ones(&[2, 2], DeviceType::Cpu)
458 .expect("ones creation should succeed")
459 .requires_grad_(true);
460
461 assert!(tensor.requires_grad());
462 assert!(!tensor.has_grad());
463
464 let detached = tensor.detach();
465 assert!(!detached.requires_grad());
466 }
467}