1#![cfg_attr(not(feature = "std"), no_std)]
53
54#[cfg(not(feature = "std"))]
55extern crate alloc;
56
57pub mod adaptive_auto_tuner;
59pub mod advanced_ops;
60pub mod advanced_simd_ops;
61pub mod algorithmic_optimizations;
62pub mod complex_ops;
63pub mod comprehensive_integration_tests;
64pub mod computation_graph;
65pub mod core_ops;
66pub mod cross_platform_validator;
67pub mod data_ops;
68pub mod expression_optimizer;
69pub mod expression_templates;
70pub mod hardware_accelerators;
71pub mod manipulation;
72pub mod math_ops;
73pub mod memory_optimization;
74pub mod optimization_cli;
75pub mod shape_ops;
76pub mod storage;
77pub mod ultimate_integration_optimizer;
78pub mod ultra_performance_profiler;
79
80#[cfg(feature = "async")]
82pub mod async_ops;
83pub mod auto_batching;
84pub mod backend_integration;
85pub mod bfloat16_ops;
86pub mod broadcast;
87pub mod cache_optimization;
88pub mod conv;
89pub mod convenience;
90pub mod creation;
91pub mod custom_dtype;
92pub mod custom_ops;
93pub mod indexing;
94pub mod lazy_loading;
95pub mod lockfree_cache;
97pub mod memory_pool;
98#[cfg(feature = "memory-profiling")]
99pub mod memory_profiler;
100pub mod nan_inf_detection;
101#[cfg(feature = "operation-logging")]
102pub mod operation_logging;
103pub mod fft;
105pub mod scirs2_backend;
106pub mod scirs2_stats_integration;
107pub mod shape_inference_debugger;
108pub mod sparse;
109pub mod stats;
110pub mod tensor_comprehension;
111pub mod tensor_tracker;
112pub mod tensor_utils;
113pub mod tensor_view; pub mod tensor_views;
115pub mod type_conversions;
116
117#[cfg(feature = "serialize")]
122pub mod serialize;
123
124use torsh_core::{
126 device::DeviceType,
127 dtype::{FloatElement, TensorElement},
128 error::Result,
129};
130
131pub use core_ops::{Operation, Tensor};
133
134pub use convenience::{FluentTensor, TensorConvenience, TensorFluentExt};
136
137pub use sparse::{SparseCSC, SparseCSR, SparseTensor};
142
143pub use custom_ops::{
145 global_registry, CustomOperation, CustomOperationRegistry, OperationMetadata, OperationParams,
146 TensorCustomOps,
147};
148
149pub use storage::{MemoryMappedStorage, TensorStorage};
151
152pub use tensor_view::{TensorView, TensorViewMut};
154
155pub const VERSION: &str = env!("CARGO_PKG_VERSION");
157pub const VERSION_MAJOR: u32 = 0;
158pub const VERSION_MINOR: u32 = 1;
159pub const VERSION_PATCH: u32 = 0;
160
161#[macro_export]
163macro_rules! tensor {
164 ([$($val:expr),+ $(,)?]) => {
166 $crate::creation::tensor_1d(&[$($val),+])
167 };
168
169 ($val1:expr, $val2:expr $(, $val:expr)* $(,)?) => {
171 $crate::creation::tensor_1d(&[$val1, $val2 $(, $val)*])
172 };
173
174 ($val:expr) => {
176 $crate::creation::tensor_scalar($val)
177 };
178}
179
180#[macro_export]
182macro_rules! tensor_2d {
183 ([$($row:expr),+ $(,)?]) => {{
184 let rows: Vec<Vec<_>> = vec![$($row.to_vec()),+];
185 let row_refs: Vec<&[_]> = rows.iter().map(|row| row.as_slice()).collect();
186 $crate::creation::tensor_2d(&row_refs)
187 }};
188}
189
190impl<T: TensorElement> std::fmt::Debug for Tensor<T> {
192 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
193 write!(
194 f,
195 "Tensor(shape={:?}, dtype={}, device={})",
196 self.shape().dims(),
197 self.dtype(),
198 self.device
199 )
200 }
201}
202
203impl<T: TensorElement> Tensor<T> {
205 #[cfg(test)]
207 pub fn data_ref_count(&self) -> usize {
208 use std::sync::Arc;
209 match &self.storage {
210 TensorStorage::InMemory(data) => Arc::strong_count(data),
211 TensorStorage::MemoryMapped(storage) => Arc::strong_count(storage),
212 #[cfg(feature = "simd")]
213 TensorStorage::Aligned(data) => Arc::strong_count(data),
214 #[cfg(feature = "simd")]
215 TensorStorage::SimdOptimized(storage) => Arc::strong_count(storage),
216 }
217 }
218
219 pub fn from_vec(data: Vec<T>, shape: &[usize]) -> Result<Self>
221 where
222 T: Copy,
223 {
224 Self::from_data(data, shape.to_vec(), DeviceType::Cpu)
225 }
226}
227
228pub mod prelude {
292 pub use crate::advanced_simd_ops::{
293 AdvancedSimdOps, ReductionType, SimdConfig, SimdPerformanceInfo,
294 };
295 pub use crate::algorithmic_optimizations::{
296 AlgorithmConfig, AlgorithmPerformanceStats, AlgorithmicOptimizer, SchedulingStrategy,
297 };
298 pub use crate::comprehensive_integration_tests::{
299 run_comprehensive_integration_tests, ComprehensiveIntegrationTestSuite,
300 ComprehensiveTestReport, IntegrationAnalysis, IntegrationTestConfig, PerformanceAnalysis,
301 StabilityAnalysis, TestCategory,
302 };
303 pub use crate::core_ops::Operation;
304 pub use crate::creation::{eye, ones, rand, randn, zeros};
305 pub use crate::cross_platform_validator::{
306 CpuArchitecture, CrossPlatformReport, CrossPlatformValidator, GpuVendor,
307 HardwareDetectionReport, HardwareDetector, OptimizationConfig, OptimizationReport,
308 Platform, PlatformOptimizer, ValidationConfig, ValidationFramework, ValidationReport,
309 };
310 pub use crate::expression_optimizer::{
311 ExpressionGraph, ExpressionNode, ExpressionOptimizer, NodeId, OperationType,
312 OptimizationStats, OptimizationStrategy, OptimizerConfig, TensorExpressionOps,
313 };
314 pub use crate::hardware_accelerators::{
315 AccelerationWorkload, ComplexityLevel, CpuAccelerationMetrics, CpuAcceleratorEngine,
316 GpuAccelerationMetrics, GpuAcceleratorEngine, HardwareAcceleratorReport,
317 HardwareAcceleratorSystem, MemoryAccelerationMetrics, MemoryAcceleratorEngine,
318 NetworkAccelerationMetrics, OptimizationCoordinator, SpecializedAcceleratorEngine,
319 WorkloadType,
320 };
321 pub use crate::memory_optimization::{
322 AdvancedMemoryPool, AggregateMemoryStats, DefragmentationReport, GlobalMemoryOptimizer,
323 MemoryConfig, MemoryStats,
324 };
325 pub use crate::optimization_cli::{
326 run_cli_command, run_optimization_cli, CLICommand, CLIConfig, OptimizationCLI,
327 OptimizationLevel, OptimizationType,
328 };
329 pub use crate::ultimate_integration_optimizer::{
330 CrossLayerSynergyGains, EfficiencyImprovements, EnergyEfficiencyImprovements,
331 GlobalPerformanceCache, IntelligentLearningSystem, LayerSpecificImprovements,
332 OptimizationComplexity, OptimizationStatus, ScalabilityImprovements,
333 SystemOptimizationCoordinator, UltimateIntegrationOptimizer, UltimateOptimizationResult,
334 };
335 pub use crate::{Tensor, TensorConvenience, TensorStorage};
336 pub use torsh_core::{
337 device::DeviceType,
338 dtype::{DType, FloatElement, TensorElement},
339 error::{Result, TorshError},
340 shape::Shape,
341 };
342}
343
344#[cfg(test)]
345mod integration_tests {
346 use super::*;
347 use torsh_core::device::DeviceType;
348 use torsh_core::dtype::DType;
349
350 #[test]
351 fn test_tensor_creation_and_basic_ops() {
352 let data = vec![1.0f32, 2.0, 3.0, 4.0];
353 let tensor = Tensor::from_data(data, vec![2, 2], DeviceType::Cpu)
354 .expect("tensor creation should succeed");
355
356 assert_eq!(tensor.shape().dims(), &[2, 2]);
357 assert_eq!(tensor.numel(), 4);
358 assert_eq!(tensor.dtype(), DType::F32);
359 }
360
361 #[test]
362 fn test_tensor_reshape_and_view() {
363 let data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0];
364 let tensor = Tensor::from_data(data, vec![2, 3], DeviceType::Cpu)
365 .expect("tensor creation should succeed");
366
367 let reshaped = tensor.view(&[3, 2]).expect("view should succeed");
368 assert_eq!(reshaped.shape().dims(), &[3, 2]);
369
370 let slice = tensor
371 .slice_tensor(0, 0, 1)
372 .expect("slice_tensor should succeed");
373 assert_eq!(slice.shape().dims(), &[1, 3]);
374 }
375
376 #[test]
377 fn test_tensor_math_operations() {
378 let a = Tensor::from_data(vec![1.0f32, 2.0, 3.0], vec![3], DeviceType::Cpu)
379 .expect("tensor creation should succeed");
380 let b = Tensor::from_data(vec![4.0f32, 5.0, 6.0], vec![3], DeviceType::Cpu)
381 .expect("tensor creation should succeed");
382
383 let sum = a.add(&b).expect("addition should succeed");
384 assert_eq!(
385 sum.data().expect("data retrieval should succeed"),
386 vec![5.0, 7.0, 9.0]
387 );
388
389 let product = a.mul(&b).expect("multiplication should succeed");
390 assert_eq!(
391 product.data().expect("data retrieval should succeed"),
392 vec![4.0, 10.0, 18.0]
393 );
394 }
395
396 #[test]
397 fn test_tensor_advanced_operations() {
398 let data = vec![1.0f32, 4.0, 9.0, 16.0];
399 let tensor = Tensor::from_data(data, vec![4], DeviceType::Cpu)
400 .expect("tensor creation should succeed");
401
402 let sqrt_result = tensor.sqrt().expect("sqrt should succeed");
403 assert_eq!(
404 sqrt_result.data().expect("data retrieval should succeed"),
405 vec![1.0, 2.0, 3.0, 4.0]
406 );
407
408 let norm = tensor.norm().expect("norm should succeed");
409 assert!(norm.item().expect("item extraction should succeed") > 0.0);
410 }
411
412 #[test]
413 fn test_tensor_data_operations() {
414 let mut tensor =
415 Tensor::<f32>::zeros(&[2, 3], DeviceType::Cpu).expect("zeros creation should succeed");
416
417 tensor.fill_(5.0).expect("fill should succeed");
418 assert_eq!(
419 tensor.get_item(&[0, 0]).expect("get_item should succeed"),
420 5.0
421 );
422
423 let indices = Tensor::from_data(vec![0i64, 2], vec![2], DeviceType::Cpu)
424 .expect("tensor creation should succeed");
425 let _src = Tensor::from_data(vec![10.0f32, 20.0], vec![2], DeviceType::Cpu)
426 .expect("tensor creation should succeed");
427
428 let data_1d = vec![1.0f32, 2.0, 3.0, 4.0, 5.0];
429 let tensor_1d = Tensor::from_data(data_1d, vec![5], DeviceType::Cpu)
430 .expect("tensor creation should succeed");
431 let gathered = tensor_1d
432 .gather(0, &indices)
433 .expect("gather should succeed");
434 assert_eq!(
435 gathered.data().expect("data retrieval should succeed"),
436 vec![1.0, 3.0]
437 );
438 }
439
440 #[test]
441 fn test_tensor_storage_optimization() {
442 let small =
444 Tensor::<f32>::zeros(&[10], DeviceType::Cpu).expect("zeros creation should succeed");
445 assert_eq!(small.storage_type(), "in_memory");
446
447 let tensor1 =
449 Tensor::<f32>::ones(&[5], DeviceType::Cpu).expect("ones creation should succeed");
450 let tensor2 = tensor1.clone();
451 assert!(tensor1.shares_storage(&tensor2));
452 }
453
454 #[test]
455 fn test_gradient_operations() {
456 let tensor = Tensor::<f32>::ones(&[2, 2], DeviceType::Cpu)
457 .expect("ones creation should succeed")
458 .requires_grad_(true);
459
460 assert!(tensor.requires_grad());
461 assert!(!tensor.has_grad());
462
463 let detached = tensor.detach();
464 assert!(!detached.requires_grad());
465 }
466}