#![cfg_attr(not(feature = "std"), no_std)]
#[cfg(not(feature = "std"))]
extern crate alloc;
pub mod adaptive_auto_tuner;
pub mod advanced_ops;
pub mod advanced_simd_ops;
pub mod algorithmic_optimizations;
pub mod complex_ops;
pub mod comprehensive_integration_tests;
pub mod computation_graph;
pub mod core_ops;
pub mod cross_platform_validator;
pub mod data_ops;
pub mod expression_optimizer;
pub mod expression_templates;
pub mod hardware_accelerators;
pub mod manipulation;
pub mod math_ops;
pub mod memory_optimization;
pub mod optimization_cli;
pub mod shape_ops;
pub mod storage;
pub mod ultimate_integration_optimizer;
pub mod ultra_performance_profiler;
#[cfg(feature = "async")]
pub mod async_ops;
pub mod auto_batching;
pub mod backend_integration;
pub mod bfloat16_ops;
pub mod broadcast;
pub mod cache_optimization;
pub mod conv;
pub mod convenience;
pub mod creation;
pub mod custom_dtype;
pub mod custom_ops;
pub mod indexing;
pub mod lazy_loading;
pub mod lockfree_cache;
pub mod memory_pool;
#[cfg(feature = "memory-profiling")]
pub mod memory_profiler;
pub mod nan_inf_detection;
#[cfg(feature = "operation-logging")]
pub mod operation_logging;
pub mod fft;
pub mod scirs2_backend;
pub mod scirs2_stats_integration;
pub mod shape_inference_debugger;
pub mod simd_ops_f32;
pub mod sparse;
pub mod stats;
pub mod tensor_comprehension;
pub mod tensor_tracker;
pub mod tensor_utils;
pub mod tensor_view; pub mod tensor_views;
pub mod type_conversions;
#[cfg(feature = "serialize")]
pub mod serialize;
use torsh_core::{
device::DeviceType,
dtype::{FloatElement, TensorElement},
error::Result,
};
pub use core_ops::{Operation, Tensor};
pub use convenience::{FluentTensor, TensorConvenience, TensorFluentExt};
pub use sparse::{SparseCSC, SparseCSR, SparseTensor};
pub use custom_ops::{
global_registry, CustomOperation, CustomOperationRegistry, OperationMetadata, OperationParams,
TensorCustomOps,
};
pub use storage::{MemoryMappedStorage, TensorStorage};
pub use tensor_view::{TensorView, TensorViewMut};
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
pub const VERSION_MAJOR: u32 = 0;
pub const VERSION_MINOR: u32 = 1;
pub const VERSION_PATCH: u32 = 0;
#[macro_export]
macro_rules! tensor {
([$($val:expr),+ $(,)?]) => {
$crate::creation::tensor_1d(&[$($val),+])
};
($val1:expr, $val2:expr $(, $val:expr)* $(,)?) => {
$crate::creation::tensor_1d(&[$val1, $val2 $(, $val)*])
};
($val:expr) => {
$crate::creation::tensor_scalar($val)
};
}
#[macro_export]
macro_rules! tensor_2d {
([$($row:expr),+ $(,)?]) => {{
let rows: Vec<Vec<_>> = vec![$($row.to_vec()),+];
let row_refs: Vec<&[_]> = rows.iter().map(|row| row.as_slice()).collect();
$crate::creation::tensor_2d(&row_refs)
}};
}
impl<T: TensorElement> std::fmt::Debug for Tensor<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Tensor(shape={:?}, dtype={}, device={})",
self.shape().dims(),
self.dtype(),
self.device
)
}
}
impl<T: TensorElement> Tensor<T> {
#[cfg(test)]
pub fn data_ref_count(&self) -> usize {
use std::sync::Arc;
match &self.storage {
TensorStorage::InMemory(data) => Arc::strong_count(data),
TensorStorage::MemoryMapped(storage) => Arc::strong_count(storage),
#[cfg(feature = "simd")]
TensorStorage::Aligned(data) => Arc::strong_count(data),
#[cfg(feature = "simd")]
TensorStorage::SimdOptimized(storage) => Arc::strong_count(storage),
}
}
pub fn from_vec(data: Vec<T>, shape: &[usize]) -> Result<Self>
where
T: Copy,
{
Self::from_data(data, shape.to_vec(), DeviceType::Cpu)
}
}
pub mod prelude {
pub use crate::advanced_simd_ops::{
AdvancedSimdOps, ReductionType, SimdConfig, SimdPerformanceInfo,
};
pub use crate::algorithmic_optimizations::{
AlgorithmConfig, AlgorithmPerformanceStats, AlgorithmicOptimizer, SchedulingStrategy,
};
pub use crate::comprehensive_integration_tests::{
run_comprehensive_integration_tests, ComprehensiveIntegrationTestSuite,
ComprehensiveTestReport, IntegrationAnalysis, IntegrationTestConfig, PerformanceAnalysis,
StabilityAnalysis, TestCategory,
};
pub use crate::core_ops::Operation;
pub use crate::creation::{eye, ones, rand, randn, zeros};
pub use crate::cross_platform_validator::{
CpuArchitecture, CrossPlatformReport, CrossPlatformValidator, GpuVendor,
HardwareDetectionReport, HardwareDetector, OptimizationConfig, OptimizationReport,
Platform, PlatformOptimizer, ValidationConfig, ValidationFramework, ValidationReport,
};
pub use crate::expression_optimizer::{
ExpressionGraph, ExpressionNode, ExpressionOptimizer, NodeId, OperationType,
OptimizationStats, OptimizationStrategy, OptimizerConfig, TensorExpressionOps,
};
pub use crate::hardware_accelerators::{
AccelerationWorkload, ComplexityLevel, CpuAccelerationMetrics, CpuAcceleratorEngine,
GpuAccelerationMetrics, GpuAcceleratorEngine, HardwareAcceleratorReport,
HardwareAcceleratorSystem, MemoryAccelerationMetrics, MemoryAcceleratorEngine,
NetworkAccelerationMetrics, OptimizationCoordinator, SpecializedAcceleratorEngine,
WorkloadType,
};
pub use crate::memory_optimization::{
AdvancedMemoryPool, AggregateMemoryStats, DefragmentationReport, GlobalMemoryOptimizer,
MemoryConfig, MemoryStats,
};
pub use crate::optimization_cli::{
run_cli_command, run_optimization_cli, CLICommand, CLIConfig, OptimizationCLI,
OptimizationLevel, OptimizationType,
};
pub use crate::ultimate_integration_optimizer::{
CrossLayerSynergyGains, EfficiencyImprovements, EnergyEfficiencyImprovements,
GlobalPerformanceCache, IntelligentLearningSystem, LayerSpecificImprovements,
OptimizationComplexity, OptimizationStatus, ScalabilityImprovements,
SystemOptimizationCoordinator, UltimateIntegrationOptimizer, UltimateOptimizationResult,
};
pub use crate::{Tensor, TensorConvenience, TensorStorage};
pub use torsh_core::{
device::DeviceType,
dtype::{DType, FloatElement, TensorElement},
error::{Result, TorshError},
shape::Shape,
};
}
#[cfg(test)]
mod integration_tests {
use super::*;
use torsh_core::device::DeviceType;
use torsh_core::dtype::DType;
#[test]
fn test_tensor_creation_and_basic_ops() {
let data = vec![1.0f32, 2.0, 3.0, 4.0];
let tensor = Tensor::from_data(data, vec![2, 2], DeviceType::Cpu)
.expect("tensor creation should succeed");
assert_eq!(tensor.shape().dims(), &[2, 2]);
assert_eq!(tensor.numel(), 4);
assert_eq!(tensor.dtype(), DType::F32);
}
#[test]
fn test_tensor_reshape_and_view() {
let data = vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0];
let tensor = Tensor::from_data(data, vec![2, 3], DeviceType::Cpu)
.expect("tensor creation should succeed");
let reshaped = tensor.view(&[3, 2]).expect("view should succeed");
assert_eq!(reshaped.shape().dims(), &[3, 2]);
let slice = tensor
.slice_tensor(0, 0, 1)
.expect("slice_tensor should succeed");
assert_eq!(slice.shape().dims(), &[1, 3]);
}
#[test]
fn test_tensor_math_operations() {
let a = Tensor::from_data(vec![1.0f32, 2.0, 3.0], vec![3], DeviceType::Cpu)
.expect("tensor creation should succeed");
let b = Tensor::from_data(vec![4.0f32, 5.0, 6.0], vec![3], DeviceType::Cpu)
.expect("tensor creation should succeed");
let sum = a.add(&b).expect("addition should succeed");
assert_eq!(
sum.data().expect("data retrieval should succeed"),
vec![5.0, 7.0, 9.0]
);
let product = a.mul(&b).expect("multiplication should succeed");
assert_eq!(
product.data().expect("data retrieval should succeed"),
vec![4.0, 10.0, 18.0]
);
}
#[test]
fn test_tensor_advanced_operations() {
let data = vec![1.0f32, 4.0, 9.0, 16.0];
let tensor = Tensor::from_data(data, vec![4], DeviceType::Cpu)
.expect("tensor creation should succeed");
let sqrt_result = tensor.sqrt().expect("sqrt should succeed");
assert_eq!(
sqrt_result.data().expect("data retrieval should succeed"),
vec![1.0, 2.0, 3.0, 4.0]
);
let norm = tensor.norm().expect("norm should succeed");
assert!(norm.item().expect("item extraction should succeed") > 0.0);
}
#[test]
fn test_tensor_data_operations() {
let mut tensor =
Tensor::<f32>::zeros(&[2, 3], DeviceType::Cpu).expect("zeros creation should succeed");
tensor.fill_(5.0).expect("fill should succeed");
assert_eq!(
tensor.get_item(&[0, 0]).expect("get_item should succeed"),
5.0
);
let indices = Tensor::from_data(vec![0i64, 2], vec![2], DeviceType::Cpu)
.expect("tensor creation should succeed");
let _src = Tensor::from_data(vec![10.0f32, 20.0], vec![2], DeviceType::Cpu)
.expect("tensor creation should succeed");
let data_1d = vec![1.0f32, 2.0, 3.0, 4.0, 5.0];
let tensor_1d = Tensor::from_data(data_1d, vec![5], DeviceType::Cpu)
.expect("tensor creation should succeed");
let gathered = tensor_1d
.gather(0, &indices)
.expect("gather should succeed");
assert_eq!(
gathered.data().expect("data retrieval should succeed"),
vec![1.0, 3.0]
);
}
#[test]
fn test_tensor_storage_optimization() {
let small =
Tensor::<f32>::zeros(&[10], DeviceType::Cpu).expect("zeros creation should succeed");
assert_eq!(small.storage_type(), "in_memory");
let tensor1 =
Tensor::<f32>::ones(&[5], DeviceType::Cpu).expect("ones creation should succeed");
let tensor2 = tensor1.clone();
assert!(tensor1.shares_storage(&tensor2));
}
#[test]
fn test_gradient_operations() {
let tensor = Tensor::<f32>::ones(&[2, 2], DeviceType::Cpu)
.expect("ones creation should succeed")
.requires_grad_(true);
assert!(tensor.requires_grad());
assert!(!tensor.has_grad());
let detached = tensor.detach();
assert!(!detached.requires_grad());
}
}