pub mod aligned;
pub mod allocation;
pub mod core;
pub mod mapped;
pub mod memory_format;
pub mod memory_info;
pub mod numa;
pub mod operations;
pub mod pooling;
pub mod registry;
pub mod views;
pub use self::core::{SharedStorage, Storage};
pub use self::allocation::{
AllocationRequest, BackendAllocator, RawMemoryHandle, TypedMemoryHandle, TypedMemoryStats,
};
pub use self::memory_info::{AllocationStrategy, MemoryInfo};
pub use self::operations::{
BackendAsyncMemory, BackendMemoryCopy, CopyOperation, MemoryOperationStats,
};
pub use self::memory_format::{
ConversionCost, FormatPreference, HardwareType, MemoryFormat, OperationType,
};
pub use self::views::{StorageView, ViewBuilder, ViewStatistics};
pub use self::pooling::{
allocate_pooled,
allocate_pooled_with_value,
clear_pooled_memory,
configure_pools,
deallocate_pooled,
pooled_memory_stats,
warmup_pools,
MemoryPool,
PoolConfig,
PoolSizeStats,
PoolStats,
};
pub use self::numa::{
MemoryAccessPattern, NumaAllocator, NumaMemoryHandle, NumaMetadata, NumaPolicy, NumaTopology,
NumaTopologyStats, WorkloadType,
};
pub use self::mapped::{
AccessPatternStats, LazyLoadConfig, MappedSlice, MappedStorage, MappedStorageStats,
};
pub use self::registry::{
AllocatorCapability, AllocatorMetadata, AllocatorRegistry, AllocatorRequirements,
RegistryStatistics,
};
pub use self::aligned::{
alignment, AlignedVec, AlignmentChecker, SimdLayoutAnalysis, SimdLayoutAnalyzer,
};
pub use self::allocation::utils as allocation_utils;
pub use self::memory_format::utils as memory_format_utils;
pub use self::numa::utils as numa_utils;
pub use self::operations::utils as operations_utils;
pub use self::registry::utils as registry_utils;
pub use self::views::utils as view_utils;
pub use self::registry::{global_registry, initialize_global_registry};
pub use self::allocation::TypedMemoryHandle as TensorMemoryHandle;
pub use self::core::SharedStorage as SharedTensorStorage;
pub mod prelude {
pub use super::allocation::{BackendAllocator, RawMemoryHandle, TypedMemoryHandle};
pub use super::core::{SharedStorage, Storage};
pub use super::memory_format::MemoryFormat;
pub use super::memory_info::{AllocationStrategy, MemoryInfo};
pub use super::numa::{NumaPolicy, NumaTopology};
pub use super::pooling::{allocate_pooled, deallocate_pooled};
pub use super::registry::AllocatorRegistry;
pub use super::views::StorageView;
}
pub mod utils {
use super::*;
pub fn default_storage_config() -> StorageConfig {
StorageConfig {
memory_format: MemoryFormat::Contiguous,
allocation_strategy: AllocationStrategy::Immediate,
numa_policy: NumaPolicy::LocalPreferred,
enable_pooling: true,
enable_memory_mapping: false,
lazy_load_config: mapped::LazyLoadConfig::default(),
}
}
pub fn recommend_memory_format(
shape: &[usize],
operation: OperationType,
hardware: HardwareType,
) -> MemoryFormat {
memory_format_utils::optimal_format_for_tensor(shape, operation, hardware)
}
pub fn recommend_allocation_strategy(
size_bytes: usize,
access_pattern: AccessPattern,
) -> AllocationStrategy {
match access_pattern {
AccessPattern::Frequent if size_bytes <= 64 * 1024 => AllocationStrategy::Pooled,
AccessPattern::Large if size_bytes >= 1024 * 1024 * 1024 => {
AllocationStrategy::PreAllocated
}
AccessPattern::Lazy => AllocationStrategy::Lazy,
_ => AllocationStrategy::Immediate,
}
}
pub fn should_use_numa(allocation_sizes: &[usize], numa_topology: &NumaTopology) -> bool {
numa_topology.node_count > 1
&& allocation_sizes.iter().sum::<usize>() > 1024 * 1024 && numa_utils::has_numa_topology(numa_topology)
}
pub fn create_optimized_view<S: Storage>(
storage: SharedStorage<S>,
access_pattern: AccessPattern,
) -> Result<StorageView<S>, crate::error::TorshError> {
let view_len = storage.get().len();
match access_pattern {
AccessPattern::Sequential => {
StorageView::new(storage, 0, view_len)
}
AccessPattern::Random => {
let chunk_size = std::cmp::min(view_len, 64 * 1024); StorageView::new(storage, 0, chunk_size)
}
AccessPattern::Frequent => {
StorageView::new(storage, 0, view_len)
}
AccessPattern::Large => {
StorageView::new(storage, 0, view_len)
}
AccessPattern::Lazy => {
let initial_size = std::cmp::min(view_len, 4096); StorageView::new(storage, 0, initial_size)
}
}
}
pub fn storage_system_stats() -> StorageSystemStats {
let pool_stats = pooled_memory_stats();
let registry = global_registry();
let registry_stats = registry
.read()
.expect("lock should not be poisoned")
.statistics();
StorageSystemStats {
pooled_memory_types: pool_stats.len(),
total_pooled_allocations: pool_stats
.values()
.map(|s| s.total_cached_allocations as u64)
.sum(),
registered_allocators: registry_stats.total_allocators,
backend_types: registry_stats.backend_counts.len(),
}
}
}
#[derive(Debug, Clone)]
pub struct StorageConfig {
pub memory_format: MemoryFormat,
pub allocation_strategy: AllocationStrategy,
pub numa_policy: NumaPolicy,
pub enable_pooling: bool,
pub enable_memory_mapping: bool,
pub lazy_load_config: mapped::LazyLoadConfig,
}
impl Default for StorageConfig {
fn default() -> Self {
utils::default_storage_config()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AccessPattern {
Sequential,
Random,
Frequent,
Large,
Lazy,
}
#[derive(Debug, Clone)]
pub struct StorageSystemStats {
pub pooled_memory_types: usize,
pub total_pooled_allocations: u64,
pub registered_allocators: usize,
pub backend_types: usize,
}
impl std::fmt::Display for StorageSystemStats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"StorageSystem(pooled_types={}, pooled_allocs={}, allocators={}, backends={})",
self.pooled_memory_types,
self.total_pooled_allocations,
self.registered_allocators,
self.backend_types
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::device::CpuDevice;
#[derive(Debug)]
struct TestStorage {
data: Vec<f32>,
device: CpuDevice,
}
impl Storage for TestStorage {
type Elem = f32;
type Device = CpuDevice;
fn allocate(device: &Self::Device, size: usize) -> Result<Self, crate::error::TorshError> {
Ok(TestStorage {
data: vec![0.0; size],
device: device.clone(),
})
}
fn len(&self) -> usize {
self.data.len()
}
fn device(&self) -> &Self::Device {
&self.device
}
fn clone_storage(&self) -> Result<Self, crate::error::TorshError> {
Ok(TestStorage {
data: self.data.clone(),
device: self.device.clone(),
})
}
}
#[test]
fn test_storage_integration() {
let device = CpuDevice::new();
let storage = TestStorage::allocate(&device, 100).expect("allocate should succeed");
let shared = SharedStorage::new(storage);
assert_eq!(shared.get().len(), 100);
assert_eq!(shared.strong_count(), 1);
let _cloned_shared = shared.clone();
assert_eq!(shared.strong_count(), 2);
}
#[test]
fn test_storage_view_integration() {
let device = CpuDevice::new();
let storage = TestStorage::allocate(&device, 100).expect("allocate should succeed");
let shared = SharedStorage::new(storage);
let view =
StorageView::new(shared.clone(), 10, 20).expect("StorageView::new should succeed");
assert_eq!(view.offset(), 10);
assert_eq!(view.view_len(), 20);
let sub_view = view.slice(5, 10).expect("slice should succeed");
assert_eq!(sub_view.offset(), 15); assert_eq!(sub_view.view_len(), 10);
}
#[test]
fn test_memory_format_integration() {
let format = MemoryFormat::Contiguous;
assert!(format.is_contiguous());
assert!(!format.is_channels_last());
let channels_last = MemoryFormat::ChannelsLast;
assert!(channels_last.is_channels_last());
assert_eq!(channels_last.expected_dims(), Some(4));
}
#[test]
fn test_storage_config() {
let config = StorageConfig::default();
assert_eq!(config.memory_format, MemoryFormat::Contiguous);
assert_eq!(config.allocation_strategy, AllocationStrategy::Immediate);
assert!(config.enable_pooling);
}
#[test]
fn test_utils_recommendations() {
let shape = [1, 3, 224, 224]; let format =
utils::recommend_memory_format(&shape, OperationType::Convolution, HardwareType::GPU);
assert_eq!(format, MemoryFormat::ChannelsLast);
let strategy = utils::recommend_allocation_strategy(1024, AccessPattern::Frequent);
assert_eq!(strategy, AllocationStrategy::Pooled);
let strategy =
utils::recommend_allocation_strategy(2 * 1024 * 1024 * 1024, AccessPattern::Large);
assert_eq!(strategy, AllocationStrategy::PreAllocated);
}
#[test]
fn test_optimized_view_creation() {
let device = CpuDevice::new();
let storage = TestStorage::allocate(&device, 1000).expect("allocate should succeed");
let shared = SharedStorage::new(storage);
let view = utils::create_optimized_view(shared.clone(), AccessPattern::Sequential)
.expect("create_optimized_view should succeed");
assert_eq!(view.view_len(), 1000);
let view = utils::create_optimized_view(shared.clone(), AccessPattern::Random)
.expect("create_optimized_view should succeed");
assert!(view.view_len() <= 64 * 1024); }
#[test]
fn test_prelude_imports() {
use super::prelude::*;
let device = CpuDevice::new();
let storage = TestStorage::allocate(&device, 10).expect("allocate should succeed");
let _shared = SharedStorage::new(storage);
let _format = MemoryFormat::Contiguous;
let _strategy = AllocationStrategy::Immediate;
}
#[test]
fn test_backward_compatibility() {
let _: MemoryFormat = MemoryFormat::Contiguous;
let _: AllocationStrategy = AllocationStrategy::Immediate;
let _: NumaPolicy = NumaPolicy::LocalPreferred;
let _config = utils::default_storage_config();
let _stats = utils::storage_system_stats();
}
#[test]
fn test_storage_system_stats() {
let stats = utils::storage_system_stats();
let _check = stats.registered_allocators;
let _check = stats.backend_types;
let _check = stats.pooled_memory_types;
}
}