use crate::profiler::Profiler;
use crate::Device;
use parking_lot::{Mutex, RwLock};
use std::collections::{HashMap, VecDeque};
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant};
use torsh_core::error::Result;
#[cfg(not(feature = "std"))]
use alloc::{boxed::Box, string::String, vec::Vec};
use super::allocation::MemoryAllocation;
use super::allocation::PressureLevel;
use super::allocation::{AccessPattern, PerformanceHint};
use super::fragmentation::{FragmentationConfig, FragmentationTracker};
use super::pressure::{MemoryPressureEvent, MemoryPressureIndicators};
use super::scirs2::{ScirS2Integration, ScirS2IntegrationConfig};
pub struct MemoryProfiler {
base_profiler: Box<dyn Profiler + Send + Sync>,
allocations: Arc<RwLock<HashMap<usize, MemoryAllocation>>>,
_pool_stats: Arc<RwLock<HashMap<String, MemoryPoolStats>>>,
usage_history: Arc<Mutex<VecDeque<MemorySnapshot>>>,
pressure_events: Arc<Mutex<Vec<MemoryPressureEvent>>>,
access_patterns: Arc<RwLock<HashMap<usize, AccessPattern>>>,
config: MemoryProfilerConfig,
global_stats: Arc<Mutex<GlobalMemoryStats>>,
peak_watermarks: Arc<RwLock<HashMap<Device, usize>>>,
fragmentation_tracker: Arc<Mutex<FragmentationTracker>>,
scirs2_integration: Arc<Mutex<ScirS2Integration>>,
}
#[derive(Debug)]
pub struct MemoryProfilerConfig {
pub enable_allocation_tracking: bool,
pub enable_access_pattern_analysis: bool,
pub enable_pressure_monitoring: bool,
pub enable_fragmentation_tracking: bool,
pub enable_scirs2_integration: bool,
pub max_tracked_allocations: usize,
pub snapshot_interval: Duration,
pub access_pattern_window: Duration,
pub hint_threshold: f64,
pub enable_stack_traces: bool,
pub memory_pressure_threshold: f64,
pub fragmentation_alert_threshold: f64,
}
impl Default for MemoryProfilerConfig {
fn default() -> Self {
Self {
enable_allocation_tracking: true,
enable_access_pattern_analysis: true,
enable_pressure_monitoring: true,
enable_fragmentation_tracking: true,
enable_scirs2_integration: true,
max_tracked_allocations: 100000,
snapshot_interval: Duration::from_secs(10),
access_pattern_window: Duration::from_secs(60),
hint_threshold: 0.1,
enable_stack_traces: false, memory_pressure_threshold: 0.85, fragmentation_alert_threshold: 0.3, }
}
}
#[derive(Debug)]
pub struct MemoryPoolStats {
pub pool_id: String,
pub device: Option<Device>,
pub total_size: usize,
pub allocated_size: usize,
pub peak_allocated_size: usize,
pub allocation_count: AtomicUsize,
pub deallocation_count: AtomicUsize,
pub average_allocation_size: f64,
pub utilization_efficiency: f64,
pub created_at: Instant,
pub last_activity: Instant,
pub performance_hints: Vec<PerformanceHint>,
}
#[derive(Debug)]
pub struct MemorySnapshot {
pub timestamp: Instant,
pub total_memory_bytes: usize,
pub host_memory: HostMemoryUsage,
pub device_memory: HashMap<Device, DeviceMemoryUsage>,
pub pressure_indicators: MemoryPressureIndicators,
pub active_allocations: usize,
pub fragmentation_score: f64,
pub cache_stats: HashMap<String, CacheStats>,
}
impl MemorySnapshot {
pub fn total_memory_usage(&self) -> usize {
let device_total: usize = self
.device_memory
.values()
.map(|usage| usage.total_allocated)
.sum();
self.host_memory.total_allocated + device_total
}
pub fn device_usage(&self, device: &Device) -> Option<&DeviceMemoryUsage> {
self.device_memory.get(device)
}
pub fn utilization_efficiency(&self) -> f64 {
if self.total_memory_bytes == 0 {
return 0.0;
}
let total_used = self.total_memory_usage();
total_used as f64 / self.total_memory_bytes as f64
}
pub fn is_under_pressure(&self, threshold: f64) -> bool {
let pressure_value = match self.pressure_indicators.system_pressure {
PressureLevel::None => 0.0,
PressureLevel::Low => 0.25,
PressureLevel::Medium => 0.5,
PressureLevel::High => 0.75,
PressureLevel::Critical => 1.0,
};
pressure_value > threshold
}
}
#[derive(Debug)]
pub struct DeviceMemoryUsage {
pub device: Device,
pub total_allocated: usize,
pub peak_allocated: usize,
pub available_memory: usize,
pub bandwidth_utilization: BandwidthUtilization,
pub cache_stats: HashMap<String, CacheStats>,
pub access_patterns: Vec<String>,
pub performance_hints: Vec<PerformanceHint>,
}
impl DeviceMemoryUsage {
pub fn utilization_percentage(&self) -> f64 {
let total_memory = self.total_allocated + self.available_memory;
if total_memory == 0 {
return 0.0;
}
(self.total_allocated as f64 / total_memory as f64) * 100.0
}
pub fn is_under_pressure(&self, threshold: f64) -> bool {
self.utilization_percentage() > threshold
}
}
#[derive(Debug)]
pub struct HostMemoryUsage {
pub total_allocated: usize,
pub peak_allocated: usize,
pub available_system_memory: usize,
pub process_memory_rss: usize,
pub process_memory_virtual: usize,
pub memory_mapped_size: usize,
pub swap_usage: usize,
pub cache_stats: HashMap<String, CacheStats>,
}
impl HostMemoryUsage {
pub fn utilization_percentage(&self) -> f64 {
let total_system = self.total_allocated + self.available_system_memory;
if total_system == 0 {
return 0.0;
}
(self.total_allocated as f64 / total_system as f64) * 100.0
}
pub fn total_footprint(&self) -> usize {
self.process_memory_virtual + self.memory_mapped_size
}
}
#[derive(Debug)]
pub struct GlobalMemoryStats {
pub total_allocations: AtomicU64,
pub total_deallocations: AtomicU64,
pub peak_memory_usage: AtomicUsize,
pub current_memory_usage: AtomicUsize,
pub total_bytes_allocated: AtomicU64,
pub total_bytes_deallocated: AtomicU64,
pub average_allocation_size: AtomicUsize,
pub oom_events: AtomicUsize,
pub pressure_events_count: AtomicUsize,
pub fragmentation_events_count: AtomicUsize,
pub optimization_suggestions_count: AtomicUsize,
}
impl Default for GlobalMemoryStats {
fn default() -> Self {
Self {
total_allocations: AtomicU64::new(0),
total_deallocations: AtomicU64::new(0),
peak_memory_usage: AtomicUsize::new(0),
current_memory_usage: AtomicUsize::new(0),
total_bytes_allocated: AtomicU64::new(0),
total_bytes_deallocated: AtomicU64::new(0),
average_allocation_size: AtomicUsize::new(0),
oom_events: AtomicUsize::new(0),
pressure_events_count: AtomicUsize::new(0),
fragmentation_events_count: AtomicUsize::new(0),
optimization_suggestions_count: AtomicUsize::new(0),
}
}
}
impl Clone for GlobalMemoryStats {
fn clone(&self) -> Self {
use std::sync::atomic::Ordering;
Self {
total_allocations: AtomicU64::new(self.total_allocations.load(Ordering::Relaxed)),
total_deallocations: AtomicU64::new(self.total_deallocations.load(Ordering::Relaxed)),
peak_memory_usage: AtomicUsize::new(self.peak_memory_usage.load(Ordering::Relaxed)),
current_memory_usage: AtomicUsize::new(
self.current_memory_usage.load(Ordering::Relaxed),
),
total_bytes_allocated: AtomicU64::new(
self.total_bytes_allocated.load(Ordering::Relaxed),
),
total_bytes_deallocated: AtomicU64::new(
self.total_bytes_deallocated.load(Ordering::Relaxed),
),
average_allocation_size: AtomicUsize::new(
self.average_allocation_size.load(Ordering::Relaxed),
),
oom_events: AtomicUsize::new(self.oom_events.load(Ordering::Relaxed)),
pressure_events_count: AtomicUsize::new(
self.pressure_events_count.load(Ordering::Relaxed),
),
fragmentation_events_count: AtomicUsize::new(
self.fragmentation_events_count.load(Ordering::Relaxed),
),
optimization_suggestions_count: AtomicUsize::new(
self.optimization_suggestions_count.load(Ordering::Relaxed),
),
}
}
}
impl GlobalMemoryStats {
pub fn outstanding_allocations(&self) -> u64 {
let allocs = self.total_allocations.load(Ordering::Relaxed);
let deallocs = self.total_deallocations.load(Ordering::Relaxed);
allocs.saturating_sub(deallocs)
}
pub fn outstanding_bytes(&self) -> u64 {
let alloc_bytes = self.total_bytes_allocated.load(Ordering::Relaxed);
let dealloc_bytes = self.total_bytes_deallocated.load(Ordering::Relaxed);
alloc_bytes.saturating_sub(dealloc_bytes)
}
pub fn update_peak_usage(&self, current_usage: usize) {
self.current_memory_usage
.store(current_usage, Ordering::Relaxed);
let mut peak = self.peak_memory_usage.load(Ordering::Relaxed);
while current_usage > peak {
match self.peak_memory_usage.compare_exchange_weak(
peak,
current_usage,
Ordering::Relaxed,
Ordering::Relaxed,
) {
Ok(_) => break,
Err(current_peak) => peak = current_peak,
}
}
}
pub fn record_allocation(&self, size: usize) {
self.total_allocations.fetch_add(1, Ordering::Relaxed);
self.total_bytes_allocated
.fetch_add(size as u64, Ordering::Relaxed);
let total_allocs = self.total_allocations.load(Ordering::Relaxed);
let total_bytes = self.total_bytes_allocated.load(Ordering::Relaxed);
if total_allocs > 0 {
let avg_size = total_bytes / total_allocs;
self.average_allocation_size
.store(avg_size as usize, Ordering::Relaxed);
}
}
pub fn record_deallocation(&self, size: usize) {
self.total_deallocations.fetch_add(1, Ordering::Relaxed);
self.total_bytes_deallocated
.fetch_add(size as u64, Ordering::Relaxed);
}
}
#[derive(Debug)]
pub struct CacheStats {
pub cache_name: String,
pub hits: AtomicU64,
pub misses: AtomicU64,
pub evictions: AtomicU64,
pub total_size: usize,
pub used_size: AtomicUsize,
pub average_access_time: Duration,
}
impl CacheStats {
pub fn hit_rate(&self) -> f64 {
let hits = self.hits.load(Ordering::Relaxed);
let misses = self.misses.load(Ordering::Relaxed);
let total = hits + misses;
if total == 0 {
return 0.0;
}
hits as f64 / total as f64
}
pub fn utilization(&self) -> f64 {
if self.total_size == 0 {
return 0.0;
}
let used = self.used_size.load(Ordering::Relaxed);
(used as f64 / self.total_size as f64) * 100.0
}
}
#[derive(Debug)]
pub struct BandwidthUtilization {
pub device: Device,
pub peak_bandwidth: u64,
pub average_bandwidth: u64,
pub theoretical_max_bandwidth: u64,
pub read_bandwidth: u64,
pub write_bandwidth: u64,
pub bidirectional_bandwidth: u64,
pub measurement_window: Duration,
}
impl BandwidthUtilization {
pub fn efficiency_percentage(&self) -> f64 {
if self.theoretical_max_bandwidth == 0 {
return 0.0;
}
(self.average_bandwidth as f64 / self.theoretical_max_bandwidth as f64) * 100.0
}
pub fn is_optimal(&self, threshold: f64) -> bool {
self.efficiency_percentage() >= threshold
}
}
impl MemoryProfiler {
pub fn new(
base_profiler: Box<dyn Profiler + Send + Sync>,
config: MemoryProfilerConfig,
) -> Self {
Self {
base_profiler,
allocations: Arc::new(RwLock::new(HashMap::new())),
_pool_stats: Arc::new(RwLock::new(HashMap::new())),
usage_history: Arc::new(Mutex::new(VecDeque::new())),
pressure_events: Arc::new(Mutex::new(Vec::new())),
access_patterns: Arc::new(RwLock::new(HashMap::new())),
config,
global_stats: Arc::new(Mutex::new(GlobalMemoryStats::default())),
peak_watermarks: Arc::new(RwLock::new(HashMap::new())),
fragmentation_tracker: Arc::new(Mutex::new(FragmentationTracker::new(
FragmentationConfig::default(),
))),
scirs2_integration: Arc::new(Mutex::new(ScirS2Integration::new(
ScirS2IntegrationConfig::default(),
))),
}
}
pub fn config(&self) -> &MemoryProfilerConfig {
&self.config
}
pub fn is_allocation_tracking_enabled(&self) -> bool {
self.config.enable_allocation_tracking
}
pub fn is_access_pattern_analysis_enabled(&self) -> bool {
self.config.enable_access_pattern_analysis
}
pub fn is_pressure_monitoring_enabled(&self) -> bool {
self.config.enable_pressure_monitoring
}
pub fn is_fragmentation_tracking_enabled(&self) -> bool {
self.config.enable_fragmentation_tracking
}
pub fn is_scirs2_integration_enabled(&self) -> bool {
self.config.enable_scirs2_integration
}
pub fn get_global_stats(&self) -> GlobalMemoryStats {
(*self.global_stats.lock()).clone()
}
pub fn take_snapshot(&self) -> Result<MemorySnapshot> {
let now = Instant::now();
let global_stats = self.get_global_stats();
Ok(MemorySnapshot {
timestamp: now,
total_memory_bytes: global_stats.current_memory_usage.load(Ordering::Relaxed),
host_memory: HostMemoryUsage {
total_allocated: 0, peak_allocated: 0,
available_system_memory: 0,
process_memory_rss: 0,
process_memory_virtual: 0,
memory_mapped_size: 0,
swap_usage: 0,
cache_stats: HashMap::new(),
},
device_memory: HashMap::new(),
pressure_indicators: MemoryPressureIndicators::default(),
active_allocations: global_stats.outstanding_allocations() as usize,
fragmentation_score: 0.0,
cache_stats: HashMap::new(),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::profiler::SimpleProfiler;
#[test]
fn test_memory_profiler_creation() {
let config = MemoryProfilerConfig::default();
let base_profiler = Box::new(SimpleProfiler::new());
let _profiler = MemoryProfiler::new(base_profiler, config);
}
#[test]
fn test_memory_profiler_config_default() {
let config = MemoryProfilerConfig::default();
assert!(config.enable_allocation_tracking);
assert!(config.enable_access_pattern_analysis);
assert!(config.enable_pressure_monitoring);
assert!(!config.enable_stack_traces);
assert_eq!(config.max_tracked_allocations, 100000);
}
#[test]
fn test_global_memory_stats() {
let stats = GlobalMemoryStats::default();
assert_eq!(stats.outstanding_allocations(), 0);
assert_eq!(stats.outstanding_bytes(), 0);
stats.record_allocation(1024);
assert_eq!(stats.total_allocations.load(Ordering::Relaxed), 1);
assert_eq!(stats.total_bytes_allocated.load(Ordering::Relaxed), 1024);
assert_eq!(stats.outstanding_allocations(), 1);
assert_eq!(stats.outstanding_bytes(), 1024);
stats.record_deallocation(512);
assert_eq!(stats.total_deallocations.load(Ordering::Relaxed), 1);
assert_eq!(stats.total_bytes_deallocated.load(Ordering::Relaxed), 512);
assert_eq!(stats.outstanding_bytes(), 512);
}
#[test]
fn test_cache_stats() {
let cache_stats = CacheStats {
cache_name: "test_cache".to_string(),
hits: AtomicU64::new(80),
misses: AtomicU64::new(20),
evictions: AtomicU64::new(5),
total_size: 1024,
used_size: AtomicUsize::new(512),
average_access_time: Duration::from_nanos(100),
};
assert_eq!(cache_stats.hit_rate(), 0.8);
assert_eq!(cache_stats.utilization(), 50.0);
}
#[test]
fn test_bandwidth_utilization() {
let bandwidth = BandwidthUtilization {
device: Device::cpu().expect("Device should succeed"),
peak_bandwidth: 800_000_000,
average_bandwidth: 600_000_000,
theoretical_max_bandwidth: 1_000_000_000,
read_bandwidth: 300_000_000,
write_bandwidth: 300_000_000,
bidirectional_bandwidth: 600_000_000,
measurement_window: Duration::from_secs(10),
};
assert_eq!(bandwidth.efficiency_percentage(), 60.0);
assert!(bandwidth.is_optimal(50.0));
assert!(!bandwidth.is_optimal(70.0));
}
#[test]
fn test_memory_snapshot() {
let snapshot = MemorySnapshot {
timestamp: Instant::now(),
total_memory_bytes: 2048,
host_memory: HostMemoryUsage {
total_allocated: 1024,
peak_allocated: 1536,
available_system_memory: 4096,
process_memory_rss: 1024,
process_memory_virtual: 2048,
memory_mapped_size: 512,
swap_usage: 0,
cache_stats: HashMap::new(),
},
device_memory: HashMap::new(),
pressure_indicators: MemoryPressureIndicators::default(),
active_allocations: 10,
fragmentation_score: 0.1,
cache_stats: HashMap::new(),
};
assert_eq!(snapshot.total_memory_usage(), 1024);
assert_eq!(snapshot.utilization_efficiency(), 0.5);
}
}