use std::time::Duration;
#[cfg(feature = "serialize")]
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone)]
pub struct EagerExecutionConfig {
pub enable_op_cache: bool,
pub enable_memory_pool: bool,
pub enable_async_execution: bool,
pub max_cache_size: usize,
pub memory_pool_size: usize,
pub target_overhead_ns: u64,
pub enable_context_optimization: bool,
pub enable_kernel_fusion: bool,
pub simd_config: SimdExecutionConfig,
pub parallel_config: ParallelExecutionConfig,
pub memory_config: MemoryOptimizationConfig,
pub monitoring_config: PerformanceMonitoringConfig,
pub adaptive_config: AdaptiveTuningConfig,
pub gpu_config: GpuAccelerationConfig,
pub ultra_latency_config: UltraLatencyConfig,
}
#[derive(Debug, Clone)]
pub struct SimdExecutionConfig {
pub enable_simd: bool,
pub simd_width: u32,
pub enable_auto_vectorization: bool,
pub enable_target_features: bool,
pub simd_threshold: usize,
pub simd_elementwise: bool,
pub simd_matrix_ops: bool,
pub memory_alignment: usize,
}
#[derive(Debug, Clone)]
pub struct ParallelExecutionConfig {
pub enable_parallel: bool,
pub num_threads: usize,
pub parallel_threshold: usize,
pub thread_strategy: ThreadPoolStrategy,
pub enable_work_stealing: bool,
pub chunk_strategy: ChunkStrategy,
pub numa_aware: bool,
pub cpu_affinity: bool,
}
#[derive(Debug, Clone)]
pub enum ThreadPoolStrategy {
Global,
PerDevice,
Adaptive,
Custom {
core_threads: usize,
max_threads: usize,
},
}
#[derive(Debug, Clone)]
pub enum ChunkStrategy {
Fixed(usize),
Adaptive,
WorkStealing,
CacheAware,
}
#[derive(Debug, Clone)]
pub struct MemoryOptimizationConfig {
pub enable_pooling: bool,
pub pool_strategy: PoolStrategy,
pub enable_memory_mapping: bool,
pub mmap_threshold: usize,
pub enable_adaptive_chunking: bool,
pub adaptive_chunk_size: usize,
pub enable_zero_copy: bool,
pub bandwidth_optimization: bool,
pub cache_optimization: bool,
pub preallocation_strategy: PreallocationStrategy,
}
#[derive(Debug, Clone)]
pub enum PoolStrategy {
FixedSize {
block_size: usize,
num_blocks: usize,
},
MultiSize { sizes: Vec<usize> },
Adaptive {
initial_size: usize,
growth_factor: f64,
},
Segregated,
}
#[derive(Debug, Clone)]
pub enum PreallocationStrategy {
None,
Historical,
Fixed(usize),
Adaptive,
}
#[derive(Debug, Clone)]
pub struct PerformanceMonitoringConfig {
pub enable_monitoring: bool,
pub collection_frequency: Duration,
pub enable_profiling: bool,
pub enable_benchmarking: bool,
pub benchmark_frequency: Duration,
pub track_memory_usage: bool,
pub enable_timing: bool,
pub track_cache_performance: bool,
pub enable_hardware_counters: bool,
pub history_retention: usize,
pub enable_alerts: bool,
pub alert_threshold: Duration,
}
#[derive(Debug, Clone)]
pub struct AdaptiveTuningConfig {
pub enable_adaptive: bool,
pub learning_rate: f64,
pub adaptation_frequency: Duration,
pub enable_prediction: bool,
pub prediction_algorithm: PredictionAlgorithm,
pub tune_simd: bool,
pub tune_parallel: bool,
pub tune_memory: bool,
pub min_confidence: f64,
pub enable_ab_testing: bool,
pub sample_size: usize,
}
#[derive(Debug, Clone)]
pub enum PredictionAlgorithm {
MovingAverage { window_size: usize },
ExponentialSmoothing { alpha: f64 },
LinearRegression,
MachineLearning { model_complexity: ModelComplexity },
}
#[derive(Debug, Clone)]
pub enum ModelComplexity {
Simple,
Polynomial { degree: u32 },
NeuralNetwork { hidden_layers: Vec<usize> },
}
#[derive(Debug, Clone)]
pub struct GpuAccelerationConfig {
pub enable_gpu: bool,
pub gpu_memory_pool: usize,
pub enable_async_gpu: bool,
pub enable_kernel_fusion: bool,
pub enable_tensor_cores: bool,
pub mixed_precision: bool,
pub optimize_transfers: bool,
pub enable_multi_gpu: bool,
pub scheduling_strategy: GpuSchedulingStrategy,
}
#[derive(Debug, Clone)]
pub enum GpuSchedulingStrategy {
RoundRobin,
LoadBased,
MemoryAware,
LatencyOptimized,
}
#[derive(Debug, Clone)]
pub struct UltraLatencyConfig {
pub enable_ultra_latency: bool,
pub cpu_isolation: bool,
pub realtime_priority: bool,
pub disable_cpu_scaling: bool,
pub prefault_memory: bool,
pub disable_swap: bool,
pub enable_lockfree: bool,
pub optimize_cache_residency: bool,
pub optimize_branch_prediction: bool,
}
impl Default for EagerExecutionConfig {
fn default() -> Self {
Self {
enable_op_cache: true,
enable_memory_pool: true,
enable_async_execution: true,
max_cache_size: 1000,
memory_pool_size: 128 * 1024 * 1024, target_overhead_ns: 1_000_000, enable_context_optimization: true,
enable_kernel_fusion: true,
simd_config: SimdExecutionConfig::default(),
parallel_config: ParallelExecutionConfig::default(),
memory_config: MemoryOptimizationConfig::default(),
monitoring_config: PerformanceMonitoringConfig::default(),
adaptive_config: AdaptiveTuningConfig::default(),
gpu_config: GpuAccelerationConfig::default(),
ultra_latency_config: UltraLatencyConfig::default(),
}
}
}
impl Default for SimdExecutionConfig {
fn default() -> Self {
Self {
enable_simd: true,
simd_width: 256, enable_auto_vectorization: true,
enable_target_features: true,
simd_threshold: 1024, simd_elementwise: true,
simd_matrix_ops: true,
memory_alignment: 32, }
}
}
impl Default for ParallelExecutionConfig {
fn default() -> Self {
Self {
enable_parallel: true,
num_threads: 0, parallel_threshold: 10_000, thread_strategy: ThreadPoolStrategy::Adaptive,
enable_work_stealing: true,
chunk_strategy: ChunkStrategy::Adaptive,
numa_aware: true,
cpu_affinity: false, }
}
}
impl Default for MemoryOptimizationConfig {
fn default() -> Self {
Self {
enable_pooling: true,
pool_strategy: PoolStrategy::Adaptive {
initial_size: 64 * 1024 * 1024, growth_factor: 1.5,
},
enable_memory_mapping: true,
mmap_threshold: 100 * 1024 * 1024, enable_adaptive_chunking: true,
adaptive_chunk_size: 1024 * 1024, enable_zero_copy: true,
bandwidth_optimization: true,
cache_optimization: true,
preallocation_strategy: PreallocationStrategy::Historical,
}
}
}
impl Default for PerformanceMonitoringConfig {
fn default() -> Self {
Self {
enable_monitoring: true,
collection_frequency: Duration::from_millis(100), enable_profiling: true,
enable_benchmarking: false, benchmark_frequency: Duration::from_secs(60), track_memory_usage: true,
enable_timing: true,
track_cache_performance: true,
enable_hardware_counters: false, history_retention: 1000, enable_alerts: true,
alert_threshold: Duration::from_millis(5), }
}
}
impl Default for AdaptiveTuningConfig {
fn default() -> Self {
Self {
enable_adaptive: true,
learning_rate: 0.01, adaptation_frequency: Duration::from_secs(30), enable_prediction: true,
prediction_algorithm: PredictionAlgorithm::ExponentialSmoothing { alpha: 0.3 },
tune_simd: true,
tune_parallel: true,
tune_memory: true,
min_confidence: 0.8, enable_ab_testing: false, sample_size: 100, }
}
}
impl Default for GpuAccelerationConfig {
fn default() -> Self {
Self {
enable_gpu: true,
gpu_memory_pool: 512 * 1024 * 1024, enable_async_gpu: true,
enable_kernel_fusion: true,
enable_tensor_cores: true,
mixed_precision: true,
optimize_transfers: true,
enable_multi_gpu: true,
scheduling_strategy: GpuSchedulingStrategy::LoadBased,
}
}
}
impl Default for UltraLatencyConfig {
fn default() -> Self {
Self {
enable_ultra_latency: false, cpu_isolation: false,
realtime_priority: false,
disable_cpu_scaling: false,
prefault_memory: true, disable_swap: false,
enable_lockfree: true, optimize_cache_residency: true,
optimize_branch_prediction: true,
}
}
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct OpSignature {
pub operation: String,
pub input_shapes: Vec<Vec<usize>>,
pub dtype: crate::DType,
pub device: crate::Device,
pub params: Vec<(String, String)>, }
#[derive(Debug, Clone)]
pub struct CachedOperation {
pub signature: OpSignature,
pub result_shape: Vec<usize>,
pub execution_time: std::time::Duration,
pub memory_usage: usize,
pub created_at: std::time::Instant,
pub last_used: std::time::Instant,
pub use_count: usize,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
pub struct ExecutionMetrics {
pub operation: String,
pub device: crate::Device,
pub setup_time: std::time::Duration,
pub execution_time: std::time::Duration,
pub teardown_time: std::time::Duration,
pub total_overhead: std::time::Duration,
pub memory_allocation_time: std::time::Duration,
pub cache_hit: bool,
pub meets_target: bool,
}