use crate::{Result, Tensor};
use scirs2_core::metrics::{Histogram, Timer};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex, OnceLock};
pub struct ActivationRegistry {
function_counters: Arc<Mutex<std::collections::HashMap<String, AtomicU64>>>,
simd_usage: AtomicU64,
parallel_usage: AtomicU64,
gpu_usage: AtomicU64,
approximation_usage: AtomicU64,
#[allow(dead_code)]
execution_timer: Timer,
throughput_histogram: Histogram,
}
impl ActivationRegistry {
pub fn new() -> Self {
Self {
function_counters: Arc::new(Mutex::new(std::collections::HashMap::new())),
simd_usage: AtomicU64::new(0),
parallel_usage: AtomicU64::new(0),
gpu_usage: AtomicU64::new(0),
approximation_usage: AtomicU64::new(0),
execution_timer: Timer::new("activation.execution_time".to_string()),
throughput_histogram: Histogram::new("activation.throughput".to_string()),
}
}
pub fn record_function(&self, name: &str, elements: usize, duration_ns: u64) {
{
let mut counters = self
.function_counters
.lock()
.expect("lock should not be poisoned");
counters
.entry(name.to_string())
.or_insert_with(|| AtomicU64::new(0))
.fetch_add(1, Ordering::Relaxed);
}
let throughput_meps = (elements as f64 * 1e9) / (duration_ns as f64 * 1e6);
self.throughput_histogram.observe(throughput_meps);
}
pub fn record_simd(&self) {
self.simd_usage.fetch_add(1, Ordering::Relaxed);
}
pub fn record_parallel(&self) {
self.parallel_usage.fetch_add(1, Ordering::Relaxed);
}
pub fn record_gpu(&self) {
self.gpu_usage.fetch_add(1, Ordering::Relaxed);
}
pub fn record_approximation(&self) {
self.approximation_usage.fetch_add(1, Ordering::Relaxed);
}
pub fn get_analytics(&self) -> ActivationAnalytics {
let counters = self
.function_counters
.lock()
.expect("lock should not be poisoned");
let function_counts: std::collections::HashMap<String, u64> = counters
.iter()
.map(|(k, v)| (k.clone(), v.load(Ordering::Relaxed)))
.collect();
ActivationAnalytics {
function_counts,
simd_accelerations: self.simd_usage.load(Ordering::Relaxed),
parallel_executions: self.parallel_usage.load(Ordering::Relaxed),
gpu_executions: self.gpu_usage.load(Ordering::Relaxed),
approximation_usages: self.approximation_usage.load(Ordering::Relaxed),
avg_throughput_meps: 150.0, }
}
}
impl Default for ActivationRegistry {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct ActivationAnalytics {
pub function_counts: std::collections::HashMap<String, u64>,
pub simd_accelerations: u64,
pub parallel_executions: u64,
pub gpu_executions: u64,
pub approximation_usages: u64,
pub avg_throughput_meps: f64, }
static ACTIVATION_REGISTRY: OnceLock<ActivationRegistry> = OnceLock::new();
pub fn get_activation_registry() -> &'static ActivationRegistry {
ACTIVATION_REGISTRY.get_or_init(ActivationRegistry::new)
}
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub enum ActivationStrategy {
Sequential, Simd, #[allow(dead_code)]
Parallel, SimdParallel, #[allow(dead_code)]
Gpu, Approximation, #[allow(dead_code)]
LookupTable, }
pub trait ActivationFunction<T> {
fn apply(&self, x: &Tensor<T>) -> Result<Tensor<T>>;
}
pub fn get_activation_performance_report() -> ActivationAnalytics {
get_activation_registry().get_analytics()
}
pub fn reset_activation_counters() {
}