reasonkit-core 0.1.8

The Reasoning Engine — Auditable Reasoning for Production AI | Rust-Native | Turn Prompts into Protocols
//! Performance Optimization Stack
//!
//! This module provides high-performance primitives for achieving
//! sub-5ms core loop latency targets.
//!
//! # Components
//! - `monoio` - io_uring async runtime (2-3x faster than tokio)
//! - `mimalloc` - Microsoft allocator (5.3x faster than glibc)
//! - `rkyv` - Zero-copy serialization (1.2ns access)
//! - `bitcode` - Fastest traditional serde
//!
//! Enable with: `cargo build --features performance`

use serde::{Deserialize, Serialize};

// Re-exports for direct access
#[cfg(feature = "performance")]
pub use mimalloc::MiMalloc;

#[cfg(feature = "performance")]
pub use rkyv;

#[cfg(feature = "performance")]
pub use bitcode;

#[cfg(feature = "performance")]
pub use monoio;

/// Global allocator configuration using mimalloc
///
/// To enable, add this to your main.rs:
/// ```rust,ignore
/// #[global_allocator]
/// static GLOBAL: reasonkit::integrations::performance::MiMalloc =
///     reasonkit::integrations::performance::MiMalloc;
/// ```
#[cfg(feature = "performance")]
pub static MIMALLOC_ALLOCATOR: MiMalloc = MiMalloc;

/// Performance configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceConfig {
    /// Use io_uring (Linux 5.6+ only)
    pub use_io_uring: bool,
    /// Enable zero-copy serialization
    pub zero_copy: bool,
    /// Thread pinning for cache locality
    pub pin_threads: bool,
    /// Number of io_uring entries
    pub io_uring_entries: u32,
}

impl Default for PerformanceConfig {
    fn default() -> Self {
        Self {
            use_io_uring: cfg!(target_os = "linux"),
            zero_copy: true,
            pin_threads: false,
            io_uring_entries: 256,
        }
    }
}

/// Serialize data using the fastest available method (bitcode serde)
///
/// Uses bitcode's serde-compatible serialization which works with any
/// type implementing serde::Serialize. This is ~5x faster than JSON.
#[cfg(feature = "performance")]
pub fn fast_serialize<T: Serialize>(value: &T) -> Result<Vec<u8>, bitcode::Error> {
    bitcode::serialize(value)
}

/// Deserialize data using the fastest available method (bitcode serde)
///
/// Uses bitcode's serde-compatible deserialization which works with any
/// type implementing serde::Deserialize.
#[cfg(feature = "performance")]
pub fn fast_deserialize<T: for<'de> Deserialize<'de>>(bytes: &[u8]) -> Result<T, bitcode::Error> {
    bitcode::deserialize(bytes)
}

/// Zero-copy buffer for high-performance data access
#[derive(Debug)]
pub struct ZeroCopyBuffer {
    data: Vec<u8>,
}

impl ZeroCopyBuffer {
    /// Create a new zero-copy buffer
    pub fn new(capacity: usize) -> Self {
        Self {
            data: Vec::with_capacity(capacity),
        }
    }

    /// Write data to the buffer
    pub fn write(&mut self, data: &[u8]) {
        self.data.extend_from_slice(data);
    }

    /// Get a reference to the buffer data
    pub fn as_slice(&self) -> &[u8] {
        &self.data
    }

    /// Clear the buffer for reuse
    pub fn clear(&mut self) {
        self.data.clear();
    }

    /// Get the buffer length
    pub fn len(&self) -> usize {
        self.data.len()
    }

    /// Check if buffer is empty
    pub fn is_empty(&self) -> bool {
        self.data.is_empty()
    }
}

/// Latency measurement utilities
pub struct LatencyTracker {
    samples: Vec<std::time::Duration>,
    max_samples: usize,
}

impl LatencyTracker {
    /// Create a new latency tracker
    pub fn new(max_samples: usize) -> Self {
        Self {
            samples: Vec::with_capacity(max_samples),
            max_samples,
        }
    }

    /// Record a latency sample
    pub fn record(&mut self, duration: std::time::Duration) {
        if self.samples.len() >= self.max_samples {
            self.samples.remove(0);
        }
        self.samples.push(duration);
    }

    /// Get the average latency
    pub fn average(&self) -> Option<std::time::Duration> {
        if self.samples.is_empty() {
            return None;
        }
        let total: std::time::Duration = self.samples.iter().sum();
        Some(total / self.samples.len() as u32)
    }

    /// Get the p99 latency
    pub fn p99(&self) -> Option<std::time::Duration> {
        if self.samples.is_empty() {
            return None;
        }
        let mut sorted = self.samples.clone();
        sorted.sort();
        let idx = (sorted.len() as f64 * 0.99) as usize;
        Some(sorted[idx.min(sorted.len() - 1)])
    }

    /// Check if meeting <5ms target
    pub fn meets_target(&self) -> bool {
        self.p99()
            .map(|p99| p99 < std::time::Duration::from_millis(5))
            .unwrap_or(true)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_config_default() {
        let config = PerformanceConfig::default();
        assert!(config.zero_copy);
    }

    #[test]
    fn test_zero_copy_buffer() {
        let mut buf = ZeroCopyBuffer::new(1024);
        buf.write(b"hello");
        assert_eq!(buf.len(), 5);
        assert_eq!(buf.as_slice(), b"hello");
    }

    #[test]
    fn test_latency_tracker() {
        let mut tracker = LatencyTracker::new(100);
        for i in 0..50 {
            tracker.record(std::time::Duration::from_micros(i * 100));
        }
        assert!(tracker.average().is_some());
        assert!(tracker.meets_target());
    }

    #[cfg(feature = "performance")]
    #[test]
    fn test_fast_serde() {
        let data = vec![1u32, 2, 3, 4, 5];
        let encoded = fast_serialize(&data).unwrap();
        let decoded: Vec<u32> = fast_deserialize(&encoded).unwrap();
        assert_eq!(data, decoded);
    }
}