realizar 0.8.5

Pure Rust ML inference engine built from scratch - model serving for GGUF and safetensors

impl<T> PriorityRequest<T> {
    /// Create a new priority request
    #[must_use]
    pub fn new(priority: Priority, data: T) -> Self {
        Self {
            priority,
            sequence: 0, // Will be set by queue
            data,
        }
    }

    /// Get the priority level
    #[must_use]
    pub fn priority(&self) -> Priority {
        self.priority
    }

    /// Get reference to request data
    #[must_use]
    pub fn data(&self) -> &T {
        &self.data
    }

    /// Consume and return the data
    #[must_use]
    pub fn into_data(self) -> T {
        self.data
    }
}

/// Priority request queue for request scheduling (M27 - IMP-064)
///
/// Implements priority-based scheduling with FIFO ordering for same-priority requests.
#[derive(Debug)]
pub struct PriorityRequestQueue<T> {
    items: Vec<PriorityRequest<T>>,
    next_sequence: u64,
}

impl<T> PriorityRequestQueue<T> {
    /// Create a new priority request queue
    #[must_use]
    pub fn new() -> Self {
        Self {
            items: Vec::new(),
            next_sequence: 0,
        }
    }

    /// Get number of items in queue
    #[must_use]
    pub fn len(&self) -> usize {
        self.items.len()
    }

    /// Check if queue is empty
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.items.is_empty()
    }

    /// Enqueue a request with priority
    pub fn enqueue(&mut self, mut request: PriorityRequest<T>) {
        request.sequence = self.next_sequence;
        self.next_sequence += 1;
        self.items.push(request);
    }

    /// Dequeue the highest priority request
    ///
    /// Returns the request with highest priority. For equal priorities,
    /// returns the earliest enqueued (FIFO).
    pub fn dequeue_highest(&mut self) -> Option<PriorityRequest<T>> {
        if self.items.is_empty() {
            return None;
        }

        // Find index of highest priority (and earliest sequence for ties)
        let mut best_idx = 0;
        for (i, item) in self.items.iter().enumerate().skip(1) {
            let best = &self.items[best_idx];
            if item.priority > best.priority
                || (item.priority == best.priority && item.sequence < best.sequence)
            {
                best_idx = i;
            }
        }

        Some(self.items.swap_remove(best_idx))
    }
}

impl<T> Default for PriorityRequestQueue<T> {
    fn default() -> Self {
        Self::new()
    }
}

/// Token bucket rate limiter for throughput control (M27 - IMP-065)
///
/// Implements token bucket algorithm with configurable rate and burst capacity.
#[derive(Debug)]
pub struct TokenRateLimiter {
    tokens: u32,
    capacity: u32,
    rate: f64, // tokens per second
    last_refill: std::time::Instant,
}

impl TokenRateLimiter {
    /// Create a new rate limiter
    ///
    /// # Arguments
    /// * `rate` - Tokens per second to refill
    /// * `burst_capacity` - Maximum tokens that can accumulate
    #[must_use]
    pub fn new(rate: f64, burst_capacity: u32) -> Self {
        Self {
            tokens: burst_capacity, // Start full
            capacity: burst_capacity,
            rate,
            last_refill: std::time::Instant::now(),
        }
    }

    /// Get current available tokens
    #[must_use]
    pub fn tokens_available(&self) -> u32 {
        self.tokens
    }

    /// Try to acquire tokens
    ///
    /// Returns `true` if tokens were acquired, `false` if insufficient tokens.
    pub fn try_acquire(&mut self, count: u32) -> bool {
        if self.tokens >= count {
            self.tokens -= count;
            true
        } else {
            false
        }
    }

    /// Refill tokens based on elapsed time
    ///
    /// Call periodically to add tokens at the configured rate.
    pub fn refill(&mut self) {
        let now = std::time::Instant::now();
        let elapsed = now.duration_since(self.last_refill).as_secs_f64();
        let new_tokens = (elapsed * self.rate) as u32;

        if new_tokens > 0 {
            self.tokens = (self.tokens + new_tokens).min(self.capacity);
            self.last_refill = now;
        }
    }
}

/// Allocation ID for resource tracking
pub type AllocationId = u64;

/// Resource allocation record
#[derive(Debug, Clone)]
struct ResourceAllocation {
    memory: u64,
    compute: u32,
}

/// Resource usage tracker for memory and compute (M27 - IMP-066)
///
/// Tracks resource allocations and provides utilization metrics.
#[derive(Debug)]
pub struct ResourceTracker {
    memory_capacity: u64,
    compute_capacity: u32,
    memory_used: u64,
    compute_used: u32,
    allocations: std::collections::HashMap<AllocationId, ResourceAllocation>,
    next_id: AllocationId,
}

impl ResourceTracker {
    /// Create a new resource tracker
    ///
    /// # Arguments
    /// * `memory_capacity` - Total memory capacity in bytes
    /// * `compute_capacity` - Total compute capacity (0-100 percentage)
    #[must_use]
    pub fn new(memory_capacity: u64, compute_capacity: u32) -> Self {
        Self {
            memory_capacity,
            compute_capacity,
            memory_used: 0,
            compute_used: 0,
            allocations: std::collections::HashMap::new(),
            next_id: 0,
        }
    }

    /// Get current memory usage in bytes
    #[must_use]
    pub fn memory_usage(&self) -> u64 {
        self.memory_used
    }

    /// Get current compute usage (0-100)
    #[must_use]
    pub fn compute_usage(&self) -> u32 {
        self.compute_used
    }

    /// Check if allocation is possible
    #[must_use]
    pub fn can_allocate(&self, memory: u64, compute: u32) -> bool {
        self.memory_used + memory <= self.memory_capacity
            && self.compute_used + compute <= self.compute_capacity
    }

    /// Allocate resources
    ///
    /// Returns allocation ID if successful, None if insufficient resources.
    pub fn allocate(&mut self, memory: u64, compute: u32) -> Option<AllocationId> {
        if !self.can_allocate(memory, compute) {
            return None;
        }

        let id = self.next_id;
        self.next_id += 1;

        self.memory_used += memory;
        self.compute_used += compute;
        self.allocations
            .insert(id, ResourceAllocation { memory, compute });

        Some(id)
    }

    /// Release allocated resources
    pub fn release(&mut self, id: AllocationId) {
        if let Some(alloc) = self.allocations.remove(&id) {
            self.memory_used = self.memory_used.saturating_sub(alloc.memory);
            self.compute_used = self.compute_used.saturating_sub(alloc.compute);
        }
    }

    /// Get usage as percentages
    ///
    /// Returns (memory_percentage, compute_percentage)
    #[must_use]
    pub fn usage_percentage(&self) -> (f64, f64) {
        let mem_pct = if self.memory_capacity > 0 {
            (self.memory_used as f64 / self.memory_capacity as f64) * 100.0
        } else {
            0.0
        };
        let compute_pct = if self.compute_capacity > 0 {
            (self.compute_used as f64 / self.compute_capacity as f64) * 100.0
        } else {
            0.0
        };
        (mem_pct, compute_pct)
    }
}

impl Default for ResourceTracker {
    fn default() -> Self {
        // Default: 8GB memory, 100% compute
        Self::new(8 * 1024 * 1024 * 1024, 100)
    }
}