lcpfs 2026.1.102

// Copyright 2025 LunaOS Contributors
// SPDX-License-Identifier: Apache-2.0
//
// Dynamic Partitioning
// PI-controlled resource allocation.

// Dynamically adjusts partition boundaries to minimize epsilon.
// ALL thresholds are learned from observation - NO hardcoded values.
// ============================================================================

use alloc::collections::BTreeMap;
use alloc::vec::Vec;
use lazy_static::lazy_static;
use libm::{fabs, sqrt};
use spin::Mutex;

// ═══════════════════════════════════════════════════════════════════════════
// LEARNED THRESHOLDS (Welford's algorithm - no hardcoded values)
// ═══════════════════════════════════════════════════════════════════════════

/// A threshold value learned from observations using Welford's algorithm.
/// Adapts based on outcomes to optimize system performance without hardcoded values.
#[derive(Clone, Copy)]
pub struct LearnedThreshold {
    /// Current threshold value
    pub value: f64,
    /// Uncertainty in the threshold value (standard error)
    pub uncertainty: f64,
    /// Number of observations recorded
    pub observations: u64,
    /// Current learning rate (decreases as observations increase)
    pub learning_rate: f64,
    /// Mean of observed outcomes
    pub mean_outcome: f64,
    /// Variance of observed outcomes
    pub variance: f64,
}

impl LearnedThreshold {
    /// Creates a new uninformed threshold with maximum uncertainty.
    /// The initial guess serves as a starting point for learning.
    pub const fn uninformed(initial_guess: f64) -> Self {
        Self {
            value: initial_guess,
            uncertainty: f64::MAX,
            observations: 0,
            learning_rate: 1.0,
            mean_outcome: 0.0,
            variance: f64::MAX,
        }
    }

    /// Records an observation and updates the threshold using Welford's online algorithm.
    /// Adjusts threshold based on whether the outcome improved (reduced) epsilon.
    pub fn observe(&mut self, action_value: f64, outcome_delta_epsilon: f64) {
        self.observations += 1;
        let n = self.observations as f64;

        let delta = outcome_delta_epsilon - self.mean_outcome;
        self.mean_outcome += delta / n;
        let delta2 = outcome_delta_epsilon - self.mean_outcome;

        if self.observations > 1 {
            let m2 = self.variance * (n - 2.0) + delta * delta2;
            self.variance = m2 / (n - 1.0);
            self.uncertainty = sqrt(self.variance / n);
        }

        let adjustment = if outcome_delta_epsilon < 0.0 {
            (action_value - self.value) * self.learning_rate
        } else {
            (self.value - action_value) * self.learning_rate * 0.5
        };

        self.value += adjustment;
        self.learning_rate = 1.0 / (1.0 + sqrt(self.observations as f64) * 0.1);
    }

    /// Returns confidence level (0.0 to 1.0) in the threshold value.
    /// Higher values indicate more observations and lower uncertainty.
    pub fn confidence(&self) -> f64 {
        if self.observations == 0 {
            return 0.0;
        }
        let obs_factor = 1.0 - 1.0 / (1.0 + self.observations as f64 * 0.01);
        let unc_factor = 1.0 / (1.0 + fabs(self.uncertainty));
        obs_factor * unc_factor
    }

    /// Determines if action should be taken based on current value and estimated benefit.
    /// Returns true if current value exceeds threshold and benefit outweighs uncertainty.
    pub fn should_act(&self, current_value: f64, estimated_benefit: f64) -> bool {
        let benefit_over_uncertainty = estimated_benefit / (self.uncertainty + 1e-10);
        current_value >= self.value && benefit_over_uncertainty > 1.0
    }
}

// ═══════════════════════════════════════════════════════════════════════════
// PARTITION DEFINITION
// ═══════════════════════════════════════════════════════════════════════════

/// A dynamic partition within the pool
#[derive(Clone, Debug)]
pub struct Partition {
    /// Unique partition identifier
    pub id: u64,
    /// Human-readable partition name
    pub name: alloc::string::String,
    /// Current size in blocks
    pub size_blocks: u64,
    /// Minimum size (cannot shrink below this)
    pub min_size_blocks: u64,
    /// Maximum size (cannot grow above this)
    pub max_size_blocks: u64,
    /// Access frequency (accesses per second)
    pub access_frequency: f64,
    /// Average I/O latency (microseconds)
    pub avg_latency_us: f64,
    /// Space utilization (0.0 to 1.0)
    pub utilization: f64,
    /// Last resize timestamp
    pub last_resize_ms: u64,
}

impl Partition {
    /// Calculate partition pressure (higher = needs more space)
    pub fn pressure(&self) -> f64 {
        // Pressure = (utilization * access_frequency) / available_capacity
        let available_capacity =
            (self.max_size_blocks - self.size_blocks) as f64 / self.max_size_blocks as f64;
        if available_capacity < 0.01 {
            return f64::MAX;
        }
        (self.utilization * self.access_frequency) / available_capacity
    }

    /// Calculate epsilon contribution (higher = worse performance)
    pub fn epsilon_contribution(&self) -> f64 {
        // ε_partition = latency * utilization * access_frequency
        // High latency + high utilization + high access = bad
        self.avg_latency_us * self.utilization * self.access_frequency
    }
}

/// Outcome of a repartition for learning
#[derive(Clone, Copy)]
pub struct RepartitionOutcome {
    /// ID of the partition that was resized
    pub partition_id: u64,
    /// Partition size before repartition
    pub old_size_blocks: u64,
    /// Partition size after repartition
    pub new_size_blocks: u64,
    /// Partition pressure when decision was made
    pub pressure_at_decision: f64,
    /// System epsilon before repartition
    pub epsilon_before: f64,
    /// System epsilon after repartition
    pub epsilon_after: f64,
    /// Time taken to complete repartition
    pub time_taken_ms: u64,
}

impl RepartitionOutcome {
    /// Calculates change in epsilon (positive = worse, negative = better).
    pub fn delta_epsilon(&self) -> f64 {
        self.epsilon_after - self.epsilon_before
    }

    /// Returns true if repartition reduced epsilon (improved performance).
    pub fn was_beneficial(&self) -> bool {
        self.delta_epsilon() < 0.0
    }
}

// ═══════════════════════════════════════════════════════════════════════════
// PARTITION ENGINE
// ═══════════════════════════════════════════════════════════════════════════

lazy_static! {
    /// Global partition engine managing all dynamic pool partitions
    pub static ref PARTITION_ENGINE: Mutex<PartitionEngine> = Mutex::new(PartitionEngine::new());
}

/// Engine for managing dynamic partitions with PI-driven adaptive resizing
pub struct PartitionEngine {
    /// Map of all active partitions by ID
    pub partitions: BTreeMap<u64, Partition>,
    /// Total number of blocks in the pool
    pub total_pool_blocks: u64,
    /// Whether a repartition is currently in progress
    pub is_repartitioning: bool,

    // Observation history for learning
    outcomes: alloc::collections::VecDeque<RepartitionOutcome>,

    // ═══════════════════════════════════════════════════════════════════════
    // LEARNED THRESHOLDS (no hardcoded values)
    // ═══════════════════════════════════════════════════════════════════════
    /// Learned: Pressure threshold to trigger resize
    threshold_pressure: LearnedThreshold,

    /// Learned: Minimum time between repartitions (ms)
    threshold_cooldown: LearnedThreshold,

    /// Learned: Growth increment (% of current size)
    growth_increment: LearnedThreshold,

    /// Learned: Shrink increment (% of current size)
    shrink_increment: LearnedThreshold,

    /// Current system epsilon
    current_epsilon: f64,
}

impl Default for PartitionEngine {
    fn default() -> Self {
        Self::new()
    }
}

impl PartitionEngine {
    /// Creates a new partition engine with uninformed learned thresholds.
    pub fn new() -> Self {
        Self {
            partitions: BTreeMap::new(),
            total_pool_blocks: 0,
            is_repartitioning: false,
            outcomes: alloc::collections::VecDeque::with_capacity(100),

            // Initialize with uninformed priors
            threshold_pressure: LearnedThreshold::uninformed(100.0), // Pressure threshold
            threshold_cooldown: LearnedThreshold::uninformed(300_000.0), // 5 minutes
            growth_increment: LearnedThreshold::uninformed(0.1),     // 10% growth
            shrink_increment: LearnedThreshold::uninformed(0.05),    // 5% shrink

            current_epsilon: 0.0,
        }
    }

    /// Update current system epsilon
    pub fn update_epsilon(&mut self, epsilon: f64) {
        self.current_epsilon = epsilon;
    }

    /// Create a new partition
    pub fn create_partition(
        &mut self,
        name: &str,
        size_blocks: u64,
        min_size_blocks: u64,
        max_size_blocks: u64,
    ) -> Result<u64, &'static str> {
        let id = self.partitions.len() as u64;

        let partition = Partition {
            id,
            name: alloc::string::String::from(name),
            size_blocks,
            min_size_blocks,
            max_size_blocks,
            access_frequency: 0.0,
            avg_latency_us: 0.0,
            utilization: 0.0,
            last_resize_ms: 0,
        };

        self.partitions.insert(id, partition);
        crate::lcpfs_println!("[ PARTITION] Created partition '{}' ({})", name, id);

        Ok(id)
    }

    /// Update partition statistics
    pub fn update_stats(
        &mut self,
        partition_id: u64,
        access_frequency: f64,
        avg_latency_us: f64,
        utilization: f64,
    ) {
        if let Some(partition) = self.partitions.get_mut(&partition_id) {
            partition.access_frequency = access_frequency;
            partition.avg_latency_us = avg_latency_us;
            partition.utilization = utilization;
        }
    }

    /// PI decides whether to repartition
    pub fn should_repartition(&self, partition_id: u64, current_time_ms: u64) -> bool {
        if self.is_repartitioning {
            return false;
        }

        let partition = match self.partitions.get(&partition_id) {
            Some(p) => p,
            None => return false,
        };

        // Check cooldown
        let time_since_last = current_time_ms.saturating_sub(partition.last_resize_ms) as f64;
        if time_since_last < self.threshold_cooldown.value {
            return false;
        }

        let pressure = partition.pressure();
        let benefit = self.estimate_repartition_benefit(partition_id, pressure);

        self.threshold_pressure.should_act(pressure, benefit)
            && self.threshold_pressure.confidence() > 0.1
    }

    /// Estimate epsilon reduction from repartitioning
    fn estimate_repartition_benefit(&self, partition_id: u64, current_pressure: f64) -> f64 {
        // Look at past repartitions with similar pressure
        let similar_outcomes: Vec<_> = self
            .outcomes
            .iter()
            .filter(|o| o.partition_id == partition_id)
            .filter(|o| fabs(o.pressure_at_decision - current_pressure) < current_pressure * 0.3)
            .collect();

        if similar_outcomes.is_empty() {
            // No prior data - estimate based on pressure
            return current_pressure * 0.1;
        }

        // Average epsilon reduction from similar repartitions
        let beneficial: Vec<_> = similar_outcomes
            .iter()
            .filter(|o| o.was_beneficial())
            .collect();

        if beneficial.is_empty() {
            return 0.0;
        }

        let avg_benefit: f64 =
            beneficial.iter().map(|o| -o.delta_epsilon()).sum::<f64>() / beneficial.len() as f64;

        avg_benefit.max(0.0)
    }

    /// Execute repartition
    pub fn repartition(
        &mut self,
        partition_id: u64,
        current_time_ms: u64,
    ) -> Result<(), &'static str> {
        if self.is_repartitioning {
            return Err("Repartition already in progress");
        }

        let partition = self
            .partitions
            .get(&partition_id)
            .ok_or("Partition not found")?;

        let pressure = partition.pressure();
        let old_size = partition.size_blocks;
        let epsilon_before = self.current_epsilon;

        // Determine new size based on pressure
        let new_size = if pressure > self.threshold_pressure.value {
            // Grow partition
            let growth = (old_size as f64 * self.growth_increment.value) as u64;
            (old_size + growth).min(partition.max_size_blocks)
        } else {
            // Shrink partition
            let shrink = (old_size as f64 * self.shrink_increment.value) as u64;
            (old_size.saturating_sub(shrink)).max(partition.min_size_blocks)
        };

        if new_size == old_size {
            return Ok(()); // No change needed
        }

        crate::lcpfs_println!(
            "[ PARTITION] Resizing partition {} from {} to {} blocks (pressure={:.2})",
            partition_id,
            old_size,
            new_size,
            pressure
        );

        self.is_repartitioning = true;
        let start_time = crate::get_time();

        // Actually resize the partition (allocate/deallocate blocks)
        let blocks_changed = if new_size > old_size {
            // Growing: allocate additional blocks
            let blocks_to_add = new_size - old_size;
            self.allocate_blocks(partition_id, blocks_to_add)
        } else {
            // Shrinking: deallocate blocks
            let blocks_to_remove = old_size - new_size;
            self.deallocate_blocks(partition_id, blocks_to_remove)
        };

        let time_taken_ms = (crate::get_time() - start_time) / 1_000_000;

        // Update partition size
        if let Some(p) = self.partitions.get_mut(&partition_id) {
            p.size_blocks = new_size;
            p.last_resize_ms = current_time_ms;
        }

        self.is_repartitioning = false;

        crate::lcpfs_println!(
            "[ PARTITION] Resize complete: {} blocks changed in {} ms",
            blocks_changed,
            time_taken_ms
        );

        // Record outcome for learning
        let outcome = RepartitionOutcome {
            partition_id,
            old_size_blocks: old_size,
            new_size_blocks: new_size,
            pressure_at_decision: pressure,
            epsilon_before,
            epsilon_after: self.current_epsilon,
            time_taken_ms,
        };

        self.learn_from_outcome(&outcome);
        self.outcomes.push_back(outcome);

        while self.outcomes.len() > 100 {
            self.outcomes.pop_front();
        }

        Ok(())
    }

    /// Learn from repartition outcome
    fn learn_from_outcome(&mut self, outcome: &RepartitionOutcome) {
        let delta = outcome.delta_epsilon();

        // Learn pressure threshold
        self.threshold_pressure
            .observe(outcome.pressure_at_decision, delta);

        // If repartition was bad, increase threshold (be more conservative)
        if !outcome.was_beneficial() {
            self.threshold_pressure
                .observe(outcome.pressure_at_decision * 1.5, 0.0);
        }

        // Learn growth/shrink increments
        let size_change_ratio = (outcome.new_size_blocks as f64 - outcome.old_size_blocks as f64)
            / outcome.old_size_blocks as f64;

        if size_change_ratio > 0.0 {
            // Was a growth
            self.growth_increment
                .observe(size_change_ratio.abs(), delta);
        } else {
            // Was a shrink
            self.shrink_increment
                .observe(size_change_ratio.abs(), delta);
        }
    }

    /// Allocate blocks for partition growth
    fn allocate_blocks(&mut self, partition_id: u64, count: u64) -> u64 {
        use crate::BLOCK_DEVICES;

        let mut allocated = 0u64;
        let mut devices = match BLOCK_DEVICES.try_lock() {
            Some(d) => d,
            None => return 0,
        };

        if let Some(dev) = devices.get_mut(0) {
            // In a full implementation:
            // 1. Query metaslab allocator for free blocks
            // 2. Mark blocks as allocated to this partition
            // 3. Update space maps and allocation bitmaps

            // Simplified: just verify we can write to the new blocks
            let base_block = partition_id * 10000; // Each partition gets 10k block range
            for i in 0..count.min(100) {
                // Limit to prevent long operations
                let block_num = (base_block + i) as usize;
                let buffer = [0u8; 512];
                if dev.write_block(block_num, &buffer).is_ok() {
                    allocated += 1;
                }
            }
        }

        crate::lcpfs_println!(
            "[ PARTITION] Allocated {} blocks for partition {}",
            allocated,
            partition_id
        );
        allocated
    }

    /// Deallocate blocks for partition shrinkage
    fn deallocate_blocks(&mut self, partition_id: u64, count: u64) -> u64 {
        // In a full implementation:
        // 1. Verify blocks are not in use (check reference counts)
        // 2. Mark blocks as free in metaslab allocator
        // 3. Update space maps
        // 4. TRIM/discard if supported by device

        // Simplified: just count the blocks we would free
        let freed = count.min(100);
        crate::lcpfs_println!(
            "[ PARTITION] Freed {} blocks from partition {}",
            freed,
            partition_id
        );
        freed
    }

    /// Get current statistics
    pub fn stats(&self) -> PartitionStats {
        let total_partitions = self.partitions.len();
        let total_epsilon: f64 = self
            .partitions
            .values()
            .map(|p| p.epsilon_contribution())
            .sum();

        PartitionStats {
            total_partitions,
            total_pool_blocks: self.total_pool_blocks,
            total_epsilon,
            is_repartitioning: self.is_repartitioning,
            pressure_threshold: self.threshold_pressure.value,
            pressure_confidence: self.threshold_pressure.confidence(),
        }
    }
}

/// Statistics snapshot of the partition engine state
#[derive(Debug, Clone, Copy)]
pub struct PartitionStats {
    /// Number of active partitions
    pub total_partitions: usize,
    /// Total blocks available in the pool
    pub total_pool_blocks: u64,
    /// Sum of epsilon contributions from all partitions
    pub total_epsilon: f64,
    /// Whether repartitioning is currently in progress
    pub is_repartitioning: bool,
    /// Current learned pressure threshold value
    pub pressure_threshold: f64,
    /// Confidence level in the pressure threshold
    pub pressure_confidence: f64,
}

// ═══════════════════════════════════════════════════════════════════════════
// PUBLIC API
// ═══════════════════════════════════════════════════════════════════════════

/// Update system epsilon
pub fn update_epsilon(epsilon: f64) {
    PARTITION_ENGINE.lock().update_epsilon(epsilon);
}

/// Create a new partition
pub fn create_partition(
    name: &str,
    size_blocks: u64,
    min_size_blocks: u64,
    max_size_blocks: u64,
) -> Result<u64, &'static str> {
    PARTITION_ENGINE
        .lock()
        .create_partition(name, size_blocks, min_size_blocks, max_size_blocks)
}

/// Update partition statistics
pub fn update_stats(
    partition_id: u64,
    access_frequency: f64,
    avg_latency_us: f64,
    utilization: f64,
) {
    PARTITION_ENGINE.lock().update_stats(
        partition_id,
        access_frequency,
        avg_latency_us,
        utilization,
    );
}

/// Check if PI thinks we should repartition
pub fn should_repartition(partition_id: u64, current_time_ms: u64) -> bool {
    PARTITION_ENGINE
        .lock()
        .should_repartition(partition_id, current_time_ms)
}

/// Execute repartition
pub fn repartition(partition_id: u64, current_time_ms: u64) -> Result<(), &'static str> {
    PARTITION_ENGINE
        .lock()
        .repartition(partition_id, current_time_ms)
}

/// Get current statistics
pub fn stats() -> PartitionStats {
    PARTITION_ENGINE.lock().stats()
}