lcpfs 2026.1.102

// Copyright 2025 LunaOS Contributors
// SPDX-License-Identifier: Apache-2.0

//! # Adaptive Replacement Cache (ARC)
//!
//! This module implements the Adaptive Replacement Cache algorithm, a
//! self-tuning cache that balances recency and frequency of access.
//!
//! ## Overview
//!
//! ARC was developed at IBM Research and is used by ZFS for its block cache.
//! Unlike simple LRU caches, ARC maintains "ghost lists" that track recently
//! evicted items, allowing it to adapt to changing workload patterns.
//!
//! ## How It Works
//!
//! ARC maintains four lists:
//!
//! - **T1**: Recently accessed items (seen once) - recency
//! - **T2**: Frequently accessed items (seen multiple times) - frequency
//! - **B1**: Ghost list for T1 evictions (metadata only)
//! - **B2**: Ghost list for T2 evictions (metadata only)
//!
//! ```text
//! ┌─────────────────────────────────────────────────────────────┐
//! │                      ARC Cache                              │
//! ├──────────────────────┬──────────────────────────────────────┤
//! │        T1            │               T2                     │
//! │   (MRU - Recency)    │         (MFU - Frequency)            │
//! ├──────────────────────┼──────────────────────────────────────┤
//! │        B1            │               B2                     │
//! │   (Ghost - Recency)  │         (Ghost - Frequency)          │
//! └──────────────────────┴──────────────────────────────────────┘
//! ```
//!
//! ## Adaptation
//!
//! The parameter `p` controls the target size of T1 vs T2:
//!
//! - Cache miss in B1 → workload favors recency → increase `p`
//! - Cache miss in B2 → workload favors frequency → decrease `p`
//!
//! ## Usage
//!
//! ```rust,ignore
//! use lcpfs::lcpfs_arc::ARC;
//!
//! let mut arc = ARC.lock();
//!
//! // Try to read from cache
//! if let Some(data) = arc.read(&dva) {
//!     // Cache hit
//! } else {
//!     // Cache miss - read from disk
//!     let data = read_from_disk(&dva);
//!     arc.cache(dva, data);
//! }
//! ```
//!
//! ## Performance
//!
//! ARC provides:
//! - O(1) lookup via hash map
//! - Self-tuning between scan-resistant and frequency-based caching
//! - Efficient memory usage through ghost lists (metadata only)

use crate::fscore::structs::Dva;
use alloc::collections::{BTreeMap, VecDeque};
use alloc::vec::Vec;
use core::cmp;
use lazy_static::lazy_static;
use spin::Mutex;

// ═══════════════════════════════════════════════════════════════════════════════
// CONFIGURATION
// ═══════════════════════════════════════════════════════════════════════════════

/// Maximum cache size in bytes (100 GiB).
///
/// This is a soft limit - the cache will evict entries to stay under this size.
const ARC_MAX_BYTES: usize = 100 * 1024 * 1024 * 1024;

/// Number of shards for parallel cache access.
const NUM_SHARDS: usize = 16;

/// Maximum bytes per shard.
const SHARD_MAX_BYTES: usize = ARC_MAX_BYTES / NUM_SHARDS;

// ═══════════════════════════════════════════════════════════════════════════════
// ARC HEADER
// ═══════════════════════════════════════════════════════════════════════════════

/// Metadata header for a cached block.
///
/// Each cached block has an associated header that tracks access patterns
/// and cache state.
pub struct ArcHeader {
    /// The cached block data.
    pub data: Vec<u8>,
    /// Number of times this block has been accessed.
    pub access_count: u64,
    /// Size of the cached data in bytes.
    pub size: usize,
    /// Whether this block is deduplicated.
    pub is_dedup: bool,
    /// Which list this entry belongs to (1 = T1, 2 = T2).
    pub list_id: u8,
}

// ═══════════════════════════════════════════════════════════════════════════════
// ARC CACHE
// ═══════════════════════════════════════════════════════════════════════════════

/// Adaptive Replacement Cache implementation.
///
/// This cache automatically adapts to workload patterns by tracking both
/// recently used (recency) and frequently used (frequency) items.
pub struct Arc {
    /// Index mapping DVAs to cached headers.
    pub index: BTreeMap<Dva, ArcHeader>,

    /// T1: Most Recently Used list (single-access items).
    /// Front = LRU (evict from here), Back = MRU (add here)
    pub t1: VecDeque<Dva>,

    /// T2: Most Frequently Used list (multi-access items).
    /// Front = LRU (evict from here), Back = MRU (add here)
    pub t2: VecDeque<Dva>,

    /// B1: Ghost list for T1 (tracks recently evicted from T1).
    pub b1: VecDeque<Dva>,

    /// B2: Ghost list for T2 (tracks recently evicted from T2).
    pub b2: VecDeque<Dva>,

    /// Current cache size in bytes.
    pub current_size: usize,

    /// Total cache hits since initialization.
    pub hits: u64,

    /// Total cache misses since initialization.
    pub misses: u64,

    /// Target size for T1 (adaptive parameter).
    ///
    /// This value is automatically adjusted based on workload:
    /// - Increases when B1 hits occur (workload favors recency)
    /// - Decreases when B2 hits occur (workload favors frequency)
    pub p: usize,

    /// Maximum size for this shard in bytes.
    pub max_bytes: usize,
}

lazy_static! {
    /// Global ARC instance.
    ///
    /// All block cache operations go through this shared cache.
    pub static ref ARC: Mutex<Arc> = Mutex::new(Arc::new());

    /// Sharded ARC instances for parallel access.
    pub static ref SHARDED_ARC: [Mutex<Arc>; NUM_SHARDS] = [
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
        Mutex::new(Arc::new_with_max(SHARD_MAX_BYTES)),
    ];
}

// Sharded API functions

/// Calculate shard index for a DVA.
#[inline]
fn get_shard_index(dva: &Dva) -> usize {
    let hash = dva.vdev as usize ^ (dva.offset as usize).wrapping_mul(0x9e3779b9);
    hash % NUM_SHARDS
}

/// Read from the sharded cache.
pub fn arc_read(dva: &Dva) -> Option<Vec<u8>> {
    let shard_idx = get_shard_index(dva);
    SHARDED_ARC[shard_idx].lock().read(dva)
}

/// Write to the sharded cache.
pub fn arc_cache(dva: Dva, data: Vec<u8>) {
    let shard_idx = get_shard_index(&dva);
    SHARDED_ARC[shard_idx].lock().cache(dva, data);
}

/// Get aggregate statistics across all shards.
pub fn arc_stats() -> (u64, u64, f64, usize) {
    let mut total_hits = 0u64;
    let mut total_misses = 0u64;
    let mut total_size = 0usize;
    for shard in SHARDED_ARC.iter() {
        let (hits, misses, _, size) = shard.lock().stats();
        total_hits += hits;
        total_misses += misses;
        total_size += size;
    }
    let total = total_hits + total_misses;
    let hit_rate = if total > 0 {
        total_hits as f64 / total as f64
    } else {
        0.0
    };
    (total_hits, total_misses, hit_rate, total_size)
}

impl Arc {
    /// Create a new, empty ARC cache with default max size.
    pub fn new() -> Self {
        Self::new_with_max(ARC_MAX_BYTES)
    }

    /// Create a new, empty ARC cache with specified max size.
    pub fn new_with_max(max_bytes: usize) -> Self {
        Self {
            index: BTreeMap::new(),
            t1: VecDeque::new(),
            t2: VecDeque::new(),
            b1: VecDeque::new(),
            b2: VecDeque::new(),
            current_size: 0,
            hits: 0,
            misses: 0,
            p: 0,
            max_bytes,
        }
    }

    /// Read a block from the cache.
    ///
    /// # Arguments
    ///
    /// * `dva` - Device Virtual Address of the block to read
    ///
    /// # Returns
    ///
    /// - `Some(data)` - The cached block data (cache hit)
    /// - `None` - Block not in cache (cache miss)
    ///
    /// # Side Effects
    ///
    /// - On hit: Increments access count, may promote from T1 to T2
    /// - On miss: Checks ghost lists and adapts `p` parameter
    pub fn read(&mut self, dva: &Dva) -> Option<Vec<u8>> {
        if let Some(header) = self.index.get_mut(dva) {
            header.access_count += 1;
            self.hits += 1;

            // Capture state before mutable borrow
            let list_id = header.list_id;
            let t1_pos = if list_id == 1 {
                self.t1.iter().position(|x| x == dva)
            } else {
                None
            };
            let t2_pos = if list_id == 2 {
                self.t2.iter().position(|x| x == dva)
            } else {
                None
            };

            // Promote from T1 to T2 on second access
            if list_id == 1 {
                if let Some(pos) = t1_pos {
                    self.t1.remove(pos);
                    self.t2.push_back(*dva); // Add to MRU end
                    header.list_id = 2;
                }
            } else if list_id == 2 {
                if let Some(pos) = t2_pos {
                    // Move to MRU end of T2
                    self.t2.remove(pos);
                    self.t2.push_back(*dva);
                }
            }

            Some(header.data.clone())
        } else {
            // Cache miss - check ghost lists for adaptation
            if self.b1.contains(dva) || self.b2.contains(dva) {
                self.adapt_on_miss(dva);
            }
            self.misses += 1;
            None
        }
    }

    /// Add a block to the cache.
    ///
    /// New blocks are added to T1 (recency list). If the block is accessed
    /// again, it will be promoted to T2 (frequency list).
    ///
    /// # Arguments
    ///
    /// * `dva` - Device Virtual Address of the block
    /// * `data` - Block data to cache
    ///
    /// # Side Effects
    ///
    /// May evict other blocks to make room if the cache is full.
    pub fn cache(&mut self, dva: Dva, data: Vec<u8>) {
        let size = data.len();

        // Evict entries until we have room
        while self.current_size + size > self.max_bytes {
            self.evict_one();
        }

        let header = ArcHeader {
            data,
            access_count: 1,
            size,
            is_dedup: false,
            list_id: 1, // New entries start in T1
        };

        if self.index.insert(dva, header).is_none() {
            self.current_size += size;
            self.t1.push_back(dva); // Add to MRU end
        }
    }

    /// Evict one entry from the cache.
    ///
    /// Eviction policy:
    /// 1. If T1 is larger than target (p) or B1 >= B2: evict from T1 → B1
    /// 2. Otherwise: evict from T2 → B2
    ///
    /// Evicts from the FRONT (LRU end) of the lists.
    fn evict_one(&mut self) {
        let t1_size = self.t1.len();
        let b1_size = self.b1.len();
        let b2_size = self.b2.len();

        let to_evict = if t1_size > 0 && (t1_size > self.p || b1_size >= b2_size) {
            // Evict from T1 (recency) → B1
            // CRITICAL: pop_front() evicts LRU (least recently used)
            if let Some(dva) = self.t1.pop_front() {
                self.b1.push_back(dva); // Add to ghost list
                Some(dva)
            } else {
                None
            }
        } else if !self.t2.is_empty() {
            // Evict from T2 (frequency) → B2
            // CRITICAL: pop_front() evicts LRU (least recently used)
            if let Some(dva) = self.t2.pop_front() {
                self.b2.push_back(dva); // Add to ghost list
                Some(dva)
            } else {
                None
            }
        } else {
            None
        };

        if let Some(dva) = to_evict {
            if let Some(header) = self.index.remove(&dva) {
                self.current_size -= header.size;
            }
        }

        // Trim ghost lists to prevent unbounded growth
        // Maximum ghost entries = cache size / minimum block size
        // Evict from FRONT (oldest ghosts)
        while self.b1.len() + self.b2.len() > self.max_bytes / 4096 {
            if !self.b1.is_empty() {
                self.b1.pop_front(); // Remove oldest ghost
            } else if !self.b2.is_empty() {
                self.b2.pop_front(); // Remove oldest ghost
            }
        }
    }

    /// Adapt the cache parameters based on ghost list hits.
    ///
    /// When a cache miss occurs for a DVA that's in a ghost list,
    /// we adjust `p` to favor the corresponding access pattern:
    ///
    /// - Miss in B1: Increase `p` (favor recency)
    /// - Miss in B2: Decrease `p` (favor frequency)
    fn adapt_on_miss(&mut self, dva: &Dva) {
        let b1_size = self.b1.len();
        let b2_size = self.b2.len();

        if self.b1.contains(dva) {
            // Ghost hit in B1: workload favors recency
            // Increase p to give more space to T1
            self.p = self.p.saturating_add(b2_size / cmp::max(b1_size, 1));
            self.p = self.p.clamp(0, self.max_bytes / 4096);
        } else if self.b2.contains(dva) {
            // Ghost hit in B2: workload favors frequency
            // Decrease p to give more space to T2
            self.p = self.p.saturating_sub(b1_size / cmp::max(b2_size, 1));
            self.p = self.p.clamp(0, self.max_bytes / 4096);
        }
    }

    /// Get cache statistics.
    ///
    /// # Returns
    ///
    /// Tuple of (hits, misses, hit_rate, current_size_bytes)
    pub fn stats(&self) -> (u64, u64, f64, usize) {
        let total = self.hits + self.misses;
        let hit_rate = if total > 0 {
            self.hits as f64 / total as f64
        } else {
            0.0
        };
        (self.hits, self.misses, hit_rate, self.current_size)
    }
}

impl Default for Arc {
    fn default() -> Self {
        Self::new()
    }
}