atlas-archive-core 1.1.0

use crate::PlcConfig;
use crate::alloc::collections::BTreeMap;
use crate::alloc::vec::Vec;
use crate::ans::ProbModel;
use crate::loom::{LoomPredictor, LoomPruner, LoomWeaver};
use crate::mixer::ContextNode;
use core::num::NonZeroUsize;
use lru::LruCache;

#[cfg(feature = "std")]
use crate::alloc::boxed::Box;
#[cfg(feature = "std")]
use std::fs::{self, File};
#[cfg(feature = "std")]
use std::io::Write;
#[cfg(feature = "std")]
use std::path::PathBuf;
// svalinn::vault::SvalinnAead used fully qualified in struct field to avoid import conflicts or clarity.

/// A page identifier (simple hash based).
type PageId = u64;

#[derive(Clone, Debug)]
pub struct PageData {
    pub nodes: BTreeMap<Vec<u8>, ContextNode>,
    pub is_dirty: bool,
    /// Incremental growth limit for this page.
    pub current_limit: usize,
    /// Accumulated loss (bits) for this page - used for data-driven sizing.
    pub loss_sum: f64,
    /// Number of symbols processed on this page.
    pub symbol_count: u64,
}

struct WeaveOp {
    context: Vec<u8>, // Only the last max_order bytes of history
    next: u8,
    page_id: PageId, // Cached to avoid double hashing
}

pub struct PagedLoom {
    predictor: crate::predictor::TransformerPredictor,
    config: PlcConfig,
    cache: LruCache<PageId, PageData>,
    /// Global nodes for low-order context (1-2) to ensure seamless transitions.
    global_nodes: BTreeMap<Vec<u8>, ContextNode>,
    op_buffer: Vec<WeaveOp>,
    buffer_limit: usize,
    flush_count: usize, // Counter for prune throttling
    peak_ram_mb: usize,
    #[cfg(feature = "std")]
    swap_dir: Option<PathBuf>,
    #[cfg(feature = "std")]
    _cipher: Option<Box<svalinn::vault::SvalinnAead>>,
    #[cfg(feature = "async")]
    runtime: Option<std::sync::Arc<tokio::runtime::Runtime>>,
}

impl PagedLoom {
    pub fn new(config: PlcConfig, max_ram_bytes: usize) -> Self {
        // Stability Fix: Larger pages and cache.
        // Approx 200MB pages -> 200 * 1024 * 1024 bytes / approx 50 bytes per node?
        // LruCache measures in "items" (pages).
        // User asked for "Larger RAM cache (1-2GB default)".
        // If max_ram_bytes is passed as 1GB, we calculate page_cap.
        // Let's assume average page size in RAM is ~4MB (compressed nodes) but 200MB on disk?
        // Let's just stick to the calculation but ensure min size is reasonable.
        let page_size_est = config.page_size_hint.max(1024 * 1024);
        // Shard-aware Cache Sizing.
        // We use 16 shards, so we MUST have at least 16 pages to avoid thrashing.
        // 17 (16 shards + 1 spare) is the minimum functional capacity.
        let min_cap = 17;
        // Divide budget by nodes to see how many pages we can actually afford at the requested size.
        // If the budget is tight, we prioritize the shard count (17) but will prune more aggressively.
        let page_cap_val = (max_ram_bytes / page_size_est).max(min_cap);
        let page_cap = NonZeroUsize::new(page_cap_val).unwrap();

        // Shard-aware budget distribution:
        // Distribute the global node limit across the pages to ensure fair pruning.
        let nodes_per_page = (config.max_nodes / page_cap.get()).max(2048);
        let mut config = config;
        config.page_max_nodes = nodes_per_page;

        #[cfg(feature = "std")]
        #[cfg(feature = "std")]
        let swap_dir = if let Some(path) = &config.persistent_dict_path {
            let path = PathBuf::from(path);
            let _ = fs::create_dir_all(&path);
            Some(path)
        } else {
            let mut base = std::env::temp_dir().join("atlas_loom_swap");

            // Add a unique component to avoid parallel test conflicts
            use core::hash::{Hash, Hasher};
            let mut hasher = std::collections::hash_map::DefaultHasher::new();
            std::thread::current().id().hash(&mut hasher);
            // Use something unique per-instance to avoid sequential reuse on same thread
            let inst_ptr = &config as *const _ as usize;
            inst_ptr.hash(&mut hasher);

            base.push(alloc::format!("{:x}", hasher.finish()));

            let _ = fs::create_dir_all(&base);
            base.canonicalize().ok().or(Some(base))
        };
        #[cfg(not(feature = "std"))]
        let swap_dir = ();

        #[cfg(feature = "std")]
        let cipher = {
            let k32 = [0xCC; 32]; // Expanded simple key
            // Use SvalinnAead::new_chacha
            svalinn::vault::SvalinnAead::new_chacha(&k32)
                .ok()
                .map(Box::new)
        };

        #[cfg(feature = "async")]
        let runtime = crate::get_background_runtime();

        let predictor = crate::predictor::TransformerPredictor::new(config.clone());

        Self {
            predictor,
            config,
            cache: LruCache::new(page_cap),
            global_nodes: BTreeMap::new(),
            op_buffer: Vec::with_capacity(4096),
            buffer_limit: 4096,
            flush_count: 0,
            peak_ram_mb: 0,
            #[cfg(feature = "std")]
            swap_dir,
            #[cfg(feature = "std")]
            _cipher: cipher,
            #[cfg(feature = "async")]
            runtime,
        }
    }

    pub fn estimated_ram_mb(&self) -> usize {
        // More accurate estimate: count actual nodes in cached pages and global cache
        let mut total_nodes = self.global_nodes.len();
        for (_, page) in self.cache.iter() {
            total_nodes += page.nodes.len();
        }
        // Each node is ~1028 bytes (counts + metadata)
        (total_nodes * 1028) / (1024 * 1024)
    }

    fn get_page_id(&self, context: &[u8]) -> PageId {
        use core::hash::{Hash, Hasher};
        let mut hasher = impl_hasher::FnvHasher::default();

        // Suffix-based Sharding: Use only the last 2 bytes to determine the shard.
        // This ensures that all multi-order contexts for a given symbol (which share a suffix)
        // are stored in the same page, restoring the efficiency of the mixer.
        let shard_ctx = if context.len() >= 2 {
            &context[context.len() - 2..]
        } else {
            context
        };
        shard_ctx.hash(&mut hasher);

        // Cluster contexts into 16 shards to avoid fragmentation/thrashing.
        (hasher.finish() & 0x0F) as PageId
    }

    fn get_page_mut(&mut self, page_id: PageId) -> &mut PageData {
        if self.cache.contains(&page_id) {
            if self.config.verbose {
                #[cfg(feature = "std")]
                std::println!(
                    "[Loom::Paged][T:{:?}] Cache HIT for page {:x} (Size: {} nodes)",
                    std::thread::current().id(),
                    page_id,
                    self.cache.get(&page_id).map(|p| p.nodes.len()).unwrap_or(0)
                );
            }
            return self.cache.get_mut(&page_id).unwrap();
        }

        if self.config.verbose {
            #[cfg(feature = "std")]
            std::println!(
                "[Loom::Paged][T:{:?}] Cache MISS for page {:x}",
                std::thread::current().id(),
                page_id
            );
        }

        #[cfg(feature = "std")]
        let loaded = self.load_page_from_disk(page_id);
        #[cfg(not(feature = "std"))]
        let loaded = None;

        let page = loaded.unwrap_or_else(|| PageData {
            nodes: BTreeMap::new(),
            is_dirty: false,
            current_limit: 1024, // Incremental start
            loss_sum: 0.0,
            symbol_count: 0,
        });

        // Enforce 1GB Cap with multi-page eviction.
        let ram_mb = self.estimated_ram_mb();
        self.peak_ram_mb = self.peak_ram_mb.max(ram_mb);

        while (self.cache.len() == self.cache.cap().get() || ram_mb > 950) && !self.cache.is_empty()
        {
            if let Some((old_id, old_page)) = self.cache.pop_lru() {
                if self.config.verbose {
                    #[cfg(feature = "std")]
                    std::println!(
                        "[Loom::Paged][T:{:?}] Evicting page {:x} (dirty={}, nodes={}) - Cache Size: {}/{}",
                        std::thread::current().id(),
                        old_id,
                        old_page.is_dirty,
                        old_page.nodes.len(),
                        self.cache.len(),
                        self.cache.cap()
                    );
                }

                if old_page.is_dirty {
                    #[cfg(feature = "std")]
                    self.save_page_to_disk(old_id, &old_page);
                }
            }
        }

        self.cache.push(page_id, page);
        self.cache.get_mut(&page_id).unwrap()
    }

    #[cfg(feature = "std")]
    fn load_page_from_disk(&self, id: PageId) -> Option<PageData> {
        // Async I/O fallback if feature enabled
        #[cfg(feature = "async")]
        if let Some(_rt) = &self.runtime {
            // Block in place to get the result since we need it NOW.
            // In a real async app we'd be async throughout, but we are fixing a crash in existing sync code.
            // Using spawn_blocking might be safer if we are already in async context, but here we likely aren't.
            // Ideally we just use sync read for load to avoid complex bridging, and async for WRITE.
            // But user asked for Async I/O.
            // Let's stick to sync read for stability unless we want to rewrite the whole trait to Async.
            // Writing is the bottleneck usually for thrashing.
            return self.load_page_sync(id);
        }
        self.load_page_sync(id)
    }

    #[cfg(feature = "std")]
    fn load_page_sync(&self, id: PageId) -> Option<PageData> {
        use std::io::Read;
        let dir = self.swap_dir.as_ref()?;
        let path = dir.join(crate::alloc::format!("{:x}.page", id));
        if !path.exists() {
            return None;
        }

        let mut file = File::open(path).ok()?;
        let mut buffer = Vec::new();
        file.read_to_end(&mut buffer).ok()?;

        // Decryption logic
        if let Some(cipher) = &self._cipher {
            // Nonce strategy: First 12 bytes of file? Or derived?
            // For simplicity/robustness in v1: We prepend 12-byte nonce.
            if buffer.len() < 12 {
                return None;
            }
            let nonce = &buffer[0..12];
            let ciphertext = &buffer[12..];
            let aad = b"AtlasPagedLoom";
            match cipher.decrypt(nonce, ciphertext, aad) {
                Ok(pt) => PageData::deserialize(&pt),
                Err(_) => None,
            }
        } else {
            // Plaintext fallback (shouldn't happen with configured cipher, but for safety)
            PageData::deserialize(&buffer)
        }
    }

    #[cfg(feature = "std")]
    fn save_page_to_disk(&self, id: PageId, data: &PageData) {
        // Async Logic Removed: Fire-and-forget writes caused race conditions where
        // a subsequent load (cache miss) would read stale data or fail to find the file,
        // resetting the model and corrupting the ANS stream.
        // We must perform synchronous writes to ensure consistency.

        if let Some(dir) = &self.swap_dir {
            let path = dir.join(crate::alloc::format!("{:x}.page", id));
            let mut serialized = data.serialize();

            // Encrypt if cipher available
            if let Some(cipher) = &self._cipher {
                let time = std::time::SystemTime::now()
                    .duration_since(std::time::UNIX_EPOCH)
                    .unwrap_or_default()
                    .as_nanos();
                let mut nonce = [0u8; 12];
                nonce[0..8].copy_from_slice(&id.to_le_bytes());
                nonce[8..12].copy_from_slice(&(time as u32).to_le_bytes());

                let aad = b"AtlasPagedLoom";
                if let Ok(ct) = cipher.encrypt(&nonce, &serialized, aad) {
                    let mut final_buf = Vec::with_capacity(12 + ct.len());
                    final_buf.extend_from_slice(&nonce);
                    final_buf.extend_from_slice(&ct);
                    serialized = final_buf;
                }
            }

            let _ = File::create(path).and_then(|mut f| f.write_all(&serialized));
        }
    }

    fn flush_buffer(&mut self) {
        if self.op_buffer.is_empty() {
            return;
        }

        #[cfg(feature = "std")]
        let flush_start = std::time::Instant::now();

        let orders = self.config.mixer_orders.clone();

        // Process batch - use cached page_id for sorting (avoid rehashing)
        let mut ops = crate::alloc::vec::Vec::from_iter(self.op_buffer.drain(..));
        ops.sort_by_key(|op| op.page_id); // Use cached page_id

        // Use HashSet for O(1) lookup instead of O(N) Vec::contains
        let mut touched_pages = crate::alloc::collections::BTreeSet::new();

        for op in ops {
            touched_pages.insert(op.page_id);
            let page = self.get_page_mut(op.page_id);

            page.is_dirty = true;

            // Calculate estimated loss BEFORE updating (measures prediction quality)
            let ctx_len = op.context.len();
            let mut best_loss = 8.0f64; // Default: 1 byte/symbol
            for order in &orders {
                if ctx_len >= *order {
                    let ctx = &op.context[ctx_len - *order..];
                    if let Some(node) = page.nodes.get(ctx) {
                        if node.total > 0 {
                            let prob = node.count[op.next as usize] as f64 / node.total as f64;
                            if prob > 0.0 {
                                let loss = -prob.log2();
                                if loss < best_loss {
                                    best_loss = loss;
                                }
                            }
                        }
                    }
                }
            }
            page.loss_sum += best_loss;
            page.symbol_count += 1;

            // Now update the context nodes
            for order in &orders {
                if ctx_len >= *order {
                    let ctx = &op.context[ctx_len - *order..];
                    let node = page.nodes.entry(ctx.to_vec()).or_default();
                    node.count[op.next as usize] = node.count[op.next as usize].saturating_add(1);
                    node.total = node.total.saturating_add(1);
                }
            }
        }

        // Increment flush counter
        self.flush_count += 1;

        let throttle = self.config.aggression.throttle_rate();
        if self.flush_count.is_multiple_of(throttle) {
            for page_id in touched_pages {
                self.prune_page(page_id);
            }
        } else {
            // Immediate check for limit breaches even if not throttled
            for page_id in touched_pages {
                if let Some(page) = self.cache.get(&page_id) {
                    if page.nodes.len() > page.current_limit {
                        self.prune_page(page_id);
                    }
                }
            }
        }

        #[cfg(feature = "std")]
        if self.config.verbose {
            let elapsed = flush_start.elapsed();
            std::println!(
                "[Loom::Paged][T:{:?}] Flushed buffer in {:?} (flush #{}, RAM ~{}MB)",
                std::thread::current().id(),
                elapsed,
                self.flush_count,
                self.estimated_ram_mb()
            );
        }
    }

    // Helper to prune a specific page
    fn prune_page(&mut self, page_id: PageId) {
        if let Some(page) = self.cache.get_mut(&page_id) {
            let limit = page.current_limit.max(1);
            if page.nodes.len() > limit {
                let before = page.nodes.len();

                // Curator Logic: Smart density enrichment.
                // Remove nodes with lowest relevance (total count) until we hit target.
                // Target: ensure we drop to limit - (limit / divisor) to create buffer.
                let prune_divisor = self.config.prune_aggression.prune_divisor();
                let buffer = limit / prune_divisor; // Create buffer space
                let target_len = limit.saturating_sub(buffer);
                let target_remove = page.nodes.len().saturating_sub(target_len);

                if target_remove > 0 {
                    // Collect (Key, Total)
                    let mut scored: Vec<(Vec<u8>, u32)> = page
                        .nodes
                        .iter()
                        .map(|(k, v)| (k.clone(), v.total))
                        .collect();

                    // Sort ascending by total, with Key tie-breaker for determinism!
                    scored.sort_unstable_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0)));

                    for (key, _) in scored.into_iter().take(target_remove) {
                        page.nodes.remove(&key);
                    }
                }

                // Data-Driven Sizing: Adjust page limit based on loss/symbol.
                // Low loss = high predictive value, allow growth.
                // High loss = low value, shrink limit.
                let avg_loss = if page.symbol_count > 128 {
                    page.loss_sum / page.symbol_count as f64
                } else {
                    5.0 // Neutral: allow some growth/stability initially
                };

                page.is_dirty = true;

                // Throttled Growth/Shrink based on loss:
                // avg_loss < 4.0 bits = excellent prediction, allow growth
                // avg_loss > 6.0 bits = poor prediction, shrink limit
                if avg_loss < 4.0 && page.current_limit < self.config.page_max_nodes {
                    page.current_limit = (page.current_limit * 2).min(self.config.page_max_nodes);
                } else if avg_loss > 6.0 && page.current_limit > 512 {
                    page.current_limit = (page.current_limit / 2).max(512);
                }

                // Log loss per page (Restored for Ratio Analysis)
                #[cfg(feature = "std")]
                if self.config.verbose {
                    std::println!(
                        "[Loom::Paged][T:{:?}] Page {:x}: {} -> {} nodes, avg_loss={:.3}, limit={}",
                        std::thread::current().id(),
                        page_id,
                        before,
                        page.nodes.len(),
                        avg_loss,
                        page.current_limit
                    );
                }
            }
        }
    }
}

impl PageData {
    fn serialize(&self) -> Vec<u8> {
        // Simple binary format:
        // [dirty: u8]
        // [num_nodes: u32]
        // For each node:
        // Dynamically estimate buffer size to avoid massive over-allocation.
        // Each node is approx 1KB.
        let mut buf = Vec::with_capacity(self.nodes.len() * 128); // Start with smaller estimate
        buf.push(if self.is_dirty { 1 } else { 0 });
        buf.extend_from_slice(&(self.nodes.len() as u32).to_le_bytes());
        buf.extend_from_slice(&(self.current_limit as u32).to_le_bytes());
        buf.extend_from_slice(&self.loss_sum.to_le_bytes());
        buf.extend_from_slice(&self.symbol_count.to_le_bytes());

        for (key, node) in &self.nodes {
            buf.extend_from_slice(&(key.len() as u16).to_le_bytes());
            buf.extend_from_slice(key);
            buf.extend_from_slice(&node.total.to_le_bytes());
            for &c in &node.count {
                buf.extend_from_slice(&c.to_le_bytes());
            }
        }
        buf
    }

    fn deserialize(data: &[u8]) -> Option<Self> {
        if data.len() < 25 {
            return None;
        }
        let is_dirty = data[0] != 0;
        let num_nodes = u32::from_le_bytes(data[1..5].try_into().ok()?) as usize;
        let current_limit = u32::from_le_bytes(data[5..9].try_into().ok()?) as usize;
        let loss_sum = f64::from_le_bytes(data[9..17].try_into().ok()?);
        let symbol_count = u64::from_le_bytes(data[17..25].try_into().ok()?);
        let mut nodes = BTreeMap::new();
        let mut offset = 25;

        for _ in 0..num_nodes {
            if offset + 2 > data.len() {
                return None;
            }
            let key_len = u16::from_le_bytes(data[offset..offset + 2].try_into().ok()?) as usize;
            offset += 2;
            if offset + key_len > data.len() {
                return None;
            }
            let key = data[offset..offset + key_len].to_vec();
            offset += key_len;

            if offset + 4 > data.len() {
                return None;
            }
            let total = u32::from_le_bytes(data[offset..offset + 4].try_into().ok()?);
            offset += 4;

            let mut count = [0u32; 256];
            for c in &mut count {
                if offset + 4 > data.len() {
                    return None;
                }
                *c = u32::from_le_bytes(data[offset..offset + 4].try_into().ok()?);
                offset += 4;
            }

            nodes.insert(key, ContextNode { count, total });
        }

        Some(Self {
            nodes,
            is_dirty,
            current_limit,
            loss_sum,
            symbol_count,
        })
    }
}

mod impl_hasher {
    pub struct FnvHasher(u64);
    impl Default for FnvHasher {
        fn default() -> Self {
            Self(0xcbf29ce484222325)
        }
    }
    impl core::hash::Hasher for FnvHasher {
        fn finish(&self) -> u64 {
            self.0
        }
        fn write(&mut self, bytes: &[u8]) {
            for &byte in bytes {
                self.0 ^= byte as u64;
                self.0 = self.0.wrapping_mul(0x1099511628211995);
            }
        }
    }
}

impl PagedLoom {
    fn prefetch_likely_pages(&self, _history: &[u8]) {
        #[cfg(all(feature = "std", feature = "async"))]
        if let Some(rt) = &self.runtime {
            let h_len = _history.len();
            if h_len < 2 {
                return;
            }

            // Simple prefetch: trigger OS read for the likely next page
            // Based on order-1 and order-2 contexts
            for order in &[1, 2] {
                if h_len >= *order {
                    let ctx = &_history[h_len - *order..];
                    let id = self.get_page_id(ctx);
                    if let Some(dir) = &self.swap_dir {
                        let path = dir.join(crate::alloc::format!("{:x}.page", id));
                        rt.spawn(async move {
                            let _ = tokio::fs::read(path).await;
                        });
                    }
                }
            }
        }
    }
}

impl LoomPredictor for PagedLoom {
    fn predict(&self, history: &[u8]) -> ProbModel {
        if self.config.verbose {
            #[cfg(feature = "std")]
            std::println!("[Loom::Paged] Predict history (len={})", history.len());
        }

        // Fix: Use correct context length for PageId hashing (matching weave)
        let max_order = self.config.mixer_orders.iter().copied().max().unwrap_or(8);
        let start = history.len().saturating_sub(max_order);
        let context_key = &history[start..];
        let hash = self.get_page_id(context_key);

        if let Some(page) = self.cache.peek(&hash) {
            let h_len = history.len();
            let mut mixed_probs = [0u64; 256];
            let mut total_weight = 0u64;
            let mut found_any = false;

            for &order in &self.config.mixer_orders {
                if h_len >= order {
                    let ctx = &history[h_len - order..];
                    if let Some(node) = page.nodes.get(ctx).filter(|n| n.total > 0) {
                        found_any = true;
                        let weight = (order as u64 + 1) * (order as u64 + 1);
                        let node_total = node.total as u64;
                        let multiplier = 65536 * weight;

                        for (p, count) in mixed_probs.iter_mut().zip(node.count.iter()) {
                            if *count > 0 {
                                let contribution = (*count as u64 * multiplier) / node_total;
                                *p = p.saturating_add(contribution);
                            }
                        }
                        total_weight = total_weight.saturating_add(weight);
                    }
                }
            }

            let mixer_probs = if found_any && total_weight > 0 {
                for prob in &mut mixed_probs {
                    *prob /= total_weight;
                }
                Some(mixed_probs)
            } else {
                None
            };

            // Prefetch nearby pages
            self.prefetch_likely_pages(history);

            // Delegate final blending (Neural Prior + Identity + Mixer) to the TransformerPredictor
            self.predictor.predict(history, mixer_probs)
        } else {
            // Page miss: return pure neural/identity prior
            self.predictor.predict(history, None)
        }
    }

    fn predict_batch(&self, histories: &[&[u8]]) -> Vec<ProbModel> {
        histories.iter().map(|&h| self.predict(h)).collect()
    }
}

impl LoomWeaver for PagedLoom {
    fn weave(&mut self, history: &[u8], next: u8) {
        // Only store the last max_order bytes of context to avoid O(N^2) memory
        let max_order = self.config.mixer_orders.iter().copied().max().unwrap_or(8);
        let start = history.len().saturating_sub(max_order);
        let context = history[start..].to_vec();

        // Cache page_id to avoid double hashing in flush_buffer
        let page_id = self.get_page_id(&context);

        self.op_buffer.push(WeaveOp {
            context,
            next,
            page_id,
        });

        if self.op_buffer.len() >= self.buffer_limit {
            self.flush_buffer();
        }
    }

    fn weave_batch(&mut self, ops: Vec<(Vec<u8>, u8)>) {
        if self.config.verbose {
            #[cfg(feature = "std")]
            std::println!("[Loom::Paged] Weaving batch of {} ops", ops.len());
        }
        for (ctx, next) in ops {
            self.weave(&ctx, next);
        }
    }
}

impl LoomPruner for PagedLoom {
    fn prune(&mut self) {
        if self.config.verbose {
            #[cfg(feature = "std")]
            std::println!("[Loom::Paged] Pruning...");
        }
        self.flush_buffer();
    }

    fn prune_batch(&mut self) {
        self.prune();
    }
}
#[cfg(feature = "std")]
impl Drop for PagedLoom {
    fn drop(&mut self) {
        if let Some(path) = self.swap_dir.clone() {
            // Only delete if NOT persistent
            if self.config.persistent_dict_path.is_some() {
                return;
            }

            // Be extra careful not to delete the base temp dir
            if path.ends_with("atlas_loom_swap") {
                return;
            }
            let _ = std::fs::remove_dir_all(path);
        }
    }
}