lcpfs 2026.1.102

LCP File System - A ZFS-inspired copy-on-write filesystem for Rust
// Copyright 2025 LunaOS Contributors
// SPDX-License-Identifier: Apache-2.0
//
// ZVOL - Block Volume Emulation
// Present LCPFS objects as raw block devices.

use crate::BlockDevice;
use crate::fscore::structs::{DmuObjectType, DnodePhys};
use crate::io::pipeline::Pipeline;
use crate::storage::dmu::ObjectSet;

use alloc::vec;
use alloc::vec::Vec;
use lazy_static::lazy_static;
use spin::Mutex;

/// ZVOL metadata (stored in LCPFS as an object)
#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct ZvolPhys {
    /// DMU object ID
    pub object_id: u64, // DMU object ID
    /// Total volume size in bytes
    pub volsize: u64, // Total volume size in bytes
    /// Block size (usually 8K, 16K, 64K, 128K)
    pub volblocksize: u64, // Block size (usually 8K, 16K, 64K, 128K)
    /// Checksum algorithm
    pub checksum: u8, // Checksum algorithm
    /// Compression algorithm
    pub compression: u8, // Compression algorithm
    /// Number of copies (1-3)
    pub copies: u8, // Number of copies (1-3)
    /// Read-only flag
    pub readonly: u8, // Read-only flag
    /// Transaction group when created
    pub txg_birth: u64, // Transaction group when created
    /// Future expansion
    pub reserved: [u64; 8], // Future expansion
}

impl ZvolPhys {
    /// Create new ZVOL physical metadata
    pub fn new(object_id: u64, volsize: u64, volblocksize: u64) -> Self {
        Self {
            object_id,
            volsize,
            volblocksize,
            checksum: 1,    // Fletcher4
            compression: 1, // LZ4
            copies: 1,      // Single copy
            readonly: 0,    // Read-write
            txg_birth: 1,
            reserved: [0; 8],
        }
    }
}

/// In-memory ZVOL instance
pub struct Zvol {
    /// Physical metadata
    pub phys: ZvolPhys,
    /// DMU object backing this volume
    pub dnode: DnodePhys, // DMU object backing this volume
    cache: Mutex<Vec<u8>>, // In-memory cache (optional)
}

impl Zvol {
    /// Create a new ZVOL
    pub fn create(object_id: u64, volsize: u64, volblocksize: u64) -> Result<Self, &'static str> {
        crate::lcpfs_println!(
            "[ ZVOL ] Creating volume: size={} MB, block={}K",
            volsize / 1024 / 1024,
            volblocksize / 1024
        );

        // Create backing DMU object
        // Note: object_id is tracked externally by the object set, not in DnodePhys
        let mut dnode = DnodePhys::zero();
        dnode.object_type = DmuObjectType::Zvol as u8;
        dnode.max_blkid = (volsize / volblocksize) - 1;
        dnode.used_bytes = 0; // Initially empty

        let phys = ZvolPhys::new(object_id, volsize, volblocksize);

        Ok(Self {
            phys,
            dnode,
            cache: Mutex::new(Vec::new()), // Allocate on-demand, not upfront
        })
    }

    /// Load existing ZVOL from DMU object
    pub fn load(dnode: DnodePhys) -> Result<Self, &'static str> {
        // Read ZVOL metadata from first block of dnode
        let metadata = ObjectSet::read_dnode_data(&dnode, 0, core::mem::size_of::<ZvolPhys>())
            .map_err(|_| "Failed to read ZVOL metadata")?;

        // SAFETY INVARIANTS:
        // 1. metadata.len() == size_of::<ZvolPhys>() - allocated by pool.read()
        // 2. ZvolPhys is #[repr(C)] with stable layout
        // 3. Data written by LCPFS as valid ZVOL metadata during creation
        // 4. read_unaligned handles misaligned disk buffer
        // 5. All ZvolPhys fields are primitive types
        //
        // VERIFICATION: TODO - Prove ZvolPhys layout stability
        //
        // JUSTIFICATION:
        // ZVOL metadata (size, blocksize, checksum type) stored as ZvolPhys.
        // Binary deserialization from metadata dnode required.
        let phys = unsafe { core::ptr::read_unaligned(metadata.as_ptr() as *const ZvolPhys) };

        crate::lcpfs_println!(
            "[ ZVOL ] Loaded volume: size={} MB, block={}K",
            phys.volsize / 1024 / 1024,
            phys.volblocksize / 1024
        );

        Ok(Self {
            phys,
            dnode,
            cache: Mutex::new(Vec::new()), // Allocate on-demand
        })
    }

    /// Get volume size
    pub fn get_size(&self) -> u64 {
        self.phys.volsize
    }

    /// Get block size
    pub fn get_blocksize(&self) -> u64 {
        self.phys.volblocksize
    }

    /// Set volume size (resize)
    pub fn set_size(&mut self, new_size: u64) -> Result<(), &'static str> {
        if new_size < self.phys.volsize {
            return Err("Cannot shrink ZVOL");
        }

        crate::lcpfs_println!(
            "[ ZVOL ] Resizing volume: {} MB -> {} MB",
            self.phys.volsize / 1024 / 1024,
            new_size / 1024 / 1024
        );

        self.phys.volsize = new_size;
        self.dnode.max_blkid = (new_size / self.phys.volblocksize) - 1;

        // No need to resize cache - it grows on-demand

        Ok(())
    }

    /// Read block range from ZVOL
    fn read_range(&self, offset: u64, length: usize) -> Result<Vec<u8>, &'static str> {
        if offset + length as u64 > self.phys.volsize {
            return Err("ZVOL read out of bounds");
        }

        let cache = self.cache.lock();

        // Sparse allocation - return zeros if cache is smaller than requested range
        if cache.len() < (offset as usize + length) {
            return Ok(vec![0u8; length]); // Return zeros for unallocated regions
        }

        let start = offset as usize;
        let end = start + length;
        Ok(cache[start..end].to_vec())
    }

    /// Write block range to ZVOL
    fn write_range(&mut self, offset: u64, data: &[u8]) -> Result<(), &'static str> {
        if offset + data.len() as u64 > self.phys.volsize {
            return Err("ZVOL write out of bounds");
        }

        let mut cache = self.cache.lock();
        let required_size = offset as usize + data.len();

        // Grow cache on-demand (sparse allocation)
        if cache.len() < required_size {
            cache.resize(required_size, 0);
        }

        let start = offset as usize;
        let end = start + data.len();
        cache[start..end].copy_from_slice(data);

        // In full implementation: write through pipeline to DMU
        // Pipeline::write_block(data, &key, txg)?;

        Ok(())
    }

    /// Snapshot the ZVOL (copy-on-write)
    pub fn snapshot(&self, snap_name: &str) -> Result<Self, &'static str> {
        crate::lcpfs_println!("[ ZVOL ] Creating snapshot: {}", snap_name);

        // Clone dnode (copy-on-write)
        let mut snap_dnode = self.dnode;
        // Note: DnodePhys doesn't have object_id - object identity is tracked externally
        snap_dnode.max_blkid += 1000; // Mark as snapshot variant

        let snap = Self {
            phys: self.phys,
            dnode: snap_dnode,
            cache: Mutex::new(self.cache.lock().clone()),
        };

        Ok(snap)
    }
}

/// Implement BlockDevice trait for ZVOL
impl BlockDevice for Zvol {
    fn read_block(&mut self, block_num: usize, buffer: &mut [u8]) -> Result<(), &'static str> {
        let offset = block_num as u64 * self.phys.volblocksize;
        let blocksize = self.phys.volblocksize as usize;

        if offset >= self.phys.volsize {
            return Err("ZVOL read_block: out of bounds");
        }

        let data = self.read_range(offset, blocksize)?;
        let copy_len = core::cmp::min(buffer.len(), data.len());
        buffer[..copy_len].copy_from_slice(&data[..copy_len]);

        Ok(())
    }

    fn write_block(&mut self, block_num: usize, buffer: &[u8]) -> Result<(), &'static str> {
        let offset = block_num as u64 * self.phys.volblocksize;

        if offset >= self.phys.volsize {
            return Err("ZVOL write_block: out of bounds");
        }

        self.write_range(offset, buffer)
    }

    fn size(&self) -> Result<u64, &'static str> {
        Ok(self.phys.volsize)
    }

    fn block_size(&self) -> usize {
        self.phys.volblocksize as usize
    }

    fn block_count(&self) -> usize {
        (self.phys.volsize / self.phys.volblocksize) as usize
    }
}

lazy_static! {
    /// Global ZVOL registry
    pub static ref ZVOL_REGISTRY: Mutex<Vec<Zvol>> = Mutex::new(Vec::new());
}

/// ZVOL management API
pub struct ZvolManager;

impl ZvolManager {
    /// Create a new ZVOL and register it
    pub fn create_zvol(name: &str, size_mb: u64, blocksize_kb: u64) -> Result<usize, &'static str> {
        let volsize = size_mb * 1024 * 1024;
        let volblocksize = blocksize_kb * 1024;

        let object_id = {
            let registry = ZVOL_REGISTRY.lock();
            (registry.len() as u64) + 1000 // Simple ID allocation
        };

        let zvol = Zvol::create(object_id, volsize, volblocksize)?;

        let mut registry = ZVOL_REGISTRY.lock();
        registry.push(zvol);
        let index = registry.len() - 1;

        crate::lcpfs_println!(
            "[ ZVOL ] Created {} ({} MB) at index {}",
            name,
            size_mb,
            index
        );
        Ok(index)
    }

    /// Destroy a ZVOL
    pub fn destroy_zvol(index: usize) -> Result<(), &'static str> {
        let mut registry = ZVOL_REGISTRY.lock();
        if index >= registry.len() {
            return Err("Invalid ZVOL index");
        }

        registry.remove(index);
        crate::lcpfs_println!("[ ZVOL ] Destroyed ZVOL at index {}", index);
        Ok(())
    }

    /// Get ZVOL reference
    pub fn get_zvol(index: usize) -> Result<(), &'static str> {
        let registry = ZVOL_REGISTRY.lock();
        if index >= registry.len() {
            return Err("Invalid ZVOL index");
        }
        Ok(())
    }

    /// List all ZVOLs
    pub fn list_zvols() -> Vec<(usize, u64, u64)> {
        let registry = ZVOL_REGISTRY.lock();
        registry
            .iter()
            .enumerate()
            .map(|(i, z)| (i, z.phys.volsize, z.phys.volblocksize))
            .collect()
    }

    /// Get statistics
    pub fn stats() -> (usize, u64) {
        let registry = ZVOL_REGISTRY.lock();
        let count = registry.len();
        let total_size: u64 = registry.iter().map(|z| z.phys.volsize).sum();
        (count, total_size)
    }
}

/// Example: Create swap ZVOL
pub fn create_swap_zvol(size_mb: u64) -> Result<usize, &'static str> {
    crate::lcpfs_println!("[ ZVOL ] Creating swap volume: {} MB", size_mb);

    // Use 128K blocks for swap (optimal for paging)
    ZvolManager::create_zvol("swap", size_mb, 128)
}

/// Example: Create database ZVOL
pub fn create_database_zvol(name: &str, size_mb: u64) -> Result<usize, &'static str> {
    crate::lcpfs_println!(
        "[ ZVOL ] Creating database volume: {} ({} MB)",
        name,
        size_mb
    );

    // Use 8K blocks for databases (matches PostgreSQL page size)
    ZvolManager::create_zvol(name, size_mb, 8)
}