rialo-s-program-entrypoint 0.11.0-alpha.0

// Copyright (c) Subzero Labs, Inc.
// SPDX-License-Identifier: Apache-2.0

//! Custom heap allocator for Rialo programs that supports dynamic heap sizes.
//!
//! # Overview
//!
//! This module provides a bump allocator optimized for Rialo's execution model.
//! Unlike the default allocator which assumes a fixed 32KB heap, this implementation
//! automatically utilizes whatever heap size is allocated by the runtime, including
//! custom sizes requested via Compute Budget instructions.
//!
//! # How It Works
//!
//! The allocator stores a small header (4-8 bytes) at the start of the heap containing:
//! - Current allocation offset
//! - Optional global state (via generic parameter `G`)
//!
//! Memory is allocated by growing upward from the heap base. The loader supplies the usable heap
//! size at runtime (`set_heap_limit`); an allocation that would exceed it is refused and surfaces
//! as a clean allocation error (`handle_alloc_error`), so the allocator never needs the heap size
//! at compile time. Writing past the heap is only possible by bypassing this allocator, in which
//! case accessing memory beyond the mapped region eventually traps.
//!
//! # Key Assumptions
//!
//! This allocator relies on guarantees provided by the Rialo runtime:
//!
//! 1. **Heap location**: On RISC-V the loader supplies the heap base at runtime (recorded
//!    via `set_heap_base`, called from the entrypoint before any allocation); on other
//!    targets it is the fixed address `0x300000000`.
//! 2. **Zero-initialization**: The Rialo runtime zero-initializes the heap region
//!    before program execution begins
//! 3. **Bounded allocation**: On RISC-V the loader also supplies the usable heap size (recorded
//!    via `set_heap_limit`); allocations beyond it are refused. Accessing memory beyond the mapped
//!    region, only reachable by bypassing this allocator, still traps.
//!
//! These assumptions are part of Rialo's documented runtime behavior and are validated
//! in tests using a simulated heap environment.
//!
//! # Deallocation Behavior
//!
//! As a bump allocator, this implementation:
//! - Can reclaim space from the most recent allocation if deallocated
//! - Intentionally leaks memory for all other deallocations (by design)
//! - Is optimized for Rialo's short-lived transaction model where all memory
//!   is reclaimed when the transaction completes
//!
//! # Usage
//!
//! The allocator is typically set up via the `custom_heap_default!` macro in the
//! entrypoint crate. Programs don't interact with it directly - it's used automatically
//! by Rust's allocation APIs (`Vec`, `Box`, etc.).

use core::{
    alloc::{GlobalAlloc, Layout},
    cell::Cell,
};

/// Minimum guaranteed heap size.
///
/// NOTE: Actual heap size may be larger if requested via Compute Budget.
/// The allocator automatically uses all available heap space.
pub const MIN_HEAP_LENGTH: usize = 32 * 1024;

/// Bump allocator that grows upward from the heap base.
///
/// Generic parameter `G` allows for optional global state storage at the heap start.
/// Use `G = ()` (the default) for no global state.
///
/// # Safety
///
/// Only one instance should exist per program, and it must be set as the global allocator.
/// Creating multiple instances or using alongside another allocator is undefined behavior.
pub struct BumpAllocator<G = ()> {
    #[cfg(test)]
    ptr: core::ptr::NonNull<u8>,
    #[cfg(test)]
    layout: Layout,

    _phantom: core::marker::PhantomData<G>,
}

/// Header stored at the start of the heap containing allocator metadata.
///
/// The header is zero-initialized by the Rialo runtime (or explicitly by tests).
/// Using `Cell<u32>` provides interior mutability for updating the allocation offset.
#[repr(C)]
struct Header<G> {
    /// Offset from end of header to first free byte (not from heap start)
    used: Cell<u32>,
    /// Optional global state (zero-sized if G = ())
    global: G,
}

impl<G> Header<G> {
    /// Size of the header including alignment padding
    const SIZE: u32 = {
        let size = core::mem::size_of::<Header<G>>();
        // Size validation happens in header() where we check against MIN_HEAP_LENGTH
        size as u32
    };

    /// Get the offset from heap start to the first free byte
    #[inline(always)]
    fn get_end_offset(&self) -> u32 {
        self.used.get().wrapping_add(Self::SIZE)
    }

    /// Set the offset from heap start to the first free byte
    #[inline(always)]
    fn set_end_offset(&self, offset: u32) {
        self.used.set(offset.wrapping_sub(Self::SIZE));
    }
}

/// `Sync` wrapper over `UnsafeCell<u32>` for the loader-supplied heap base and heap limit (RISC-V
/// target). A `static` must be `Sync`, which `UnsafeCell` is not; the wrapper carries the
/// `unsafe impl Sync` below, sound because the guest VM is single-threaded.
#[cfg(all(not(test), target_arch = "riscv64"))]
struct HeapCell(core::cell::UnsafeCell<u32>);

// SAFETY: the guest VM executes on a single thread, so these statics are never accessed concurrently.
#[cfg(all(not(test), target_arch = "riscv64"))]
unsafe impl Sync for HeapCell {}

/// Loader-supplied heap base. On RISC-V the heap is not at a fixed address: the loader places it
/// right above the program's read-only and read-write data and passes the base to the entrypoint,
/// recorded here via `set_heap_base`. Growing upward from this per-program base means a program
/// only ever backs the heap it actually requested, regardless of how large its data sections are.
#[cfg(all(not(test), target_arch = "riscv64"))]
static HEAP_BASE: HeapCell = HeapCell(core::cell::UnsafeCell::new(0));

/// Loader-supplied usable heap size in bytes (the program's requested `heap_size`). Bounds the
/// default allocator (see `BumpAllocator::heap_limit`); recorded via `set_heap_limit`.
#[cfg(all(not(test), target_arch = "riscv64"))]
static HEAP_LIMIT: HeapCell = HeapCell(core::cell::UnsafeCell::new(0));

/// Records the heap base supplied by the loader.
///
/// Called by the `entrypoint!`/`entrypoint_no_alloc!` macros at the very start of execution,
/// before any allocation can occur. A program that installs `BumpAllocator` through a
/// hand-rolled entrypoint, bypassing those macros, MUST call this (and `set_heap_limit`) itself
/// before its first allocation; otherwise the heap base stays 0 and allocations trap on unmapped
/// memory.
#[cfg(all(not(test), target_arch = "riscv64"))]
#[inline(always)]
pub fn set_heap_base(base: u32) {
    // SAFETY: single-threaded VM; called once from the entrypoint before the allocator is used,
    // so there is no aliasing or concurrent access.
    unsafe { *HEAP_BASE.0.get() = base };
}

/// Records the usable heap size (bytes) supplied by the loader.
///
/// Called by the `entrypoint!`/`entrypoint_no_alloc!` macros alongside `set_heap_base`. Bounds the
/// default allocator to the program's requested `heap_size` so an over-allocation fails with a
/// clean allocation error instead of growing into the serialized account region.
#[cfg(all(not(test), target_arch = "riscv64"))]
#[inline(always)]
pub fn set_heap_limit(limit: u32) {
    // SAFETY: single-threaded VM; called once from the entrypoint before the allocator is used,
    // so there is no aliasing or concurrent access.
    unsafe { *HEAP_LIMIT.0.get() = limit };
}

// Non-test (Rialo target) implementation
#[cfg(not(test))]
impl<G> BumpAllocator<G> {
    /// Start address of the memory region used for program heap (non-RISC-V targets).
    #[cfg(not(target_arch = "riscv64"))]
    const HEAP_START_ADDRESS: u64 = 0x300000000;

    /// Creates a new allocator.
    ///
    /// # Safety
    ///
    /// - Only one BumpAllocator instance should exist per program
    /// - It must be set as the global allocator
    /// - Multiple instances or using alongside another allocator leads to undefined behavior
    /// - The Rialo runtime must have zero-initialized the heap region (guaranteed by spec)
    pub const unsafe fn new() -> Self {
        // SAFETY: Caller must ensure this is only called once and set as global allocator.
        // The Rialo runtime guarantees the heap region is zero-initialized before program
        // execution.
        Self {
            _phantom: core::marker::PhantomData,
        }
    }

    /// Base address of the heap region.
    ///
    /// Fixed on non-RISC-V targets; supplied at runtime by the loader on RISC-V.
    #[cfg(not(target_arch = "riscv64"))]
    #[inline(always)]
    fn base_address(&self) -> u64 {
        Self::HEAP_START_ADDRESS
    }

    #[cfg(target_arch = "riscv64")]
    #[inline(always)]
    fn base_address(&self) -> u64 {
        // SAFETY: single-threaded VM; `HEAP_BASE` is set once at entry before any allocation,
        // and is only ever read here, so there is no aliasing or concurrent access.
        let base = unsafe { *HEAP_BASE.0.get() };
        // A zero base means `set_heap_base` was never called, i.e. the `BumpAllocator` was
        // installed without going through `entrypoint!`/`entrypoint_no_alloc!`. Allocations
        // would otherwise target unmapped low memory and trap opaquely. Debug-only: compiled
        // out in release, so no cost on the allocation hot path on-chain.
        debug_assert!(
            base != 0,
            "heap base is 0: call `set_heap_base` before the first allocation"
        );
        u64::from(base)
    }

    /// Usable heap size in bytes, supplied by the loader (RISC-V target).
    ///
    /// `try_alloc_*` refuse allocations whose end offset from the heap base would exceed this, so a
    /// program that over-allocates fails with a clean allocation error at its requested `heap_size`
    /// boundary instead of growing into the serialized account region above the heap. The bound is
    /// advisory: it constrains only programs that use this default allocator. A program that
    /// replaces or bypasses the allocator is not constrained by this bound; a raw write past the
    /// heap can land anywhere in the serialized account region above it (any instruction account,
    /// not just the program's own writable ones), but the runtime re-validates every written-back
    /// change against on-chain authority (`can_data_be_changed`/`can_data_be_resized`, owner and
    /// kelvins checks), so an illegitimate change fails the transaction rather than persisting.
    #[cfg(target_arch = "riscv64")]
    #[inline(always)]
    fn heap_limit(&self) -> u32 {
        // SAFETY: single-threaded VM; `HEAP_LIMIT` is set once at entry before any allocation, and
        // is only ever read here, so there is no aliasing or concurrent access.
        let limit = unsafe { *HEAP_LIMIT.0.get() };
        // A zero limit means `set_heap_limit` was never called. Every allocation would then
        // immediately fail (`end_offset > 0`), aborting with Custom(11) and no clear cause.
        // Debug-only: compiled out in release, so no cost on the allocation hot path on-chain.
        debug_assert!(
            limit != 0,
            "heap limit is 0: call `set_heap_limit` before the first allocation"
        );
        limit
    }

    #[inline(always)]
    fn heap_start(&self) -> *mut u8 {
        self.base_address() as *mut u8
    }

    #[inline(always)]
    fn to_offset(&self, ptr: *mut u8) -> u32 {
        let addr = ptr as u64;
        let base = self.base_address();
        debug_assert!(
            addr >= base && addr < base + u32::MAX as u64,
            "Pointer outside valid heap range"
        );
        (addr - base) as u32
    }

    #[allow(clippy::wrong_self_convention)]
    #[inline(always)]
    fn from_offset(&self, offset: u32) -> *mut u8 {
        (self.base_address() + offset as u64) as *mut u8
    }
}

// Test implementation with actual allocation
#[cfg(test)]
impl<G: bytemuck::Zeroable> BumpAllocator<G> {
    /// Creates a test allocator with specified heap size
    fn new_test(size: usize) -> Self {
        let size = size.min(u32::MAX as usize);
        assert!(
            size >= core::mem::size_of::<Header<G>>(),
            "Heap too small for header"
        );

        let align = core::mem::align_of::<Header<G>>().max(16);
        let layout = Layout::from_size_align(size, align).unwrap();

        // SAFETY: We're allocating with proper layout
        let ptr = unsafe { std::alloc::alloc_zeroed(layout) };
        let ptr = core::ptr::NonNull::new(ptr).expect("Failed to allocate test heap");

        Self {
            ptr,
            layout,
            _phantom: core::marker::PhantomData,
        }
    }

    #[inline(always)]
    fn heap_start(&self) -> *mut u8 {
        self.ptr.as_ptr()
    }

    #[inline(always)]
    fn to_offset(&self, ptr: *mut u8) -> u32 {
        (ptr as usize - self.heap_start() as usize) as u32
    }

    #[allow(clippy::wrong_self_convention)]
    #[inline(always)]
    fn from_offset(&self, offset: u32) -> *mut u8 {
        self.heap_start().wrapping_add(offset as usize)
    }
}

#[cfg(test)]
impl<G> Drop for BumpAllocator<G> {
    fn drop(&mut self) {
        // SAFETY: ptr and layout match the allocation
        unsafe {
            std::alloc::dealloc(self.ptr.as_ptr(), self.layout);
        }
    }
}

impl<G: bytemuck::Zeroable> BumpAllocator<G> {
    /// Returns reference to the header at the start of the heap
    #[inline(always)]
    fn header(&self) -> &Header<G> {
        // Compile-time check: header must fit in minimum guaranteed heap
        const {
            assert!(
                core::mem::size_of::<Header<G>>() <= MIN_HEAP_LENGTH,
                "Header too large for minimum heap size"
            );
        }

        // SAFETY:
        // 1. On Rialo: the heap base is page-aligned (runtime-supplied on RISC-V, the fixed
        //    0x300000000 elsewhere), so the Header is aligned
        // 2. In tests: Test allocator ensures proper alignment via Layout
        // 3. Header fits in heap (compile-time check above)
        // 4. Heap memory is zero-initialized (by Rialo runtime or test allocator)
        // 5. Header<G> is Zeroable, so zero-initialization is valid
        unsafe { &*self.heap_start().cast::<Header<G>>() }
    }

    /// Fast path allocation - assumes success is common case
    #[inline(always)]
    fn try_alloc_fast(&self, layout: Layout) -> Option<*mut u8> {
        let header = self.header();
        let current_offset = header.get_end_offset();

        let size = match u32::try_from(layout.size()) {
            Ok(s) => s,
            Err(_) => return None,
        };

        debug_assert!(layout.align().is_power_of_two());
        let align_mask = (layout.align() - 1) as u32;

        let aligned_offset = match current_offset.checked_add(align_mask) {
            Some(v) => v & !align_mask,
            None => return None,
        };

        #[allow(clippy::question_mark)]
        let end_offset = match aligned_offset.checked_add(size) {
            Some(end) => end,
            None => return None,
        };

        #[cfg(test)]
        if end_offset as usize > self.layout.size() {
            return None;
        }

        // Advisory bound: refuse allocations past the loader-supplied heap size so an
        // over-allocation fails cleanly instead of growing into the account region.
        #[cfg(all(not(test), target_arch = "riscv64"))]
        if end_offset > self.heap_limit() {
            return None;
        }

        header.set_end_offset(end_offset);
        Some(self.from_offset(aligned_offset))
    }

    #[allow(clippy::question_mark)]
    /// Try to allocate at a specific pointer (used for in-place realloc)
    #[inline]
    fn try_alloc_at(&self, ptr: *mut u8, layout: Layout) -> Option<*mut u8> {
        let offset = self.to_offset(ptr);

        let size = match u32::try_from(layout.size()) {
            Ok(s) => s,
            Err(_) => return None,
        };

        let end_offset = match offset.checked_add(size) {
            Some(end) => end,
            None => return None,
        };

        #[cfg(test)]
        if end_offset as usize > self.layout.size() {
            return None;
        }

        // Advisory bound: refuse in-place growth past the loader-supplied heap size.
        #[cfg(all(not(test), target_arch = "riscv64"))]
        if end_offset > self.heap_limit() {
            return None;
        }

        self.header().set_end_offset(end_offset);
        Some(ptr)
    }

    /// Returns reference to global state reserved at heap start
    #[inline]
    pub fn global(&self) -> &G {
        &self.header().global
    }

    /// Returns amount of heap used (excluding header)
    #[cfg(test)]
    pub fn used(&self) -> usize {
        self.header().used.get() as usize
    }
}

// SAFETY: BumpAllocator correctly implements GlobalAlloc
unsafe impl<G: bytemuck::Zeroable> GlobalAlloc for BumpAllocator<G> {
    #[inline]
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
        // Fast path: assume allocation succeeds
        self.try_alloc_fast(layout).unwrap_or(core::ptr::null_mut())
    }

    #[inline]
    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
        // Only deallocate if this is the most recent allocation
        let header = self.header();
        let ptr_end = ptr.wrapping_add(layout.size());
        let end_offset = self.to_offset(ptr_end);

        if end_offset == header.get_end_offset() {
            // This was the last allocation, reclaim it
            header.set_end_offset(self.to_offset(ptr));
        }
        // Otherwise, bump allocator intentionally leaks (by design)
    }

    #[inline]
    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
        let header = self.header();
        let ptr_end = ptr.wrapping_add(layout.size());
        let end_offset = self.to_offset(ptr_end);

        // Check if this is the last allocation
        if end_offset == header.get_end_offset() {
            // Last allocation - try to resize in place
            // SAFETY: Caller guarantees new layout is valid for the same alignment
            let new_layout = Layout::from_size_align_unchecked(new_size, layout.align());
            return self
                .try_alloc_at(ptr, new_layout)
                .unwrap_or(core::ptr::null_mut());
        }

        // Not the last allocation
        if new_size <= layout.size() {
            // Shrinking - return same pointer (leak extra space, this is bump allocator)
            return ptr;
        }

        // Growing non-last allocation - need new allocation and copy
        // SAFETY: Caller guarantees new layout is valid for the same alignment
        let new_layout = Layout::from_size_align_unchecked(new_size, layout.align());
        match self.try_alloc_fast(new_layout) {
            Some(new_ptr) => {
                // SAFETY:
                // - src is valid for reads of layout.size() bytes (caller guarantee)
                // - dst is valid for writes of new_size bytes (just allocated)
                // - Regions don't overlap (new allocation is after old in bump allocator)
                core::ptr::copy_nonoverlapping(ptr, new_ptr, layout.size());
                new_ptr
            }
            None => core::ptr::null_mut(),
        }
    }
}

#[cfg(test)]
mod unit_tests;