candle-cuda-vmm 0.1.1

CUDA Virtual Memory Management bindings for elastic KV cache allocation in Candle
Documentation
//! Simple usage example for candle-cuda-vmm.
//!
//! This example demonstrates basic usage of VirtualMemoryPool for elastic memory allocation.

use candle_cuda_vmm::{VirtualMemoryPool, Result};
use candle_core::Device;

fn main() -> Result<()> {
    println!("=== Candle CUDA VMM - Simple Usage Example ===\n");

    // Check if CUDA is available
    let device = match Device::new_cuda(0) {
        Ok(d) => d,
        Err(e) => {
            eprintln!("Error: CUDA device not available: {}", e);
            eprintln!("This example requires a CUDA-capable GPU.");
            return Ok(());
        }
    };

    println!("Using CUDA device: {:?}", device);

    // Create a virtual memory pool
    // - 128GB virtual address space (very large!)
    // - 2MB page size (recommended for performance)
    let capacity = 128 * 1024 * 1024 * 1024u64; // 128GB
    let page_size = 2 * 1024 * 1024; // 2MB

    println!("\nCreating virtual memory pool:");
    println!("  Virtual capacity: {} GB", capacity / (1024 * 1024 * 1024));
    println!("  Page size: {} MB", page_size / (1024 * 1024));

    let mut pool = VirtualMemoryPool::new(capacity as usize, page_size, device)?;

    println!("\nPool created successfully!");
    println!("  Base address: 0x{:x}", pool.base_address());
    println!("  Physical memory usage: {} bytes", pool.physical_memory_usage());

    // Allocate some memory on-demand
    println!("\n--- Allocating 1GB of memory ---");
    let size1 = 1024 * 1024 * 1024; // 1GB
    let addr1 = pool.allocate(0, size1)?;

    println!("Allocated {} bytes at address 0x{:x}", size1, addr1);
    println!("Physical memory usage: {} MB", pool.physical_memory_usage() / (1024 * 1024));
    println!("Is mapped: {}", pool.is_mapped(0, size1));

    // Get statistics
    let stats = pool.stats();
    println!("\nMemory statistics:");
    println!("  Virtual capacity: {} GB", stats.virtual_capacity / (1024 * 1024 * 1024));
    println!("  Physical usage: {} MB", stats.physical_usage / (1024 * 1024));
    println!("  Mapped pages: {}", stats.mapped_pages);
    println!("  Fragmentation ratio: {:.2}%", stats.fragmentation_ratio * 100.0);

    // Allocate more memory
    println!("\n--- Allocating another 512MB ---");
    let size2 = 512 * 1024 * 1024; // 512MB
    let offset2 = size1;
    let addr2 = pool.allocate(offset2, size2)?;

    println!("Allocated {} bytes at address 0x{:x}", size2, addr2);
    println!("Total physical memory usage: {} MB", pool.physical_memory_usage() / (1024 * 1024));

    // Deallocate first region
    println!("\n--- Deallocating first 1GB ---");
    pool.deallocate(0, size1)?;

    println!("Deallocated {} bytes", size1);
    println!("Remaining physical memory usage: {} MB", pool.physical_memory_usage() / (1024 * 1024));

    // Final statistics
    let stats = pool.stats();
    println!("\nFinal memory statistics:");
    println!("  Physical usage: {} MB", stats.physical_usage / (1024 * 1024));
    println!("  Mapped pages: {}", stats.mapped_pages);

    println!("\n--- Cleanup ---");
    println!("Deallocating remaining memory...");
    pool.deallocate(offset2, size2)?;

    println!("Final physical memory usage: {} bytes", pool.physical_memory_usage());
    println!("\nExample completed successfully!");

    Ok(())
}