dynamo-llm 1.1.0

Dynamo LLM Library
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

//! Re-export NUMA utilities from dynamo-memory.
pub use dynamo_memory::numa::*;

/// Check if NUMA optimization is explicitly opted-in for the block manager.
///
/// Set `DYN_KVBM_ENABLE_NUMA=1` to enable NUMA-aware allocation in the
/// KV cache block manager. This is opt-in because the block manager
/// manages its own pinned memory allocations separately from `PinnedStorage`.
///
/// The global kill switch `DYN_MEMORY_DISABLE_NUMA` always takes precedence:
/// if it is set truthy, this function returns `false` regardless of
/// `DYN_KVBM_ENABLE_NUMA`.
///
/// TODO(KVBM-336): remove this function in the future
#[deprecated(
    since = "1.0.0",
    note = "Use dynamo_memory::numa::is_numa_enabled instead"
)]
pub fn is_numa_enabled() -> bool {
    // Global kill switch always wins
    if dynamo_memory::numa::is_numa_disabled() {
        return false;
    }
    dynamo_config::env_is_truthy("DYN_KVBM_ENABLE_NUMA")
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_numa_node_equality() {
        let node0a = NumaNode(0);
        let node0b = NumaNode(0);
        let node1 = NumaNode(1);

        assert_eq!(node0a, node0b);
        assert_ne!(node0a, node1);
    }

    #[test]
    fn test_numa_node_unknown() {
        let unknown = NumaNode::UNKNOWN;
        assert!(unknown.is_unknown());
        assert_eq!(unknown.0, u32::MAX);

        let valid = NumaNode(0);
        assert!(!valid.is_unknown());
    }

    #[test]
    fn test_numa_node_display() {
        assert_eq!(format!("{}", NumaNode(0)), "NumaNode(0)");
        assert_eq!(format!("{}", NumaNode(7)), "NumaNode(7)");
        assert_eq!(format!("{}", NumaNode::UNKNOWN), "UNKNOWN");
    }

    #[test]
    fn test_numa_node_serialization() {
        let node = NumaNode(1);
        let json = serde_json::to_string(&node).unwrap();
        let deserialized: NumaNode = serde_json::from_str(&json).unwrap();
        assert_eq!(node, deserialized);
    }

    #[test]
    fn test_numa_node_hash() {
        use std::collections::HashMap;

        let mut map = HashMap::new();
        map.insert(NumaNode(0), "node0");
        map.insert(NumaNode(1), "node1");

        assert_eq!(map.get(&NumaNode(0)), Some(&"node0"));
        assert_eq!(map.get(&NumaNode(1)), Some(&"node1"));
        assert_eq!(map.get(&NumaNode(2)), None);
    }

    #[test]
    fn test_numa_node_copy_clone() {
        let node1 = NumaNode(5);
        let node2 = node1;
        let node3 = node1;

        assert_eq!(node1, node2);
        assert_eq!(node1, node3);
        assert_eq!(node2, node3);
    }

    #[test]
    fn test_get_current_cpu_numa_node() {
        let node = get_current_cpu_numa_node();
        if !node.is_unknown() {
            assert!(node.0 < 8, "NUMA node {} seems unreasonably high", node.0);
        }
    }

    #[test]
    fn test_worker_pool_singleton() {
        let pool1 = worker_pool::NumaWorkerPool::global();
        let pool2 = worker_pool::NumaWorkerPool::global();
        assert!(std::ptr::eq(pool1, pool2));
    }
}

#[cfg(all(test, feature = "testing-cuda"))]
mod cuda_tests {
    use super::*;

    #[test]
    fn test_get_device_numa_node_valid_gpu() {
        match get_device_numa_node(0) {
            Some(node) => println!("GPU 0 detected on NUMA node: {}", node.0),
            None => println!("GPU 0 has no determinable NUMA node"),
        }
    }

    #[test]
    fn test_worker_pool_allocate() {
        let pool = worker_pool::NumaWorkerPool::global();

        match pool.allocate_pinned_for_gpu(8192, 0).unwrap() {
            Some(ptr) => unsafe {
                assert!(!ptr.is_null());
                cudarc::driver::result::free_host(ptr as *mut std::ffi::c_void).unwrap();
            },
            None => {
                println!("NUMA node unknown for GPU 0, allocation skipped");
            }
        }
    }

    #[test]
    fn test_worker_pool_reuse() {
        let pool = worker_pool::NumaWorkerPool::global();

        let r1 = pool.allocate_pinned_for_gpu(1024, 0).unwrap();
        let r2 = pool.allocate_pinned_for_gpu(1024, 0).unwrap();

        match (r1, r2) {
            (Some(ptr1), Some(ptr2)) => unsafe {
                assert!(!ptr1.is_null());
                assert!(!ptr2.is_null());
                assert_ne!(ptr1, ptr2);
                cudarc::driver::result::free_host(ptr1 as *mut std::ffi::c_void).unwrap();
                cudarc::driver::result::free_host(ptr2 as *mut std::ffi::c_void).unwrap();
            },
            (None, None) => {
                println!("NUMA node unknown, both allocations skipped");
            }
            _ => panic!("inconsistent NUMA detection between two calls for same GPU"),
        }
    }

    #[test]
    fn test_zero_size_allocation() {
        let pool = worker_pool::NumaWorkerPool::global();
        let result = pool.allocate_pinned_for_gpu(0, 0);
        match result {
            Ok(None) => {
                println!("NUMA node unknown, zero-size check not reached");
            }
            Err(e) => {
                assert!(e.contains("zero"));
            }
            Ok(Some(_)) => panic!("zero-size allocation should not succeed"),
        }
    }

    #[test]
    fn test_pinned_allocation_api() {
        let pool = worker_pool::NumaWorkerPool::global();

        if let Ok(Some(ptr)) = pool.allocate_pinned_for_gpu(1024, 0) {
            assert!(!ptr.is_null());
            unsafe {
                cudarc::driver::result::free_host(ptr as *mut std::ffi::c_void).unwrap();
            }
        }
    }
}