candle_cuda_vmm/
virtual_memory.rs

1//! Virtual memory pool for elastic memory allocation.
2
3use crate::cuda_ffi::{self, AccessFlags};
4use crate::error::{Result, VmmError};
5use crate::mapping::{map_memory, set_memory_access, unmap_memory, VirtualAddressRange};
6use crate::physical_memory::PhysicalMemoryHandle;
7use candle_core::{Device, DeviceLocation};
8use std::collections::HashMap;
9
10/// Helper function to extract device ordinal from Candle Device
11fn get_device_ordinal(device: &Device) -> Result<i32> {
12    match device.location() {
13        DeviceLocation::Cuda { gpu_id } => Ok(gpu_id as i32),
14        _ => Err(VmmError::other("Device must be a CUDA device")),
15    }
16}
17
18/// Page state in the virtual memory pool.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20enum PageState {
21    /// Page is not allocated (no physical memory).
22    Free,
23    /// Page is allocated and mapped to physical memory.
24    Allocated,
25}
26
27/// Elastic memory pool with virtual memory backing.
28///
29/// This pool reserves a large virtual address space but only allocates physical
30/// memory on-demand when `allocate()` is called. This enables:
31/// - Large virtual capacity (e.g., 128GB) with minimal initial physical usage
32/// - Dynamic allocation/deallocation based on workload
33/// - Reduced memory waste for bursty workloads
34///
35/// # Example
36/// ```no_run
37/// use candle_cuda_vmm::VirtualMemoryPool;
38/// use candle_core::Device;
39///
40/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
41/// let device = Device::new_cuda(0)?;
42/// let mut pool = VirtualMemoryPool::new(
43///     128 * 1024 * 1024 * 1024, // 128GB virtual capacity
44///     2 * 1024 * 1024,          // 2MB page size
45///     device,
46/// )?;
47///
48/// // Allocate 1GB of physical memory on-demand
49/// let addr = pool.allocate(0, 1024 * 1024 * 1024)?;
50/// println!("Physical usage: {} bytes", pool.physical_memory_usage());
51///
52/// // Deallocate when done
53/// pool.deallocate(0, 1024 * 1024 * 1024)?;
54/// # Ok(())
55/// # }
56/// ```
57pub struct VirtualMemoryPool {
58    /// Virtual address range reservation.
59    virtual_range: VirtualAddressRange,
60    /// Physical memory handles for each page (indexed by page number).
61    physical_pages: HashMap<usize, PhysicalMemoryHandle>,
62    /// Page state tracking.
63    page_states: Vec<PageState>,
64    /// Page size in bytes.
65    page_size: usize,
66    /// Total virtual capacity in bytes.
67    total_capacity: usize,
68    /// Currently mapped size in bytes.
69    mapped_size: usize,
70    /// Device ordinal.
71    device_ordinal: i32,
72}
73
74impl VirtualMemoryPool {
75    /// Create a new virtual memory pool.
76    ///
77    /// # Arguments
78    /// * `capacity` - Maximum virtual address space (e.g., 128GB).
79    /// * `page_size` - Page granularity (e.g., 2MB for large pages).
80    /// * `device` - CUDA device.
81    ///
82    /// # Returns
83    /// Pool with reserved virtual address space, no physical memory allocated.
84    ///
85    /// # Errors
86    /// Returns error if:
87    /// - Device is not a CUDA device
88    /// - Page size is invalid (not power of 2 or < 64KB)
89    /// - Virtual address reservation fails
90    pub fn new(capacity: usize, page_size: usize, device: Device) -> Result<Self> {
91        // Validate device
92        let device_ordinal = get_device_ordinal(&device)?;
93
94        // Validate page size
95        if !page_size.is_power_of_two() || page_size < 64 * 1024 {
96            return Err(VmmError::InvalidPageSize(page_size));
97        }
98
99        // Ensure capacity is multiple of page size
100        let capacity = (capacity + page_size - 1) / page_size * page_size;
101
102        // Reserve virtual address space
103        let virtual_range = VirtualAddressRange::new(capacity, page_size)?;
104
105        // Calculate number of pages
106        let num_pages = capacity / page_size;
107
108        Ok(Self {
109            virtual_range,
110            physical_pages: HashMap::new(),
111            page_states: vec![PageState::Free; num_pages],
112            page_size,
113            total_capacity: capacity,
114            mapped_size: 0,
115            device_ordinal,
116        })
117    }
118
119    /// Allocate and map physical pages on-demand.
120    ///
121    /// # Arguments
122    /// * `offset` - Offset in virtual address space (bytes).
123    /// * `size` - Number of bytes to allocate.
124    ///
125    /// # Returns
126    /// Base virtual address of allocated region.
127    ///
128    /// # Errors
129    /// Returns error if:
130    /// - Offset/size out of bounds
131    /// - Region already allocated
132    /// - Physical memory allocation fails
133    pub fn allocate(&mut self, offset: usize, size: usize) -> Result<usize> {
134        // Validate parameters
135        if offset + size > self.total_capacity {
136            return Err(VmmError::InvalidOffset {
137                offset,
138                size,
139                capacity: self.total_capacity,
140            });
141        }
142
143        // Align offset and size to page boundaries
144        let start_page = offset / self.page_size;
145        let end_page = (offset + size + self.page_size - 1) / self.page_size;
146
147        // Check if any pages are already allocated
148        for page_idx in start_page..end_page {
149            if self.page_states[page_idx] == PageState::Allocated {
150                return Err(VmmError::AlreadyMapped {
151                    offset: page_idx * self.page_size,
152                    size: self.page_size,
153                });
154            }
155        }
156
157        // Allocate and map each page
158        for page_idx in start_page..end_page {
159            // Allocate physical memory for this page
160            let device = Device::new_cuda(self.device_ordinal as usize)?;
161            let physical_handle = PhysicalMemoryHandle::new(self.page_size, &device)?;
162
163            // Map physical memory to virtual address
164            let page_offset = page_idx * self.page_size;
165            map_memory(
166                &self.virtual_range,
167                page_offset,
168                &physical_handle,
169                0,
170                self.page_size,
171            )?;
172
173            // Set memory access permissions
174            set_memory_access(
175                &self.virtual_range,
176                page_offset,
177                self.page_size,
178                self.device_ordinal,
179                AccessFlags::ReadWrite,
180            )?;
181
182            // Store physical handle and update state
183            self.physical_pages.insert(page_idx, physical_handle);
184            self.page_states[page_idx] = PageState::Allocated;
185            self.mapped_size += self.page_size;
186        }
187
188        Ok(self.virtual_range.base_address() + offset)
189    }
190
191    /// Unmap and free physical pages.
192    ///
193    /// # Arguments
194    /// * `offset` - Offset in virtual address space (bytes).
195    /// * `size` - Number of bytes to free.
196    ///
197    /// # Errors
198    /// Returns error if:
199    /// - Offset/size out of bounds
200    /// - Region not allocated
201    pub fn deallocate(&mut self, offset: usize, size: usize) -> Result<()> {
202        // Validate parameters
203        if offset + size > self.total_capacity {
204            return Err(VmmError::InvalidOffset {
205                offset,
206                size,
207                capacity: self.total_capacity,
208            });
209        }
210
211        // Align offset and size to page boundaries
212        let start_page = offset / self.page_size;
213        let end_page = (offset + size + self.page_size - 1) / self.page_size;
214
215        // Check if all pages are allocated
216        for page_idx in start_page..end_page {
217            if self.page_states[page_idx] == PageState::Free {
218                return Err(VmmError::NotMapped {
219                    offset: page_idx * self.page_size,
220                    size: self.page_size,
221                });
222            }
223        }
224
225        // Unmap and free each page
226        for page_idx in start_page..end_page {
227            let page_offset = page_idx * self.page_size;
228
229            // Unmap virtual memory
230            unmap_memory(&self.virtual_range, page_offset, self.page_size)?;
231
232            // Remove physical handle (automatically freed via Drop)
233            self.physical_pages.remove(&page_idx);
234            self.page_states[page_idx] = PageState::Free;
235            self.mapped_size -= self.page_size;
236        }
237
238        Ok(())
239    }
240
241    /// Get current physical memory usage in bytes.
242    pub fn physical_memory_usage(&self) -> usize {
243        self.mapped_size
244    }
245
246    /// Get virtual address space capacity in bytes.
247    pub fn capacity(&self) -> usize {
248        self.total_capacity
249    }
250
251    /// Get base virtual address.
252    pub fn base_address(&self) -> usize {
253        self.virtual_range.base_address()
254    }
255
256    /// Get page size in bytes.
257    pub fn page_size(&self) -> usize {
258        self.page_size
259    }
260
261    /// Check if a range is currently mapped.
262    ///
263    /// # Arguments
264    /// * `offset` - Offset in virtual address space (bytes).
265    /// * `size` - Size to check (bytes).
266    ///
267    /// # Returns
268    /// True if entire range is mapped, false otherwise.
269    pub fn is_mapped(&self, offset: usize, size: usize) -> bool {
270        if offset + size > self.total_capacity {
271            return false;
272        }
273
274        let start_page = offset / self.page_size;
275        let end_page = (offset + size + self.page_size - 1) / self.page_size;
276
277        for page_idx in start_page..end_page {
278            if self.page_states[page_idx] != PageState::Allocated {
279                return false;
280            }
281        }
282
283        true
284    }
285
286    /// Compact pool by coalescing free pages (no-op for now, future optimization).
287    pub fn compact(&mut self) -> Result<()> {
288        // Future: Implement compaction to reduce fragmentation
289        Ok(())
290    }
291
292    /// Get memory statistics.
293    pub fn stats(&self) -> MemoryStats {
294        let allocated_pages = self
295            .page_states
296            .iter()
297            .filter(|&&state| state == PageState::Allocated)
298            .count();
299
300        let total_pages = self.page_states.len();
301        let fragmentation_ratio = if total_pages > 0 {
302            1.0 - (allocated_pages as f32 / total_pages as f32)
303        } else {
304            0.0
305        };
306
307        MemoryStats {
308            virtual_capacity: self.total_capacity,
309            physical_usage: self.mapped_size,
310            mapped_pages: allocated_pages,
311            fragmentation_ratio,
312        }
313    }
314}
315
316/// Memory statistics for a pool.
317#[derive(Debug, Clone)]
318pub struct MemoryStats {
319    /// Virtual address space capacity in bytes.
320    pub virtual_capacity: usize,
321    /// Physical memory usage in bytes.
322    pub physical_usage: usize,
323    /// Number of mapped pages.
324    pub mapped_pages: usize,
325    /// Fragmentation ratio (0.0 = no fragmentation, 1.0 = completely fragmented).
326    pub fragmentation_ratio: f32,
327}
328
329/// Shared memory pool for multiple models.
330///
331/// Manages multiple virtual memory pools with a global physical memory limit.
332/// Enables memory sharing across models with per-model statistics.
333///
334/// # Example
335/// ```no_run
336/// use candle_cuda_vmm::SharedMemoryPool;
337/// use candle_core::Device;
338///
339/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
340/// let device = Device::new_cuda(0)?;
341/// let mut shared_pool = SharedMemoryPool::new(
342///     32 * 1024 * 1024 * 1024, // 32GB global physical limit
343///     device,
344/// )?;
345///
346/// // Register models
347/// shared_pool.register_model("llama-7b", 64 * 1024 * 1024 * 1024)?; // 64GB virtual
348/// shared_pool.register_model("gpt2", 32 * 1024 * 1024 * 1024)?;     // 32GB virtual
349///
350/// // Allocate for specific model
351/// let addr = shared_pool.allocate_for_model("llama-7b", 1024 * 1024 * 1024)?;
352/// # Ok(())
353/// # }
354/// ```
355pub struct SharedMemoryPool {
356    /// Per-model virtual memory pools.
357    pools: HashMap<String, VirtualMemoryPool>,
358    /// Global physical memory limit in bytes.
359    global_physical_limit: usize,
360    /// Current global physical usage in bytes.
361    current_physical_usage: usize,
362    /// Device ordinal.
363    device_ordinal: i32,
364    /// Default page size for new pools.
365    default_page_size: usize,
366}
367
368impl SharedMemoryPool {
369    /// Create shared pool with global physical memory limit.
370    ///
371    /// # Arguments
372    /// * `physical_limit` - Global physical memory limit (bytes).
373    /// * `device` - CUDA device.
374    ///
375    /// # Returns
376    /// Shared memory pool.
377    pub fn new(physical_limit: usize, device: Device) -> Result<Self> {
378        let device_ordinal = get_device_ordinal(&device)?;
379
380        // Get recommended page size
381        let default_page_size = cuda_ffi::get_recommended_granularity(device_ordinal)?;
382
383        Ok(Self {
384            pools: HashMap::new(),
385            global_physical_limit: physical_limit,
386            current_physical_usage: 0,
387            device_ordinal,
388            default_page_size,
389        })
390    }
391
392    /// Register a model with virtual address space reservation.
393    ///
394    /// # Arguments
395    /// * `model_id` - Unique model identifier.
396    /// * `virtual_capacity` - Virtual address space for this model (bytes).
397    ///
398    /// # Errors
399    /// Returns error if model already registered.
400    pub fn register_model(&mut self, model_id: &str, virtual_capacity: usize) -> Result<()> {
401        if self.pools.contains_key(model_id) {
402            return Err(VmmError::ModelAlreadyExists(model_id.to_string()));
403        }
404
405        let device = Device::new_cuda(self.device_ordinal as usize)?;
406        let pool = VirtualMemoryPool::new(virtual_capacity, self.default_page_size, device)?;
407
408        self.pools.insert(model_id.to_string(), pool);
409        Ok(())
410    }
411
412    /// Allocate from specific model's pool.
413    ///
414    /// # Arguments
415    /// * `model_id` - Model identifier.
416    /// * `size` - Size to allocate (bytes).
417    ///
418    /// # Returns
419    /// Virtual address of allocated region.
420    ///
421    /// # Errors
422    /// Returns error if:
423    /// - Model not found
424    /// - Global physical limit exceeded
425    /// - Allocation fails
426    pub fn allocate_for_model(&mut self, model_id: &str, size: usize) -> Result<usize> {
427        let pool = self
428            .pools
429            .get_mut(model_id)
430            .ok_or_else(|| VmmError::ModelNotFound(model_id.to_string()))?;
431
432        // Check global physical limit
433        let rounded_size =
434            (size + self.default_page_size - 1) / self.default_page_size * self.default_page_size;
435        if self.current_physical_usage + rounded_size > self.global_physical_limit {
436            return Err(VmmError::OutOfPhysicalMemory {
437                requested: rounded_size,
438                available: self.global_physical_limit - self.current_physical_usage,
439            });
440        }
441
442        // Allocate from model's pool
443        let addr = pool.allocate(0, size)?;
444        self.current_physical_usage += rounded_size;
445
446        Ok(addr)
447    }
448
449    /// Free from specific model's pool.
450    ///
451    /// # Arguments
452    /// * `model_id` - Model identifier.
453    /// * `offset` - Offset in model's virtual address space (bytes).
454    /// * `size` - Size to free (bytes).
455    pub fn deallocate_for_model(
456        &mut self,
457        model_id: &str,
458        offset: usize,
459        size: usize,
460    ) -> Result<()> {
461        let pool = self
462            .pools
463            .get_mut(model_id)
464            .ok_or_else(|| VmmError::ModelNotFound(model_id.to_string()))?;
465
466        let rounded_size =
467            (size + self.default_page_size - 1) / self.default_page_size * self.default_page_size;
468
469        pool.deallocate(offset, size)?;
470        self.current_physical_usage = self.current_physical_usage.saturating_sub(rounded_size);
471
472        Ok(())
473    }
474
475    /// Get per-model memory statistics.
476    pub fn get_model_stats(&self, model_id: &str) -> Option<MemoryStats> {
477        self.pools.get(model_id).map(|pool| pool.stats())
478    }
479
480    /// Global memory statistics.
481    pub fn global_stats(&self) -> GlobalMemoryStats {
482        GlobalMemoryStats {
483            physical_limit: self.global_physical_limit,
484            physical_usage: self.current_physical_usage,
485            num_models: self.pools.len(),
486        }
487    }
488
489    /// Unregister a model and free its resources.
490    pub fn unregister_model(&mut self, model_id: &str) -> Result<()> {
491        if let Some(pool) = self.pools.remove(model_id) {
492            let usage = pool.physical_memory_usage();
493            self.current_physical_usage = self.current_physical_usage.saturating_sub(usage);
494            Ok(())
495        } else {
496            Err(VmmError::ModelNotFound(model_id.to_string()))
497        }
498    }
499}
500
501/// Global memory statistics for shared pool.
502#[derive(Debug, Clone)]
503pub struct GlobalMemoryStats {
504    /// Global physical memory limit in bytes.
505    pub physical_limit: usize,
506    /// Current global physical usage in bytes.
507    pub physical_usage: usize,
508    /// Number of registered models.
509    pub num_models: usize,
510}
511
512#[cfg(test)]
513mod tests {
514    use super::*;
515
516    #[test]
517    fn test_memory_stats() {
518        let stats = MemoryStats {
519            virtual_capacity: 1024 * 1024,
520            physical_usage: 512 * 1024,
521            mapped_pages: 256,
522            fragmentation_ratio: 0.5,
523        };
524
525        assert_eq!(stats.virtual_capacity, 1024 * 1024);
526        assert_eq!(stats.physical_usage, 512 * 1024);
527    }
528
529    #[test]
530    fn test_global_memory_stats() {
531        let stats = GlobalMemoryStats {
532            physical_limit: 32 * 1024 * 1024 * 1024,
533            physical_usage: 16 * 1024 * 1024 * 1024,
534            num_models: 3,
535        };
536
537        assert_eq!(stats.num_models, 3);
538    }
539}
candle_cuda_vmm/virtual_memory.rs

candle_cuda_vmm/
virtual_memory.rs