hive_gpu/monitoring/
vram_monitor.rs

1//! VRAM Monitoring and Validation
2//!
3//! This module provides VRAM usage monitoring, validation, and optimization
4//! for GPU operations.
5
6use crate::error::{HiveGpuError, Result};
7use crate::traits::{GpuMonitor, VramBufferInfo, VramStats};
8use std::collections::HashMap;
9use std::time::{SystemTime, UNIX_EPOCH};
10
11/// VRAM Monitor for tracking GPU memory usage
12pub struct VramMonitor {
13    /// Total VRAM capacity in bytes
14    total_vram: usize,
15    /// Currently allocated VRAM in bytes
16    allocated_vram: usize,
17    /// Buffer information map
18    buffers: HashMap<usize, VramBufferInfo>,
19    /// Next buffer ID
20    next_buffer_id: usize,
21}
22
23impl VramMonitor {
24    /// Create a new VRAM monitor
25    pub fn new(total_vram: usize) -> Self {
26        Self {
27            total_vram,
28            allocated_vram: 0,
29            buffers: HashMap::new(),
30            next_buffer_id: 0,
31        }
32    }
33
34    /// Allocate VRAM for a buffer
35    pub fn allocate_buffer(
36        &mut self,
37        size: usize,
38        buffer_type: crate::traits::BufferType,
39    ) -> Result<usize> {
40        if self.allocated_vram + size > self.total_vram {
41            return Err(HiveGpuError::VramLimitExceeded {
42                requested: self.allocated_vram + size,
43                limit: self.total_vram,
44            });
45        }
46
47        let buffer_id = self.next_buffer_id;
48        self.next_buffer_id += 1;
49
50        let buffer_info = VramBufferInfo {
51            buffer_id,
52            size,
53            buffer_type,
54            allocated_at: SystemTime::now()
55                .duration_since(UNIX_EPOCH)
56                .unwrap()
57                .as_secs(),
58        };
59
60        self.buffers.insert(buffer_id, buffer_info);
61        self.allocated_vram += size;
62
63        Ok(buffer_id)
64    }
65
66    /// Deallocate VRAM for a buffer
67    pub fn deallocate_buffer(&mut self, buffer_id: usize) -> Result<()> {
68        if let Some(buffer_info) = self.buffers.remove(&buffer_id) {
69            self.allocated_vram = self.allocated_vram.saturating_sub(buffer_info.size);
70            Ok(())
71        } else {
72            Err(HiveGpuError::Other(format!(
73                "Buffer {} not found",
74                buffer_id
75            )))
76        }
77    }
78
79    /// Get current VRAM statistics
80    pub fn get_vram_stats(&self) -> VramStats {
81        VramStats {
82            total_vram: self.total_vram,
83            allocated_vram: self.allocated_vram,
84            available_vram: self.total_vram - self.allocated_vram,
85            utilization: self.allocated_vram as f32 / self.total_vram as f32,
86            buffer_count: self.buffers.len(),
87        }
88    }
89
90    /// Validate that all operations are in VRAM
91    pub fn validate_all_vram(&self) -> Result<()> {
92        let stats = self.get_vram_stats();
93
94        if stats.utilization > 0.95 {
95            return Err(HiveGpuError::VramLimitExceeded {
96                requested: stats.allocated_vram,
97                limit: (self.total_vram as f32 * 0.95) as usize,
98            });
99        }
100
101        Ok(())
102    }
103
104    /// Generate detailed VRAM report
105    pub fn generate_vram_report(&self) -> String {
106        let stats = self.get_vram_stats();
107        let mut report = String::new();
108
109        report.push_str(&format!("VRAM Report:\n"));
110        report.push_str(&format!(
111            "  Total VRAM: {:.2} MB\n",
112            stats.total_vram as f64 / 1024.0 / 1024.0
113        ));
114        report.push_str(&format!(
115            "  Allocated: {:.2} MB\n",
116            stats.allocated_vram as f64 / 1024.0 / 1024.0
117        ));
118        report.push_str(&format!(
119            "  Available: {:.2} MB\n",
120            stats.available_vram as f64 / 1024.0 / 1024.0
121        ));
122        report.push_str(&format!(
123            "  Utilization: {:.1}%\n",
124            stats.utilization * 100.0
125        ));
126        report.push_str(&format!("  Buffer Count: {}\n", stats.buffer_count));
127
128        // Buffer breakdown by type
129        let mut type_counts: HashMap<crate::traits::BufferType, (usize, usize)> = HashMap::new();
130        for buffer in self.buffers.values() {
131            let entry = type_counts.entry(buffer.buffer_type).or_insert((0, 0));
132            entry.0 += 1;
133            entry.1 += buffer.size;
134        }
135
136        report.push_str("\nBuffer Breakdown:\n");
137        for (buffer_type, (count, size)) in type_counts {
138            report.push_str(&format!(
139                "  {:?}: {} buffers, {:.2} MB\n",
140                buffer_type,
141                count,
142                size as f64 / 1024.0 / 1024.0
143            ));
144        }
145
146        report
147    }
148}
149
150impl GpuMonitor for VramMonitor {
151    fn get_vram_stats(&self) -> VramStats {
152        self.get_vram_stats()
153    }
154
155    fn validate_all_vram(&self) -> Result<()> {
156        self.validate_all_vram()
157    }
158
159    fn generate_vram_report(&self) -> String {
160        self.generate_vram_report()
161    }
162}
163
164/// VRAM Validator for ensuring VRAM-only operations
165pub struct VramValidator {
166    monitor: VramMonitor,
167    max_utilization: f32,
168}
169
170impl VramValidator {
171    /// Create a new VRAM validator
172    pub fn new(total_vram: usize, max_utilization: f32) -> Self {
173        Self {
174            monitor: VramMonitor::new(total_vram),
175            max_utilization,
176        }
177    }
178
179    /// Validate VRAM allocation request
180    pub fn validate_allocation(&self, size: usize) -> Result<()> {
181        let stats = self.monitor.get_vram_stats();
182        let new_utilization = (stats.allocated_vram + size) as f32 / stats.total_vram as f32;
183
184        if new_utilization > self.max_utilization {
185            return Err(HiveGpuError::VramLimitExceeded {
186                requested: stats.allocated_vram + size,
187                limit: (stats.total_vram as f32 * self.max_utilization) as usize,
188            });
189        }
190
191        Ok(())
192    }
193}
194
195/// VRAM Benchmark Result
196#[derive(Debug, Clone)]
197pub struct VramBenchmarkResult {
198    /// Operation name
199    pub operation: String,
200    /// Duration in milliseconds
201    pub duration_ms: f64,
202    /// Memory allocated in bytes
203    pub memory_allocated: usize,
204    /// Memory utilization percentage
205    pub utilization: f32,
206    /// Throughput (operations per second)
207    pub throughput: f64,
208}