ringkernel_procint/kernels/
partial_order.rs1use crate::cuda::{
6 generate_partial_order_kernel, CpuFallbackExecutor, ExecutionResult, GpuStats, GpuStatus,
7 KernelExecutor,
8};
9use crate::models::{GpuObjectEvent, GpuPartialOrderTrace};
10
11pub struct PartialOrderKernel {
13 executor: KernelExecutor,
15 use_gpu: bool,
17 kernel_compiled: bool,
19}
20
21impl Default for PartialOrderKernel {
22 fn default() -> Self {
23 Self::new()
24 }
25}
26
27impl PartialOrderKernel {
28 pub fn new() -> Self {
30 let mut kernel = Self {
31 executor: KernelExecutor::new(),
32 use_gpu: true,
33 kernel_compiled: false,
34 };
35
36 kernel.try_compile_kernel();
38 kernel
39 }
40
41 fn try_compile_kernel(&mut self) {
43 if self.executor.is_cuda_available() && !self.kernel_compiled {
44 let source = generate_partial_order_kernel();
45 match self.executor.compile(&source) {
46 Ok(_) => {
47 log::info!("Partial order CUDA kernel compiled successfully");
48 self.kernel_compiled = true;
49 }
50 Err(e) => {
51 log::warn!("Partial order CUDA kernel compilation failed: {}", e);
52 self.kernel_compiled = false;
53 }
54 }
55 }
56 }
57
58 pub fn with_cpu_only(mut self) -> Self {
60 self.use_gpu = false;
61 self
62 }
63
64 pub fn gpu_status(&self) -> GpuStatus {
66 self.executor.gpu_status()
67 }
68
69 pub fn gpu_stats(&self) -> &GpuStats {
71 &self.executor.stats
72 }
73
74 pub fn is_using_gpu(&self) -> bool {
76 self.use_gpu && self.kernel_compiled && self.executor.is_cuda_available()
77 }
78
79 pub fn derive(&mut self, events: &[GpuObjectEvent]) -> PartialOrderResult {
81 let start = std::time::Instant::now();
82
83 #[cfg(feature = "cuda")]
85 let (gpu_traces, exec_result) = if self.is_using_gpu() {
86 match self.executor.execute_partial_order_gpu(events) {
87 Ok((traces, result)) => {
88 log::debug!(
89 "Partial order GPU execution: {} events -> {} traces in {}µs",
90 events.len(),
91 traces.len(),
92 result.execution_time_us
93 );
94 (Some(traces), result)
95 }
96 Err(e) => {
97 log::warn!(
98 "Partial order GPU execution failed, falling back to CPU: {}",
99 e
100 );
101 (None, ExecutionResult::default())
102 }
103 }
104 } else {
105 (None, ExecutionResult::default())
106 };
107
108 #[cfg(not(feature = "cuda"))]
109 let gpu_traces: Option<Vec<GpuPartialOrderTrace>> = None;
110 #[cfg(not(feature = "cuda"))]
111 let exec_result = ExecutionResult::default();
112
113 let (traces, exec_result) = if let Some(gpu_traces) = gpu_traces {
115 (gpu_traces, exec_result)
116 } else {
117 let mut traces = Vec::new();
119 let result = CpuFallbackExecutor::execute_partial_order(events, &mut traces);
120 (traces, result)
121 };
122
123 let total_time = start.elapsed().as_micros() as u64;
124
125 PartialOrderResult {
126 traces,
127 execution_result: exec_result,
128 total_time_us: total_time,
129 }
130 }
131}
132
133#[derive(Debug)]
135pub struct PartialOrderResult {
136 pub traces: Vec<GpuPartialOrderTrace>,
138 pub execution_result: ExecutionResult,
140 pub total_time_us: u64,
142}
143
144impl PartialOrderResult {
145 pub fn trace_count(&self) -> usize {
147 self.traces.len()
148 }
149
150 pub fn avg_width(&self) -> f32 {
152 if self.traces.is_empty() {
153 return 0.0;
154 }
155 let total: u32 = self.traces.iter().map(|t| t.max_width).sum();
156 total as f32 / self.traces.len() as f32
157 }
158
159 pub fn traces_with_cycles(&self) -> Vec<&GpuPartialOrderTrace> {
161 self.traces.iter().filter(|t| t.has_cycles()).collect()
162 }
163
164 pub fn total_order_traces(&self) -> Vec<&GpuPartialOrderTrace> {
166 self.traces.iter().filter(|t| t.is_total_order()).collect()
167 }
168}
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173 use crate::models::HybridTimestamp;
174
175 fn create_test_events() -> Vec<GpuObjectEvent> {
176 vec![
178 GpuObjectEvent {
179 event_id: 1,
180 object_id: 100,
181 activity_id: 1,
182 timestamp: HybridTimestamp::new(0, 0),
183 duration_ms: 100,
184 ..Default::default()
185 },
186 GpuObjectEvent {
187 event_id: 2,
188 object_id: 100,
189 activity_id: 2,
190 timestamp: HybridTimestamp::new(100, 0),
191 duration_ms: 100,
192 ..Default::default()
193 },
194 GpuObjectEvent {
195 event_id: 3,
196 object_id: 100,
197 activity_id: 3,
198 timestamp: HybridTimestamp::new(200, 0),
199 duration_ms: 100,
200 ..Default::default()
201 },
202 ]
203 }
204
205 #[test]
206 fn test_partial_order_derivation() {
207 let mut kernel = PartialOrderKernel::new().with_cpu_only();
208 let events = create_test_events();
209 let result = kernel.derive(&events);
210
211 assert_eq!(result.trace_count(), 1);
212
213 let trace = &result.traces[0];
214 assert_eq!(trace.activity_count, 3);
215
216 assert!(trace.precedes(0, 1));
218 assert!(trace.precedes(1, 2));
219
220 assert!(trace.precedes(0, 2));
222
223 assert!(trace.is_total_order());
225 }
226
227 #[test]
228 fn test_concurrent_activities() {
229 let events = vec![
231 GpuObjectEvent {
232 event_id: 1,
233 object_id: 100,
234 activity_id: 1,
235 timestamp: HybridTimestamp::new(0, 0),
236 duration_ms: 100,
237 ..Default::default()
238 },
239 GpuObjectEvent {
240 event_id: 2,
241 object_id: 100,
242 activity_id: 2,
243 timestamp: HybridTimestamp::new(50, 0),
244 duration_ms: 100,
245 ..Default::default()
246 },
247 ];
248
249 let mut kernel = PartialOrderKernel::new().with_cpu_only();
250 let result = kernel.derive(&events);
251
252 let trace = &result.traces[0];
253 assert!(trace.is_concurrent(0, 1));
255 }
256}