ringkernel_procint/kernels/
partial_order.rs

1//! Partial order derivation kernel.
2//!
3//! Builds precedence matrices from interval events using GPU acceleration.
4
5use crate::cuda::{
6    generate_partial_order_kernel, CpuFallbackExecutor, ExecutionResult, GpuStats, GpuStatus,
7    KernelExecutor,
8};
9use crate::models::{GpuObjectEvent, GpuPartialOrderTrace};
10
11/// Partial order derivation kernel.
12pub struct PartialOrderKernel {
13    /// Kernel executor.
14    executor: KernelExecutor,
15    /// Use GPU if available.
16    use_gpu: bool,
17    /// Whether kernel is compiled.
18    kernel_compiled: bool,
19}
20
21impl Default for PartialOrderKernel {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl PartialOrderKernel {
28    /// Create a new partial order kernel.
29    pub fn new() -> Self {
30        let mut kernel = Self {
31            executor: KernelExecutor::new(),
32            use_gpu: true,
33            kernel_compiled: false,
34        };
35
36        // Try to compile the CUDA kernel at creation time
37        kernel.try_compile_kernel();
38        kernel
39    }
40
41    /// Try to compile the CUDA kernel.
42    fn try_compile_kernel(&mut self) {
43        if self.executor.is_cuda_available() && !self.kernel_compiled {
44            let source = generate_partial_order_kernel();
45            match self.executor.compile(&source) {
46                Ok(_) => {
47                    log::info!("Partial order CUDA kernel compiled successfully");
48                    self.kernel_compiled = true;
49                }
50                Err(e) => {
51                    log::warn!("Partial order CUDA kernel compilation failed: {}", e);
52                    self.kernel_compiled = false;
53                }
54            }
55        }
56    }
57
58    /// Disable GPU (use CPU fallback).
59    pub fn with_cpu_only(mut self) -> Self {
60        self.use_gpu = false;
61        self
62    }
63
64    /// Get GPU status.
65    pub fn gpu_status(&self) -> GpuStatus {
66        self.executor.gpu_status()
67    }
68
69    /// Get GPU stats.
70    pub fn gpu_stats(&self) -> &GpuStats {
71        &self.executor.stats
72    }
73
74    /// Check if GPU is being used.
75    pub fn is_using_gpu(&self) -> bool {
76        self.use_gpu && self.kernel_compiled && self.executor.is_cuda_available()
77    }
78
79    /// Derive partial order traces from events.
80    pub fn derive(&mut self, events: &[GpuObjectEvent]) -> PartialOrderResult {
81        let start = std::time::Instant::now();
82
83        // Try GPU path first if available and compiled
84        #[cfg(feature = "cuda")]
85        let (gpu_traces, exec_result) = if self.is_using_gpu() {
86            match self.executor.execute_partial_order_gpu(events) {
87                Ok((traces, result)) => {
88                    log::debug!(
89                        "Partial order GPU execution: {} events -> {} traces in {}µs",
90                        events.len(),
91                        traces.len(),
92                        result.execution_time_us
93                    );
94                    (Some(traces), result)
95                }
96                Err(e) => {
97                    log::warn!(
98                        "Partial order GPU execution failed, falling back to CPU: {}",
99                        e
100                    );
101                    (None, ExecutionResult::default())
102                }
103            }
104        } else {
105            (None, ExecutionResult::default())
106        };
107
108        #[cfg(not(feature = "cuda"))]
109        let gpu_traces: Option<Vec<GpuPartialOrderTrace>> = None;
110        #[cfg(not(feature = "cuda"))]
111        let exec_result = ExecutionResult::default();
112
113        // Use GPU results or fall back to CPU
114        let (traces, exec_result) = if let Some(gpu_traces) = gpu_traces {
115            (gpu_traces, exec_result)
116        } else {
117            // CPU fallback path
118            let mut traces = Vec::new();
119            let result = CpuFallbackExecutor::execute_partial_order(events, &mut traces);
120            (traces, result)
121        };
122
123        let total_time = start.elapsed().as_micros() as u64;
124
125        PartialOrderResult {
126            traces,
127            execution_result: exec_result,
128            total_time_us: total_time,
129        }
130    }
131}
132
133/// Result of partial order derivation.
134#[derive(Debug)]
135pub struct PartialOrderResult {
136    /// Derived partial order traces.
137    pub traces: Vec<GpuPartialOrderTrace>,
138    /// Kernel execution result.
139    pub execution_result: ExecutionResult,
140    /// Total processing time in microseconds.
141    pub total_time_us: u64,
142}
143
144impl PartialOrderResult {
145    /// Get total traces.
146    pub fn trace_count(&self) -> usize {
147        self.traces.len()
148    }
149
150    /// Get average width (concurrency level).
151    pub fn avg_width(&self) -> f32 {
152        if self.traces.is_empty() {
153            return 0.0;
154        }
155        let total: u32 = self.traces.iter().map(|t| t.max_width).sum();
156        total as f32 / self.traces.len() as f32
157    }
158
159    /// Get traces with cycles.
160    pub fn traces_with_cycles(&self) -> Vec<&GpuPartialOrderTrace> {
161        self.traces.iter().filter(|t| t.has_cycles()).collect()
162    }
163
164    /// Get traces that are total orders.
165    pub fn total_order_traces(&self) -> Vec<&GpuPartialOrderTrace> {
166        self.traces.iter().filter(|t| t.is_total_order()).collect()
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173    use crate::models::HybridTimestamp;
174
175    fn create_test_events() -> Vec<GpuObjectEvent> {
176        // Case with sequential activities: A(0-100) -> B(100-200) -> C(200-300)
177        vec![
178            GpuObjectEvent {
179                event_id: 1,
180                object_id: 100,
181                activity_id: 1,
182                timestamp: HybridTimestamp::new(0, 0),
183                duration_ms: 100,
184                ..Default::default()
185            },
186            GpuObjectEvent {
187                event_id: 2,
188                object_id: 100,
189                activity_id: 2,
190                timestamp: HybridTimestamp::new(100, 0),
191                duration_ms: 100,
192                ..Default::default()
193            },
194            GpuObjectEvent {
195                event_id: 3,
196                object_id: 100,
197                activity_id: 3,
198                timestamp: HybridTimestamp::new(200, 0),
199                duration_ms: 100,
200                ..Default::default()
201            },
202        ]
203    }
204
205    #[test]
206    fn test_partial_order_derivation() {
207        let mut kernel = PartialOrderKernel::new().with_cpu_only();
208        let events = create_test_events();
209        let result = kernel.derive(&events);
210
211        assert_eq!(result.trace_count(), 1);
212
213        let trace = &result.traces[0];
214        assert_eq!(trace.activity_count, 3);
215
216        // A precedes B, B precedes C
217        assert!(trace.precedes(0, 1));
218        assert!(trace.precedes(1, 2));
219
220        // After transitive closure, A precedes C
221        assert!(trace.precedes(0, 2));
222
223        // Should be a total order
224        assert!(trace.is_total_order());
225    }
226
227    #[test]
228    fn test_concurrent_activities() {
229        // Case with concurrent activities: A(0-100), B(50-150) (overlap = concurrent)
230        let events = vec![
231            GpuObjectEvent {
232                event_id: 1,
233                object_id: 100,
234                activity_id: 1,
235                timestamp: HybridTimestamp::new(0, 0),
236                duration_ms: 100,
237                ..Default::default()
238            },
239            GpuObjectEvent {
240                event_id: 2,
241                object_id: 100,
242                activity_id: 2,
243                timestamp: HybridTimestamp::new(50, 0),
244                duration_ms: 100,
245                ..Default::default()
246            },
247        ];
248
249        let mut kernel = PartialOrderKernel::new().with_cpu_only();
250        let result = kernel.derive(&events);
251
252        let trace = &result.traces[0];
253        // Neither precedes the other (concurrent)
254        assert!(trace.is_concurrent(0, 1));
255    }
256}