1#![allow(missing_docs)]
6
7#[cfg(feature = "cuda")]
8use ringkernel_cuda_codegen::{
9 transpile_global_kernel, transpile_ring_kernel, transpile_stencil_kernel, RingKernelConfig,
10 StencilConfig,
11};
12
13#[cfg(feature = "cuda")]
14use syn::parse_quote;
15
16#[derive(Debug, Clone)]
18pub struct KernelSource {
19 pub name: String,
21 pub source: String,
23 pub entry_point: String,
25 pub kernel_type: KernelType,
27}
28
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31pub enum KernelType {
32 Global,
34 Stencil,
36 Ring,
38}
39
40impl KernelSource {
41 pub fn new(
43 name: impl Into<String>,
44 source: impl Into<String>,
45 kernel_type: KernelType,
46 ) -> Self {
47 let name = name.into();
48 Self {
49 entry_point: name.clone(),
50 name,
51 source: source.into(),
52 kernel_type,
53 }
54 }
55
56 pub fn with_entry_point(mut self, entry: impl Into<String>) -> Self {
58 self.entry_point = entry.into();
59 self
60 }
61}
62
63#[cfg(feature = "cuda")]
65pub fn generate_all_kernels() -> Result<Vec<KernelSource>, String> {
66 Ok(vec![
67 generate_dfg_batch_kernel()?,
68 generate_pattern_batch_kernel()?,
69 generate_partial_order_stencil_kernel()?,
70 generate_dfg_ring_kernel()?,
71 ])
72}
73
74#[cfg(feature = "cuda")]
78pub fn generate_dfg_batch_kernel() -> Result<KernelSource, String> {
79 let kernel_fn: syn::ItemFn = parse_quote! {
80 fn dfg_construction(
81 source_activities: &[u32],
82 target_activities: &[u32],
83 durations: &[u32],
84 edge_frequencies: &mut [u32],
85 edge_durations: &mut [u64],
86 max_activities: i32,
87 n: i32
88 ) {
89 let idx = block_idx_x() * block_dim_x() + thread_idx_x();
90 if idx >= n { return; }
91
92 let source = source_activities[idx as usize];
93 let target = target_activities[idx as usize];
94 let duration = durations[idx as usize];
95
96 let edge_idx = (source as i32 * max_activities + target as i32) as usize;
98
99 atomic_add(&mut edge_frequencies[edge_idx], 1u32);
101
102 atomic_add(&mut edge_durations[edge_idx], duration as u64);
104 }
105 };
106
107 let cuda_source = transpile_global_kernel(&kernel_fn)
108 .map_err(|e| format!("Failed to transpile DFG batch kernel: {}", e))?;
109
110 Ok(
111 KernelSource::new("dfg_construction", cuda_source, KernelType::Global)
112 .with_entry_point("dfg_construction"),
113 )
114}
115
116#[cfg(feature = "cuda")]
120pub fn generate_pattern_batch_kernel() -> Result<KernelSource, String> {
121 let kernel_fn: syn::ItemFn = parse_quote! {
122 fn pattern_detection(
123 event_counts: &[u32],
124 avg_durations: &[f32],
125 incoming_counts: &[u16],
126 outgoing_counts: &[u16],
127 pattern_types: &mut [u8],
128 pattern_confidences: &mut [f32],
129 bottleneck_threshold: f32,
130 duration_threshold: f32,
131 n: i32
132 ) {
133 let idx = block_idx_x() * block_dim_x() + thread_idx_x();
134 if idx >= n { return; }
135
136 let event_count = event_counts[idx as usize];
137 let avg_duration = avg_durations[idx as usize];
138 let incoming = incoming_counts[idx as usize] as f32;
139 let outgoing = outgoing_counts[idx as usize] as f32;
140
141 pattern_types[idx as usize] = 0u8;
143 pattern_confidences[idx as usize] = 0.0f32;
144
145 if event_count == 0 { return; }
146
147 if incoming > bottleneck_threshold && outgoing < incoming * 0.5f32 {
149 pattern_types[idx as usize] = 7u8; pattern_confidences[idx as usize] = incoming / bottleneck_threshold;
151 return;
152 }
153
154 if avg_duration > duration_threshold {
156 pattern_types[idx as usize] = 6u8; pattern_confidences[idx as usize] = avg_duration / duration_threshold;
158 }
159 }
160 };
161
162 let cuda_source = transpile_global_kernel(&kernel_fn)
163 .map_err(|e| format!("Failed to transpile pattern batch kernel: {}", e))?;
164
165 Ok(
166 KernelSource::new("pattern_detection", cuda_source, KernelType::Global)
167 .with_entry_point("pattern_detection"),
168 )
169}
170
171#[cfg(feature = "cuda")]
175pub fn generate_partial_order_stencil_kernel() -> Result<KernelSource, String> {
176 let stencil_fn: syn::ItemFn = parse_quote! {
177 fn partial_order_derive(
178 start_times: &[u64],
179 end_times: &[u64],
180 precedence: &mut [u32],
181 pos: GridPos
182 ) {
183 let i_end = end_times[pos.y() as usize];
185 let j_start = pos.east(start_times);
186
187 if i_end <= j_start {
189 precedence[pos.idx()] = 1u32;
190 } else {
191 precedence[pos.idx()] = 0u32;
192 }
193 }
194 };
195
196 let config = StencilConfig::new("partial_order")
197 .with_tile_size(16, 16)
198 .with_halo(0); let cuda_source = transpile_stencil_kernel(&stencil_fn, &config)
201 .map_err(|e| format!("Failed to transpile partial order stencil: {}", e))?;
202
203 Ok(
204 KernelSource::new("partial_order_derive", cuda_source, KernelType::Stencil)
205 .with_entry_point("partial_order_derive"),
206 )
207}
208
209#[cfg(feature = "cuda")]
213pub fn generate_dfg_ring_kernel() -> Result<KernelSource, String> {
214 let handler_fn: syn::ItemFn = parse_quote! {
215 fn process_event(ctx: &RingContext, event: &GpuObjectEvent) -> EdgeUpdate {
216 let tid = ctx.global_thread_id();
217
218 let ts = ctx.tick();
220
221 ctx.sync_threads();
222
223 EdgeUpdate {
225 source_activity: event.prev_activity,
226 target_activity: event.activity_id,
227 duration_ms: event.duration_ms,
228 timestamp: ts.physical,
229 thread_id: tid as u32,
230 }
231 }
232 };
233
234 let config = RingKernelConfig::new("dfg_processor")
235 .with_block_size(256)
236 .with_queue_capacity(4096)
237 .with_hlc(true)
238 .with_k2k(false); let cuda_source = transpile_ring_kernel(&handler_fn, &config)
241 .map_err(|e| format!("Failed to transpile DFG ring kernel: {}", e))?;
242
243 Ok(
244 KernelSource::new("ring_kernel_dfg", cuda_source, KernelType::Ring)
245 .with_entry_point("ring_kernel_dfg"),
246 )
247}
248
249pub fn cuda_type_definitions() -> &'static str {
251 r#"
252// Process Intelligence GPU Types
253// Auto-generated - matches Rust struct layouts
254
255struct __align__(128) GpuObjectEvent {
256 unsigned long long event_id;
257 unsigned long long object_id;
258 unsigned int activity_id;
259 unsigned char event_type;
260 unsigned char _padding1[3];
261 unsigned long long physical_ms;
262 unsigned int logical;
263 unsigned int node_id;
264 unsigned int resource_id;
265 unsigned int duration_ms;
266 unsigned int flags;
267 unsigned int attributes[4];
268 unsigned int object_type_id;
269 unsigned int prev_activity;
270 unsigned long long related_object_id;
271 unsigned char _reserved[32];
272};
273
274struct __align__(64) GpuDFGNode {
275 unsigned int activity_id;
276 unsigned int event_count;
277 unsigned long long total_duration_ms;
278 unsigned int min_duration_ms;
279 unsigned int max_duration_ms;
280 float avg_duration_ms;
281 float std_duration_ms;
282 unsigned long long first_seen_ms;
283 unsigned long long last_seen_ms;
284 unsigned char is_start;
285 unsigned char is_end;
286 unsigned char flags;
287 unsigned char _padding;
288 unsigned short incoming_count;
289 unsigned short outgoing_count;
290};
291
292struct __align__(64) GpuDFGEdge {
293 unsigned int source_activity;
294 unsigned int target_activity;
295 unsigned int frequency;
296 unsigned int min_duration_ms;
297 float avg_duration_ms;
298 unsigned int max_duration_ms;
299 float probability;
300 unsigned char flags;
301 unsigned char _padding[3];
302 float total_cost;
303 unsigned long long first_seen_ms;
304 unsigned long long last_seen_ms;
305 unsigned char _reserved[16];
306};
307
308struct __align__(64) GpuPatternMatch {
309 unsigned char pattern_type;
310 unsigned char severity;
311 unsigned char activity_count;
312 unsigned char flags;
313 unsigned int activity_ids[8];
314 float confidence;
315 unsigned int frequency;
316 float avg_duration_ms;
317 float impact;
318 unsigned char _reserved[4];
319};
320
321struct EdgeUpdate {
322 unsigned int source_activity;
323 unsigned int target_activity;
324 unsigned int duration_ms;
325 unsigned long long timestamp;
326 unsigned int thread_id;
327};
328
329// Pattern type constants
330#define PATTERN_SEQUENCE 0
331#define PATTERN_CHOICE 1
332#define PATTERN_LOOP 2
333#define PATTERN_PARALLEL 3
334#define PATTERN_SKIP 4
335#define PATTERN_REWORK 5
336#define PATTERN_LONG_RUNNING 6
337#define PATTERN_BOTTLENECK 7
338#define PATTERN_ANOMALY 8
339
340// Severity constants
341#define SEVERITY_INFO 0
342#define SEVERITY_WARNING 1
343#define SEVERITY_CRITICAL 2
344"#
345}
346
347#[cfg(not(feature = "cuda"))]
349pub fn generate_all_kernels() -> Result<Vec<KernelSource>, String> {
350 Err("CUDA feature not enabled. Build with --features cuda".to_string())
351}
352
353#[cfg(not(feature = "cuda"))]
354pub fn generate_dfg_batch_kernel() -> Result<KernelSource, String> {
355 Err("CUDA feature not enabled".to_string())
356}
357
358#[cfg(not(feature = "cuda"))]
359pub fn generate_pattern_batch_kernel() -> Result<KernelSource, String> {
360 Err("CUDA feature not enabled".to_string())
361}
362
363#[cfg(not(feature = "cuda"))]
364pub fn generate_partial_order_stencil_kernel() -> Result<KernelSource, String> {
365 Err("CUDA feature not enabled".to_string())
366}
367
368#[cfg(not(feature = "cuda"))]
369pub fn generate_dfg_ring_kernel() -> Result<KernelSource, String> {
370 Err("CUDA feature not enabled".to_string())
371}
372
373pub fn generate_dfg_kernel() -> KernelSource {
375 KernelSource::new("dfg_construction", STATIC_DFG_KERNEL, KernelType::Global)
376 .with_entry_point("dfg_construction_kernel")
377}
378
379pub fn generate_pattern_kernel() -> KernelSource {
381 KernelSource::new(
382 "pattern_detection",
383 STATIC_PATTERN_KERNEL,
384 KernelType::Global,
385 )
386 .with_entry_point("pattern_detection_kernel")
387}
388
389pub fn generate_partial_order_kernel() -> KernelSource {
391 KernelSource::new(
392 "partial_order",
393 STATIC_PARTIAL_ORDER_KERNEL,
394 KernelType::Stencil,
395 )
396 .with_entry_point("partial_order_kernel")
397}
398
399pub fn generate_conformance_kernel() -> KernelSource {
401 KernelSource::new("conformance", STATIC_CONFORMANCE_KERNEL, KernelType::Global)
402 .with_entry_point("conformance_kernel")
403}
404
405const STATIC_DFG_KERNEL: &str = r#"
408extern "C" __global__ void dfg_construction_kernel(
409 const unsigned int* source_activities,
410 const unsigned int* target_activities,
411 const unsigned int* durations,
412 unsigned int* edge_frequencies,
413 unsigned long long* edge_durations,
414 int max_activities,
415 int n
416) {
417 int idx = blockIdx.x * blockDim.x + threadIdx.x;
418 if (idx >= n) return;
419
420 unsigned int source = source_activities[idx];
421 unsigned int target = target_activities[idx];
422 unsigned int duration = durations[idx];
423
424 int edge_idx = source * max_activities + target;
425 atomicAdd(&edge_frequencies[edge_idx], 1);
426 atomicAdd(&edge_durations[edge_idx], (unsigned long long)duration);
427}
428"#;
429
430const STATIC_PATTERN_KERNEL: &str = r#"
431extern "C" __global__ void pattern_detection_kernel(
432 const unsigned int* event_counts,
433 const float* avg_durations,
434 const unsigned short* incoming_counts,
435 const unsigned short* outgoing_counts,
436 unsigned char* pattern_types,
437 float* pattern_confidences,
438 float bottleneck_threshold,
439 float duration_threshold,
440 int n
441) {
442 int idx = blockIdx.x * blockDim.x + threadIdx.x;
443 if (idx >= n) return;
444
445 unsigned int event_count = event_counts[idx];
446 float avg_duration = avg_durations[idx];
447 float incoming = (float)incoming_counts[idx];
448 float outgoing = (float)outgoing_counts[idx];
449
450 pattern_types[idx] = 0;
451 pattern_confidences[idx] = 0.0f;
452
453 if (event_count == 0) return;
454
455 // Bottleneck detection
456 if (incoming > bottleneck_threshold && outgoing < incoming * 0.5f) {
457 pattern_types[idx] = 7; // Bottleneck
458 pattern_confidences[idx] = incoming / bottleneck_threshold;
459 return;
460 }
461
462 // Long-running detection
463 if (avg_duration > duration_threshold) {
464 pattern_types[idx] = 6; // LongRunning
465 pattern_confidences[idx] = avg_duration / duration_threshold;
466 }
467}
468"#;
469
470const STATIC_PARTIAL_ORDER_KERNEL: &str = r#"
471extern "C" __global__ void partial_order_kernel(
472 const unsigned long long* start_times,
473 const unsigned long long* end_times,
474 unsigned int* precedence,
475 int width,
476 int height
477) {
478 int x = blockIdx.x * blockDim.x + threadIdx.x;
479 int y = blockIdx.y * blockDim.y + threadIdx.y;
480
481 if (x >= width || y >= height) return;
482
483 int idx = y * width + x;
484 int x_next = min(x + 1, width - 1);
485
486 unsigned long long i_end = end_times[y];
487 unsigned long long j_start = start_times[x_next];
488
489 precedence[idx] = (i_end <= j_start) ? 1 : 0;
490}
491"#;
492
493const STATIC_CONFORMANCE_KERNEL: &str = r#"
494extern "C" __global__ void conformance_kernel(
495 const unsigned int* trace_activities,
496 const int* trace_starts,
497 const int* trace_lengths,
498 const unsigned int* model_sources,
499 const unsigned int* model_targets,
500 int num_transitions,
501 float* fitness_scores,
502 int num_traces
503) {
504 int trace_idx = blockIdx.x * blockDim.x + threadIdx.x;
505 if (trace_idx >= num_traces) return;
506
507 int start = trace_starts[trace_idx];
508 int len = trace_lengths[trace_idx];
509
510 if (len <= 1) {
511 fitness_scores[trace_idx] = 1.0f;
512 return;
513 }
514
515 int valid_moves = 0;
516 for (int i = 0; i < len - 1; i++) {
517 unsigned int source = trace_activities[start + i];
518 unsigned int target = trace_activities[start + i + 1];
519
520 for (int t = 0; t < num_transitions; t++) {
521 if (model_sources[t] == source && model_targets[t] == target) {
522 valid_moves++;
523 break;
524 }
525 }
526 }
527
528 fitness_scores[trace_idx] = (float)valid_moves / (float)(len - 1);
529}
530"#;
531
532#[cfg(test)]
533mod tests {
534 use super::*;
535
536 #[test]
537 fn test_dfg_kernel_generation() {
538 let source = generate_dfg_kernel();
539 assert!(source.source.contains("dfg_construction_kernel"));
540 assert!(source.source.contains("atomicAdd"));
541 }
542
543 #[test]
544 fn test_pattern_kernel_generation() {
545 let source = generate_pattern_kernel();
546 assert!(source.source.contains("pattern_detection_kernel"));
547 }
548
549 #[test]
550 fn test_partial_order_kernel_generation() {
551 let source = generate_partial_order_kernel();
552 assert!(source.source.contains("partial_order_kernel"));
553 assert!(source.source.contains("precedence"));
554 }
555
556 #[test]
557 fn test_conformance_kernel_generation() {
558 let source = generate_conformance_kernel();
559 assert!(source.source.contains("conformance_kernel"));
560 assert!(source.source.contains("fitness"));
561 }
562
563 #[test]
564 fn test_cuda_type_definitions() {
565 let defs = cuda_type_definitions();
566 assert!(defs.contains("GpuObjectEvent"));
567 assert!(defs.contains("GpuDFGNode"));
568 assert!(defs.contains("GpuPatternMatch"));
569 assert!(defs.contains("__align__(64)"));
570 }
571
572 #[cfg(feature = "cuda")]
573 #[test]
574 fn test_transpiled_dfg_kernel() {
575 let result = generate_dfg_batch_kernel();
576 match result {
577 Ok(kernel) => {
578 println!(
579 "Generated DFG batch kernel ({} bytes):",
580 kernel.source.len()
581 );
582 println!("{}", kernel.source);
583 assert!(kernel.source.contains("__global__"));
584 }
585 Err(e) => {
586 println!("DFG kernel generation pending: {}", e);
587 }
588 }
589 }
590
591 #[cfg(feature = "cuda")]
592 #[test]
593 fn test_transpiled_pattern_kernel() {
594 let result = generate_pattern_batch_kernel();
595 match result {
596 Ok(kernel) => {
597 println!(
598 "Generated pattern batch kernel ({} bytes):",
599 kernel.source.len()
600 );
601 println!("{}", kernel.source);
602 assert!(kernel.source.contains("__global__"));
603 }
604 Err(e) => {
605 println!("Pattern kernel generation pending: {}", e);
606 }
607 }
608 }
609}