1use std::fmt;
61
62use oxicuda_ptx::PtxType;
63use oxicuda_ptx::arch::SmVersion;
64use oxicuda_ptx::error::PtxGenError;
65
66use crate::error::LaunchError;
67use crate::grid::Dim3;
68
69const CUDA_MAX_NESTING_DEPTH: u32 = 24;
75
76const DEFAULT_MAX_PENDING_LAUNCHES: u32 = 2048;
78
79const BASE_LAUNCH_OVERHEAD_BYTES: u64 = 2048;
83
84const PER_DEPTH_OVERHEAD_BYTES: u64 = 4096;
87
88#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct DynamicParallelismConfig {
105 pub max_nesting_depth: u32,
107 pub max_pending_launches: u32,
109 pub sync_depth: u32,
114 pub child_grid: Dim3,
116 pub child_block: Dim3,
118 pub child_shared_mem: u32,
120 pub sm_version: SmVersion,
122}
123
124impl DynamicParallelismConfig {
125 #[must_use]
135 pub fn new(sm_version: SmVersion) -> Self {
136 Self {
137 max_nesting_depth: 4,
138 max_pending_launches: DEFAULT_MAX_PENDING_LAUNCHES,
139 sync_depth: 2,
140 child_grid: Dim3::x(128),
141 child_block: Dim3::x(256),
142 child_shared_mem: 0,
143 sm_version,
144 }
145 }
146}
147
148impl fmt::Display for DynamicParallelismConfig {
149 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
150 write!(
151 f,
152 "DynParallelism(depth={}, pending={}, sync@{}, grid={}, block={}, smem={}, {})",
153 self.max_nesting_depth,
154 self.max_pending_launches,
155 self.sync_depth,
156 self.child_grid,
157 self.child_block,
158 self.child_shared_mem,
159 self.sm_version,
160 )
161 }
162}
163
164#[derive(Debug, Clone)]
173pub struct DynamicLaunchPlan {
174 pub config: DynamicParallelismConfig,
176 pub parent_kernel_name: String,
178 pub child_kernel_name: String,
180 pub estimated_child_launches: u64,
182 pub memory_overhead_bytes: u64,
184}
185
186impl fmt::Display for DynamicLaunchPlan {
187 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
188 write!(
189 f,
190 "DynamicLaunchPlan {{ parent: '{}', child: '{}', \
191 est_launches: {}, overhead: {} bytes, config: {} }}",
192 self.parent_kernel_name,
193 self.child_kernel_name,
194 self.estimated_child_launches,
195 self.memory_overhead_bytes,
196 self.config,
197 )
198 }
199}
200
201#[derive(Debug, Clone)]
210pub struct ChildKernelSpec {
211 pub name: String,
213 pub param_types: Vec<PtxType>,
215 pub grid_dim: GridSpec,
217 pub block_dim: Dim3,
219 pub shared_mem_bytes: u32,
221}
222
223#[derive(Debug, Clone, PartialEq, Eq)]
232pub enum GridSpec {
233 Fixed(Dim3),
235 DataDependent {
241 param_index: u32,
243 },
244 ThreadDependent,
249}
250
251impl fmt::Display for GridSpec {
252 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
253 match self {
254 Self::Fixed(dim) => write!(f, "Fixed({dim})"),
255 Self::DataDependent { param_index } => {
256 write!(f, "DataDependent(param[{param_index}])")
257 }
258 Self::ThreadDependent => write!(f, "ThreadDependent"),
259 }
260 }
261}
262
263pub fn validate_dynamic_config(config: &DynamicParallelismConfig) -> Result<(), LaunchError> {
281 if config.max_nesting_depth == 0 || config.max_nesting_depth > CUDA_MAX_NESTING_DEPTH {
283 return Err(LaunchError::InvalidDimension {
284 dim: "max_nesting_depth",
285 value: config.max_nesting_depth,
286 });
287 }
288
289 if config.max_pending_launches == 0 {
291 return Err(LaunchError::InvalidDimension {
292 dim: "max_pending_launches",
293 value: 0,
294 });
295 }
296
297 if config.sync_depth > config.max_nesting_depth {
299 return Err(LaunchError::InvalidDimension {
300 dim: "sync_depth",
301 value: config.sync_depth,
302 });
303 }
304
305 if config.child_grid.x == 0 {
307 return Err(LaunchError::InvalidDimension {
308 dim: "child_grid.x",
309 value: 0,
310 });
311 }
312 if config.child_grid.y == 0 {
313 return Err(LaunchError::InvalidDimension {
314 dim: "child_grid.y",
315 value: 0,
316 });
317 }
318 if config.child_grid.z == 0 {
319 return Err(LaunchError::InvalidDimension {
320 dim: "child_grid.z",
321 value: 0,
322 });
323 }
324
325 if config.child_block.x == 0 {
327 return Err(LaunchError::InvalidDimension {
328 dim: "child_block.x",
329 value: 0,
330 });
331 }
332 if config.child_block.y == 0 {
333 return Err(LaunchError::InvalidDimension {
334 dim: "child_block.y",
335 value: 0,
336 });
337 }
338 if config.child_block.z == 0 {
339 return Err(LaunchError::InvalidDimension {
340 dim: "child_block.z",
341 value: 0,
342 });
343 }
344
345 let max_threads = config.sm_version.max_threads_per_block();
347 let block_total = config.child_block.total();
348 if block_total > max_threads {
349 return Err(LaunchError::BlockSizeExceedsLimit {
350 requested: block_total,
351 max: max_threads,
352 });
353 }
354
355 let max_smem = config.sm_version.max_shared_mem_per_block();
357 if config.child_shared_mem > max_smem {
358 return Err(LaunchError::SharedMemoryExceedsLimit {
359 requested: config.child_shared_mem,
360 max: max_smem,
361 });
362 }
363
364 Ok(())
365}
366
367pub fn plan_dynamic_launch(
381 config: &DynamicParallelismConfig,
382) -> Result<DynamicLaunchPlan, LaunchError> {
383 validate_dynamic_config(config)?;
384
385 let parent_grid_total = config.child_grid.total() as u64;
386 let estimated_child_launches =
387 parent_grid_total.saturating_mul(config.child_block.total() as u64);
388 let memory_overhead_bytes =
389 estimate_launch_overhead(config.max_nesting_depth, config.max_pending_launches);
390
391 Ok(DynamicLaunchPlan {
392 config: config.clone(),
393 parent_kernel_name: String::from("parent_kernel"),
394 child_kernel_name: String::from("child_kernel"),
395 estimated_child_launches,
396 memory_overhead_bytes,
397 })
398}
399
400pub fn estimate_launch_overhead(depth: u32, pending: u32) -> u64 {
419 let per_launch = BASE_LAUNCH_OVERHEAD_BYTES.saturating_mul(pending as u64);
420 let per_depth = PER_DEPTH_OVERHEAD_BYTES.saturating_mul(depth as u64);
421 per_launch.saturating_add(per_depth)
422}
423
424pub fn max_nesting_for_sm(sm: SmVersion) -> u32 {
433 match sm {
437 SmVersion::Sm75 => CUDA_MAX_NESTING_DEPTH,
438 SmVersion::Sm80 | SmVersion::Sm86 => CUDA_MAX_NESTING_DEPTH,
439 SmVersion::Sm89 => CUDA_MAX_NESTING_DEPTH,
440 SmVersion::Sm90 | SmVersion::Sm90a => CUDA_MAX_NESTING_DEPTH,
441 SmVersion::Sm100 => CUDA_MAX_NESTING_DEPTH,
442 SmVersion::Sm120 => CUDA_MAX_NESTING_DEPTH,
443 }
444}
445
446pub fn generate_child_launch_ptx(
472 parent_name: &str,
473 child: &ChildKernelSpec,
474 sm: SmVersion,
475) -> Result<String, PtxGenError> {
476 if child.name.is_empty() {
478 return Err(PtxGenError::GenerationFailed(
479 "child kernel name must not be empty".to_string(),
480 ));
481 }
482 if child.block_dim.x == 0 || child.block_dim.y == 0 || child.block_dim.z == 0 {
483 return Err(PtxGenError::GenerationFailed(
484 "child block dimensions must be non-zero".to_string(),
485 ));
486 }
487
488 let (isa_major, isa_minor) = sm.ptx_isa_version();
489 let target = sm.as_ptx_str();
490
491 let mut ptx = String::with_capacity(2048);
492
493 ptx.push_str(&format!(
495 "// Dynamic parallelism: {parent_name} -> {child_name}\n",
496 child_name = child.name,
497 ));
498 ptx.push_str(&format!(
499 ".version {isa_major}.{isa_minor}\n\
500 .target {target}\n\
501 .address_size 64\n\n"
502 ));
503
504 ptx.push_str(&format!(
506 "// Child kernel declaration\n\
507 .extern .entry {child_name}(\n",
508 child_name = child.name,
509 ));
510 for (i, ptype) in child.param_types.iter().enumerate() {
511 let comma = if i + 1 < child.param_types.len() {
512 ","
513 } else {
514 ""
515 };
516 ptx.push_str(&format!(
517 " .param {ty} _param_{i}{comma}\n",
518 ty = ptype.as_ptx_str(),
519 ));
520 }
521 ptx.push_str(")\n\n");
522
523 let func_name = format!(
525 "__{parent_name}_launch_{child_name}",
526 child_name = child.name
527 );
528 ptx.push_str("// Device-side launch helper\n");
529 ptx.push_str(&format!(".func (.param .s32 _retval) {func_name}(\n"));
530
531 for (i, ptype) in child.param_types.iter().enumerate() {
533 let comma = if i + 1 < child.param_types.len() {
534 ","
535 } else {
536 ""
537 };
538 ptx.push_str(&format!(
539 " .param {ty} arg_{i}{comma}\n",
540 ty = ptype.as_ptx_str(),
541 ));
542 }
543 ptx.push_str(")\n{\n");
544
545 ptx.push_str(" // Register declarations\n");
547 ptx.push_str(" .reg .s32 %retval;\n");
548 ptx.push_str(" .reg .u32 %grid_x, %grid_y, %grid_z;\n");
549 ptx.push_str(" .reg .u32 %block_x, %block_y, %block_z;\n");
550 ptx.push_str(" .reg .u32 %shared_mem;\n");
551 ptx.push_str(" .reg .u64 %stream;\n");
552
553 if let GridSpec::DataDependent { .. } = &child.grid_dim {
555 ptx.push_str(" .reg .u32 %n_elements, %block_size;\n");
556 }
557 if matches!(&child.grid_dim, GridSpec::ThreadDependent) {
558 ptx.push_str(" .reg .u32 %tid_x, %ntid_x, %ctaid_x;\n");
559 }
560
561 ptx.push('\n');
562
563 match &child.grid_dim {
565 GridSpec::Fixed(dim) => {
566 ptx.push_str(&format!(
567 " // Fixed grid dimensions\n\
568 mov.u32 %grid_x, {gx};\n\
569 mov.u32 %grid_y, {gy};\n\
570 mov.u32 %grid_z, {gz};\n",
571 gx = dim.x,
572 gy = dim.y,
573 gz = dim.z,
574 ));
575 }
576 GridSpec::DataDependent { param_index } => {
577 ptx.push_str(&format!(
578 " // Data-dependent grid: ceil(param[{param_index}] / block.x)\n\
579 ld.param.u32 %n_elements, [arg_{param_index}];\n\
580 mov.u32 %block_size, {bx};\n\
581 add.u32 %grid_x, %n_elements, %block_size;\n\
582 sub.u32 %grid_x, %grid_x, 1;\n\
583 div.u32 %grid_x, %grid_x, %block_size;\n\
584 mov.u32 %grid_y, 1;\n\
585 mov.u32 %grid_z, 1;\n",
586 bx = child.block_dim.x,
587 ));
588 }
589 GridSpec::ThreadDependent => {
590 ptx.push_str(
591 " // Thread-dependent: one child launch per parent thread\n\
592 mov.u32 %tid_x, %tid.x;\n\
593 mov.u32 %ntid_x, %ntid.x;\n\
594 mov.u32 %ctaid_x, %ctaid.x;\n\
595 // Each thread launches a 1-block child grid\n\
596 mov.u32 %grid_x, 1;\n\
597 mov.u32 %grid_y, 1;\n\
598 mov.u32 %grid_z, 1;\n",
599 );
600 }
601 }
602
603 ptx.push_str(&format!(
605 "\n // Block dimensions\n\
606 mov.u32 %block_x, {bx};\n\
607 mov.u32 %block_y, {by};\n\
608 mov.u32 %block_z, {bz};\n",
609 bx = child.block_dim.x,
610 by = child.block_dim.y,
611 bz = child.block_dim.z,
612 ));
613
614 ptx.push_str(&format!(
616 "\n // Shared memory and stream (NULL = default stream)\n\
617 mov.u32 %shared_mem, {smem};\n\
618 mov.u64 %stream, 0;\n",
619 smem = child.shared_mem_bytes,
620 ));
621
622 ptx.push_str(&format!(
626 "\n // Launch child kernel: {child_name}\n\
627 // cudaLaunchDevice(\n\
628 // &{child_name},\n\
629 // param_buffer,\n\
630 // dim3(grid_x, grid_y, grid_z),\n\
631 // dim3(block_x, block_y, block_z),\n\
632 // shared_mem, stream\n\
633 // )\n\
634 // Note: actual device-side launch uses cudaLaunchDeviceV2\n\
635 // which takes a pre-formatted parameter buffer.\n\
636 mov.s32 %retval, 0; // cudaSuccess\n",
637 child_name = child.name,
638 ));
639
640 ptx.push_str(
642 "\n st.param.s32 [_retval], %retval;\n\
643 ret;\n\
644 }\n",
645 );
646
647 Ok(ptx)
648}
649
650pub fn generate_device_sync_ptx(sm: SmVersion) -> Result<String, PtxGenError> {
664 let (isa_major, isa_minor) = sm.ptx_isa_version();
665 let target = sm.as_ptx_str();
666
667 let ptx = format!(
668 "// Device-side synchronization\n\
669 .version {isa_major}.{isa_minor}\n\
670 .target {target}\n\
671 .address_size 64\n\
672 \n\
673 // cudaDeviceSynchronize() from device code\n\
674 // Synchronizes all pending child kernel launches.\n\
675 .func (.param .s32 _retval) __device_synchronize()\n\
676 {{\n\
677 .reg .s32 %retval;\n\
678 \n\
679 // Device-side cudaDeviceSynchronize is a runtime call\n\
680 // that blocks until all child kernels complete.\n\
681 // In PTX, this maps to a system call:\n\
682 // call.uni cudaDeviceSynchronize;\n\
683 // For code generation, we emit the call pattern.\n\
684 mov.s32 %retval, 0; // cudaSuccess (placeholder)\n\
685 \n\
686 st.param.s32 [_retval], %retval;\n\
687 ret;\n\
688 }}\n"
689 );
690
691 Ok(ptx)
692}
693
694#[cfg(test)]
699mod tests {
700 use super::*;
701
702 fn default_config() -> DynamicParallelismConfig {
703 DynamicParallelismConfig::new(SmVersion::Sm80)
704 }
705
706 #[test]
709 fn validate_default_config_ok() {
710 let config = default_config();
711 assert!(validate_dynamic_config(&config).is_ok());
712 }
713
714 #[test]
715 fn validate_zero_nesting_depth_fails() {
716 let mut config = default_config();
717 config.max_nesting_depth = 0;
718 let err = validate_dynamic_config(&config);
719 assert!(err.is_err());
720 let err = err.err();
721 assert!(matches!(
722 err,
723 Some(LaunchError::InvalidDimension {
724 dim: "max_nesting_depth",
725 ..
726 })
727 ));
728 }
729
730 #[test]
731 fn validate_excessive_nesting_depth_fails() {
732 let mut config = default_config();
733 config.max_nesting_depth = 25;
734 let err = validate_dynamic_config(&config);
735 assert!(err.is_err());
736 }
737
738 #[test]
739 fn validate_max_nesting_depth_boundary() {
740 let mut config = default_config();
741 config.max_nesting_depth = CUDA_MAX_NESTING_DEPTH;
742 config.sync_depth = CUDA_MAX_NESTING_DEPTH;
743 assert!(validate_dynamic_config(&config).is_ok());
744 }
745
746 #[test]
747 fn validate_zero_pending_launches_fails() {
748 let mut config = default_config();
749 config.max_pending_launches = 0;
750 assert!(validate_dynamic_config(&config).is_err());
751 }
752
753 #[test]
754 fn validate_sync_depth_exceeds_nesting_fails() {
755 let mut config = default_config();
756 config.max_nesting_depth = 4;
757 config.sync_depth = 5;
758 assert!(validate_dynamic_config(&config).is_err());
759 }
760
761 #[test]
762 fn validate_zero_child_block_fails() {
763 let mut config = default_config();
764 config.child_block = Dim3::new(0, 256, 1);
765 assert!(validate_dynamic_config(&config).is_err());
766 }
767
768 #[test]
769 fn validate_zero_child_grid_fails() {
770 let mut config = default_config();
771 config.child_grid = Dim3::new(128, 0, 1);
772 assert!(validate_dynamic_config(&config).is_err());
773 }
774
775 #[test]
776 fn validate_block_size_exceeds_limit() {
777 let mut config = default_config();
778 config.child_block = Dim3::new(32, 32, 2);
780 let err = validate_dynamic_config(&config);
781 assert!(matches!(
782 err,
783 Err(LaunchError::BlockSizeExceedsLimit { .. })
784 ));
785 }
786
787 #[test]
788 fn validate_shared_mem_exceeds_limit() {
789 let mut config = default_config();
790 config.child_shared_mem = 500_000; let err = validate_dynamic_config(&config);
792 assert!(matches!(
793 err,
794 Err(LaunchError::SharedMemoryExceedsLimit { .. })
795 ));
796 }
797
798 #[test]
801 fn plan_dynamic_launch_ok() {
802 let config = default_config();
803 let plan = plan_dynamic_launch(&config);
804 assert!(plan.is_ok());
805 let plan = plan.ok();
806 assert!(plan.is_some());
807 if let Some(plan) = plan {
808 assert!(plan.estimated_child_launches > 0);
809 assert!(plan.memory_overhead_bytes > 0);
810 assert_eq!(plan.parent_kernel_name, "parent_kernel");
811 assert_eq!(plan.child_kernel_name, "child_kernel");
812 }
813 }
814
815 #[test]
816 fn plan_dynamic_launch_invalid_config_fails() {
817 let mut config = default_config();
818 config.max_nesting_depth = 0;
819 let plan = plan_dynamic_launch(&config);
820 assert!(plan.is_err());
821 }
822
823 #[test]
824 fn plan_display() {
825 let config = default_config();
826 let plan = plan_dynamic_launch(&config);
827 if let Ok(plan) = plan {
828 let display = format!("{plan}");
829 assert!(display.contains("parent_kernel"));
830 assert!(display.contains("child_kernel"));
831 assert!(display.contains("bytes"));
832 }
833 }
834
835 #[test]
838 fn estimate_overhead_basic() {
839 let overhead = estimate_launch_overhead(1, 1);
840 assert_eq!(
841 overhead,
842 BASE_LAUNCH_OVERHEAD_BYTES + PER_DEPTH_OVERHEAD_BYTES
843 );
844 }
845
846 #[test]
847 fn estimate_overhead_default() {
848 let overhead = estimate_launch_overhead(4, 2048);
849 let expected = BASE_LAUNCH_OVERHEAD_BYTES * 2048 + PER_DEPTH_OVERHEAD_BYTES * 4;
850 assert_eq!(overhead, expected);
851 }
852
853 #[test]
854 fn estimate_overhead_zero() {
855 let overhead = estimate_launch_overhead(0, 0);
856 assert_eq!(overhead, 0);
857 }
858
859 #[test]
862 fn max_nesting_all_sm_versions() {
863 assert_eq!(max_nesting_for_sm(SmVersion::Sm75), 24);
864 assert_eq!(max_nesting_for_sm(SmVersion::Sm80), 24);
865 assert_eq!(max_nesting_for_sm(SmVersion::Sm86), 24);
866 assert_eq!(max_nesting_for_sm(SmVersion::Sm89), 24);
867 assert_eq!(max_nesting_for_sm(SmVersion::Sm90), 24);
868 assert_eq!(max_nesting_for_sm(SmVersion::Sm90a), 24);
869 assert_eq!(max_nesting_for_sm(SmVersion::Sm100), 24);
870 assert_eq!(max_nesting_for_sm(SmVersion::Sm120), 24);
871 }
872
873 #[test]
876 fn generate_child_launch_ptx_basic() {
877 let child = ChildKernelSpec {
878 name: "child_add".to_string(),
879 param_types: vec![PtxType::U64, PtxType::U64, PtxType::U32],
880 grid_dim: GridSpec::Fixed(Dim3::x(64)),
881 block_dim: Dim3::x(256),
882 shared_mem_bytes: 0,
883 };
884 let result = generate_child_launch_ptx("parent_add", &child, SmVersion::Sm80);
885 assert!(result.is_ok());
886 let ptx = result.ok();
887 assert!(ptx.is_some());
888 if let Some(ptx) = ptx {
889 assert!(ptx.contains("child_add"));
890 assert!(ptx.contains("parent_add"));
891 assert!(ptx.contains(".version 7.0"));
892 assert!(ptx.contains("sm_80"));
893 assert!(ptx.contains("mov.u32 %grid_x, 64"));
894 assert!(ptx.contains(".u64"));
895 assert!(ptx.contains(".u32"));
896 }
897 }
898
899 #[test]
900 fn generate_child_launch_ptx_data_dependent() {
901 let child = ChildKernelSpec {
902 name: "child_scale".to_string(),
903 param_types: vec![PtxType::U64, PtxType::U32],
904 grid_dim: GridSpec::DataDependent { param_index: 1 },
905 block_dim: Dim3::x(128),
906 shared_mem_bytes: 1024,
907 };
908 let result = generate_child_launch_ptx("parent_scale", &child, SmVersion::Sm90);
909 assert!(result.is_ok());
910 if let Ok(ptx) = result {
911 assert!(ptx.contains("Data-dependent"));
912 assert!(ptx.contains("arg_1"));
913 assert!(ptx.contains("div.u32"));
914 }
915 }
916
917 #[test]
918 fn generate_child_launch_ptx_thread_dependent() {
919 let child = ChildKernelSpec {
920 name: "child_per_thread".to_string(),
921 param_types: vec![PtxType::U64],
922 grid_dim: GridSpec::ThreadDependent,
923 block_dim: Dim3::x(32),
924 shared_mem_bytes: 0,
925 };
926 let result = generate_child_launch_ptx("parent", &child, SmVersion::Sm80);
927 assert!(result.is_ok());
928 if let Ok(ptx) = result {
929 assert!(ptx.contains("Thread-dependent"));
930 assert!(ptx.contains("%tid.x"));
931 }
932 }
933
934 #[test]
935 fn generate_child_launch_ptx_empty_name_fails() {
936 let child = ChildKernelSpec {
937 name: String::new(),
938 param_types: vec![],
939 grid_dim: GridSpec::Fixed(Dim3::x(1)),
940 block_dim: Dim3::x(1),
941 shared_mem_bytes: 0,
942 };
943 let result = generate_child_launch_ptx("parent", &child, SmVersion::Sm80);
944 assert!(result.is_err());
945 }
946
947 #[test]
948 fn generate_child_launch_ptx_zero_block_fails() {
949 let child = ChildKernelSpec {
950 name: "child".to_string(),
951 param_types: vec![],
952 grid_dim: GridSpec::Fixed(Dim3::x(1)),
953 block_dim: Dim3::new(0, 1, 1),
954 shared_mem_bytes: 0,
955 };
956 let result = generate_child_launch_ptx("parent", &child, SmVersion::Sm80);
957 assert!(result.is_err());
958 }
959
960 #[test]
961 fn generate_device_sync_ptx_basic() {
962 let result = generate_device_sync_ptx(SmVersion::Sm80);
963 assert!(result.is_ok());
964 if let Ok(ptx) = result {
965 assert!(ptx.contains("__device_synchronize"));
966 assert!(ptx.contains(".version 7.0"));
967 assert!(ptx.contains("sm_80"));
968 assert!(ptx.contains("cudaDeviceSynchronize"));
969 }
970 }
971
972 #[test]
973 fn generate_device_sync_ptx_hopper() {
974 let result = generate_device_sync_ptx(SmVersion::Sm90);
975 assert!(result.is_ok());
976 if let Ok(ptx) = result {
977 assert!(ptx.contains(".version 8.0"));
978 assert!(ptx.contains("sm_90"));
979 }
980 }
981
982 #[test]
985 fn config_display() {
986 let config = default_config();
987 let display = format!("{config}");
988 assert!(display.contains("depth=4"));
989 assert!(display.contains("pending=2048"));
990 assert!(display.contains("sync@2"));
991 assert!(display.contains("sm_80"));
992 }
993
994 #[test]
995 fn grid_spec_display() {
996 assert_eq!(format!("{}", GridSpec::Fixed(Dim3::x(64))), "Fixed(64)");
997 assert_eq!(
998 format!("{}", GridSpec::DataDependent { param_index: 2 }),
999 "DataDependent(param[2])"
1000 );
1001 assert_eq!(format!("{}", GridSpec::ThreadDependent), "ThreadDependent");
1002 }
1003}