Skip to main content

oxicuda_driver/
link.rs

1//! Link-time optimisation for JIT-linking multiple PTX modules.
2//!
3//! This module wraps the CUDA linker API (`cuLinkCreate`, `cuLinkAddData`,
4//! `cuLinkAddFile`, `cuLinkComplete`, `cuLinkDestroy`) for combining
5//! multiple PTX, cubin, or fatbin inputs into a single linked binary.
6//!
7//! # Platform behaviour
8//!
9//! On macOS (where NVIDIA dropped CUDA support), all linker operations use
10//! a synthetic in-memory implementation.  PTX inputs are accumulated and
11//! concatenated into a synthetic cubin blob so that the full API surface
12//! can be exercised in tests without a GPU.
13//!
14//! # Example
15//!
16//! ```rust,no_run
17//! # use oxicuda_driver::link::{Linker, LinkerOptions};
18//! # fn main() -> Result<(), oxicuda_driver::error::CudaError> {
19//! let opts = LinkerOptions::default();
20//! let mut linker = Linker::new(opts)?;
21//!
22//! linker.add_ptx(r#"
23//!     .version 7.0
24//!     .target sm_70
25//!     .address_size 64
26//!     .visible .entry kernel_a() { ret; }
27//! "#, "module_a.ptx")?;
28//!
29//! linker.add_ptx(r#"
30//!     .version 7.0
31//!     .target sm_70
32//!     .address_size 64
33//!     .visible .entry kernel_b() { ret; }
34//! "#, "module_b.ptx")?;
35//!
36//! let linked = linker.complete()?;
37//! println!("cubin size: {} bytes", linked.cubin_size());
38//! # Ok(())
39//! # }
40//! ```
41
42use std::ffi::{CString, c_void};
43
44#[cfg(not(target_os = "macos"))]
45use crate::error::check;
46use crate::error::{CudaError, CudaResult};
47#[cfg(any(not(target_os = "macos"), test))]
48use crate::ffi::CUjit_option;
49use crate::ffi::CUjitInputType;
50#[cfg(not(target_os = "macos"))]
51use crate::ffi::CUlinkState;
52#[cfg(not(target_os = "macos"))]
53use crate::module::jit_failure;
54
55// ---------------------------------------------------------------------------
56// OptimizationLevel
57// ---------------------------------------------------------------------------
58
59/// JIT optimisation level for the linker.
60///
61/// Higher levels produce faster GPU code at the cost of longer link times.
62/// Maps directly to `CU_JIT_OPTIMIZATION_LEVEL` values 0--4.
63#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
64pub enum OptimizationLevel {
65    /// No optimisation.
66    O0 = 0,
67    /// Minimal optimisation.
68    O1 = 1,
69    /// Moderate optimisation.
70    O2 = 2,
71    /// High optimisation.
72    O3 = 3,
73    /// Maximum optimisation (default).
74    #[default]
75    O4 = 4,
76}
77
78impl OptimizationLevel {
79    /// Returns the raw integer value for the CUDA JIT option.
80    #[inline]
81    pub fn as_u32(self) -> u32 {
82        self as u32
83    }
84}
85
86// ---------------------------------------------------------------------------
87// FallbackStrategy
88// ---------------------------------------------------------------------------
89
90/// Strategy when an exact binary match is not found for the target GPU.
91///
92/// Maps to `CU_JIT_FALLBACK_STRATEGY` values.
93#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
94pub enum FallbackStrategy {
95    /// Prefer to compile from PTX if binary is not available (default).
96    #[default]
97    PreferPtx = 0,
98    /// Prefer a compatible binary over PTX recompilation.
99    PreferBinary = 1,
100}
101
102impl FallbackStrategy {
103    /// Returns the raw integer value for the CUDA JIT option.
104    #[inline]
105    pub fn as_u32(self) -> u32 {
106        self as u32
107    }
108}
109
110// ---------------------------------------------------------------------------
111// LinkInputType
112// ---------------------------------------------------------------------------
113
114/// The type of input data being added to the linker.
115///
116/// Each variant corresponds to a `CUjitInputType` constant.
117#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
118pub enum LinkInputType {
119    /// PTX source code.
120    Ptx,
121    /// Compiled device code (cubin).
122    Cubin,
123    /// Fat binary bundle.
124    Fatbin,
125    /// Relocatable device object.
126    Object,
127    /// Device code library.
128    Library,
129}
130
131impl LinkInputType {
132    /// Convert to the raw FFI enum value.
133    #[inline]
134    pub fn to_raw(self) -> CUjitInputType {
135        match self {
136            Self::Ptx => CUjitInputType::Ptx,
137            Self::Cubin => CUjitInputType::Cubin,
138            Self::Fatbin => CUjitInputType::Fatbin,
139            Self::Object => CUjitInputType::Object,
140            Self::Library => CUjitInputType::Library,
141        }
142    }
143}
144
145// ---------------------------------------------------------------------------
146// LinkerOptions
147// ---------------------------------------------------------------------------
148
149/// Options controlling the JIT linker's behaviour.
150///
151/// These are translated to `CUjit_option` key/value pairs when calling
152/// `cuLinkCreate`.
153#[derive(Debug, Clone)]
154pub struct LinkerOptions {
155    /// Maximum number of registers per thread (`None` = driver default).
156    ///
157    /// Limiting registers increases occupancy but may cause spilling.
158    pub max_registers: Option<u32>,
159
160    /// Optimisation level for the linker (default: [`OptimizationLevel::O4`]).
161    pub optimization_level: OptimizationLevel,
162
163    /// Target compute capability as a bare number (e.g. 70 for sm_70).
164    /// `None` means the linker derives the target from the current context.
165    pub target_sm: Option<u32>,
166
167    /// Whether to generate debug information in the linked binary.
168    pub generate_debug_info: bool,
169
170    /// Whether to generate line-number information.
171    pub generate_line_info: bool,
172
173    /// Whether to request verbose log output from the linker.
174    pub log_verbose: bool,
175
176    /// Fallback strategy when an exact binary match is unavailable.
177    pub fallback_strategy: FallbackStrategy,
178}
179
180impl Default for LinkerOptions {
181    fn default() -> Self {
182        Self {
183            max_registers: None,
184            optimization_level: OptimizationLevel::O4,
185            target_sm: None,
186            generate_debug_info: false,
187            generate_line_info: false,
188            log_verbose: false,
189            fallback_strategy: FallbackStrategy::PreferPtx,
190        }
191    }
192}
193
194/// Size of the JIT log buffers in bytes.
195#[cfg(any(not(target_os = "macos"), test))]
196const LINK_LOG_BUFFER_SIZE: usize = 8192;
197
198impl LinkerOptions {
199    /// Build parallel option-key and option-value arrays for `cuLinkCreate`.
200    ///
201    /// Returns `(keys, values, info_buf, error_buf)`.  The caller must
202    /// keep `info_buf` and `error_buf` alive until after the CUDA call
203    /// completes, because the pointers stored in `values` reference them.
204    #[cfg(any(not(target_os = "macos"), test))]
205    fn build_jit_options(&self) -> (Vec<CUjit_option>, Vec<*mut c_void>, Vec<u8>, Vec<u8>) {
206        let mut keys: Vec<CUjit_option> = Vec::with_capacity(12);
207        let mut vals: Vec<*mut c_void> = Vec::with_capacity(12);
208
209        let mut info_buf: Vec<u8> = vec![0u8; LINK_LOG_BUFFER_SIZE];
210        let mut error_buf: Vec<u8> = vec![0u8; LINK_LOG_BUFFER_SIZE];
211
212        // Info log buffer.
213        keys.push(CUjit_option::InfoLogBuffer);
214        vals.push(info_buf.as_mut_ptr().cast::<c_void>());
215
216        keys.push(CUjit_option::InfoLogBufferSizeBytes);
217        vals.push(LINK_LOG_BUFFER_SIZE as *mut c_void);
218
219        // Error log buffer.
220        keys.push(CUjit_option::ErrorLogBuffer);
221        vals.push(error_buf.as_mut_ptr().cast::<c_void>());
222
223        keys.push(CUjit_option::ErrorLogBufferSizeBytes);
224        vals.push(LINK_LOG_BUFFER_SIZE as *mut c_void);
225
226        // Optimisation level.
227        keys.push(CUjit_option::OptimizationLevel);
228        vals.push(self.optimization_level.as_u32() as *mut c_void);
229
230        // Max registers.
231        if let Some(max_regs) = self.max_registers {
232            keys.push(CUjit_option::MaxRegisters);
233            vals.push(max_regs as *mut c_void);
234        }
235
236        // Target SM.
237        if let Some(sm) = self.target_sm {
238            keys.push(CUjit_option::Target);
239            vals.push(sm as *mut c_void);
240        } else {
241            keys.push(CUjit_option::TargetFromCuContext);
242            vals.push(core::ptr::without_provenance_mut::<c_void>(1));
243        }
244
245        // Debug info.
246        if self.generate_debug_info {
247            keys.push(CUjit_option::GenerateDebugInfo);
248            vals.push(core::ptr::without_provenance_mut::<c_void>(1));
249        }
250
251        // Line info.
252        if self.generate_line_info {
253            keys.push(CUjit_option::GenerateLineInfo);
254            vals.push(core::ptr::without_provenance_mut::<c_void>(1));
255        }
256
257        // Verbose log.
258        if self.log_verbose {
259            keys.push(CUjit_option::LogVerbose);
260            vals.push(core::ptr::without_provenance_mut::<c_void>(1));
261        }
262
263        // Fallback strategy.
264        keys.push(CUjit_option::FallbackStrategy);
265        vals.push(self.fallback_strategy.as_u32() as *mut c_void);
266
267        (keys, vals, info_buf, error_buf)
268    }
269}
270
271// ---------------------------------------------------------------------------
272// LinkedModule
273// ---------------------------------------------------------------------------
274
275/// The output of a successful link operation.
276///
277/// Contains the compiled cubin binary blob and any log messages emitted
278/// by the JIT linker during compilation.
279#[derive(Debug, Clone)]
280pub struct LinkedModule {
281    /// The compiled cubin binary data.
282    cubin_data: Vec<u8>,
283    /// Informational messages from the linker.
284    info_log: String,
285    /// Error/warning messages from the linker.
286    error_log: String,
287}
288
289impl LinkedModule {
290    /// Returns the compiled cubin data as a byte slice.
291    #[inline]
292    pub fn cubin(&self) -> &[u8] {
293        &self.cubin_data
294    }
295
296    /// Returns the size of the compiled cubin in bytes.
297    #[inline]
298    pub fn cubin_size(&self) -> usize {
299        self.cubin_data.len()
300    }
301
302    /// Returns the informational log from the linker.
303    #[inline]
304    pub fn info_log(&self) -> &str {
305        &self.info_log
306    }
307
308    /// Returns the error log from the linker.
309    #[inline]
310    pub fn error_log(&self) -> &str {
311        &self.error_log
312    }
313
314    /// Consumes the linked module and returns the raw cubin data.
315    #[inline]
316    pub fn into_cubin(self) -> Vec<u8> {
317        self.cubin_data
318    }
319}
320
321// ---------------------------------------------------------------------------
322// Linker
323// ---------------------------------------------------------------------------
324
325/// RAII wrapper around the CUDA link state (`CUlinkState`).
326///
327/// The linker accumulates PTX, cubin, and fatbin inputs via the `add_*`
328/// methods and then produces a single linked binary via [`complete`].
329///
330/// On macOS, a synthetic implementation stores the inputs in memory and
331/// produces a synthetic cubin on completion.
332///
333/// # Drop behaviour
334///
335/// Dropping the linker calls `cuLinkDestroy` on platforms with a real
336/// CUDA driver.  If `complete()` was already called, Drop is still safe
337/// because the cubin data has been copied into the [`LinkedModule`].
338///
339/// [`complete`]: Linker::complete
340pub struct Linker {
341    /// Raw `CUlinkState` handle (null on macOS / synthetic mode).
342    state: *mut c_void,
343    /// Linker configuration.
344    options: LinkerOptions,
345    /// Number of inputs added so far.
346    input_count: usize,
347    /// Names of inputs added (for diagnostics).
348    input_names: Vec<String>,
349
350    // -- Driver-owned buffer keep-alive (non-macOS only) ----------------------
351    //
352    // These buffers are passed to `cuLinkCreate` as the back-store for
353    // `CU_JIT_INFO_LOG_BUFFER` / `CU_JIT_ERROR_LOG_BUFFER`.  The driver
354    // retains the raw pointers internally for the lifetime of the link
355    // state, so we **must** keep these `Vec`s alive (and not reallocate
356    // them) until after `cuLinkComplete` runs.  Do not call `push`,
357    // `reserve`, `shrink_to_fit`, or any other API that may reallocate
358    // these vectors after `Linker::new` returns.
359    #[cfg(not(target_os = "macos"))]
360    info_buf: Vec<u8>,
361    #[cfg(not(target_os = "macos"))]
362    error_buf: Vec<u8>,
363
364    // -- macOS synthetic state ------------------------------------------------
365    /// Accumulated PTX sources (macOS only — empty on real GPU platforms).
366    #[cfg(target_os = "macos")]
367    ptx_sources: Vec<String>,
368    /// Accumulated binary data (macOS only — cubin/fatbin/object/library).
369    #[cfg(target_os = "macos")]
370    binary_sources: Vec<Vec<u8>>,
371}
372
373// SAFETY: The raw `CUlinkState` pointer is only accessed through driver
374// API calls which are thread-safe when used with proper synchronisation.
375unsafe impl Send for Linker {}
376
377impl Linker {
378    /// Creates a new linker with the given options.
379    ///
380    /// On platforms with a real CUDA driver, this calls `cuLinkCreate`.
381    /// On macOS, a synthetic linker is created for testing purposes.
382    ///
383    /// # Errors
384    ///
385    /// Returns a [`CudaError`] if `cuLinkCreate` fails (e.g. no active
386    /// CUDA context).
387    pub fn new(options: LinkerOptions) -> CudaResult<Self> {
388        let (state, info_buf, error_buf) = Self::platform_create(&options)?;
389
390        // Suppress unused-variable warnings on macOS (synthetic mode).
391        #[cfg(target_os = "macos")]
392        {
393            let _ = (info_buf, error_buf);
394        }
395
396        Ok(Self {
397            state,
398            options,
399            input_count: 0,
400            input_names: Vec::new(),
401            #[cfg(not(target_os = "macos"))]
402            info_buf,
403            #[cfg(not(target_os = "macos"))]
404            error_buf,
405            #[cfg(target_os = "macos")]
406            ptx_sources: Vec::new(),
407            #[cfg(target_os = "macos")]
408            binary_sources: Vec::new(),
409        })
410    }
411
412    /// Adds PTX source code to the linker.
413    ///
414    /// The PTX is compiled and linked when [`complete`](Self::complete) is
415    /// called.
416    ///
417    /// # Arguments
418    ///
419    /// * `ptx` — PTX source code (must not contain interior null bytes).
420    /// * `name` — A descriptive name for this input (used in error messages).
421    ///
422    /// # Errors
423    ///
424    /// * [`CudaError::InvalidValue`] if `ptx` contains interior null bytes.
425    /// * Other [`CudaError`] variants if `cuLinkAddData` fails.
426    pub fn add_ptx(&mut self, ptx: &str, name: &str) -> CudaResult<()> {
427        let c_ptx = CString::new(ptx).map_err(|_| CudaError::InvalidValue)?;
428        let c_name = CString::new(name).map_err(|_| CudaError::InvalidValue)?;
429        let bytes = c_ptx.as_bytes_with_nul();
430
431        self.platform_add_data(
432            CUjitInputType::Ptx,
433            bytes.as_ptr().cast::<c_void>(),
434            bytes.len(),
435            c_name.as_ptr(),
436        )?;
437
438        #[cfg(target_os = "macos")]
439        {
440            self.ptx_sources.push(ptx.to_string());
441        }
442
443        self.input_count += 1;
444        self.input_names.push(name.to_string());
445        Ok(())
446    }
447
448    /// Adds compiled cubin data to the linker.
449    ///
450    /// # Arguments
451    ///
452    /// * `data` — Raw cubin binary data.
453    /// * `name` — A descriptive name for this input.
454    ///
455    /// # Errors
456    ///
457    /// * [`CudaError::InvalidValue`] if `name` contains interior null bytes
458    ///   or `data` is empty.
459    /// * Other [`CudaError`] variants if `cuLinkAddData` fails.
460    pub fn add_cubin(&mut self, data: &[u8], name: &str) -> CudaResult<()> {
461        if data.is_empty() {
462            return Err(CudaError::InvalidValue);
463        }
464        let c_name = CString::new(name).map_err(|_| CudaError::InvalidValue)?;
465
466        self.platform_add_data(
467            CUjitInputType::Cubin,
468            data.as_ptr().cast::<c_void>(),
469            data.len(),
470            c_name.as_ptr(),
471        )?;
472
473        #[cfg(target_os = "macos")]
474        {
475            self.binary_sources.push(data.to_vec());
476        }
477
478        self.input_count += 1;
479        self.input_names.push(name.to_string());
480        Ok(())
481    }
482
483    /// Adds a fat binary to the linker.
484    ///
485    /// # Arguments
486    ///
487    /// * `data` — Raw fatbin binary data.
488    /// * `name` — A descriptive name for this input.
489    ///
490    /// # Errors
491    ///
492    /// * [`CudaError::InvalidValue`] if `name` contains interior null bytes
493    ///   or `data` is empty.
494    /// * Other [`CudaError`] variants if `cuLinkAddData` fails.
495    pub fn add_fatbin(&mut self, data: &[u8], name: &str) -> CudaResult<()> {
496        if data.is_empty() {
497            return Err(CudaError::InvalidValue);
498        }
499        let c_name = CString::new(name).map_err(|_| CudaError::InvalidValue)?;
500
501        self.platform_add_data(
502            CUjitInputType::Fatbin,
503            data.as_ptr().cast::<c_void>(),
504            data.len(),
505            c_name.as_ptr(),
506        )?;
507
508        #[cfg(target_os = "macos")]
509        {
510            self.binary_sources.push(data.to_vec());
511        }
512
513        self.input_count += 1;
514        self.input_names.push(name.to_string());
515        Ok(())
516    }
517
518    /// Adds a relocatable device object to the linker.
519    ///
520    /// # Arguments
521    ///
522    /// * `data` — Raw object binary data.
523    /// * `name` — A descriptive name for this input.
524    ///
525    /// # Errors
526    ///
527    /// * [`CudaError::InvalidValue`] if `name` contains interior null bytes
528    ///   or `data` is empty.
529    pub fn add_object(&mut self, data: &[u8], name: &str) -> CudaResult<()> {
530        if data.is_empty() {
531            return Err(CudaError::InvalidValue);
532        }
533        let c_name = CString::new(name).map_err(|_| CudaError::InvalidValue)?;
534
535        self.platform_add_data(
536            CUjitInputType::Object,
537            data.as_ptr().cast::<c_void>(),
538            data.len(),
539            c_name.as_ptr(),
540        )?;
541
542        #[cfg(target_os = "macos")]
543        {
544            self.binary_sources.push(data.to_vec());
545        }
546
547        self.input_count += 1;
548        self.input_names.push(name.to_string());
549        Ok(())
550    }
551
552    /// Adds a device code library to the linker.
553    ///
554    /// # Arguments
555    ///
556    /// * `data` — Raw library binary data.
557    /// * `name` — A descriptive name for this input.
558    ///
559    /// # Errors
560    ///
561    /// * [`CudaError::InvalidValue`] if `name` contains interior null bytes
562    ///   or `data` is empty.
563    pub fn add_library(&mut self, data: &[u8], name: &str) -> CudaResult<()> {
564        if data.is_empty() {
565            return Err(CudaError::InvalidValue);
566        }
567        let c_name = CString::new(name).map_err(|_| CudaError::InvalidValue)?;
568
569        self.platform_add_data(
570            CUjitInputType::Library,
571            data.as_ptr().cast::<c_void>(),
572            data.len(),
573            c_name.as_ptr(),
574        )?;
575
576        #[cfg(target_os = "macos")]
577        {
578            self.binary_sources.push(data.to_vec());
579        }
580
581        self.input_count += 1;
582        self.input_names.push(name.to_string());
583        Ok(())
584    }
585
586    /// Returns the number of inputs added to the linker.
587    #[inline]
588    pub fn input_count(&self) -> usize {
589        self.input_count
590    }
591
592    /// Returns the names of all inputs added so far.
593    #[inline]
594    pub fn input_names(&self) -> &[String] {
595        &self.input_names
596    }
597
598    /// Returns a reference to the linker options.
599    #[inline]
600    pub fn options(&self) -> &LinkerOptions {
601        &self.options
602    }
603
604    /// Completes the link, producing a [`LinkedModule`].
605    ///
606    /// This consumes the linker.  The resulting cubin data is copied into
607    /// the `LinkedModule` before the underlying `CUlinkState` is destroyed
608    /// (by `Drop`).
609    ///
610    /// # Errors
611    ///
612    /// * [`CudaError::InvalidValue`] if no inputs have been added.
613    /// * Other [`CudaError`] variants if `cuLinkComplete` fails.
614    pub fn complete(self) -> CudaResult<LinkedModule> {
615        if self.input_count == 0 {
616            return Err(CudaError::InvalidValue);
617        }
618        self.platform_complete()
619    }
620
621    // -----------------------------------------------------------------------
622    // Platform-specific helpers
623    // -----------------------------------------------------------------------
624
625    /// Create the link state.  On macOS, returns a null pointer (synthetic).
626    ///
627    /// Returns `(state, info_buf, error_buf)`.  The caller (the constructor)
628    /// must keep the buffers alive for the lifetime of the linker because
629    /// the driver retains raw pointers into them.
630    fn platform_create(options: &LinkerOptions) -> CudaResult<(*mut c_void, Vec<u8>, Vec<u8>)> {
631        #[cfg(target_os = "macos")]
632        {
633            let _ = options;
634            Ok((std::ptr::null_mut(), Vec::new(), Vec::new()))
635        }
636
637        #[cfg(not(target_os = "macos"))]
638        {
639            Self::gpu_link_create(options)
640        }
641    }
642
643    /// Add data to the link state.
644    fn platform_add_data(
645        &self,
646        input_type: CUjitInputType,
647        data: *const c_void,
648        size: usize,
649        name: *const std::ffi::c_char,
650    ) -> CudaResult<()> {
651        #[cfg(target_os = "macos")]
652        {
653            let _ = (input_type, data, size, name);
654            Ok(())
655        }
656
657        #[cfg(not(target_os = "macos"))]
658        {
659            Self::gpu_link_add_data(self.state, input_type, data, size, name)
660        }
661    }
662
663    /// Complete the link and produce a `LinkedModule`.
664    fn platform_complete(self) -> CudaResult<LinkedModule> {
665        #[cfg(target_os = "macos")]
666        {
667            self.synthetic_complete()
668        }
669
670        #[cfg(not(target_os = "macos"))]
671        {
672            self.gpu_link_complete()
673        }
674    }
675
676    /// Destroy the link state.
677    fn platform_destroy(state: *mut c_void) {
678        #[cfg(target_os = "macos")]
679        {
680            let _ = state;
681        }
682
683        #[cfg(not(target_os = "macos"))]
684        {
685            if !state.is_null() {
686                Self::gpu_link_destroy(state);
687            }
688        }
689    }
690
691    // -----------------------------------------------------------------------
692    // macOS synthetic implementation
693    // -----------------------------------------------------------------------
694
695    /// Produce a synthetic `LinkedModule` by concatenating all PTX and
696    /// binary inputs.
697    #[cfg(target_os = "macos")]
698    fn synthetic_complete(&self) -> CudaResult<LinkedModule> {
699        let mut cubin = Vec::new();
700
701        // Magic header to identify synthetic cubin.
702        cubin.extend_from_slice(b"OXICUDA_SYNTHETIC_CUBIN\0");
703
704        // Append all PTX sources.
705        for ptx in &self.ptx_sources {
706            cubin.extend_from_slice(ptx.as_bytes());
707            cubin.push(0); // null separator
708        }
709
710        // Append all binary sources.
711        for bin in &self.binary_sources {
712            cubin.extend_from_slice(bin);
713        }
714
715        let info_msg = format!(
716            "Synthetic link complete: {} input(s), {} bytes",
717            self.input_count,
718            cubin.len()
719        );
720
721        Ok(LinkedModule {
722            cubin_data: cubin,
723            info_log: info_msg,
724            error_log: String::new(),
725        })
726    }
727
728    // -----------------------------------------------------------------------
729    // GPU-only stubs (compiled out on macOS)
730    // -----------------------------------------------------------------------
731
732    /// Create link state via `cuLinkCreate_v2`.
733    ///
734    /// Returns `(state, info_buf, error_buf)`.  The buffers must be moved
735    /// into the [`Linker`] without reallocation, because the driver retains
736    /// raw pointers into them as the back-store for the JIT info / error
737    /// log options.
738    #[cfg(not(target_os = "macos"))]
739    fn gpu_link_create(options: &LinkerOptions) -> CudaResult<(*mut c_void, Vec<u8>, Vec<u8>)> {
740        let api = crate::loader::try_driver()?;
741        let f = api.cu_link_create.ok_or(CudaError::NotSupported)?;
742
743        let (mut keys, mut vals, info_buf, error_buf) = options.build_jit_options();
744        let num_options = keys.len() as u32;
745
746        let mut state_handle: CUlinkState = CUlinkState::default();
747
748        // SAFETY: `f` is the loaded `cuLinkCreate_v2` entry point.  `keys`
749        // and `vals` are parallel arrays of length `num_options` whose
750        // backing storage outlives the call.  `info_buf` / `error_buf`
751        // back the log-buffer pointers stored in `vals`; the caller of
752        // this fn keeps them alive for the lifetime of the link state.
753        check(unsafe {
754            f(
755                num_options,
756                keys.as_mut_ptr(),
757                vals.as_mut_ptr(),
758                &mut state_handle,
759            )
760        })?;
761
762        Ok((state_handle.0, info_buf, error_buf))
763    }
764
765    /// Add data via `cuLinkAddData_v2`.
766    #[cfg(not(target_os = "macos"))]
767    fn gpu_link_add_data(
768        state: *mut c_void,
769        input_type: CUjitInputType,
770        data: *const c_void,
771        size: usize,
772        name: *const std::ffi::c_char,
773    ) -> CudaResult<()> {
774        let api = crate::loader::try_driver()?;
775        let f = api.cu_link_add_data.ok_or(CudaError::NotSupported)?;
776
777        // The C signature accepts `data` as `*mut c_void` even though the
778        // payload is logically read-only; cast at the boundary.
779        // Per-call options are not required — the linker-wide options were
780        // supplied at `cuLinkCreate` time.
781        // SAFETY: `state` was returned by `cuLinkCreate_v2` and has not
782        // yet been destroyed.  `data` points to a buffer of `size` bytes
783        // owned by the caller (PTX/cubin/fatbin/object/library bytes).
784        // `name` is a NUL-terminated C string owned by the caller.
785        check(unsafe {
786            f(
787                CUlinkState(state),
788                input_type,
789                data as *mut c_void,
790                size,
791                name,
792                0,
793                std::ptr::null_mut(),
794                std::ptr::null_mut(),
795            )
796        })
797    }
798
799    /// Complete the link via `cuLinkComplete`.
800    ///
801    /// Reads the driver-owned cubin pointer and copies it into a `Vec<u8>`
802    /// before the underlying link state is destroyed by [`Drop`].
803    #[cfg(not(target_os = "macos"))]
804    fn gpu_link_complete(self) -> CudaResult<LinkedModule> {
805        let api = crate::loader::try_driver()?;
806        let f = api.cu_link_complete.ok_or(CudaError::NotSupported)?;
807
808        let mut cubin_ptr: *mut c_void = std::ptr::null_mut();
809        let mut cubin_size: usize = 0;
810
811        // SAFETY: `self.state` is a valid link state from `cuLinkCreate_v2`.
812        // `cubin_ptr` and `cubin_size` are fresh out-parameters.
813        let link_result =
814            check(unsafe { f(CUlinkState(self.state), &mut cubin_ptr, &mut cubin_size) });
815        if let Err(e) = link_result {
816            // Surface the JIT diagnostic log in the error so callers can
817            // inspect the ptxas output that led to the failure.
818            return Err(jit_failure(e, &self.info_buf, &self.error_buf));
819        }
820
821        // Copy the driver-owned cubin into our own buffer *before* Drop runs
822        // `cuLinkDestroy`, which invalidates `cubin_ptr`.
823        let cubin_data = if cubin_ptr.is_null() || cubin_size == 0 {
824            Vec::new()
825        } else {
826            // SAFETY: the driver guarantees `cubin_ptr` references a buffer
827            // of `cubin_size` bytes that remains valid until the link state
828            // is destroyed.
829            unsafe { std::slice::from_raw_parts(cubin_ptr.cast::<u8>(), cubin_size) }.to_vec()
830        };
831
832        let info_log = buf_to_string(&self.info_buf);
833        let error_log = buf_to_string(&self.error_buf);
834
835        // `self` is dropped at the end of this fn, which calls
836        // `cuLinkDestroy` and frees the driver-side cubin allocation —
837        // safe now that we've copied it out.
838        Ok(LinkedModule {
839            cubin_data,
840            info_log,
841            error_log,
842        })
843    }
844
845    /// Destroy the link state via `cuLinkDestroy`.
846    ///
847    /// Called from [`Drop`].  Errors are intentionally ignored — panicking
848    /// in a destructor is fatal, and a missing entry point or stale state
849    /// cannot be recovered from at this stage.
850    #[cfg(not(target_os = "macos"))]
851    fn gpu_link_destroy(state: *mut c_void) {
852        if state.is_null() {
853            return;
854        }
855        if let Ok(api) = crate::loader::try_driver() {
856            if let Some(f) = api.cu_link_destroy {
857                // SAFETY: `state` was returned by a successful
858                // `cuLinkCreate_v2` and has not been destroyed yet
859                // (this is the only place that calls `cuLinkDestroy`,
860                // and `Drop` runs at most once per `Linker`).
861                let _ = unsafe { f(CUlinkState(state)) };
862            }
863        }
864    }
865}
866
867impl Drop for Linker {
868    fn drop(&mut self) {
869        Self::platform_destroy(self.state);
870    }
871}
872
873impl std::fmt::Debug for Linker {
874    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
875        f.debug_struct("Linker")
876            .field("state", &format_args!("{:p}", self.state))
877            .field("input_count", &self.input_count)
878            .field("input_names", &self.input_names)
879            .field("options", &self.options)
880            .finish()
881    }
882}
883
884// ---------------------------------------------------------------------------
885// Convenience helpers
886// ---------------------------------------------------------------------------
887
888/// Converts a null-terminated C buffer to a Rust [`String`], trimming
889/// trailing null bytes and whitespace.
890#[cfg(any(not(target_os = "macos"), test))]
891fn buf_to_string(buf: &[u8]) -> String {
892    let len = buf.iter().position(|&b| b == 0).unwrap_or(buf.len());
893    String::from_utf8_lossy(&buf[..len]).trim().to_string()
894}
895
896// =========================================================================
897// Tests
898// =========================================================================
899
900#[cfg(test)]
901mod tests {
902    use super::*;
903
904    #[cfg(target_os = "macos")]
905    const SAMPLE_PTX_A: &str = r#"
906        .version 7.0
907        .target sm_70
908        .address_size 64
909        .visible .entry kernel_a() { ret; }
910    "#;
911
912    #[cfg(target_os = "macos")]
913    const SAMPLE_PTX_B: &str = r#"
914        .version 7.0
915        .target sm_70
916        .address_size 64
917        .visible .entry kernel_b() { ret; }
918    "#;
919
920    // -- OptimizationLevel tests --
921
922    #[test]
923    fn optimization_level_values() {
924        assert_eq!(OptimizationLevel::O0.as_u32(), 0);
925        assert_eq!(OptimizationLevel::O1.as_u32(), 1);
926        assert_eq!(OptimizationLevel::O2.as_u32(), 2);
927        assert_eq!(OptimizationLevel::O3.as_u32(), 3);
928        assert_eq!(OptimizationLevel::O4.as_u32(), 4);
929    }
930
931    #[test]
932    fn optimization_level_default() {
933        let level = OptimizationLevel::default();
934        assert_eq!(level, OptimizationLevel::O4);
935    }
936
937    // -- FallbackStrategy tests --
938
939    #[test]
940    fn fallback_strategy_values() {
941        assert_eq!(FallbackStrategy::PreferPtx.as_u32(), 0);
942        assert_eq!(FallbackStrategy::PreferBinary.as_u32(), 1);
943    }
944
945    #[test]
946    fn fallback_strategy_default() {
947        let strategy = FallbackStrategy::default();
948        assert_eq!(strategy, FallbackStrategy::PreferPtx);
949    }
950
951    // -- LinkInputType tests --
952
953    #[test]
954    fn link_input_type_to_raw() {
955        assert_eq!(LinkInputType::Ptx.to_raw(), CUjitInputType::Ptx);
956        assert_eq!(LinkInputType::Cubin.to_raw(), CUjitInputType::Cubin);
957        assert_eq!(LinkInputType::Fatbin.to_raw(), CUjitInputType::Fatbin);
958        assert_eq!(LinkInputType::Object.to_raw(), CUjitInputType::Object);
959        assert_eq!(LinkInputType::Library.to_raw(), CUjitInputType::Library);
960    }
961
962    // -- LinkerOptions tests --
963
964    #[test]
965    fn linker_options_default() {
966        let opts = LinkerOptions::default();
967        assert!(opts.max_registers.is_none());
968        assert_eq!(opts.optimization_level, OptimizationLevel::O4);
969        assert!(opts.target_sm.is_none());
970        assert!(!opts.generate_debug_info);
971        assert!(!opts.generate_line_info);
972        assert!(!opts.log_verbose);
973        assert_eq!(opts.fallback_strategy, FallbackStrategy::PreferPtx);
974    }
975
976    #[test]
977    fn linker_options_custom() {
978        let opts = LinkerOptions {
979            max_registers: Some(32),
980            optimization_level: OptimizationLevel::O2,
981            target_sm: Some(75),
982            generate_debug_info: true,
983            generate_line_info: true,
984            log_verbose: true,
985            fallback_strategy: FallbackStrategy::PreferBinary,
986        };
987        assert_eq!(opts.max_registers, Some(32));
988        assert_eq!(opts.optimization_level, OptimizationLevel::O2);
989        assert_eq!(opts.target_sm, Some(75));
990        assert!(opts.generate_debug_info);
991        assert!(opts.generate_line_info);
992        assert!(opts.log_verbose);
993        assert_eq!(opts.fallback_strategy, FallbackStrategy::PreferBinary);
994    }
995
996    #[test]
997    fn linker_options_build_jit_options_minimal() {
998        let opts = LinkerOptions::default();
999        let (keys, vals, _info_buf, _error_buf) = opts.build_jit_options();
1000
1001        // Minimum options: info log (2), error log (2), opt level (1),
1002        // target from context (1), fallback (1) = 7
1003        assert_eq!(keys.len(), vals.len());
1004        assert!(keys.len() >= 7);
1005    }
1006
1007    #[test]
1008    fn linker_options_build_jit_options_full() {
1009        let opts = LinkerOptions {
1010            max_registers: Some(64),
1011            optimization_level: OptimizationLevel::O3,
1012            target_sm: Some(80),
1013            generate_debug_info: true,
1014            generate_line_info: true,
1015            log_verbose: true,
1016            fallback_strategy: FallbackStrategy::PreferBinary,
1017        };
1018        let (keys, vals, _info_buf, _error_buf) = opts.build_jit_options();
1019
1020        assert_eq!(keys.len(), vals.len());
1021        // info log (2) + error log (2) + opt level (1) + max regs (1)
1022        // + target (1) + debug (1) + line (1) + verbose (1) + fallback (1) = 11
1023        assert!(keys.len() >= 11);
1024    }
1025
1026    // -- Linker lifecycle tests (macOS synthetic mode) --
1027
1028    #[cfg(target_os = "macos")]
1029    #[test]
1030    fn linker_create_default() {
1031        let linker = Linker::new(LinkerOptions::default());
1032        assert!(linker.is_ok());
1033        let linker = match linker {
1034            Ok(l) => l,
1035            Err(e) => panic!("unexpected error: {e}"),
1036        };
1037        assert_eq!(linker.input_count(), 0);
1038        assert!(linker.input_names().is_empty());
1039    }
1040
1041    #[cfg(target_os = "macos")]
1042    #[test]
1043    fn linker_add_single_ptx() {
1044        let mut linker = match Linker::new(LinkerOptions::default()) {
1045            Ok(l) => l,
1046            Err(e) => panic!("unexpected error: {e}"),
1047        };
1048        let result = linker.add_ptx(SAMPLE_PTX_A, "module_a.ptx");
1049        assert!(result.is_ok());
1050        assert_eq!(linker.input_count(), 1);
1051        assert_eq!(linker.input_names(), &["module_a.ptx"]);
1052    }
1053
1054    #[cfg(target_os = "macos")]
1055    #[test]
1056    fn linker_add_multiple_ptx() {
1057        let mut linker = match Linker::new(LinkerOptions::default()) {
1058            Ok(l) => l,
1059            Err(e) => panic!("unexpected error: {e}"),
1060        };
1061        linker.add_ptx(SAMPLE_PTX_A, "a.ptx").ok();
1062        linker.add_ptx(SAMPLE_PTX_B, "b.ptx").ok();
1063        assert_eq!(linker.input_count(), 2);
1064        assert_eq!(linker.input_names(), &["a.ptx", "b.ptx"]);
1065    }
1066
1067    #[cfg(target_os = "macos")]
1068    #[test]
1069    fn linker_complete_with_ptx() {
1070        let mut linker = match Linker::new(LinkerOptions::default()) {
1071            Ok(l) => l,
1072            Err(e) => panic!("unexpected error: {e}"),
1073        };
1074        linker.add_ptx(SAMPLE_PTX_A, "a.ptx").ok();
1075        linker.add_ptx(SAMPLE_PTX_B, "b.ptx").ok();
1076
1077        let linked = linker.complete();
1078        assert!(linked.is_ok());
1079        let linked = match linked {
1080            Ok(l) => l,
1081            Err(e) => panic!("unexpected error: {e}"),
1082        };
1083
1084        assert!(linked.cubin_size() > 0);
1085        assert!(linked.cubin().starts_with(b"OXICUDA_SYNTHETIC_CUBIN\0"));
1086        assert!(!linked.info_log().is_empty());
1087        assert!(linked.error_log().is_empty());
1088    }
1089
1090    #[cfg(target_os = "macos")]
1091    #[test]
1092    fn linker_complete_empty_fails() {
1093        let linker = match Linker::new(LinkerOptions::default()) {
1094            Ok(l) => l,
1095            Err(e) => panic!("unexpected error: {e}"),
1096        };
1097        let result = linker.complete();
1098        assert!(result.is_err());
1099        assert_eq!(result.err(), Some(CudaError::InvalidValue));
1100    }
1101
1102    #[cfg(target_os = "macos")]
1103    #[test]
1104    fn linker_add_cubin() {
1105        let mut linker = match Linker::new(LinkerOptions::default()) {
1106            Ok(l) => l,
1107            Err(e) => panic!("unexpected error: {e}"),
1108        };
1109        let fake_cubin = vec![0x7f, 0x45, 0x4c, 0x46]; // ELF magic
1110        let result = linker.add_cubin(&fake_cubin, "test.cubin");
1111        assert!(result.is_ok());
1112        assert_eq!(linker.input_count(), 1);
1113    }
1114
1115    #[cfg(target_os = "macos")]
1116    #[test]
1117    fn linker_add_fatbin() {
1118        let mut linker = match Linker::new(LinkerOptions::default()) {
1119            Ok(l) => l,
1120            Err(e) => panic!("unexpected error: {e}"),
1121        };
1122        let fake_fatbin = vec![0xBA, 0xB0, 0xCA, 0xFE]; // fatbin magic
1123        let result = linker.add_fatbin(&fake_fatbin, "test.fatbin");
1124        assert!(result.is_ok());
1125        assert_eq!(linker.input_count(), 1);
1126    }
1127
1128    #[cfg(target_os = "macos")]
1129    #[test]
1130    fn linker_add_empty_cubin_fails() {
1131        let mut linker = match Linker::new(LinkerOptions::default()) {
1132            Ok(l) => l,
1133            Err(e) => panic!("unexpected error: {e}"),
1134        };
1135        let result = linker.add_cubin(&[], "empty.cubin");
1136        assert!(result.is_err());
1137        assert_eq!(result.err(), Some(CudaError::InvalidValue));
1138    }
1139
1140    #[cfg(target_os = "macos")]
1141    #[test]
1142    fn linker_add_empty_fatbin_fails() {
1143        let mut linker = match Linker::new(LinkerOptions::default()) {
1144            Ok(l) => l,
1145            Err(e) => panic!("unexpected error: {e}"),
1146        };
1147        let result = linker.add_fatbin(&[], "empty.fatbin");
1148        assert!(result.is_err());
1149        assert_eq!(result.err(), Some(CudaError::InvalidValue));
1150    }
1151
1152    #[cfg(target_os = "macos")]
1153    #[test]
1154    fn linker_mixed_inputs() {
1155        let mut linker = match Linker::new(LinkerOptions::default()) {
1156            Ok(l) => l,
1157            Err(e) => panic!("unexpected error: {e}"),
1158        };
1159        linker.add_ptx(SAMPLE_PTX_A, "a.ptx").ok();
1160        linker.add_cubin(&[1, 2, 3, 4], "b.cubin").ok();
1161        linker.add_ptx(SAMPLE_PTX_B, "c.ptx").ok();
1162
1163        assert_eq!(linker.input_count(), 3);
1164
1165        let linked = match linker.complete() {
1166            Ok(l) => l,
1167            Err(e) => panic!("unexpected error: {e}"),
1168        };
1169
1170        // The cubin should contain both PTX sources and the binary data.
1171        let cubin = linked.cubin();
1172        assert!(cubin.starts_with(b"OXICUDA_SYNTHETIC_CUBIN\0"));
1173        assert!(cubin.len() > 24); // header + content
1174    }
1175
1176    #[cfg(target_os = "macos")]
1177    #[test]
1178    fn linker_into_cubin() {
1179        let mut linker = match Linker::new(LinkerOptions::default()) {
1180            Ok(l) => l,
1181            Err(e) => panic!("unexpected error: {e}"),
1182        };
1183        linker.add_ptx(SAMPLE_PTX_A, "a.ptx").ok();
1184
1185        let linked = match linker.complete() {
1186            Ok(l) => l,
1187            Err(e) => panic!("unexpected error: {e}"),
1188        };
1189
1190        let size = linked.cubin_size();
1191        let raw = linked.into_cubin();
1192        assert_eq!(raw.len(), size);
1193    }
1194
1195    #[cfg(target_os = "macos")]
1196    #[test]
1197    fn linker_debug_format() {
1198        let linker = match Linker::new(LinkerOptions::default()) {
1199            Ok(l) => l,
1200            Err(e) => panic!("unexpected error: {e}"),
1201        };
1202        let debug = format!("{linker:?}");
1203        assert!(debug.contains("Linker"));
1204        assert!(debug.contains("input_count"));
1205    }
1206
1207    #[cfg(target_os = "macos")]
1208    #[test]
1209    fn linker_with_custom_options() {
1210        let opts = LinkerOptions {
1211            max_registers: Some(48),
1212            optimization_level: OptimizationLevel::O3,
1213            target_sm: Some(80),
1214            generate_debug_info: true,
1215            generate_line_info: true,
1216            log_verbose: true,
1217            fallback_strategy: FallbackStrategy::PreferBinary,
1218        };
1219        let mut linker = match Linker::new(opts) {
1220            Ok(l) => l,
1221            Err(e) => panic!("unexpected error: {e}"),
1222        };
1223
1224        linker.add_ptx(SAMPLE_PTX_A, "a.ptx").ok();
1225        let linked = match linker.complete() {
1226            Ok(l) => l,
1227            Err(e) => panic!("unexpected error: {e}"),
1228        };
1229        assert!(linked.cubin_size() > 0);
1230    }
1231
1232    #[cfg(target_os = "macos")]
1233    #[test]
1234    fn linker_add_object_and_library() {
1235        let mut linker = match Linker::new(LinkerOptions::default()) {
1236            Ok(l) => l,
1237            Err(e) => panic!("unexpected error: {e}"),
1238        };
1239        let result = linker.add_object(&[10, 20, 30], "test.o");
1240        assert!(result.is_ok());
1241        let result = linker.add_library(&[40, 50, 60], "test.a");
1242        assert!(result.is_ok());
1243        assert_eq!(linker.input_count(), 2);
1244    }
1245
1246    #[cfg(target_os = "macos")]
1247    #[test]
1248    fn linker_add_empty_object_fails() {
1249        let mut linker = match Linker::new(LinkerOptions::default()) {
1250            Ok(l) => l,
1251            Err(e) => panic!("unexpected error: {e}"),
1252        };
1253        assert_eq!(
1254            linker.add_object(&[], "empty.o").err(),
1255            Some(CudaError::InvalidValue)
1256        );
1257        assert_eq!(
1258            linker.add_library(&[], "empty.a").err(),
1259            Some(CudaError::InvalidValue)
1260        );
1261    }
1262
1263    // -- LinkedModule tests --
1264
1265    #[test]
1266    fn linked_module_accessors() {
1267        let module = LinkedModule {
1268            cubin_data: vec![1, 2, 3, 4, 5],
1269            info_log: "some info".to_string(),
1270            error_log: "some error".to_string(),
1271        };
1272        assert_eq!(module.cubin(), &[1, 2, 3, 4, 5]);
1273        assert_eq!(module.cubin_size(), 5);
1274        assert_eq!(module.info_log(), "some info");
1275        assert_eq!(module.error_log(), "some error");
1276    }
1277
1278    #[test]
1279    fn linked_module_into_cubin() {
1280        let module = LinkedModule {
1281            cubin_data: vec![10, 20, 30],
1282            info_log: String::new(),
1283            error_log: String::new(),
1284        };
1285        let data = module.into_cubin();
1286        assert_eq!(data, vec![10, 20, 30]);
1287    }
1288
1289    #[test]
1290    fn linked_module_clone() {
1291        let module = LinkedModule {
1292            cubin_data: vec![1, 2],
1293            info_log: "info".to_string(),
1294            error_log: String::new(),
1295        };
1296        let cloned = module.clone();
1297        assert_eq!(cloned.cubin(), module.cubin());
1298        assert_eq!(cloned.info_log(), module.info_log());
1299    }
1300
1301    // -- buf_to_string helper tests --
1302
1303    #[test]
1304    fn buf_to_string_basic() {
1305        let buf = b"hello\0world";
1306        assert_eq!(buf_to_string(buf), "hello");
1307    }
1308
1309    #[test]
1310    fn buf_to_string_no_null() {
1311        let buf = b"hello world";
1312        assert_eq!(buf_to_string(buf), "hello world");
1313    }
1314
1315    #[test]
1316    fn buf_to_string_empty() {
1317        let buf: &[u8] = &[];
1318        assert_eq!(buf_to_string(buf), "");
1319    }
1320
1321    #[test]
1322    fn buf_to_string_all_nulls() {
1323        let buf = &[0u8; 10];
1324        assert_eq!(buf_to_string(buf), "");
1325    }
1326
1327    // -- CUjitInputType FFI value tests --
1328
1329    #[test]
1330    fn cujit_input_type_values() {
1331        assert_eq!(CUjitInputType::Ptx as u32, 1);
1332        assert_eq!(CUjitInputType::Cubin as u32, 2);
1333        assert_eq!(CUjitInputType::Fatbin as u32, 3);
1334        assert_eq!(CUjitInputType::Object as u32, 4);
1335        assert_eq!(CUjitInputType::Library as u32, 5);
1336    }
1337
1338    // -- Cross-platform end-to-end smoke test --
1339
1340    /// Construct a linker, add a PTX input, and complete it.  This exercises
1341    /// the full `cuLink*` wiring on platforms where the driver is available
1342    /// (Linux/Windows with NVIDIA), and the synthetic implementation on
1343    /// macOS.  The test passes when:
1344    ///   * the operation succeeds end-to-end (real or synthetic driver), or
1345    ///   * any failure is a recognised, non-panicking [`CudaError`] variant
1346    ///     (typical on CI without a GPU: `NotSupported`, `NotInitialized`,
1347    ///     `NoDevice`, `UnsupportedPlatform`, etc.).
1348    ///
1349    /// The intent is to catch a `panic!` in any of the four newly-wired
1350    /// link entry points (`cuLinkCreate_v2`, `cuLinkAddData_v2`,
1351    /// `cuLinkComplete`, `cuLinkDestroy`) — implementations must always
1352    /// return a `CudaError` rather than aborting.
1353    #[test]
1354    fn linker_end_to_end_returns_sensible_result() {
1355        const PTX: &str = r#"
1356            .version 7.0
1357            .target sm_70
1358            .address_size 64
1359            .visible .entry kernel_smoke() { ret; }
1360        "#;
1361
1362        let linker = Linker::new(LinkerOptions::default());
1363        let mut linker = match linker {
1364            Ok(l) => l,
1365            Err(e) => {
1366                // Acceptable on systems without a CUDA driver.
1367                let _ = format!("{e}");
1368                return;
1369            }
1370        };
1371
1372        if let Err(e) = linker.add_ptx(PTX, "smoke.ptx") {
1373            // Acceptable when the driver is absent or rejects the PTX.
1374            let _ = format!("{e}");
1375            return;
1376        }
1377
1378        match linker.complete() {
1379            Ok(linked) => {
1380                // Real or synthetic — both produce a non-zero cubin.
1381                assert!(linked.cubin_size() > 0);
1382            }
1383            Err(e) => {
1384                // Acceptable failure modes — what matters is no panic.
1385                let _ = format!("{e}");
1386            }
1387        }
1388    }
1389}