cust/context/
legacy.rs

1//! Legacy (old) context management which preceded primary contexts.
2//!
3//! # CUDA context management
4//!
5//! Most CUDA functions require a context. A CUDA context is analogous to a CPU process - it's
6//! an isolated container for all runtime state, including configuration settings and the
7//! device/unified/page-locked memory allocations. Each context has a separate memory space, and
8//! pointers from one context do not work in another. Each context is associated with a single
9//! device. Although it is possible to have multiple contexts associated with a single device, this
10//! is strongly discouraged as it can cause a significant loss of performance.
11//!
12//! CUDA keeps a thread-local stack of contexts which the programmer can push to or pop from.
13//! The top context in that stack is known as the "current" context and it is used in most CUDA
14//! API calls. One context can be safely made current in multiple CPU threads.
15//!
16//! # Safety
17//!
18//! The CUDA context management API does not fit easily into Rust's safety guarantees.
19//!
20//! The thread-local stack (as well as the fact that any context can be on the stack for any number
21//! of threads) means there is no clear owner for a CUDA context, but it still has to be cleaned up.
22//! Also, the fact that a context can be current to multiple threads at once means that there can be
23//! multiple implicit references to a context which are not controlled by Rust.
24//!
25//! cust handles ownership by providing an owning [`Context`](struct.Context.html) struct and
26//! a non-owning [`UnownedContext`](struct.UnownedContext.html). When the `Context` is dropped, the
27//! backing context is destroyed. The context could be current on other threads, though. In this
28//! case, the context is still destroyed, and attempts to access the context on other threads will
29//! fail with an error. This is (mostly) safe, if a bit inconvenient. It's only mostly safe because
30//! other threads could be accessing that context while the destructor is running on this thread,
31//! which could result in undefined behavior.
32//!
33//! In short, Rust's thread-safety guarantees cannot fully protect use of the context management
34//! functions. The programmer must ensure that no other OS threads are using the `Context` when it
35//! is dropped.
36//!
37//! # Examples
38//!
39//! For most commmon uses (one device, one OS thread) it should suffice to create a single context:
40//!
41//! ```
42//! use cust::device::Device;
43//! use cust::context::legacy::{Context, ContextFlags};
44//! # use std::error::Error;
45//! # fn main () -> Result<(), Box<dyn Error>> {
46//!
47//! cust::init(cust::CudaFlags::empty())?;
48//! let device = Device::get_device(0)?;
49//! let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
50//! // call cust functions which use the context
51//!
52//! // The context will be destroyed when dropped or it falls out of scope.
53//! drop(context);
54//! # Ok(())
55//! # }
56//! ```
57//!
58//! If you have multiple OS threads that each submit work to the same device, you can get a handle
59//! to the single context and pass it to each thread.
60//!
61//! ```
62//! # use cust::context::legacy::{Context, ContextFlags, CurrentContext};
63//! # use cust::device::Device;
64//! # use std::error::Error;
65//! # fn main() -> Result<(), Box<dyn Error>> {
66//! # cust::init(cust::CudaFlags::empty())?;
67//! # let device = Device::get_device(0)?;
68//! // As before
69//! let context =
70//!     Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
71//! let mut join_handles = vec![];
72//!
73//! for _ in 0..4 {
74//!     let unowned = context.get_unowned();
75//!     let join_handle = std::thread::spawn(move || {
76//!         CurrentContext::set_current(&unowned).unwrap();
77//!         // Call cust functions which use the context
78//!     });
79//!     join_handles.push(join_handle);
80//! }
81//! // We must ensure that the other threads are not using the context when it's destroyed.
82//! for handle in join_handles {
83//!     handle.join().unwrap();
84//! }
85//! // Now it's safe to drop the context.
86//! drop(context);
87//! # Ok(())
88//! # }
89//! ```
90//!
91//! If you have multiple devices, each device needs its own context.
92//!
93//! ```
94//! # use cust::device::Device;
95//! # use cust::context::legacy::{Context, ContextStack, ContextFlags, CurrentContext};
96//! # use std::error::Error;
97//! #
98//! # fn main() -> Result<(), Box<dyn Error>> {
99//! # cust::init(cust::CudaFlags::empty())?;
100//! // Create and pop contexts for each device
101//! let mut contexts = vec![];
102//! for device in Device::devices()? {
103//!     let device = device?;
104//!     let ctx =
105//!         Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
106//!     ContextStack::pop()?;
107//!     contexts.push(ctx);
108//! }
109//! CurrentContext::set_current(&contexts[0])?;
110//!
111//! // Call cust functions which will use the context
112//!
113//! # Ok(())
114//! # }
115//! ```
116
117use crate::context::ContextHandle;
118use crate::device::Device;
119use crate::error::{CudaResult, DropResult, ToResult};
120use crate::private::Sealed;
121use crate::sys::{self as cuda, CUcontext};
122use crate::CudaApiVersion;
123use std::mem;
124use std::mem::transmute;
125use std::ptr;
126
127/// This enumeration represents configuration settings for devices which share hardware resources
128/// between L1 cache and shared memory.
129///
130/// Note that this is only a preference - the driver will use the requested configuration if
131/// possible, but it is free to choose a different configuration if required to execute functions.
132///
133/// See
134/// [CurrentContext::get_cache_config](struct.CurrentContext.html#method.get_cache_config) and
135/// [CurrentContext::set_cache_config](struct.CurrentContext.html#method.set_cache_config) to get
136/// and set the cache config for the current context.
137#[repr(u32)]
138#[non_exhaustive]
139#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
140pub enum CacheConfig {
141    /// No preference for shared memory or L1 (default)
142    PreferNone = 0,
143    /// Prefer larger shared memory and smaller L1 cache
144    PreferShared = 1,
145    /// Prefer larger L1 cache and smaller shared memory
146    PreferL1 = 2,
147    /// Prefer equal-sized L1 cache and shared memory
148    PreferEqual = 3,
149}
150
151/// This enumeration represents the limited resources which can be accessed through
152/// [CurrentContext::get_resource_limit](struct.CurrentContext.html#method.get_resource_limit) and
153/// [CurrentContext::set_resource_limit](struct.CurrentContext.html#method.set_resource_limit).
154#[repr(u32)]
155#[non_exhaustive]
156#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
157pub enum ResourceLimit {
158    /// The size in bytes of each GPU thread stack
159    StackSize = 0,
160    /// The size in bytes of the FIFO used by the `printf()` device system call.
161    PrintfFifoSize = 1,
162    /// The size in bytes of the heap used by the `malloc()` and `free()` device system calls.
163    ///
164    /// Note that this is used for memory allocated within a kernel launch; it is not related to the
165    /// device memory allocated by the host.
166    MallocHeapSize = 2,
167    /// The maximum nesting depth of a grid at which a thread can safely call
168    /// `cudaDeviceSynchronize()` to wait on child grid launches to complete.
169    DeviceRuntimeSynchronizeDepth = 3,
170    /// The maximum number of outstanding device runtime launches that can be made from the current
171    /// context.
172    DeviceRuntimePendingLaunchCount = 4,
173    /// L2 cache fetch granularity
174    MaxL2FetchGranularity = 5,
175}
176
177/// This enumeration represents the options for configuring the shared memory bank size.
178///
179/// See
180/// [CurrentContext::get_shared_memory_config](struct.CurrentContext.html#method.get_shared_memory_config) and
181/// [CurrentContext::set_shared_memory_config](struct.CurrentContext.html#method.set_shared_memory_config) to get
182/// and set the cache config for the current context.
183#[repr(u32)]
184#[non_exhaustive]
185#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
186pub enum SharedMemoryConfig {
187    /// Set shared-memory bank size to the default.
188    DefaultBankSize = 0,
189    /// Set shared-memory bank width to four bytes
190    FourByteBankSize = 1,
191    /// Set shared-memory bank width to eight bytes
192    EightByteBankSize = 2,
193}
194
195bitflags::bitflags! {
196    /// Bit flags for initializing the CUDA context.
197    ///
198    /// If you're not sure which flags to use, `MAP_HOST | SCHED_AUTO` is a good default.
199    pub struct ContextFlags: u32 {
200        /// Instructs CUDA to actively spin when waiting for results from the GPU. This can decrease
201        /// latency when waiting for the GPU, but may lower the performance of other CPU threads
202        /// if they are performing work in parallel with the CUDA thread.
203        const SCHED_SPIN = 0x01;
204
205        /// Instructs CUDA to yield its thread when waiting for results from the GPU. This can
206        /// increase latency when waiting for the GPU, but can increase the performance of CPU
207        /// threads performing work in parallel with the GPU.
208        const SCHED_YIELD = 0x02;
209
210        /// Instructs CUDA to block the CPU thread on a synchronization primitive when waiting for
211        /// the GPU to finish work.
212        const SCHED_BLOCKING_SYNC = 0x04;
213
214        /// Instructs CUDA to automatically choose whether to yield to other OS threads while waiting
215        /// for the GPU, or to spin the OS thread. This is the default.
216        const SCHED_AUTO = 0x00;
217
218        /// Instructs CUDA to support mapped pinned allocations. This flag must be set in order to
219        /// use page-locked memory (see [LockedBuffer](../memory/struct.LockedBuffer.html])).
220        const MAP_HOST = 0x08;
221
222        /// Instruct CUDA not to reduce local memory after resizing local memory for a kernel. This
223        /// can prevent thrashing by local memory allocations when launching many kernels with high
224        /// local memory usage at the cost of potentially increased memory usage.
225        const LMEM_RESIZE_TO_MAX = 0x10;
226    }
227}
228
229/// Owned handle to a CUDA context.
230///
231/// The context will be destroyed when this goes out of scope. If this is the current context on
232/// the current OS thread, the next context on the stack (if any) will be made current. Note that
233/// the context will be destroyed even if other threads are still using it. Attempts to access the
234/// destroyed context from another thread will return an error.
235#[derive(Debug)]
236pub struct Context {
237    inner: CUcontext,
238}
239impl Context {
240    /// Create a CUDA context for the given device.
241    ///
242    /// # Example
243    ///
244    /// ```
245    /// # use cust::device::Device;
246    /// # use cust::context::legacy::{Context, ContextFlags};
247    /// # use std::error::Error;
248    /// #
249    /// # fn main () -> Result<(), Box<dyn Error>> {
250    /// cust::init(cust::CudaFlags::empty())?;
251    /// let device = Device::get_device(0)?;
252    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
253    /// # Ok(())
254    /// # }
255    /// ```
256    pub fn create_and_push(flags: ContextFlags, device: Device) -> CudaResult<Context> {
257        unsafe {
258            // CUDA only provides a create-and-push operation, but that makes it hard to provide
259            // lifetime guarantees so we create-and-push, then pop, then the programmer has to
260            // push again.
261            let mut ctx: CUcontext = ptr::null_mut();
262            cuda::cuCtxCreate_v2(&mut ctx as *mut CUcontext, flags.bits(), device.as_raw())
263                .to_result()?;
264            Ok(Context { inner: ctx })
265        }
266    }
267
268    /// Get the API version used to create this context.
269    ///
270    /// This is not necessarily the latest version supported by the driver.
271    ///
272    /// # Example
273    ///
274    /// ```
275    /// # use cust::device::Device;
276    /// # use cust::context::legacy::{Context, ContextFlags};
277    /// # use std::error::Error;
278    /// #
279    /// # fn main () -> Result<(), Box<dyn Error>> {
280    /// cust::init(cust::CudaFlags::empty())?;
281    /// let device = Device::get_device(0)?;
282    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
283    /// let version = context.get_api_version()?;
284    /// # Ok(())
285    /// # }
286    /// ```
287    pub fn get_api_version(&self) -> CudaResult<CudaApiVersion> {
288        unsafe {
289            let mut api_version = 0u32;
290            cuda::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32).to_result()?;
291            Ok(CudaApiVersion {
292                version: api_version as i32,
293            })
294        }
295    }
296
297    /// Returns an non-owning handle to this context.
298    ///
299    /// This is useful for sharing a single context between threads (though see the module-level
300    /// documentation for safety details!).
301    ///
302    /// # Example
303    ////*  */
304    /// ```
305    /// # use cust::device::Device;
306    /// # use cust::context::legacy::{Context, ContextFlags};
307    /// # use std::error::Error;
308    /// #
309    /// # fn main() -> Result<(), Box<dyn Error>> {
310    /// # cust::init(cust::CudaFlags::empty())?;
311    /// # let device = Device::get_device(0)?;
312    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
313    /// let unowned = context.get_unowned();
314    /// # Ok(())
315    /// # }
316    /// ```
317    pub fn get_unowned(&self) -> UnownedContext {
318        UnownedContext { inner: self.inner }
319    }
320
321    /// Destroy a `Context`, returning an error.
322    ///
323    /// Destroying a context can return errors from previous asynchronous work. This function
324    /// destroys the given context and returns the error and the un-destroyed context on failure.
325    ///
326    /// # Example
327    ///
328    /// ```
329    /// # use cust::device::Device;
330    /// # use cust::context::legacy::{Context, ContextFlags};
331    /// # use std::error::Error;
332    /// #
333    /// # fn main () -> Result<(), Box<dyn Error>> {
334    /// # cust::init(cust::CudaFlags::empty())?;
335    /// # let device = Device::get_device(0)?;
336    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
337    /// match Context::drop(context) {
338    ///     Ok(()) => println!("Successfully destroyed"),
339    ///     Err((e, ctx)) => {
340    ///         println!("Failed to destroy context: {:?}", e);
341    ///         // Do something with ctx
342    ///     },
343    /// }
344    /// # Ok(())
345    /// # }
346    /// ```
347    pub fn drop(mut ctx: Context) -> DropResult<Context> {
348        if ctx.inner.is_null() {
349            return Ok(());
350        }
351
352        unsafe {
353            let inner = mem::replace(&mut ctx.inner, ptr::null_mut());
354            match cuda::cuCtxDestroy_v2(inner).to_result() {
355                Ok(()) => {
356                    mem::forget(ctx);
357                    Ok(())
358                }
359                Err(e) => Err((e, Context { inner })),
360            }
361        }
362    }
363}
364impl Drop for Context {
365    fn drop(&mut self) {
366        if self.inner.is_null() {
367            return;
368        }
369
370        unsafe {
371            let inner = mem::replace(&mut self.inner, ptr::null_mut());
372            cuda::cuCtxDestroy_v2(inner);
373        }
374    }
375}
376
377impl Sealed for Context {}
378impl ContextHandle for Context {
379    fn get_inner(&self) -> CUcontext {
380        self.inner
381    }
382}
383impl Sealed for UnownedContext {}
384impl ContextHandle for UnownedContext {
385    fn get_inner(&self) -> CUcontext {
386        self.inner
387    }
388}
389
390/// Non-owning handle to a CUDA context.
391#[derive(Debug, Clone)]
392pub struct UnownedContext {
393    inner: CUcontext,
394}
395unsafe impl Send for UnownedContext {}
396unsafe impl Sync for UnownedContext {}
397impl UnownedContext {
398    /// Get the API version used to create this context.
399    ///
400    /// This is not necessarily the latest version supported by the driver.
401    ///
402    /// # Example
403    ///
404    /// ```
405    /// # use cust::device::Device;
406    /// # use cust::context::legacy::{Context, ContextFlags};
407    /// # use std::error::Error;
408    /// #
409    /// # fn main () -> Result<(), Box<dyn Error>> {
410    /// # cust::init(cust::CudaFlags::empty())?;
411    /// # let device = Device::get_device(0)?;
412    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
413    /// let unowned = context.get_unowned();
414    /// let version = unowned.get_api_version()?;
415    /// #
416    /// # Ok(())
417    /// # }
418    /// ```
419    pub fn get_api_version(&self) -> CudaResult<CudaApiVersion> {
420        unsafe {
421            let mut api_version = 0u32;
422            cuda::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32).to_result()?;
423            Ok(CudaApiVersion {
424                version: api_version as i32,
425            })
426        }
427    }
428}
429
430/// Type used to represent the thread-local context stack.
431#[derive(Debug)]
432pub struct ContextStack;
433impl ContextStack {
434    /// Pop the current context off the stack and return the handle. That context may then be made
435    /// current again (perhaps on a different CPU thread) by calling [push](#method.push).
436    ///
437    /// # Example
438    ///
439    /// ```
440    /// # use cust::device::Device;
441    /// # use cust::context::legacy ::{Context, ContextFlags, ContextStack};
442    /// # use std::error::Error;
443    /// #
444    /// # fn main () -> Result<(), Box<dyn Error>> {
445    /// # cust::init(cust::CudaFlags::empty())?;
446    /// # let device = Device::get_device(0)?;
447    /// # let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
448    /// let unowned = ContextStack::pop()?;
449    /// #
450    /// # Ok(())
451    /// # }
452    /// ```
453    pub fn pop() -> CudaResult<UnownedContext> {
454        unsafe {
455            let mut ctx: CUcontext = ptr::null_mut();
456            cuda::cuCtxPopCurrent_v2(&mut ctx as *mut CUcontext).to_result()?;
457            Ok(UnownedContext { inner: ctx })
458        }
459    }
460
461    /// Push the given context to the top of the stack
462    ///
463    /// # Example
464    ///
465    /// ```
466    /// # use cust::device::Device;
467    /// # use cust::context::legacy::{Context, ContextFlags, ContextStack};
468    /// # use std::error::Error;
469    /// #
470    /// # fn main () -> Result<(), Box<dyn Error>> {
471    /// # cust::init(cust::CudaFlags::empty())?;
472    /// # let device = Device::get_device(0)?;
473    /// # let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
474    /// let unowned = ContextStack::pop()?;
475    /// ContextStack::push(&unowned)?;
476    /// # Ok(())
477    /// # }
478    /// ```
479    pub fn push<C: ContextHandle>(ctx: &C) -> CudaResult<()> {
480        unsafe {
481            cuda::cuCtxPushCurrent_v2(ctx.get_inner()).to_result()?;
482            Ok(())
483        }
484    }
485}
486
487/// Struct representing a range of stream priorities.
488///
489/// By convention, lower numbers imply greater priorities. The range of meaningful stream priorities
490/// is given by `[greatest, least]` - that is (numerically), `greatest <= least`.
491#[derive(Debug, Clone, Hash, Eq, PartialEq)]
492pub struct StreamPriorityRange {
493    /// The least stream priority
494    pub least: i32,
495    /// The greatest stream priority
496    pub greatest: i32,
497}
498
499/// Type representing the top context in the thread-local stack.
500#[derive(Debug)]
501pub struct CurrentContext;
502impl CurrentContext {
503    /// Returns the preferred cache configuration for the current context.
504    ///
505    /// On devices where the L1 cache and shared memory use the same hardware resources, this
506    /// function returns the preferred cache configuration for the current context. For devices
507    /// where the size of the L1 cache and shared memory are fixed, this will always return
508    /// `CacheConfig::PreferNone`.
509    ///
510    /// # Example
511    ///
512    /// ```
513    /// # use cust::device::Device;
514    /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext };
515    /// # use std::error::Error;
516    /// #
517    /// # fn main () -> Result<(), Box<dyn Error>> {
518    /// # cust::init(cust::CudaFlags::empty())?;
519    /// # let device = Device::get_device(0)?;
520    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
521    /// let cache_config = CurrentContext::get_cache_config()?;
522    /// # Ok(())
523    /// # }
524    /// ```
525    pub fn get_cache_config() -> CudaResult<CacheConfig> {
526        unsafe {
527            let mut config = CacheConfig::PreferNone;
528            cuda::cuCtxGetCacheConfig(&mut config as *mut CacheConfig as *mut cuda::CUfunc_cache)
529                .to_result()?;
530            Ok(config)
531        }
532    }
533
534    /// Return the device ID for the current context.
535    ///
536    /// # Example
537    ///
538    /// ```
539    /// # use cust::device::Device;
540    /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext };
541    /// # use std::error::Error;
542    /// #
543    /// # fn main () -> Result<(), Box<dyn Error>> {
544    /// # cust::init(cust::CudaFlags::empty())?;
545    /// # let device = Device::get_device(0)?;
546    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
547    /// let device = CurrentContext::get_device()?;
548    /// # Ok(())
549    /// # }
550    /// ```
551    pub fn get_device() -> CudaResult<Device> {
552        unsafe {
553            let mut device = Device { device: 0 };
554            cuda::cuCtxGetDevice(&mut device.device as *mut cuda::CUdevice).to_result()?;
555            Ok(device)
556        }
557    }
558
559    /// Return the context flags for the current context.
560    ///
561    /// # Example
562    ///
563    /// ```
564    /// # use cust::device::Device;
565    /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext };
566    /// # use std::error::Error;
567    /// #
568    /// # fn main () -> Result<(), Box<dyn Error>> {
569    /// # cust::init(cust::CudaFlags::empty())?;
570    /// # let device = Device::get_device(0)?;
571    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
572    /// let flags = CurrentContext::get_flags()?;
573    /// # Ok(())
574    /// # }
575    /// ```
576    pub fn get_flags() -> CudaResult<ContextFlags> {
577        unsafe {
578            let mut flags = 0u32;
579            cuda::cuCtxGetFlags(&mut flags as *mut u32).to_result()?;
580            Ok(ContextFlags::from_bits_truncate(flags))
581        }
582    }
583
584    /// Return resource limits for the current context.
585    ///
586    /// # Example
587    ///
588    /// ```
589    /// # use cust::device::Device;
590    /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext, ResourceLimit };
591    /// # use std::error::Error;
592    /// #
593    /// # fn main () -> Result<(), Box<dyn Error>> {
594    /// # cust::init(cust::CudaFlags::empty())?;
595    /// # let device = Device::get_device(0)?;
596    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
597    /// let stack_size = CurrentContext::get_resource_limit(ResourceLimit::StackSize)?;
598    /// # Ok(())
599    /// # }
600    /// ```
601    pub fn get_resource_limit(resource: ResourceLimit) -> CudaResult<usize> {
602        unsafe {
603            let mut limit: usize = 0;
604            cuda::cuCtxGetLimit(&mut limit as *mut usize, transmute(resource)).to_result()?;
605            Ok(limit)
606        }
607    }
608
609    /// Return resource limits for the current context.
610    ///
611    /// # Example
612    ///
613    /// ```
614    /// # use cust::device::Device;
615    /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext, ResourceLimit };
616    /// # use std::error::Error;
617    /// #
618    /// # fn main () -> Result<(), Box<dyn Error>> {
619    /// # cust::init(cust::CudaFlags::empty())?;
620    /// # let device = Device::get_device(0)?;
621    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
622    /// let shared_mem_config = CurrentContext::get_shared_memory_config()?;
623    /// # Ok(())
624    /// # }
625    /// ```
626    pub fn get_shared_memory_config() -> CudaResult<SharedMemoryConfig> {
627        unsafe {
628            let mut cfg = SharedMemoryConfig::DefaultBankSize;
629            cuda::cuCtxGetSharedMemConfig(
630                &mut cfg as *mut SharedMemoryConfig as *mut cuda::CUsharedconfig,
631            )
632            .to_result()?;
633            Ok(cfg)
634        }
635    }
636
637    /// Return the least and greatest stream priorities.
638    ///
639    /// If the program attempts to create a stream with a priority outside of this range, it will be
640    /// automatically clamped to within the valid range. If the device does not support stream
641    /// priorities, the returned range will contain zeroes.
642    ///
643    /// # Example
644    ///
645    /// ```
646    /// # use cust::device::Device;
647    /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext};
648    /// # use std::error::Error;
649    /// #
650    /// # fn main () -> Result<(), Box<dyn Error>> {
651    /// # cust::init(cust::CudaFlags::empty())?;
652    /// # let device = Device::get_device(0)?;
653    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
654    /// let priority_range = CurrentContext::get_stream_priority_range()?;
655    /// # Ok(())
656    /// # }
657    /// ```
658    pub fn get_stream_priority_range() -> CudaResult<StreamPriorityRange> {
659        unsafe {
660            let mut range = StreamPriorityRange {
661                least: 0,
662                greatest: 0,
663            };
664            cuda::cuCtxGetStreamPriorityRange(
665                &mut range.least as *mut i32,
666                &mut range.greatest as *mut i32,
667            )
668            .to_result()?;
669            Ok(range)
670        }
671    }
672
673    /// Sets the preferred cache configuration for the current context.
674    ///
675    /// On devices where L1 cache and shared memory use the same hardware resources, this sets the
676    /// preferred cache configuration for the current context. This is only a preference. The
677    /// driver will use the requested configuration if possible, but is free to choose a different
678    /// configuration if required to execute the function.
679    ///
680    /// This setting does nothing on devices where the size of the L1 cache and shared memory are
681    /// fixed.
682    ///
683    /// # Example
684    ///
685    /// ```
686    /// # use cust::device::Device;
687    /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext, CacheConfig };
688    /// # use std::error::Error;
689    /// #
690    /// # fn main () -> Result<(), Box<dyn Error>> {
691    /// # cust::init(cust::CudaFlags::empty())?;
692    /// # let device = Device::get_device(0)?;
693    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
694    /// CurrentContext::set_cache_config(CacheConfig::PreferL1)?;
695    /// # Ok(())
696    /// # }
697    /// ```
698    pub fn set_cache_config(cfg: CacheConfig) -> CudaResult<()> {
699        unsafe { cuda::cuCtxSetCacheConfig(transmute(cfg)).to_result() }
700    }
701
702    /// Sets a requested resource limit for the current context.
703    ///
704    /// Note that this is only a request; the driver is free to modify the requested value to meet
705    /// hardware requirements. Each limit has some specific restrictions.
706    ///
707    /// * `StackSize`: Controls the stack size in bytes for each GPU thread
708    /// * `PrintfFifoSize`: Controls the size in bytes of the FIFO used by the `printf()` device
709    ///   system call. This cannot be changed after a kernel has been launched which uses the
710    ///   `printf()` function.
711    /// * `MallocHeapSize`: Controls the size in bytes of the heap used by the `malloc()` and `free()`
712    ///   device system calls. This cannot be changed aftr a kernel has been launched which uses the
713    ///   `malloc()` and `free()` system calls.
714    /// * `DeviceRuntimeSyncDepth`: Controls the maximum nesting depth of a grid at which a thread
715    ///   can safely call `cudaDeviceSynchronize()`. This cannot be changed after a kernel has been
716    ///   launched which uses the device runtime. When setting this limit, keep in mind that
717    ///   additional levels of sync depth require the driver to reserve large amounts of device
718    ///   memory which can no longer be used for device allocations.
719    /// * `DeviceRuntimePendingLaunchCount`: Controls the maximum number of outstanding device
720    ///    runtime launches that can be made from the current context. A grid is outstanding from
721    ///    the point of the launch up until the grid is known to have completed. Keep in mind that
722    ///    increasing this limit will require the driver to reserve larger amounts of device memory
723    ///    which can no longer be used for device allocations.
724    /// * `MaxL2FetchGranularity`: Controls the L2 fetch granularity. This is purely a performance
725    ///    hint and it can be ignored or clamped depending on the platform.
726    ///
727    /// # Example
728    ///
729    /// ```
730    /// # use cust::device::Device;
731    /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext, ResourceLimit };
732    /// # use std::error::Error;
733    /// #
734    /// # fn main () -> Result<(), Box<dyn Error>> {
735    /// # cust::init(cust::CudaFlags::empty())?;
736    /// # let device = Device::get_device(0)?;
737    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
738    /// CurrentContext::set_resource_limit(ResourceLimit::StackSize, 2048)?;
739    /// # Ok(())
740    /// # }
741    /// ```
742    pub fn set_resource_limit(resource: ResourceLimit, limit: usize) -> CudaResult<()> {
743        unsafe {
744            cuda::cuCtxSetLimit(transmute(resource), limit).to_result()?;
745            Ok(())
746        }
747    }
748
749    /// Sets the preferred shared memory configuration for the current context.
750    ///
751    /// On devices with configurable shared memory banks, this function will set the context's
752    /// shared memory bank size which is used for subsequent kernel launches.
753    ///
754    /// # Example
755    ///
756    /// ```
757    /// # use cust::device::Device;
758    /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext, SharedMemoryConfig };
759    /// # use std::error::Error;
760    /// #
761    /// # fn main () -> Result<(), Box<dyn Error>> {
762    /// # cust::init(cust::CudaFlags::empty())?;
763    /// # let device = Device::get_device(0)?;
764    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
765    /// CurrentContext::set_shared_memory_config(SharedMemoryConfig::DefaultBankSize)?;
766    /// # Ok(())
767    /// # }
768    /// ```
769    pub fn set_shared_memory_config(cfg: SharedMemoryConfig) -> CudaResult<()> {
770        unsafe { cuda::cuCtxSetSharedMemConfig(transmute(cfg)).to_result() }
771    }
772
773    /// Returns a non-owning handle to the current context.
774    ///
775    /// # Example
776    ///
777    /// ```
778    /// # use cust::device::Device;
779    /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext };
780    /// # use std::error::Error;
781    /// #
782    /// # fn main () -> Result<(), Box<dyn Error>> {
783    /// # cust::init(cust::CudaFlags::empty())?;
784    /// # let device = Device::get_device(0)?;
785    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
786    /// let unowned = CurrentContext::get_current()?;
787    /// # Ok(())
788    /// # }
789    /// ```
790    pub fn get_current() -> CudaResult<UnownedContext> {
791        unsafe {
792            let mut ctx: CUcontext = ptr::null_mut();
793            cuda::cuCtxGetCurrent(&mut ctx as *mut CUcontext).to_result()?;
794            Ok(UnownedContext { inner: ctx })
795        }
796    }
797
798    /// Set the given context as the current context for this thread.
799    ///
800    /// If there is no context set for this thread, this pushes the given context onto the stack.
801    /// If there is a context set for this thread, this replaces the top context on the stack with
802    /// the given context.
803    ///
804    /// # Example
805    ///
806    /// ```
807    /// # use cust::device::Device;
808    /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext };
809    /// # use std::error::Error;
810    /// #
811    /// # fn main () -> Result<(), Box<dyn Error>> {
812    /// # cust::init(cust::CudaFlags::empty())?;
813    /// # let device = Device::get_device(0)?;
814    /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
815    /// CurrentContext::set_current(&context)?;
816    /// # Ok(())
817    /// # }
818    /// ```
819    pub fn set_current<C: ContextHandle>(c: &C) -> CudaResult<()> {
820        unsafe {
821            cuda::cuCtxSetCurrent(c.get_inner()).to_result()?;
822            Ok(())
823        }
824    }
825
826    /// Block to wait for a context's tasks to complete.
827    pub fn synchronize() -> CudaResult<()> {
828        unsafe {
829            cuda::cuCtxSynchronize().to_result()?;
830            Ok(())
831        }
832    }
833}