cust/context/legacy.rs
1//! Legacy (old) context management which preceded primary contexts.
2//!
3//! # CUDA context management
4//!
5//! Most CUDA functions require a context. A CUDA context is analogous to a CPU process - it's
6//! an isolated container for all runtime state, including configuration settings and the
7//! device/unified/page-locked memory allocations. Each context has a separate memory space, and
8//! pointers from one context do not work in another. Each context is associated with a single
9//! device. Although it is possible to have multiple contexts associated with a single device, this
10//! is strongly discouraged as it can cause a significant loss of performance.
11//!
12//! CUDA keeps a thread-local stack of contexts which the programmer can push to or pop from.
13//! The top context in that stack is known as the "current" context and it is used in most CUDA
14//! API calls. One context can be safely made current in multiple CPU threads.
15//!
16//! # Safety
17//!
18//! The CUDA context management API does not fit easily into Rust's safety guarantees.
19//!
20//! The thread-local stack (as well as the fact that any context can be on the stack for any number
21//! of threads) means there is no clear owner for a CUDA context, but it still has to be cleaned up.
22//! Also, the fact that a context can be current to multiple threads at once means that there can be
23//! multiple implicit references to a context which are not controlled by Rust.
24//!
25//! cust handles ownership by providing an owning [`Context`](struct.Context.html) struct and
26//! a non-owning [`UnownedContext`](struct.UnownedContext.html). When the `Context` is dropped, the
27//! backing context is destroyed. The context could be current on other threads, though. In this
28//! case, the context is still destroyed, and attempts to access the context on other threads will
29//! fail with an error. This is (mostly) safe, if a bit inconvenient. It's only mostly safe because
30//! other threads could be accessing that context while the destructor is running on this thread,
31//! which could result in undefined behavior.
32//!
33//! In short, Rust's thread-safety guarantees cannot fully protect use of the context management
34//! functions. The programmer must ensure that no other OS threads are using the `Context` when it
35//! is dropped.
36//!
37//! # Examples
38//!
39//! For most commmon uses (one device, one OS thread) it should suffice to create a single context:
40//!
41//! ```
42//! use cust::device::Device;
43//! use cust::context::legacy::{Context, ContextFlags};
44//! # use std::error::Error;
45//! # fn main () -> Result<(), Box<dyn Error>> {
46//!
47//! cust::init(cust::CudaFlags::empty())?;
48//! let device = Device::get_device(0)?;
49//! let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
50//! // call cust functions which use the context
51//!
52//! // The context will be destroyed when dropped or it falls out of scope.
53//! drop(context);
54//! # Ok(())
55//! # }
56//! ```
57//!
58//! If you have multiple OS threads that each submit work to the same device, you can get a handle
59//! to the single context and pass it to each thread.
60//!
61//! ```
62//! # use cust::context::legacy::{Context, ContextFlags, CurrentContext};
63//! # use cust::device::Device;
64//! # use std::error::Error;
65//! # fn main() -> Result<(), Box<dyn Error>> {
66//! # cust::init(cust::CudaFlags::empty())?;
67//! # let device = Device::get_device(0)?;
68//! // As before
69//! let context =
70//! Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
71//! let mut join_handles = vec![];
72//!
73//! for _ in 0..4 {
74//! let unowned = context.get_unowned();
75//! let join_handle = std::thread::spawn(move || {
76//! CurrentContext::set_current(&unowned).unwrap();
77//! // Call cust functions which use the context
78//! });
79//! join_handles.push(join_handle);
80//! }
81//! // We must ensure that the other threads are not using the context when it's destroyed.
82//! for handle in join_handles {
83//! handle.join().unwrap();
84//! }
85//! // Now it's safe to drop the context.
86//! drop(context);
87//! # Ok(())
88//! # }
89//! ```
90//!
91//! If you have multiple devices, each device needs its own context.
92//!
93//! ```
94//! # use cust::device::Device;
95//! # use cust::context::legacy::{Context, ContextStack, ContextFlags, CurrentContext};
96//! # use std::error::Error;
97//! #
98//! # fn main() -> Result<(), Box<dyn Error>> {
99//! # cust::init(cust::CudaFlags::empty())?;
100//! // Create and pop contexts for each device
101//! let mut contexts = vec![];
102//! for device in Device::devices()? {
103//! let device = device?;
104//! let ctx =
105//! Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
106//! ContextStack::pop()?;
107//! contexts.push(ctx);
108//! }
109//! CurrentContext::set_current(&contexts[0])?;
110//!
111//! // Call cust functions which will use the context
112//!
113//! # Ok(())
114//! # }
115//! ```
116
117use crate::context::ContextHandle;
118use crate::device::Device;
119use crate::error::{CudaResult, DropResult, ToResult};
120use crate::private::Sealed;
121use crate::sys::{self as cuda, CUcontext};
122use crate::CudaApiVersion;
123use std::mem;
124use std::mem::transmute;
125use std::ptr;
126
127/// This enumeration represents configuration settings for devices which share hardware resources
128/// between L1 cache and shared memory.
129///
130/// Note that this is only a preference - the driver will use the requested configuration if
131/// possible, but it is free to choose a different configuration if required to execute functions.
132///
133/// See
134/// [CurrentContext::get_cache_config](struct.CurrentContext.html#method.get_cache_config) and
135/// [CurrentContext::set_cache_config](struct.CurrentContext.html#method.set_cache_config) to get
136/// and set the cache config for the current context.
137#[repr(u32)]
138#[non_exhaustive]
139#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
140pub enum CacheConfig {
141 /// No preference for shared memory or L1 (default)
142 PreferNone = 0,
143 /// Prefer larger shared memory and smaller L1 cache
144 PreferShared = 1,
145 /// Prefer larger L1 cache and smaller shared memory
146 PreferL1 = 2,
147 /// Prefer equal-sized L1 cache and shared memory
148 PreferEqual = 3,
149}
150
151/// This enumeration represents the limited resources which can be accessed through
152/// [CurrentContext::get_resource_limit](struct.CurrentContext.html#method.get_resource_limit) and
153/// [CurrentContext::set_resource_limit](struct.CurrentContext.html#method.set_resource_limit).
154#[repr(u32)]
155#[non_exhaustive]
156#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
157pub enum ResourceLimit {
158 /// The size in bytes of each GPU thread stack
159 StackSize = 0,
160 /// The size in bytes of the FIFO used by the `printf()` device system call.
161 PrintfFifoSize = 1,
162 /// The size in bytes of the heap used by the `malloc()` and `free()` device system calls.
163 ///
164 /// Note that this is used for memory allocated within a kernel launch; it is not related to the
165 /// device memory allocated by the host.
166 MallocHeapSize = 2,
167 /// The maximum nesting depth of a grid at which a thread can safely call
168 /// `cudaDeviceSynchronize()` to wait on child grid launches to complete.
169 DeviceRuntimeSynchronizeDepth = 3,
170 /// The maximum number of outstanding device runtime launches that can be made from the current
171 /// context.
172 DeviceRuntimePendingLaunchCount = 4,
173 /// L2 cache fetch granularity
174 MaxL2FetchGranularity = 5,
175}
176
177/// This enumeration represents the options for configuring the shared memory bank size.
178///
179/// See
180/// [CurrentContext::get_shared_memory_config](struct.CurrentContext.html#method.get_shared_memory_config) and
181/// [CurrentContext::set_shared_memory_config](struct.CurrentContext.html#method.set_shared_memory_config) to get
182/// and set the cache config for the current context.
183#[repr(u32)]
184#[non_exhaustive]
185#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
186pub enum SharedMemoryConfig {
187 /// Set shared-memory bank size to the default.
188 DefaultBankSize = 0,
189 /// Set shared-memory bank width to four bytes
190 FourByteBankSize = 1,
191 /// Set shared-memory bank width to eight bytes
192 EightByteBankSize = 2,
193}
194
195bitflags::bitflags! {
196 /// Bit flags for initializing the CUDA context.
197 ///
198 /// If you're not sure which flags to use, `MAP_HOST | SCHED_AUTO` is a good default.
199 pub struct ContextFlags: u32 {
200 /// Instructs CUDA to actively spin when waiting for results from the GPU. This can decrease
201 /// latency when waiting for the GPU, but may lower the performance of other CPU threads
202 /// if they are performing work in parallel with the CUDA thread.
203 const SCHED_SPIN = 0x01;
204
205 /// Instructs CUDA to yield its thread when waiting for results from the GPU. This can
206 /// increase latency when waiting for the GPU, but can increase the performance of CPU
207 /// threads performing work in parallel with the GPU.
208 const SCHED_YIELD = 0x02;
209
210 /// Instructs CUDA to block the CPU thread on a synchronization primitive when waiting for
211 /// the GPU to finish work.
212 const SCHED_BLOCKING_SYNC = 0x04;
213
214 /// Instructs CUDA to automatically choose whether to yield to other OS threads while waiting
215 /// for the GPU, or to spin the OS thread. This is the default.
216 const SCHED_AUTO = 0x00;
217
218 /// Instructs CUDA to support mapped pinned allocations. This flag must be set in order to
219 /// use page-locked memory (see [LockedBuffer](../memory/struct.LockedBuffer.html])).
220 const MAP_HOST = 0x08;
221
222 /// Instruct CUDA not to reduce local memory after resizing local memory for a kernel. This
223 /// can prevent thrashing by local memory allocations when launching many kernels with high
224 /// local memory usage at the cost of potentially increased memory usage.
225 const LMEM_RESIZE_TO_MAX = 0x10;
226 }
227}
228
229/// Owned handle to a CUDA context.
230///
231/// The context will be destroyed when this goes out of scope. If this is the current context on
232/// the current OS thread, the next context on the stack (if any) will be made current. Note that
233/// the context will be destroyed even if other threads are still using it. Attempts to access the
234/// destroyed context from another thread will return an error.
235#[derive(Debug)]
236pub struct Context {
237 inner: CUcontext,
238}
239impl Context {
240 /// Create a CUDA context for the given device.
241 ///
242 /// # Example
243 ///
244 /// ```
245 /// # use cust::device::Device;
246 /// # use cust::context::legacy::{Context, ContextFlags};
247 /// # use std::error::Error;
248 /// #
249 /// # fn main () -> Result<(), Box<dyn Error>> {
250 /// cust::init(cust::CudaFlags::empty())?;
251 /// let device = Device::get_device(0)?;
252 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
253 /// # Ok(())
254 /// # }
255 /// ```
256 pub fn create_and_push(flags: ContextFlags, device: Device) -> CudaResult<Context> {
257 unsafe {
258 // CUDA only provides a create-and-push operation, but that makes it hard to provide
259 // lifetime guarantees so we create-and-push, then pop, then the programmer has to
260 // push again.
261 let mut ctx: CUcontext = ptr::null_mut();
262 cuda::cuCtxCreate_v2(&mut ctx as *mut CUcontext, flags.bits(), device.as_raw())
263 .to_result()?;
264 Ok(Context { inner: ctx })
265 }
266 }
267
268 /// Get the API version used to create this context.
269 ///
270 /// This is not necessarily the latest version supported by the driver.
271 ///
272 /// # Example
273 ///
274 /// ```
275 /// # use cust::device::Device;
276 /// # use cust::context::legacy::{Context, ContextFlags};
277 /// # use std::error::Error;
278 /// #
279 /// # fn main () -> Result<(), Box<dyn Error>> {
280 /// cust::init(cust::CudaFlags::empty())?;
281 /// let device = Device::get_device(0)?;
282 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
283 /// let version = context.get_api_version()?;
284 /// # Ok(())
285 /// # }
286 /// ```
287 pub fn get_api_version(&self) -> CudaResult<CudaApiVersion> {
288 unsafe {
289 let mut api_version = 0u32;
290 cuda::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32).to_result()?;
291 Ok(CudaApiVersion {
292 version: api_version as i32,
293 })
294 }
295 }
296
297 /// Returns an non-owning handle to this context.
298 ///
299 /// This is useful for sharing a single context between threads (though see the module-level
300 /// documentation for safety details!).
301 ///
302 /// # Example
303 ////* */
304 /// ```
305 /// # use cust::device::Device;
306 /// # use cust::context::legacy::{Context, ContextFlags};
307 /// # use std::error::Error;
308 /// #
309 /// # fn main() -> Result<(), Box<dyn Error>> {
310 /// # cust::init(cust::CudaFlags::empty())?;
311 /// # let device = Device::get_device(0)?;
312 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
313 /// let unowned = context.get_unowned();
314 /// # Ok(())
315 /// # }
316 /// ```
317 pub fn get_unowned(&self) -> UnownedContext {
318 UnownedContext { inner: self.inner }
319 }
320
321 /// Destroy a `Context`, returning an error.
322 ///
323 /// Destroying a context can return errors from previous asynchronous work. This function
324 /// destroys the given context and returns the error and the un-destroyed context on failure.
325 ///
326 /// # Example
327 ///
328 /// ```
329 /// # use cust::device::Device;
330 /// # use cust::context::legacy::{Context, ContextFlags};
331 /// # use std::error::Error;
332 /// #
333 /// # fn main () -> Result<(), Box<dyn Error>> {
334 /// # cust::init(cust::CudaFlags::empty())?;
335 /// # let device = Device::get_device(0)?;
336 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
337 /// match Context::drop(context) {
338 /// Ok(()) => println!("Successfully destroyed"),
339 /// Err((e, ctx)) => {
340 /// println!("Failed to destroy context: {:?}", e);
341 /// // Do something with ctx
342 /// },
343 /// }
344 /// # Ok(())
345 /// # }
346 /// ```
347 pub fn drop(mut ctx: Context) -> DropResult<Context> {
348 if ctx.inner.is_null() {
349 return Ok(());
350 }
351
352 unsafe {
353 let inner = mem::replace(&mut ctx.inner, ptr::null_mut());
354 match cuda::cuCtxDestroy_v2(inner).to_result() {
355 Ok(()) => {
356 mem::forget(ctx);
357 Ok(())
358 }
359 Err(e) => Err((e, Context { inner })),
360 }
361 }
362 }
363}
364impl Drop for Context {
365 fn drop(&mut self) {
366 if self.inner.is_null() {
367 return;
368 }
369
370 unsafe {
371 let inner = mem::replace(&mut self.inner, ptr::null_mut());
372 cuda::cuCtxDestroy_v2(inner);
373 }
374 }
375}
376
377impl Sealed for Context {}
378impl ContextHandle for Context {
379 fn get_inner(&self) -> CUcontext {
380 self.inner
381 }
382}
383impl Sealed for UnownedContext {}
384impl ContextHandle for UnownedContext {
385 fn get_inner(&self) -> CUcontext {
386 self.inner
387 }
388}
389
390/// Non-owning handle to a CUDA context.
391#[derive(Debug, Clone)]
392pub struct UnownedContext {
393 inner: CUcontext,
394}
395unsafe impl Send for UnownedContext {}
396unsafe impl Sync for UnownedContext {}
397impl UnownedContext {
398 /// Get the API version used to create this context.
399 ///
400 /// This is not necessarily the latest version supported by the driver.
401 ///
402 /// # Example
403 ///
404 /// ```
405 /// # use cust::device::Device;
406 /// # use cust::context::legacy::{Context, ContextFlags};
407 /// # use std::error::Error;
408 /// #
409 /// # fn main () -> Result<(), Box<dyn Error>> {
410 /// # cust::init(cust::CudaFlags::empty())?;
411 /// # let device = Device::get_device(0)?;
412 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
413 /// let unowned = context.get_unowned();
414 /// let version = unowned.get_api_version()?;
415 /// #
416 /// # Ok(())
417 /// # }
418 /// ```
419 pub fn get_api_version(&self) -> CudaResult<CudaApiVersion> {
420 unsafe {
421 let mut api_version = 0u32;
422 cuda::cuCtxGetApiVersion(self.inner, &mut api_version as *mut u32).to_result()?;
423 Ok(CudaApiVersion {
424 version: api_version as i32,
425 })
426 }
427 }
428}
429
430/// Type used to represent the thread-local context stack.
431#[derive(Debug)]
432pub struct ContextStack;
433impl ContextStack {
434 /// Pop the current context off the stack and return the handle. That context may then be made
435 /// current again (perhaps on a different CPU thread) by calling [push](#method.push).
436 ///
437 /// # Example
438 ///
439 /// ```
440 /// # use cust::device::Device;
441 /// # use cust::context::legacy ::{Context, ContextFlags, ContextStack};
442 /// # use std::error::Error;
443 /// #
444 /// # fn main () -> Result<(), Box<dyn Error>> {
445 /// # cust::init(cust::CudaFlags::empty())?;
446 /// # let device = Device::get_device(0)?;
447 /// # let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
448 /// let unowned = ContextStack::pop()?;
449 /// #
450 /// # Ok(())
451 /// # }
452 /// ```
453 pub fn pop() -> CudaResult<UnownedContext> {
454 unsafe {
455 let mut ctx: CUcontext = ptr::null_mut();
456 cuda::cuCtxPopCurrent_v2(&mut ctx as *mut CUcontext).to_result()?;
457 Ok(UnownedContext { inner: ctx })
458 }
459 }
460
461 /// Push the given context to the top of the stack
462 ///
463 /// # Example
464 ///
465 /// ```
466 /// # use cust::device::Device;
467 /// # use cust::context::legacy::{Context, ContextFlags, ContextStack};
468 /// # use std::error::Error;
469 /// #
470 /// # fn main () -> Result<(), Box<dyn Error>> {
471 /// # cust::init(cust::CudaFlags::empty())?;
472 /// # let device = Device::get_device(0)?;
473 /// # let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
474 /// let unowned = ContextStack::pop()?;
475 /// ContextStack::push(&unowned)?;
476 /// # Ok(())
477 /// # }
478 /// ```
479 pub fn push<C: ContextHandle>(ctx: &C) -> CudaResult<()> {
480 unsafe {
481 cuda::cuCtxPushCurrent_v2(ctx.get_inner()).to_result()?;
482 Ok(())
483 }
484 }
485}
486
487/// Struct representing a range of stream priorities.
488///
489/// By convention, lower numbers imply greater priorities. The range of meaningful stream priorities
490/// is given by `[greatest, least]` - that is (numerically), `greatest <= least`.
491#[derive(Debug, Clone, Hash, Eq, PartialEq)]
492pub struct StreamPriorityRange {
493 /// The least stream priority
494 pub least: i32,
495 /// The greatest stream priority
496 pub greatest: i32,
497}
498
499/// Type representing the top context in the thread-local stack.
500#[derive(Debug)]
501pub struct CurrentContext;
502impl CurrentContext {
503 /// Returns the preferred cache configuration for the current context.
504 ///
505 /// On devices where the L1 cache and shared memory use the same hardware resources, this
506 /// function returns the preferred cache configuration for the current context. For devices
507 /// where the size of the L1 cache and shared memory are fixed, this will always return
508 /// `CacheConfig::PreferNone`.
509 ///
510 /// # Example
511 ///
512 /// ```
513 /// # use cust::device::Device;
514 /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext };
515 /// # use std::error::Error;
516 /// #
517 /// # fn main () -> Result<(), Box<dyn Error>> {
518 /// # cust::init(cust::CudaFlags::empty())?;
519 /// # let device = Device::get_device(0)?;
520 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
521 /// let cache_config = CurrentContext::get_cache_config()?;
522 /// # Ok(())
523 /// # }
524 /// ```
525 pub fn get_cache_config() -> CudaResult<CacheConfig> {
526 unsafe {
527 let mut config = CacheConfig::PreferNone;
528 cuda::cuCtxGetCacheConfig(&mut config as *mut CacheConfig as *mut cuda::CUfunc_cache)
529 .to_result()?;
530 Ok(config)
531 }
532 }
533
534 /// Return the device ID for the current context.
535 ///
536 /// # Example
537 ///
538 /// ```
539 /// # use cust::device::Device;
540 /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext };
541 /// # use std::error::Error;
542 /// #
543 /// # fn main () -> Result<(), Box<dyn Error>> {
544 /// # cust::init(cust::CudaFlags::empty())?;
545 /// # let device = Device::get_device(0)?;
546 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
547 /// let device = CurrentContext::get_device()?;
548 /// # Ok(())
549 /// # }
550 /// ```
551 pub fn get_device() -> CudaResult<Device> {
552 unsafe {
553 let mut device = Device { device: 0 };
554 cuda::cuCtxGetDevice(&mut device.device as *mut cuda::CUdevice).to_result()?;
555 Ok(device)
556 }
557 }
558
559 /// Return the context flags for the current context.
560 ///
561 /// # Example
562 ///
563 /// ```
564 /// # use cust::device::Device;
565 /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext };
566 /// # use std::error::Error;
567 /// #
568 /// # fn main () -> Result<(), Box<dyn Error>> {
569 /// # cust::init(cust::CudaFlags::empty())?;
570 /// # let device = Device::get_device(0)?;
571 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
572 /// let flags = CurrentContext::get_flags()?;
573 /// # Ok(())
574 /// # }
575 /// ```
576 pub fn get_flags() -> CudaResult<ContextFlags> {
577 unsafe {
578 let mut flags = 0u32;
579 cuda::cuCtxGetFlags(&mut flags as *mut u32).to_result()?;
580 Ok(ContextFlags::from_bits_truncate(flags))
581 }
582 }
583
584 /// Return resource limits for the current context.
585 ///
586 /// # Example
587 ///
588 /// ```
589 /// # use cust::device::Device;
590 /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext, ResourceLimit };
591 /// # use std::error::Error;
592 /// #
593 /// # fn main () -> Result<(), Box<dyn Error>> {
594 /// # cust::init(cust::CudaFlags::empty())?;
595 /// # let device = Device::get_device(0)?;
596 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
597 /// let stack_size = CurrentContext::get_resource_limit(ResourceLimit::StackSize)?;
598 /// # Ok(())
599 /// # }
600 /// ```
601 pub fn get_resource_limit(resource: ResourceLimit) -> CudaResult<usize> {
602 unsafe {
603 let mut limit: usize = 0;
604 cuda::cuCtxGetLimit(&mut limit as *mut usize, transmute(resource)).to_result()?;
605 Ok(limit)
606 }
607 }
608
609 /// Return resource limits for the current context.
610 ///
611 /// # Example
612 ///
613 /// ```
614 /// # use cust::device::Device;
615 /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext, ResourceLimit };
616 /// # use std::error::Error;
617 /// #
618 /// # fn main () -> Result<(), Box<dyn Error>> {
619 /// # cust::init(cust::CudaFlags::empty())?;
620 /// # let device = Device::get_device(0)?;
621 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
622 /// let shared_mem_config = CurrentContext::get_shared_memory_config()?;
623 /// # Ok(())
624 /// # }
625 /// ```
626 pub fn get_shared_memory_config() -> CudaResult<SharedMemoryConfig> {
627 unsafe {
628 let mut cfg = SharedMemoryConfig::DefaultBankSize;
629 cuda::cuCtxGetSharedMemConfig(
630 &mut cfg as *mut SharedMemoryConfig as *mut cuda::CUsharedconfig,
631 )
632 .to_result()?;
633 Ok(cfg)
634 }
635 }
636
637 /// Return the least and greatest stream priorities.
638 ///
639 /// If the program attempts to create a stream with a priority outside of this range, it will be
640 /// automatically clamped to within the valid range. If the device does not support stream
641 /// priorities, the returned range will contain zeroes.
642 ///
643 /// # Example
644 ///
645 /// ```
646 /// # use cust::device::Device;
647 /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext};
648 /// # use std::error::Error;
649 /// #
650 /// # fn main () -> Result<(), Box<dyn Error>> {
651 /// # cust::init(cust::CudaFlags::empty())?;
652 /// # let device = Device::get_device(0)?;
653 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
654 /// let priority_range = CurrentContext::get_stream_priority_range()?;
655 /// # Ok(())
656 /// # }
657 /// ```
658 pub fn get_stream_priority_range() -> CudaResult<StreamPriorityRange> {
659 unsafe {
660 let mut range = StreamPriorityRange {
661 least: 0,
662 greatest: 0,
663 };
664 cuda::cuCtxGetStreamPriorityRange(
665 &mut range.least as *mut i32,
666 &mut range.greatest as *mut i32,
667 )
668 .to_result()?;
669 Ok(range)
670 }
671 }
672
673 /// Sets the preferred cache configuration for the current context.
674 ///
675 /// On devices where L1 cache and shared memory use the same hardware resources, this sets the
676 /// preferred cache configuration for the current context. This is only a preference. The
677 /// driver will use the requested configuration if possible, but is free to choose a different
678 /// configuration if required to execute the function.
679 ///
680 /// This setting does nothing on devices where the size of the L1 cache and shared memory are
681 /// fixed.
682 ///
683 /// # Example
684 ///
685 /// ```
686 /// # use cust::device::Device;
687 /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext, CacheConfig };
688 /// # use std::error::Error;
689 /// #
690 /// # fn main () -> Result<(), Box<dyn Error>> {
691 /// # cust::init(cust::CudaFlags::empty())?;
692 /// # let device = Device::get_device(0)?;
693 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
694 /// CurrentContext::set_cache_config(CacheConfig::PreferL1)?;
695 /// # Ok(())
696 /// # }
697 /// ```
698 pub fn set_cache_config(cfg: CacheConfig) -> CudaResult<()> {
699 unsafe { cuda::cuCtxSetCacheConfig(transmute(cfg)).to_result() }
700 }
701
702 /// Sets a requested resource limit for the current context.
703 ///
704 /// Note that this is only a request; the driver is free to modify the requested value to meet
705 /// hardware requirements. Each limit has some specific restrictions.
706 ///
707 /// * `StackSize`: Controls the stack size in bytes for each GPU thread
708 /// * `PrintfFifoSize`: Controls the size in bytes of the FIFO used by the `printf()` device
709 /// system call. This cannot be changed after a kernel has been launched which uses the
710 /// `printf()` function.
711 /// * `MallocHeapSize`: Controls the size in bytes of the heap used by the `malloc()` and `free()`
712 /// device system calls. This cannot be changed aftr a kernel has been launched which uses the
713 /// `malloc()` and `free()` system calls.
714 /// * `DeviceRuntimeSyncDepth`: Controls the maximum nesting depth of a grid at which a thread
715 /// can safely call `cudaDeviceSynchronize()`. This cannot be changed after a kernel has been
716 /// launched which uses the device runtime. When setting this limit, keep in mind that
717 /// additional levels of sync depth require the driver to reserve large amounts of device
718 /// memory which can no longer be used for device allocations.
719 /// * `DeviceRuntimePendingLaunchCount`: Controls the maximum number of outstanding device
720 /// runtime launches that can be made from the current context. A grid is outstanding from
721 /// the point of the launch up until the grid is known to have completed. Keep in mind that
722 /// increasing this limit will require the driver to reserve larger amounts of device memory
723 /// which can no longer be used for device allocations.
724 /// * `MaxL2FetchGranularity`: Controls the L2 fetch granularity. This is purely a performance
725 /// hint and it can be ignored or clamped depending on the platform.
726 ///
727 /// # Example
728 ///
729 /// ```
730 /// # use cust::device::Device;
731 /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext, ResourceLimit };
732 /// # use std::error::Error;
733 /// #
734 /// # fn main () -> Result<(), Box<dyn Error>> {
735 /// # cust::init(cust::CudaFlags::empty())?;
736 /// # let device = Device::get_device(0)?;
737 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
738 /// CurrentContext::set_resource_limit(ResourceLimit::StackSize, 2048)?;
739 /// # Ok(())
740 /// # }
741 /// ```
742 pub fn set_resource_limit(resource: ResourceLimit, limit: usize) -> CudaResult<()> {
743 unsafe {
744 cuda::cuCtxSetLimit(transmute(resource), limit).to_result()?;
745 Ok(())
746 }
747 }
748
749 /// Sets the preferred shared memory configuration for the current context.
750 ///
751 /// On devices with configurable shared memory banks, this function will set the context's
752 /// shared memory bank size which is used for subsequent kernel launches.
753 ///
754 /// # Example
755 ///
756 /// ```
757 /// # use cust::device::Device;
758 /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext, SharedMemoryConfig };
759 /// # use std::error::Error;
760 /// #
761 /// # fn main () -> Result<(), Box<dyn Error>> {
762 /// # cust::init(cust::CudaFlags::empty())?;
763 /// # let device = Device::get_device(0)?;
764 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
765 /// CurrentContext::set_shared_memory_config(SharedMemoryConfig::DefaultBankSize)?;
766 /// # Ok(())
767 /// # }
768 /// ```
769 pub fn set_shared_memory_config(cfg: SharedMemoryConfig) -> CudaResult<()> {
770 unsafe { cuda::cuCtxSetSharedMemConfig(transmute(cfg)).to_result() }
771 }
772
773 /// Returns a non-owning handle to the current context.
774 ///
775 /// # Example
776 ///
777 /// ```
778 /// # use cust::device::Device;
779 /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext };
780 /// # use std::error::Error;
781 /// #
782 /// # fn main () -> Result<(), Box<dyn Error>> {
783 /// # cust::init(cust::CudaFlags::empty())?;
784 /// # let device = Device::get_device(0)?;
785 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
786 /// let unowned = CurrentContext::get_current()?;
787 /// # Ok(())
788 /// # }
789 /// ```
790 pub fn get_current() -> CudaResult<UnownedContext> {
791 unsafe {
792 let mut ctx: CUcontext = ptr::null_mut();
793 cuda::cuCtxGetCurrent(&mut ctx as *mut CUcontext).to_result()?;
794 Ok(UnownedContext { inner: ctx })
795 }
796 }
797
798 /// Set the given context as the current context for this thread.
799 ///
800 /// If there is no context set for this thread, this pushes the given context onto the stack.
801 /// If there is a context set for this thread, this replaces the top context on the stack with
802 /// the given context.
803 ///
804 /// # Example
805 ///
806 /// ```
807 /// # use cust::device::Device;
808 /// # use cust::context::legacy::{ Context, ContextFlags, CurrentContext };
809 /// # use std::error::Error;
810 /// #
811 /// # fn main () -> Result<(), Box<dyn Error>> {
812 /// # cust::init(cust::CudaFlags::empty())?;
813 /// # let device = Device::get_device(0)?;
814 /// let context = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
815 /// CurrentContext::set_current(&context)?;
816 /// # Ok(())
817 /// # }
818 /// ```
819 pub fn set_current<C: ContextHandle>(c: &C) -> CudaResult<()> {
820 unsafe {
821 cuda::cuCtxSetCurrent(c.get_inner()).to_result()?;
822 Ok(())
823 }
824 }
825
826 /// Block to wait for a context's tasks to complete.
827 pub fn synchronize() -> CudaResult<()> {
828 unsafe {
829 cuda::cuCtxSynchronize().to_result()?;
830 Ok(())
831 }
832 }
833}