pub struct Ptx(_);
Expand description
An opaque structure representing a compiled PTX program output from compile_ptx() or compile_ptx_with_opts().
Can also be created from a Ptx::from_file and Ptx::from_src
Implementations§
source§impl Ptx
impl Ptx
sourcepub fn from_file<P: Into<PathBuf>>(path: P) -> Self
pub fn from_file<P: Into<PathBuf>>(path: P) -> Self
Creates a Ptx from a pre-compiled .ptx file.
Examples found in repository?
examples/03-launch-kernel.rs (line 10)
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
fn main() -> Result<(), DriverError> {
let dev = CudaDevice::new(0)?;
// You can load a function from a pre-compiled PTX like so:
dev.load_ptx(Ptx::from_file("./examples/sin.ptx"), "sin", &["sin_kernel"])?;
// and then retrieve the function with `get_func`
let f = dev.get_func("sin", "sin_kernel").unwrap();
let a_host = [1.0, 2.0, 3.0];
let a_dev = dev.htod_copy(a_host.into())?;
let mut b_dev = a_dev.clone();
let n = 3;
let cfg = LaunchConfig::for_num_elems(n);
unsafe { f.launch(cfg, (&mut b_dev, &a_dev, n as i32)) }?;
let a_host_2 = dev.sync_reclaim(a_dev)?;
let b_host = dev.sync_reclaim(b_dev)?;
println!("Found {:?}", b_host);
println!("Expected {:?}", a_host.map(f32::sin));
assert_eq!(&a_host, a_host_2.as_slice());
Ok(())
}
More examples
examples/04-streams.rs (line 8)
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
fn main() -> Result<(), DriverError> {
let dev = CudaDevice::new(0)?;
dev.load_ptx(Ptx::from_file("./examples/sin.ptx"), "sin", &["sin_kernel"])?;
let n = 3;
let cfg = LaunchConfig::for_num_elems(n);
let a_host = [1.0, 2.0, 3.0];
let a_dev = dev.htod_copy(a_host.into())?;
let mut b_dev = a_dev.clone();
// create a stream with `fork_default_stream()`
// This synchronizes with the default stream, so since
// we put this call **after** the `htod_copy` & `clone` above,
// cuda will complete those orders **before** work on this stream
// can start.
let stream = dev.fork_default_stream()?;
let f = dev.get_func("sin", "sin_kernel").unwrap();
// we launch it differently too
unsafe { f.launch_on_stream(&stream, cfg, (&mut b_dev, &a_dev, n as i32)) }?;
// and we must join with the default work stream in order for copies
// to work corrently.
// NOTE: this is actually async with respect to the host!
dev.wait_for(&stream)?;
let a_host_2 = dev.sync_reclaim(a_dev)?;
let b_host = dev.sync_reclaim(b_dev)?;
println!("Found {:?}", b_host);
println!("Expected {:?}", a_host.map(f32::sin));
assert_eq!(&a_host, a_host_2.as_slice());
Ok(())
}
Trait Implementations§
Auto Trait Implementations§
impl RefUnwindSafe for Ptx
impl Send for Ptx
impl Sync for Ptx
impl Unpin for Ptx
impl UnwindSafe for Ptx
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more