kn_runtime/
lib.rs

1#![warn(missing_debug_implementations)]
2
3//! A wrapper crate around `kn-graph` and `kn-cuda-eval` that allows selecting whether to use a CPU or GPU at runtime
4//! through the [Device] type.
5//!
6//! By default this crate only includes the [Device::Cpu] device, and only depends on `kn-graph`.
7//! To enable the [Device::Cuda] device, enable the `cuda` cargo feature.
8//! This adds a dependency on `kn-cuda-eval` and the cuda libraries.
9//!
10//! This crate is part of the [Kyanite](https://github.com/KarelPeeters/Kyanite) project, see its readme for more information.
11//! See [system-requirements](https://github.com/KarelPeeters/Kyanite#system-requirements) for how to set up the cuda libraries.
12//!
13//! # Quick demo
14//!
15//! ```no_run
16//! # use kn_cuda_sys::wrapper::handle::CudaDevice;
17//! # use kn_graph::dtype::{DTensor, Tensor};
18//! # use kn_graph::onnx::load_graph_from_onnx_path;
19//! # use kn_graph::optimizer::optimize_graph;
20//! # use kn_runtime::Device;
21//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
22//! // load and optimize a graph
23//! let graph = load_graph_from_onnx_path("test.onnx", false)?;
24//! let graph = optimize_graph(&graph, Default::default());
25//!
26//! // select a device, at runtime
27//! let device_str = "cpu";
28//! let device = match device_str {
29//!     "auto" => Device::best(),
30//!     "cpu" => Device::Cpu,
31//!     "cuda" => Device::Cuda(CudaDevice::all().next().unwrap()),
32//!     _ => panic!("unknown device"),
33//! };
34//!
35//! // prepare the graph for execution
36//! let batch_size = 8;
37//! let mut  prepared = device.prepare(graph, batch_size);
38//!
39//! // evaluate the graph with some inputs, get the outputs back
40//! let inputs = [DTensor::F32(Tensor::zeros(vec![batch_size, 16]))];
41//! let outputs: Vec<DTensor> = prepared.eval(&inputs);
42//! # Ok(())
43//! # }
44//! ```
45
46use std::fmt::Debug;
47
48#[cfg(feature = "cuda")]
49pub use kn_cuda_eval::executor::CudaExecutor;
50#[cfg(feature = "cuda")]
51pub use kn_cuda_sys::wrapper::handle::CudaDevice;
52use kn_graph::cpu::cpu_eval_graph;
53use kn_graph::dtype::DTensor;
54use kn_graph::graph::Graph;
55
56/// Whether the crate was compiled with cuda support.
57///
58/// This is independent of whether the current system actually has a cuda device available.
59pub fn compiled_with_cuda_support() -> bool {
60    #[cfg(feature = "cuda")]
61    return true;
62    #[cfg(not(feature = "cuda"))]
63    return false;
64}
65
66/// A device that can be used to evaluate a graph.
67#[derive(Debug)]
68pub enum Device {
69    Cpu,
70    #[cfg(feature = "cuda")]
71    Cuda(CudaDevice),
72}
73
74/// A graph that has been prepared for evaluation on a device.
75/// * For a CPU this is just the graph itself and a hardcoded batch size.
76/// * For a GPU this is a fully planned and memory allocated execution plan.
77#[derive(Debug)]
78pub enum PreparedGraph {
79    CPU { graph: Graph, batch_size: usize },
80    #[cfg(feature = "cuda")]
81    Cuda { executor: CudaExecutor },
82}
83
84impl Device {
85    pub fn prepare(&self, graph: Graph, batch_size: usize) -> PreparedGraph {
86        match *self {
87            Device::Cpu => PreparedGraph::CPU { graph, batch_size },
88            #[cfg(feature = "cuda")]
89            Device::Cuda(device) => PreparedGraph::Cuda {
90                executor: CudaExecutor::new(device, &graph, batch_size),
91            },
92        }
93    }
94
95    /// Returns the best available device.
96    ///
97    /// For now the algorithm used is very simple:
98    /// * pick the first cuda device if any are available
99    /// * otherwise use the CPU
100    pub fn best() -> Device {
101        if let Some(device) = Device::first_cuda() {
102            return device;
103        }
104
105        Device::Cpu
106    }
107
108    /// Returns the first available cuda device if any.
109    pub fn first_cuda() -> Option<Device> {
110        #[cfg(feature = "cuda")]
111        if let Some(device) = CudaDevice::all().next() {
112            return Some(Device::Cuda(device));
113        }
114
115        None
116    }
117}
118
119impl PreparedGraph {
120    pub fn eval(&mut self, inputs: &[DTensor]) -> Vec<DTensor> {
121        match self {
122            PreparedGraph::CPU { graph, batch_size } => cpu_eval_graph(graph, *batch_size, inputs),
123            #[cfg(feature = "cuda")]
124            PreparedGraph::Cuda { executor } => executor.evaluate(inputs).to_owned(),
125        }
126    }
127}