kn_runtime/lib.rs
1#![warn(missing_debug_implementations)]
2
3//! A wrapper crate around `kn-graph` and `kn-cuda-eval` that allows selecting whether to use a CPU or GPU at runtime
4//! through the [Device] type.
5//!
6//! By default this crate only includes the [Device::Cpu] device, and only depends on `kn-graph`.
7//! To enable the [Device::Cuda] device, enable the `cuda` cargo feature.
8//! This adds a dependency on `kn-cuda-eval` and the cuda libraries.
9//!
10//! This crate is part of the [Kyanite](https://github.com/KarelPeeters/Kyanite) project, see its readme for more information.
11//! See [system-requirements](https://github.com/KarelPeeters/Kyanite#system-requirements) for how to set up the cuda libraries.
12//!
13//! # Quick demo
14//!
15//! ```no_run
16//! # use kn_cuda_sys::wrapper::handle::CudaDevice;
17//! # use kn_graph::dtype::{DTensor, Tensor};
18//! # use kn_graph::onnx::load_graph_from_onnx_path;
19//! # use kn_graph::optimizer::optimize_graph;
20//! # use kn_runtime::Device;
21//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
22//! // load and optimize a graph
23//! let graph = load_graph_from_onnx_path("test.onnx", false)?;
24//! let graph = optimize_graph(&graph, Default::default());
25//!
26//! // select a device, at runtime
27//! let device_str = "cpu";
28//! let device = match device_str {
29//! "auto" => Device::best(),
30//! "cpu" => Device::Cpu,
31//! "cuda" => Device::Cuda(CudaDevice::all().next().unwrap()),
32//! _ => panic!("unknown device"),
33//! };
34//!
35//! // prepare the graph for execution
36//! let batch_size = 8;
37//! let mut prepared = device.prepare(graph, batch_size);
38//!
39//! // evaluate the graph with some inputs, get the outputs back
40//! let inputs = [DTensor::F32(Tensor::zeros(vec![batch_size, 16]))];
41//! let outputs: Vec<DTensor> = prepared.eval(&inputs);
42//! # Ok(())
43//! # }
44//! ```
45
46use std::fmt::Debug;
47
48#[cfg(feature = "cuda")]
49pub use kn_cuda_eval::executor::CudaExecutor;
50#[cfg(feature = "cuda")]
51pub use kn_cuda_sys::wrapper::handle::CudaDevice;
52use kn_graph::cpu::cpu_eval_graph;
53use kn_graph::dtype::DTensor;
54use kn_graph::graph::Graph;
55
56/// Whether the crate was compiled with cuda support.
57///
58/// This is independent of whether the current system actually has a cuda device available.
59pub fn compiled_with_cuda_support() -> bool {
60 #[cfg(feature = "cuda")]
61 return true;
62 #[cfg(not(feature = "cuda"))]
63 return false;
64}
65
66/// A device that can be used to evaluate a graph.
67#[derive(Debug)]
68pub enum Device {
69 Cpu,
70 #[cfg(feature = "cuda")]
71 Cuda(CudaDevice),
72}
73
74/// A graph that has been prepared for evaluation on a device.
75/// * For a CPU this is just the graph itself and a hardcoded batch size.
76/// * For a GPU this is a fully planned and memory allocated execution plan.
77#[derive(Debug)]
78pub enum PreparedGraph {
79 CPU { graph: Graph, batch_size: usize },
80 #[cfg(feature = "cuda")]
81 Cuda { executor: CudaExecutor },
82}
83
84impl Device {
85 pub fn prepare(&self, graph: Graph, batch_size: usize) -> PreparedGraph {
86 match *self {
87 Device::Cpu => PreparedGraph::CPU { graph, batch_size },
88 #[cfg(feature = "cuda")]
89 Device::Cuda(device) => PreparedGraph::Cuda {
90 executor: CudaExecutor::new(device, &graph, batch_size),
91 },
92 }
93 }
94
95 /// Returns the best available device.
96 ///
97 /// For now the algorithm used is very simple:
98 /// * pick the first cuda device if any are available
99 /// * otherwise use the CPU
100 pub fn best() -> Device {
101 if let Some(device) = Device::first_cuda() {
102 return device;
103 }
104
105 Device::Cpu
106 }
107
108 /// Returns the first available cuda device if any.
109 pub fn first_cuda() -> Option<Device> {
110 #[cfg(feature = "cuda")]
111 if let Some(device) = CudaDevice::all().next() {
112 return Some(Device::Cuda(device));
113 }
114
115 None
116 }
117}
118
119impl PreparedGraph {
120 pub fn eval(&mut self, inputs: &[DTensor]) -> Vec<DTensor> {
121 match self {
122 PreparedGraph::CPU { graph, batch_size } => cpu_eval_graph(graph, *batch_size, inputs),
123 #[cfg(feature = "cuda")]
124 PreparedGraph::Cuda { executor } => executor.evaluate(inputs).to_owned(),
125 }
126 }
127}