1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
#![warn(missing_debug_implementations)]
//! A Cuda CPU executor for neural network graphs from the `kn_graph` crate. The core type is [CudaExecutor](executor::CudaExecutor).
//!
//! This crate is part of the [Kyanite](https://github.com/KarelPeeters/Kyanite) project, see its readme for more information.
//! See [system-requirements](https://github.com/KarelPeeters/Kyanite#system-requirements) for how to set up the cuda libraries.
//!
//! # Quick demo
//!
//! ```no_run
//! # use kn_cuda_eval::executor::CudaExecutor;
//! # use kn_cuda_sys::wrapper::handle::Device;
//! # use kn_graph::dtype::{DTensor, Tensor};
//! # use kn_graph::ndarray::{Array, IxDyn};
//! # use kn_graph::onnx::load_graph_from_onnx_path;
//! # use kn_graph::optimizer::optimize_graph;
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // load and optimize the graph
//! let graph = load_graph_from_onnx_path("test.onnx", false)?;
//! let graph = optimize_graph(&graph, Default::default());
//!
//! // select a device
//! let device = Device::new(0);
//!
//! // build an executor
//! let batch_size = 8;
//! let mut executor = CudaExecutor::new(device, &graph, batch_size);
//!
//! // evaluate the graph with some inputs, get the outputs back
//! let inputs = [DTensor::F32(Tensor::zeros(vec![batch_size, 16]))];
//! let outputs: &[DTensor] = executor.evaluate(&inputs);
//! # Ok(())
//! # }
//! ```
/// Export the [Device] type for convenience: often an explicit dependency on the `kn_cuda_sys` crate is not needed.
pub use kn_cuda_sys::wrapper::handle::Device;
/// The autokernel infrastructure and specific kernels.
pub mod autokernel;
/// On-device tensor data structure.
pub mod device_tensor;
/// The main executor type and the compiler for it.
pub mod executor;
/// A utility to automatically choose between CPU and GPU evaluation.
pub mod runtime;
/// Shape utilities.
pub mod shape;
/// Testing and debugging infrastructure.
pub mod tester;
/// Miscellaneous utilities.
pub mod util;
/// Tensor utility.
pub mod offset_tensor;
mod planner;
mod step;