Skip to main content

rknn_runtime/
lib.rs

1//! Rust bindings for RKNN NPU inference on Rockchip SoCs.
2//!
3//! This crate allows you to load `*.rknn` model, run it on the NPU, and read back
4//! the results. It wraps the C library `librknnmrt.so` and handles all the
5//! low-level details: zero-copy memory allocation, NPU-to-CPU cache sync,
6//! and the unusual NC1HWC2 tensor layout.
7//!
8//! # Usage
9//!
10//! ```rust,no_run
11//! use rknn_runtime::RknnModel;
12//!
13//! // Load a model file
14//! let model = RknnModel::load("model.rknn").unwrap();
15//!
16//! // Prepare input: raw RGB bytes in NHWC layout, no normalization needed.
17//! // The byte length must match the model's expected input size.
18//! # let rgb_data: Vec<u8> = vec![0u8; 320 * 320 * 3];
19//!
20//! // Run inference on the NPU
21//! model.run(&rgb_data).unwrap();
22//!
23//! // Read output as raw INT8 (zero-copy, no allocation)
24//! let raw: &[i8] = model.output_raw(0).unwrap();
25//!
26//! // ...or as dequantized f32 (allocates a new Vec)
27//! let floats: Vec<f32> = model.output_f32(0).unwrap();
28//! ```
29//!
30//! # NC1HWC2 output layout
31//!
32//! RKNN models (especially on RV1106) often output tensors in NC1HWC2 format
33//! instead of standard NCHW. Channels are packed into blocks of `c2`
34//! (typically 16). Use [`nc1hwc2_to_flat`] to convert this into a normal
35//! flat array before parsing:
36//!
37//! ```rust,ignore
38//! let flat = nc1hwc2_to_flat(raw, c1, h, w, c2, total_channels);
39//! let data = dequantize_affine(&flat, output.zp, output.scale);
40//! // data[channel * num_predictions + prediction_index]
41//! ```
42//!
43//! # Linking modes
44//!
45//! - **`dynamic`** (default) - loads `librknnmrt.so` at runtime via
46//!   [`libloading`](https://crates.io/crates/libloading). You can compile
47//!   on x86 without having the RKNN library installed.
48//! - **`static-link`** - links at compile time. Requires `librknnmrt.so`
49//!   to be present on the build machine.
50
51pub mod ffi;
52pub mod error;
53mod context;
54mod memory;
55pub mod inference;
56pub mod tensor;
57
58pub use error::Error;
59pub use inference::RknnModel;
60pub use tensor::{
61    TensorAttr, TensorFormat, TensorType, QuantType,
62    Nc1hwc2Layout,
63    dequantize_affine, nc1hwc2_to_flat,
64};