Skip to main content

iree_embedded/
lib.rs

1//! A safe, `no_std` Rust API for machine-learning inference on Cortex-M
2//! microcontrollers, built on [IREE](https://iree.dev)'s bare-metal C runtime.
3//!
4//! The runtime half of IREE (loading a compiled model and invoking it) is
5//! wrapped in six RAII types ([`Arena`], [`Instance`], [`Device`], [`Context`],
6//! [`Tensor`], [`Error`]) so leaks and double-frees are compile-time
7//! impossibilities and every fallible call returns a [`Result`] carrying the
8//! real IREE status message. See the repository for a complete firmware
9//! example and the model-compilation workflow.
10#![cfg_attr(not(test), no_std)]
11#![deny(missing_docs)]
12// `Error` inlines a 192-byte IREE status message buffer, so `Result` Err
13// variants are large. Deliberate: this is `no_std` with no global allocator,
14// and the most important error to report is allocator exhaustion, so the
15// message must not itself allocate. The fallible calls here are millisecond
16// FFI operations; a ~200-byte move on the error path is noise.
17#![allow(clippy::result_large_err)]
18
19/// Embed a compiled `.vmfb` as a 64-byte-aligned `&'static [u8]`.
20///
21/// IREE's FlatBuffer verifier requires the module header to be aligned, and,
22/// critically, the rodata segments (model weights) inside are only used
23/// *in place* when they meet HAL buffer alignment (64 bytes). An underaligned
24/// module silently falls back to staging copies through the device queue,
25/// which costs RAM and deadlocks the bare-metal single-threaded HAL. A plain
26/// `include_bytes!` (1-byte aligned) guarantees neither; use this for any
27/// embedded model.
28#[macro_export]
29macro_rules! include_vmfb {
30    ($path:expr) => {{
31        #[repr(C, align(64))]
32        struct Aligned<T: ?Sized>(T);
33        static ALIGNED: &Aligned<[u8]> = &Aligned(*include_bytes!($path));
34        &ALIGNED.0
35    }};
36}
37
38/// Hand out a unique `&'static mut` to a static, at most once per call site.
39///
40/// The initialiser must be a const expression: the value is a real `static`,
41/// living in `.bss`/`.data` like any other, so a multi-kilobyte arena costs
42/// no stack to create (passing such a buffer *by value* through an
43/// initialiser, as cell-based abstractions do, can overflow a small MCU
44/// stack before the move is elided). A second take of the same call site
45/// panics rather than aliasing the `&mut`.
46///
47/// # Panics
48///
49/// Panics on a second take of the same call site, and in any concurrent race
50/// all but one taker panics.
51///
52/// # Target requirements
53///
54/// The guard uses an atomic swap, available on Cortex-M3 and above
55/// (thumbv7/thumbv8); it is not available on thumbv6m (Cortex-M0/M0+).
56///
57/// ```
58/// let heap: &'static mut [u8; 1024] = iree_embedded::singleton!([u8; 1024] = [0; 1024]);
59/// heap[0] = 1;
60/// ```
61#[macro_export]
62macro_rules! singleton {
63    ($t:ty = $init:expr) => {{
64        static TAKEN: ::core::sync::atomic::AtomicBool =
65            ::core::sync::atomic::AtomicBool::new(false);
66        static mut SLOT: $t = $init;
67        assert!(
68            // The swap is an atomic read-modify-write, so exactly one caller
69            // can ever observe `false`; all concurrent racers observe `true`
70            // and hit the assert. `AcqRel` is deliberately conservative; the
71            // uniqueness argument needs only the RMW's atomicity.
72            !TAKEN.swap(true, ::core::sync::atomic::Ordering::AcqRel),
73            "iree_embedded::singleton! taken more than once"
74        );
75        // SAFETY: the TAKEN swap lets this expression complete at most once,
76        // so the returned &mut is the only reference to SLOT for the life of
77        // the program.
78        unsafe { &mut *::core::ptr::addr_of_mut!(SLOT) }
79    }};
80}
81
82/// Declare the query entry point of a statically linked IREE executable
83/// library and yield it as a [`LibraryQueryFn`].
84///
85/// `iree-compile --iree-hal-target-backends=llvm-cpu` with static-library
86/// output produces an object file plus a header naming its query function
87/// (for example `my_model_linked_library_query`). Link the object into the
88/// firmware and pass the symbol here; give the result to
89/// [`Device::local_sync_static`](crate::Device::local_sync_static).
90///
91/// # Contract
92///
93/// `$sym` must name the query function of an IREE static library, emitted by
94/// `iree-compile` alongside the object file (the `*_library_query` symbol in
95/// its generated header). The macro declares, it cannot verify: naming any
96/// other symbol misdeclares its ABI, and invoking the device on it is
97/// undefined behaviour.
98///
99/// ```ignore
100/// let device = Device::local_sync_static(
101///     &arena,
102///     &[iree_embedded::link_kernels!(my_model_linked_library_query)],
103/// )?;
104/// ```
105#[macro_export]
106macro_rules! link_kernels {
107    ($sym:ident) => {{
108        unsafe extern "C" {
109            fn $sym(
110                max_version: u32,
111                environment: *const ::core::ffi::c_void,
112            ) -> *const ::core::ffi::c_void;
113        }
114        $sym as $crate::LibraryQueryFn
115    }};
116}
117
118/// Provide the libc stubs a bare-metal newlib link expects, sized for this
119/// crate's allocation model.
120///
121/// Emits `_sbrk` returning failure: the IREE runtime allocates exclusively
122/// from the [`Arena`], so the libc heap must never grow. Invoke once at
123/// module scope in the firmware binary:
124///
125/// ```ignore
126/// iree_embedded::libc_stubs!();
127/// ```
128#[macro_export]
129macro_rules! libc_stubs {
130    () => {
131        /// libc heap stub: the IREE runtime allocates from the arena, so the
132        /// libc heap can never grow.
133        #[unsafe(no_mangle)]
134        pub extern "C" fn _sbrk(_incr: isize) -> *mut ::core::ffi::c_void {
135            -1isize as *mut ::core::ffi::c_void
136        }
137    };
138}
139
140mod arena;
141mod context;
142mod device;
143mod instance;
144mod status;
145mod tensor;
146
147pub use arena::{Arena, LAST_ALLOC_FAIL_SIZE};
148pub use context::{Context, Function};
149pub use device::{Device, LibraryQueryFn};
150pub use instance::Instance;
151pub(crate) use status::check;
152pub use status::{Error, Result, StatusCode};
153pub use tensor::Tensor;