ruvllm_esp32/
lib.rs

1//! RuvLLM ESP32 - Tiny LLM Inference for Microcontrollers
2//!
3//! This crate provides a minimal inference engine designed for ESP32 and similar
4//! resource-constrained microcontrollers.
5//!
6//! # Constraints
7//! - ~520KB SRAM available
8//! - 4-16MB flash for model storage
9//! - No floating-point unit on base ESP32 (ESP32-S3 has one)
10//! - Single/dual core @ 240MHz
11//!
12//! # Features
13//! - INT8 quantized inference
14//! - Fixed-point arithmetic option
15//! - Tiny transformer blocks
16//! - Memory-mapped model loading
17//! - Optional ESP32-S3 SIMD acceleration
18
19#![cfg_attr(feature = "no_std", no_std)]
20
21#[cfg(feature = "no_std")]
22extern crate alloc;
23
24#[cfg(feature = "no_std")]
25use alloc::{vec, vec::Vec};
26
27pub mod micro_inference;
28pub mod quantized;
29pub mod model;
30pub mod attention;
31pub mod embedding;
32pub mod optimizations;
33pub mod ota;
34pub mod benchmark;
35pub mod diagnostics;
36pub mod models;
37
38#[cfg(feature = "federation")]
39pub mod federation;
40
41// RuVector integration (vector database capabilities)
42#[cfg(feature = "federation")]
43pub mod ruvector;
44
45// Re-exports
46pub use micro_inference::{MicroEngine, InferenceConfig, InferenceResult};
47pub use quantized::{QuantizedTensor, QuantizationType};
48pub use model::{TinyModel, ModelConfig};
49
50// Optimization re-exports
51pub use optimizations::{
52    BinaryVector, BinaryEmbedding, hamming_distance, hamming_similarity,
53    ProductQuantizer, PQCode,
54    SoftmaxLUT, ExpLUT, DistanceLUT,
55    MicroLoRA, LoRAConfig,
56    SparseAttention, AttentionPattern,
57    LayerPruner, PruningConfig,
58};
59
60// Federation re-exports (optional)
61#[cfg(feature = "federation")]
62pub use federation::{
63    FederationConfig, FederationMode, FederationSpeedup,
64    PipelineNode, PipelineConfig, PipelineRole,
65    FederationMessage, MessageType, ChipId,
66    FederationCoordinator, ClusterTopology,
67    MicroFastGRNN, MicroGRNNConfig,
68    SpeculativeDecoder, DraftVerifyConfig,
69};
70
71/// Memory budget for ESP32 variants
72#[derive(Debug, Clone, Copy)]
73pub enum Esp32Variant {
74    /// Original ESP32: 520KB SRAM
75    Esp32,
76    /// ESP32-S2: 320KB SRAM
77    Esp32S2,
78    /// ESP32-S3: 512KB SRAM + vector instructions
79    Esp32S3,
80    /// ESP32-C3: 400KB SRAM, RISC-V
81    Esp32C3,
82    /// ESP32-C6: 512KB SRAM, RISC-V + WiFi 6
83    Esp32C6,
84}
85
86impl Esp32Variant {
87    /// Available SRAM in bytes
88    pub const fn sram_bytes(&self) -> usize {
89        match self {
90            Self::Esp32 => 520 * 1024,
91            Self::Esp32S2 => 320 * 1024,
92            Self::Esp32S3 => 512 * 1024,
93            Self::Esp32C3 => 400 * 1024,
94            Self::Esp32C6 => 512 * 1024,
95        }
96    }
97
98    /// Whether variant has hardware floating point
99    pub const fn has_fpu(&self) -> bool {
100        match self {
101            Self::Esp32 => false,
102            Self::Esp32S2 => false,
103            Self::Esp32S3 => true,
104            Self::Esp32C3 => false,
105            Self::Esp32C6 => false,
106        }
107    }
108
109    /// Whether variant has vector/SIMD extensions
110    pub const fn has_simd(&self) -> bool {
111        matches!(self, Self::Esp32S3)
112    }
113
114    /// Recommended max model size (leaving ~200KB for runtime)
115    pub const fn max_model_ram(&self) -> usize {
116        self.sram_bytes().saturating_sub(200 * 1024)
117    }
118}
119
120/// Error types for ESP32 inference
121#[derive(Debug, Clone)]
122pub enum Error {
123    /// Model too large for available memory
124    ModelTooLarge { required: usize, available: usize },
125    /// Invalid model format
126    InvalidModel(&'static str),
127    /// Quantization error
128    QuantizationError(&'static str),
129    /// Buffer overflow
130    BufferOverflow,
131    /// Inference failed
132    InferenceFailed(&'static str),
133    /// Feature not supported on this variant
134    UnsupportedFeature(&'static str),
135}
136
137impl core::fmt::Display for Error {
138    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
139        match self {
140            Error::ModelTooLarge { required, available } => {
141                write!(f, "Model too large: requires {} bytes, only {} available", required, available)
142            }
143            Error::InvalidModel(msg) => write!(f, "Invalid model: {}", msg),
144            Error::QuantizationError(msg) => write!(f, "Quantization error: {}", msg),
145            Error::BufferOverflow => write!(f, "Buffer overflow"),
146            Error::InferenceFailed(msg) => write!(f, "Inference failed: {}", msg),
147            Error::UnsupportedFeature(msg) => write!(f, "Unsupported feature: {}", msg),
148        }
149    }
150}
151
152#[cfg(feature = "host-test")]
153impl std::error::Error for Error {}
154
155pub type Result<T> = core::result::Result<T, Error>;
156
157/// Prelude for common imports
158pub mod prelude {
159    pub use crate::{
160        MicroEngine, InferenceConfig, InferenceResult,
161        QuantizedTensor, QuantizationType,
162        TinyModel, ModelConfig,
163        Esp32Variant, Error, Result,
164    };
165}