Skip to main content

spn_native/inference/
mod.rs

1//! Native LLM inference module.
2//!
3//! This module provides local model inference via mistral.rs when the
4//! `inference` feature is enabled.
5//!
6//! # Architecture
7//!
8//! ```text
9//! ┌─────────────────────────────────────────────────────────────────────────────┐
10//! │  Inference Module                                                           │
11//! ├─────────────────────────────────────────────────────────────────────────────┤
12//! │                                                                             │
13//! │  InferenceBackend (trait)                                                   │
14//! │  ├── load(path, config)      Load GGUF model into memory                    │
15//! │  ├── unload()                Unload model from memory                       │
16//! │  ├── is_loaded()             Check if model is loaded                       │
17//! │  ├── model_info()            Get metadata about loaded model                │
18//! │  ├── infer(prompt, opts)     Generate response (non-streaming)              │
19//! │  └── infer_stream(...)       Generate response (streaming)                  │
20//! │                                                                             │
21//! │  NativeRuntime (struct)                                                     │
22//! │  └── Implements InferenceBackend using mistral.rs                           │
23//! │                                                                             │
24//! └─────────────────────────────────────────────────────────────────────────────┘
25//! ```
26//!
27//! # Example
28//!
29//! ```ignore
30//! use spn_native::inference::{NativeRuntime, InferenceBackend};
31//! use spn_core::{LoadConfig, ChatOptions};
32//! use std::path::PathBuf;
33//!
34//! #[tokio::main]
35//! async fn main() -> anyhow::Result<()> {
36//!     let mut runtime = NativeRuntime::new();
37//!
38//!     // Load a GGUF model
39//!     let model_path = PathBuf::from("~/.spn/models/qwen3-8b-q4_k_m.gguf");
40//!     runtime.load(model_path, LoadConfig::default()).await?;
41//!
42//!     // Run inference
43//!     let response = runtime.infer(
44//!         "What is 2+2?",
45//!         ChatOptions::default().with_temperature(0.7)
46//!     ).await?;
47//!
48//!     println!("{}", response.content);
49//!     Ok(())
50//! }
51//! ```
52
53mod runtime;
54mod traits;
55
56pub use runtime::NativeRuntime;
57pub use traits::{DynInferenceBackend, InferenceBackend};
58
59// Re-export types commonly used with inference
60pub use spn_core::{ChatOptions, ChatResponse, LoadConfig, ModelInfo};