spn_native/inference/mod.rs
1//! Native LLM inference module.
2//!
3//! This module provides local model inference via mistral.rs when the
4//! `inference` feature is enabled.
5//!
6//! # Architecture
7//!
8//! ```text
9//! ┌─────────────────────────────────────────────────────────────────────────────┐
10//! │ Inference Module │
11//! ├─────────────────────────────────────────────────────────────────────────────┤
12//! │ │
13//! │ InferenceBackend (trait) │
14//! │ ├── load(path, config) Load GGUF model into memory │
15//! │ ├── unload() Unload model from memory │
16//! │ ├── is_loaded() Check if model is loaded │
17//! │ ├── model_info() Get metadata about loaded model │
18//! │ ├── infer(prompt, opts) Generate response (non-streaming) │
19//! │ └── infer_stream(...) Generate response (streaming) │
20//! │ │
21//! │ NativeRuntime (struct) │
22//! │ └── Implements InferenceBackend using mistral.rs │
23//! │ │
24//! └─────────────────────────────────────────────────────────────────────────────┘
25//! ```
26//!
27//! # Example
28//!
29//! ```ignore
30//! use spn_native::inference::{NativeRuntime, InferenceBackend};
31//! use spn_core::{LoadConfig, ChatOptions};
32//! use std::path::PathBuf;
33//!
34//! #[tokio::main]
35//! async fn main() -> anyhow::Result<()> {
36//! let mut runtime = NativeRuntime::new();
37//!
38//! // Load a GGUF model
39//! let model_path = PathBuf::from("~/.spn/models/qwen3-8b-q4_k_m.gguf");
40//! runtime.load(model_path, LoadConfig::default()).await?;
41//!
42//! // Run inference
43//! let response = runtime.infer(
44//! "What is 2+2?",
45//! ChatOptions::default().with_temperature(0.7)
46//! ).await?;
47//!
48//! println!("{}", response.content);
49//! Ok(())
50//! }
51//! ```
52
53mod runtime;
54mod traits;
55
56pub use runtime::NativeRuntime;
57pub use traits::{DynInferenceBackend, InferenceBackend};
58
59// Re-export types commonly used with inference
60pub use spn_core::{ChatOptions, ChatResponse, LoadConfig, ModelInfo};