spn_native/lib.rs
1//! Native model inference and storage for the SuperNovae ecosystem.
2//!
3//! This crate provides:
4//! - [`HuggingFaceStorage`]: Download models from HuggingFace Hub
5//! - [`detect_available_ram_gb`]: Platform-specific RAM detection
6//! - [`default_model_dir`]: Default storage location (~/.spn/models)
7//! - [`inference::NativeRuntime`]: Local LLM inference via mistral.rs (feature: `inference`)
8//!
9//! # Architecture
10//!
11//! ```text
12//! ┌─────────────────────────────────────────────────────────────────────────────┐
13//! │ spn-native │
14//! │ ├── HuggingFaceStorage Download GGUF models from HuggingFace Hub │
15//! │ ├── detect_available_ram_gb() Platform-specific RAM detection │
16//! │ ├── default_model_dir() Default storage path (~/.spn/models) │
17//! │ └── NativeRuntime (inference) mistral.rs inference integration │
18//! └─────────────────────────────────────────────────────────────────────────────┘
19//! ```
20//!
21//! # Features
22//!
23//! - `progress`: Enable terminal progress bars for downloads
24//! - `inference`: Enable local LLM inference via mistral.rs
25//! - `native`: Alias for `inference`
26//! - `full`: All features
27//!
28//! # Example: Download
29//!
30//! ```ignore
31//! use spn_native::{HuggingFaceStorage, default_model_dir, detect_available_ram_gb};
32//! use spn_core::{find_model, auto_select_quantization, DownloadRequest};
33//!
34//! #[tokio::main]
35//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
36//! // Detect RAM and select quantization
37//! let ram_gb = detect_available_ram_gb();
38//! let model = find_model("qwen3:8b").unwrap();
39//! let quant = auto_select_quantization(model, ram_gb);
40//!
41//! // Create storage and download
42//! let storage = HuggingFaceStorage::new(default_model_dir());
43//! let request = DownloadRequest::curated(model).with_quantization(quant);
44//!
45//! let result = storage.download(&request, |progress| {
46//! println!("{}: {:.1}%", progress.status, progress.percent());
47//! }).await?;
48//!
49//! println!("Downloaded to: {:?}", result.path);
50//! Ok(())
51//! }
52//! ```
53//!
54//! # Example: Inference (requires `inference` feature)
55//!
56//! ```ignore
57//! use spn_native::inference::{NativeRuntime, InferenceBackend};
58//! use spn_core::{LoadConfig, ChatOptions};
59//!
60//! #[tokio::main]
61//! async fn main() -> anyhow::Result<()> {
62//! let mut runtime = NativeRuntime::new();
63//!
64//! // Load a downloaded model
65//! runtime.load("~/.spn/models/qwen3-8b-q4_k_m.gguf".into(), LoadConfig::default()).await?;
66//!
67//! // Run inference
68//! let response = runtime.infer("What is 2+2?", ChatOptions::default()).await?;
69//! println!("{}", response.message.content);
70//!
71//! Ok(())
72//! }
73//! ```
74
75#![forbid(unsafe_code)]
76#![warn(missing_docs)]
77#![warn(clippy::all)]
78// Allow certain patterns during development
79#![allow(clippy::module_inception)]
80
81mod error;
82mod platform;
83mod storage;
84
85// Feature-gated inference module
86pub mod inference;
87
88pub use error::{NativeError, Result};
89pub use platform::{default_model_dir, detect_available_ram_gb};
90pub use storage::HuggingFaceStorage;
91
92// Re-export inference types at crate root for convenience
93pub use inference::{DynInferenceBackend, InferenceBackend, NativeRuntime};
94
95// Re-export core types for convenience
96pub use spn_core::{
97 auto_select_quantization, find_model, resolve_model, BackendError, ChatOptions, ChatResponse,
98 DownloadRequest, DownloadResult, KnownModel, LoadConfig, ModelArchitecture, ModelInfo,
99 ModelStorage, ModelType, ProgressCallback, PullProgress, Quantization, ResolvedModel,
100};
101
102/// Extract quantization type from a filename.
103///
104/// Supports: Q2_K, Q3_K_S, Q3_K_M, Q3_K_L, Q4_K_S, Q4_K_M, Q5_K_S, Q5_K_M, Q6_K, Q8_0, F16, F32
105///
106/// # Example
107///
108/// ```
109/// use spn_native::extract_quantization;
110///
111/// assert_eq!(extract_quantization("model-q4_k_m.gguf"), Some("Q4_K_M".to_string()));
112/// assert_eq!(extract_quantization("model-Q8_0.gguf"), Some("Q8_0".to_string()));
113/// assert_eq!(extract_quantization("model.gguf"), None);
114/// ```
115#[must_use]
116pub fn extract_quantization(filename: &str) -> Option<String> {
117 let lower = filename.to_lowercase();
118 // Order from most specific to least specific (longer patterns first)
119 for quant in [
120 "q3_k_s", "q3_k_m", "q3_k_l", // Q3 variants
121 "q4_k_s", "q4_k_m", // Q4 variants
122 "q5_k_s", "q5_k_m", // Q5 variants
123 "q2_k", "q6_k", "q8_0", // Single variants
124 "f16", "f32", // Float variants
125 ] {
126 if lower.contains(quant) {
127 return Some(quant.to_uppercase());
128 }
129 }
130 None
131}