Skip to main content

spn_native/
lib.rs

1//! Native model inference and storage for the SuperNovae ecosystem.
2//!
3//! This crate provides:
4//! - [`HuggingFaceStorage`]: Download models from HuggingFace Hub
5//! - [`detect_available_ram_gb`]: Platform-specific RAM detection
6//! - [`default_model_dir`]: Default storage location (~/.spn/models)
7//! - [`inference::NativeRuntime`]: Local LLM inference via mistral.rs (feature: `inference`)
8//!
9//! # Architecture
10//!
11//! ```text
12//! ┌─────────────────────────────────────────────────────────────────────────────┐
13//! │  spn-native                                                                 │
14//! │  ├── HuggingFaceStorage     Download GGUF models from HuggingFace Hub       │
15//! │  ├── detect_available_ram_gb()  Platform-specific RAM detection             │
16//! │  ├── default_model_dir()        Default storage path (~/.spn/models)        │
17//! │  └── NativeRuntime (inference)  mistral.rs inference integration            │
18//! └─────────────────────────────────────────────────────────────────────────────┘
19//! ```
20//!
21//! # Features
22//!
23//! - `progress`: Enable terminal progress bars for downloads
24//! - `inference`: Enable local LLM inference via mistral.rs
25//! - `native`: Alias for `inference`
26//! - `full`: All features
27//!
28//! # Example: Download
29//!
30//! ```ignore
31//! use spn_native::{HuggingFaceStorage, default_model_dir, detect_available_ram_gb};
32//! use spn_core::{find_model, auto_select_quantization, DownloadRequest};
33//!
34//! #[tokio::main]
35//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
36//!     // Detect RAM and select quantization
37//!     let ram_gb = detect_available_ram_gb();
38//!     let model = find_model("qwen3:8b").unwrap();
39//!     let quant = auto_select_quantization(model, ram_gb);
40//!
41//!     // Create storage and download
42//!     let storage = HuggingFaceStorage::new(default_model_dir());
43//!     let request = DownloadRequest::curated(model).with_quantization(quant);
44//!
45//!     let result = storage.download(&request, |progress| {
46//!         println!("{}: {:.1}%", progress.status, progress.percent());
47//!     }).await?;
48//!
49//!     println!("Downloaded to: {:?}", result.path);
50//!     Ok(())
51//! }
52//! ```
53//!
54//! # Example: Inference (requires `inference` feature)
55//!
56//! ```ignore
57//! use spn_native::inference::{NativeRuntime, InferenceBackend};
58//! use spn_core::{LoadConfig, ChatOptions};
59//!
60//! #[tokio::main]
61//! async fn main() -> anyhow::Result<()> {
62//!     let mut runtime = NativeRuntime::new();
63//!
64//!     // Load a downloaded model
65//!     runtime.load("~/.spn/models/qwen3-8b-q4_k_m.gguf".into(), LoadConfig::default()).await?;
66//!
67//!     // Run inference
68//!     let response = runtime.infer("What is 2+2?", ChatOptions::default()).await?;
69//!     println!("{}", response.message.content);
70//!
71//!     Ok(())
72//! }
73//! ```
74
75#![forbid(unsafe_code)]
76#![warn(missing_docs)]
77#![warn(clippy::all)]
78// Allow certain patterns during development
79#![allow(clippy::module_inception)]
80
81mod error;
82mod platform;
83mod storage;
84
85// Feature-gated inference module
86pub mod inference;
87
88pub use error::{NativeError, Result};
89pub use platform::{default_model_dir, detect_available_ram_gb};
90pub use storage::HuggingFaceStorage;
91
92// Re-export inference types at crate root for convenience
93pub use inference::{DynInferenceBackend, InferenceBackend, NativeRuntime};
94
95// Re-export core types for convenience
96pub use spn_core::{
97    auto_select_quantization, find_model, resolve_model, BackendError, ChatOptions, ChatResponse,
98    DownloadRequest, DownloadResult, KnownModel, LoadConfig, ModelArchitecture, ModelInfo,
99    ModelStorage, ModelType, ProgressCallback, PullProgress, Quantization, ResolvedModel,
100};
101
102/// Extract quantization type from a filename.
103///
104/// Supports: Q2_K, Q3_K_S, Q3_K_M, Q3_K_L, Q4_K_S, Q4_K_M, Q5_K_S, Q5_K_M, Q6_K, Q8_0, F16, F32
105///
106/// # Example
107///
108/// ```
109/// use spn_native::extract_quantization;
110///
111/// assert_eq!(extract_quantization("model-q4_k_m.gguf"), Some("Q4_K_M".to_string()));
112/// assert_eq!(extract_quantization("model-Q8_0.gguf"), Some("Q8_0".to_string()));
113/// assert_eq!(extract_quantization("model.gguf"), None);
114/// ```
115#[must_use]
116pub fn extract_quantization(filename: &str) -> Option<String> {
117    let lower = filename.to_lowercase();
118    // Order from most specific to least specific (longer patterns first)
119    for quant in [
120        "q3_k_s", "q3_k_m", "q3_k_l", // Q3 variants
121        "q4_k_s", "q4_k_m", // Q4 variants
122        "q5_k_s", "q5_k_m", // Q5 variants
123        "q2_k", "q6_k", "q8_0", // Single variants
124        "f16", "f32", // Float variants
125    ] {
126        if lower.contains(quant) {
127            return Some(quant.to_uppercase());
128        }
129    }
130    None
131}