1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
//! Native LLM inference module
//!
//! This module provides local GGUF model inference via mistral.rs.
//!
//! # Architecture
//!
//! ```text
//! ┌─────────────────────────────────────────────────────────────────────────────┐
//! │ Nika Native Inference │
//! ├─────────────────────────────────────────────────────────────────────────────┤
//! │ │
//! │ NativeRuntime (local implementation) │
//! │ ├── load(path, config) Load GGUF model into memory │
//! │ ├── unload() Unload model from memory │
//! │ ├── is_loaded() Check if model is loaded │
//! │ ├── model_info() Get metadata about loaded model │
//! │ ├── infer(prompt, opts) Generate response (non-streaming) │
//! │ └── infer_stream(...) Generate response (streaming) │
//! │ │
//! └─────────────────────────────────────────────────────────────────────────────┘
//! ```
//!
//! # Example
//!
//! ```ignore
//! use nika::provider::native::NativeRuntime;
//! use nika::core::backend::LoadConfig;
//!
//! #[tokio::main]
//! async fn main() -> anyhow::Result<()> {
//! let mut runtime = NativeRuntime::new();
//!
//! // Load a GGUF model
//! runtime.load("~/.cache/huggingface/models/qwen3-8b-q4_k_m.gguf".into(), LoadConfig::default()).await?;
//!
//! // Non-streaming inference
//! let response = runtime.infer("What is 2+2?", Default::default()).await?;
//! println!("{}", response.message.content);
//!
//! // Streaming inference
//! use futures::StreamExt;
//! let mut stream = runtime.infer_stream("Explain Rust", Default::default()).await?;
//! while let Some(chunk) = stream.next().await {
//! print!("{}", chunk?);
//! }
//!
//! Ok(())
//! }
//! ```
// Local modules
// Re-export main types
pub use NativeError;
pub use NativeRuntime;
pub use ;
// Re-export backend types from core
pub use crate;
// Re-export storage types from core
pub use crate;