pub struct NativeRuntime { /* private fields */ }Expand description
Native runtime for local LLM inference.
Uses mistral.rs for high-performance inference on GGUF models. Supports CPU and GPU (Metal on macOS, CUDA on Linux) acceleration.
§Example
ⓘ
use spn_native::inference::NativeRuntime;
use spn_core::LoadConfig;
let mut runtime = NativeRuntime::new()?;
runtime.load("model.gguf".into(), LoadConfig::default()).await?;
let response = runtime.infer("Hello!", Default::default()).await?;Implementations§
Source§impl NativeRuntime
impl NativeRuntime
Sourcepub fn new() -> Self
pub fn new() -> Self
Create a new native runtime.
The runtime is created without a model loaded. Call load() to
load a model before running inference.
Sourcepub fn model_path(&self) -> Option<&PathBuf>
pub fn model_path(&self) -> Option<&PathBuf>
Get the path to the currently loaded model.
Sourcepub fn config(&self) -> Option<&LoadConfig>
pub fn config(&self) -> Option<&LoadConfig>
Get the load configuration for the current model.
Trait Implementations§
Source§impl Clone for NativeRuntime
impl Clone for NativeRuntime
Source§impl Debug for NativeRuntime
impl Debug for NativeRuntime
Source§impl Default for NativeRuntime
impl Default for NativeRuntime
Source§impl InferenceBackend for NativeRuntime
Available on non-crate feature inference only.
impl InferenceBackend for NativeRuntime
Available on non-crate feature
inference only.Source§async fn load(
&mut self,
_model_path: PathBuf,
_config: LoadConfig,
) -> Result<(), NativeError>
async fn load( &mut self, _model_path: PathBuf, _config: LoadConfig, ) -> Result<(), NativeError>
Load a model from disk. Read more
Source§async fn infer(
&self,
_prompt: &str,
_options: ChatOptions,
) -> Result<ChatResponse, NativeError>
async fn infer( &self, _prompt: &str, _options: ChatOptions, ) -> Result<ChatResponse, NativeError>
Generate a response (non-streaming). Read more
Source§async fn infer_stream(
&self,
_prompt: &str,
_options: ChatOptions,
) -> Result<impl Stream<Item = Result<String, NativeError>> + Send, NativeError>
async fn infer_stream( &self, _prompt: &str, _options: ChatOptions, ) -> Result<impl Stream<Item = Result<String, NativeError>> + Send, NativeError>
Generate a response (streaming). Read more
Auto Trait Implementations§
impl Freeze for NativeRuntime
impl RefUnwindSafe for NativeRuntime
impl Send for NativeRuntime
impl Sync for NativeRuntime
impl Unpin for NativeRuntime
impl UnsafeUnpin for NativeRuntime
impl UnwindSafe for NativeRuntime
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> DynInferenceBackend for Twhere
T: InferenceBackend + 'static,
impl<T> DynInferenceBackend for Twhere
T: InferenceBackend + 'static,
Source§fn load_dyn(
&mut self,
model_path: PathBuf,
config: LoadConfig,
) -> Pin<Box<dyn Future<Output = Result<(), NativeError>> + Send + '_>>
fn load_dyn( &mut self, model_path: PathBuf, config: LoadConfig, ) -> Pin<Box<dyn Future<Output = Result<(), NativeError>> + Send + '_>>
Load a model from disk (boxed future for object safety).
Source§fn unload_dyn(
&mut self,
) -> Pin<Box<dyn Future<Output = Result<(), NativeError>> + Send + '_>>
fn unload_dyn( &mut self, ) -> Pin<Box<dyn Future<Output = Result<(), NativeError>> + Send + '_>>
Unload the model from memory (boxed future for object safety).
Source§fn is_loaded_dyn(&self) -> bool
fn is_loaded_dyn(&self) -> bool
Check if a model is currently loaded.
Source§fn model_info_dyn(&self) -> Option<ModelInfo>
fn model_info_dyn(&self) -> Option<ModelInfo>
Get metadata about the loaded model (cloned for object safety).
Source§fn infer_dyn(
&self,
prompt: String,
options: ChatOptions,
) -> Pin<Box<dyn Future<Output = Result<ChatResponse, NativeError>> + Send + '_>>
fn infer_dyn( &self, prompt: String, options: ChatOptions, ) -> Pin<Box<dyn Future<Output = Result<ChatResponse, NativeError>> + Send + '_>>
Generate a response (boxed future for object safety). Read more
Source§fn infer_stream_dyn(
&self,
_prompt: String,
_options: ChatOptions,
) -> Pin<Box<dyn Future<Output = Result<Pin<Box<dyn Stream<Item = Result<String, NativeError>> + Send>>, NativeError>> + Send + '_>>
fn infer_stream_dyn( &self, _prompt: String, _options: ChatOptions, ) -> Pin<Box<dyn Future<Output = Result<Pin<Box<dyn Stream<Item = Result<String, NativeError>> + Send>>, NativeError>> + Send + '_>>
Generate a streaming response (boxed stream for object safety). Read more