Skip to main content

ModelRuntime

Trait ModelRuntime 

Source
pub trait ModelRuntime: Send + Sync {
    // Required methods
    fn supported_format(&self) -> ModelFormat;
    fn initialize<'life0, 'async_trait>(
        &'life0 mut self,
        config: RuntimeConfig,
    ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn is_ready<'life0, 'async_trait>(
        &'life0 self,
    ) -> Pin<Box<dyn Future<Output = bool> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn health_check<'life0, 'async_trait>(
        &'life0 self,
    ) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn base_url(&self) -> String;
    fn generate<'life0, 'async_trait>(
        &'life0 self,
        request: InferenceRequest,
    ) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn generate_stream<'life0, 'async_trait>(
        &'life0 self,
        request: InferenceRequest,
    ) -> Pin<Box<dyn Future<Output = Result<Box<dyn Stream<Item = Result<String, Error>> + Send + Unpin>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn shutdown<'life0, 'async_trait>(
        &'life0 mut self,
    ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn metadata(&self) -> RuntimeMetadata;

    // Provided method
    fn completions_url(&self) -> String { ... }
}
Expand description

Model runtime trait - all runtime adapters must implement this

Required Methods§

Source

fn supported_format(&self) -> ModelFormat

Get the format this runtime supports

Source

fn initialize<'life0, 'async_trait>( &'life0 mut self, config: RuntimeConfig, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Initialize the runtime (start server process, load model, etc.)

Source

fn is_ready<'life0, 'async_trait>( &'life0 self, ) -> Pin<Box<dyn Future<Output = bool> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Check if runtime is ready for inference

Source

fn health_check<'life0, 'async_trait>( &'life0 self, ) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Get health status

Source

fn base_url(&self) -> String

Get the base URL for inference API (e.g., “http://127.0.0.1:8001”)

Source

fn generate<'life0, 'async_trait>( &'life0 self, request: InferenceRequest, ) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Perform inference (non-streaming)

Source

fn generate_stream<'life0, 'async_trait>( &'life0 self, request: InferenceRequest, ) -> Pin<Box<dyn Future<Output = Result<Box<dyn Stream<Item = Result<String, Error>> + Send + Unpin>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Perform streaming inference

Source

fn shutdown<'life0, 'async_trait>( &'life0 mut self, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Shutdown the runtime (stop server, cleanup resources)

Source

fn metadata(&self) -> RuntimeMetadata

Get runtime metadata

Provided Methods§

Source

fn completions_url(&self) -> String

Get the OpenAI-compatible chat completions endpoint

Implementors§