pub trait ModelRuntime: Send + Sync {
// Required methods
fn supported_format(&self) -> ModelFormat;
fn initialize<'life0, 'async_trait>(
&'life0 mut self,
config: RuntimeConfig,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn is_ready<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = bool> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn health_check<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn base_url(&self) -> String;
fn generate<'life0, 'async_trait>(
&'life0 self,
request: InferenceRequest,
) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn generate_stream<'life0, 'async_trait>(
&'life0 self,
request: InferenceRequest,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn Stream<Item = Result<String, Error>> + Send + Unpin>>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn shutdown<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn metadata(&self) -> RuntimeMetadata;
// Provided method
fn completions_url(&self) -> String { ... }
}Expand description
Model runtime trait - all runtime adapters must implement this
Required Methods§
Sourcefn supported_format(&self) -> ModelFormat
fn supported_format(&self) -> ModelFormat
Get the format this runtime supports
Sourcefn initialize<'life0, 'async_trait>(
&'life0 mut self,
config: RuntimeConfig,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn initialize<'life0, 'async_trait>(
&'life0 mut self,
config: RuntimeConfig,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Initialize the runtime (start server process, load model, etc.)
Sourcefn is_ready<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = bool> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn is_ready<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = bool> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Check if runtime is ready for inference
Sourcefn health_check<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn health_check<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Get health status
Sourcefn base_url(&self) -> String
fn base_url(&self) -> String
Get the base URL for inference API (e.g., “http://127.0.0.1:8001”)
Sourcefn generate<'life0, 'async_trait>(
&'life0 self,
request: InferenceRequest,
) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn generate<'life0, 'async_trait>(
&'life0 self,
request: InferenceRequest,
) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Perform inference (non-streaming)
Sourcefn generate_stream<'life0, 'async_trait>(
&'life0 self,
request: InferenceRequest,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn Stream<Item = Result<String, Error>> + Send + Unpin>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn generate_stream<'life0, 'async_trait>(
&'life0 self,
request: InferenceRequest,
) -> Pin<Box<dyn Future<Output = Result<Box<dyn Stream<Item = Result<String, Error>> + Send + Unpin>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Perform streaming inference
Sourcefn shutdown<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn shutdown<'life0, 'async_trait>(
&'life0 mut self,
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Shutdown the runtime (stop server, cleanup resources)
Sourcefn metadata(&self) -> RuntimeMetadata
fn metadata(&self) -> RuntimeMetadata
Get runtime metadata
Provided Methods§
Sourcefn completions_url(&self) -> String
fn completions_url(&self) -> String
Get the OpenAI-compatible chat completions endpoint