Skip to main content

AdvancedInferenceEngine

Trait AdvancedInferenceEngine 

Source
pub trait AdvancedInferenceEngine: LlmInferenceEngine {
    // Required methods
    fn infer_batch<'life0, 'async_trait>(
        &'life0 self,
        requests: Vec<InferenceRequest>,
    ) -> Pin<Box<dyn Future<Output = Result<Vec<Result<InferenceResponse>>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn infer_speculative<'life0, 'async_trait>(
        &'life0 self,
        request: InferenceRequest,
        speculation_config: SpeculationConfig,
    ) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn warmup<'life0, 'async_trait>(
        &'life0 mut self,
        warmup_requests: Vec<InferenceRequest>,
    ) -> Pin<Box<dyn Future<Output = Result<WarmupResult>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn reconfigure<'life0, 'async_trait>(
        &'life0 mut self,
        config: EngineConfig,
    ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn diagnostics<'life0, 'async_trait>(
        &'life0 self,
    ) -> Pin<Box<dyn Future<Output = DiagnosticsReport> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn export_state<'life0, 'async_trait>(
        &'life0 self,
    ) -> Pin<Box<dyn Future<Output = Result<EngineState>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn import_state<'life0, 'async_trait>(
        &'life0 mut self,
        state: EngineState,
    ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
}
Expand description

Advanced engine capabilities — opt-in addition to LLM engines that support batching / speculation / runtime reconfig / diagnostics.

Required Methods§

Source

fn infer_batch<'life0, 'async_trait>( &'life0 self, requests: Vec<InferenceRequest>, ) -> Pin<Box<dyn Future<Output = Result<Vec<Result<InferenceResponse>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Execute batch inference.

Source

fn infer_speculative<'life0, 'async_trait>( &'life0 self, request: InferenceRequest, speculation_config: SpeculationConfig, ) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Execute speculative inference.

Source

fn warmup<'life0, 'async_trait>( &'life0 mut self, warmup_requests: Vec<InferenceRequest>, ) -> Pin<Box<dyn Future<Output = Result<WarmupResult>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Warm up engine with sample requests.

Source

fn reconfigure<'life0, 'async_trait>( &'life0 mut self, config: EngineConfig, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Configure engine at runtime.

Source

fn diagnostics<'life0, 'async_trait>( &'life0 self, ) -> Pin<Box<dyn Future<Output = DiagnosticsReport> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Get detailed diagnostics.

Source

fn export_state<'life0, 'async_trait>( &'life0 self, ) -> Pin<Box<dyn Future<Output = Result<EngineState>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Export engine state for debugging.

Source

fn import_state<'life0, 'async_trait>( &'life0 mut self, state: EngineState, ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Import engine state for debugging/testing.

Dyn Compatibility§

This trait is dyn compatible.

In older versions of Rust, dyn compatibility was called "object safety".

Implementors§