Trait LlmInferenceEngine

Source

pub trait LlmInferenceEngine: InferenceEngine {
    // Required methods
    fn infer<'life0, 'async_trait>(
        &'life0 self,
        request: InferenceRequest,
    ) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
    fn infer_stream<'life0, 'async_trait>(
        &'life0 self,
        request: InferenceRequest,
    ) -> Pin<Box<dyn Future<Output = Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait;
}

Expand description

LLM text-generation engine.

Implemented by ContinuousBatchEngine (the production path) and DefaultInferenceEngine (legacy reference path). Backs /v1/chat/completions and /v1/completions.

Required Methods§

Source

fn infer<'life0, 'async_trait>( &'life0 self, request: InferenceRequest, ) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Execute single inference request.

Source

fn infer_stream<'life0, 'async_trait>( &'life0 self, request: InferenceRequest, ) -> Pin<Box<dyn Future<Output = Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Execute streaming inference request.

Dyn Compatibility§

This trait is dyn compatible.

In older versions of Rust, dyn compatibility was called "object safety".

LlmInferenceEngine

Trait LlmInferenceEngine Copy item path

Required Methods§

fn infer<'life0, 'async_trait>( &'life0 self, request: InferenceRequest, ) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait,

fn infer_stream<'life0, 'async_trait>( &'life0 self, request: InferenceRequest, ) -> Pin<Box<dyn Future<Output = Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait,

Dyn Compatibility§

Implementors§

Trait LlmInferenceEngine

fn infer<'life0, 'async_trait>( &'life0 self, request: InferenceRequest, ) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

fn infer_stream<'life0, 'async_trait>( &'life0 self, request: InferenceRequest, ) -> Pin<Box<dyn Future<Output = Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,