pub trait LlmInferenceEngine: InferenceEngine {
// Required methods
fn infer<'life0, 'async_trait>(
&'life0 self,
request: InferenceRequest,
) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn infer_stream<'life0, 'async_trait>(
&'life0 self,
request: InferenceRequest,
) -> Pin<Box<dyn Future<Output = Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
}Expand description
LLM text-generation engine.
Implemented by ContinuousBatchEngine (the production path) and
DefaultInferenceEngine (legacy reference path). Backs
/v1/chat/completions and /v1/completions.
Required Methods§
Sourcefn infer<'life0, 'async_trait>(
&'life0 self,
request: InferenceRequest,
) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn infer<'life0, 'async_trait>(
&'life0 self,
request: InferenceRequest,
) -> Pin<Box<dyn Future<Output = Result<InferenceResponse>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Execute single inference request.
Sourcefn infer_stream<'life0, 'async_trait>(
&'life0 self,
request: InferenceRequest,
) -> Pin<Box<dyn Future<Output = Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn infer_stream<'life0, 'async_trait>(
&'life0 self,
request: InferenceRequest,
) -> Pin<Box<dyn Future<Output = Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Execute streaming inference request.
Dyn Compatibility§
This trait is dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety".