pub struct Client<CTXEXT, FENSLLM, CUSG> {
pub ensemble_llm_fetcher: Arc<CachingFetcher<CTXEXT, FENSLLM>>,
pub usage_handler: Arc<CUSG>,
pub upstream_client: Client,
pub backoff_current_interval: Duration,
pub backoff_initial_interval: Duration,
pub backoff_randomization_factor: f64,
pub backoff_multiplier: f64,
pub backoff_max_interval: Duration,
pub backoff_max_elapsed_time: Duration,
}Expand description
Client for creating chat completions.
Handles Ensemble LLM fetching, upstream provider selection with fallbacks, retry logic with exponential backoff, and usage tracking.
Fields§
§ensemble_llm_fetcher: Arc<CachingFetcher<CTXEXT, FENSLLM>>Caching fetcher for Ensemble LLM definitions.
usage_handler: Arc<CUSG>Handler for tracking usage after completion.
upstream_client: ClientClient for communicating with upstream providers.
backoff_current_interval: DurationCurrent backoff interval for retry logic.
backoff_initial_interval: DurationInitial backoff interval for retry logic.
backoff_randomization_factor: f64Randomization factor for backoff jitter.
backoff_multiplier: f64Multiplier for exponential backoff growth.
backoff_max_interval: DurationMaximum backoff interval.
backoff_max_elapsed_time: DurationMaximum total time to spend on retries.
Implementations§
Source§impl<CTXEXT, FENSLLM, CUSG> Client<CTXEXT, FENSLLM, CUSG>
impl<CTXEXT, FENSLLM, CUSG> Client<CTXEXT, FENSLLM, CUSG>
Sourcepub fn new(
ensemble_llm_fetcher: Arc<CachingFetcher<CTXEXT, FENSLLM>>,
usage_handler: Arc<CUSG>,
upstream_client: Client,
backoff_current_interval: Duration,
backoff_initial_interval: Duration,
backoff_randomization_factor: f64,
backoff_multiplier: f64,
backoff_max_interval: Duration,
backoff_max_elapsed_time: Duration,
) -> Self
pub fn new( ensemble_llm_fetcher: Arc<CachingFetcher<CTXEXT, FENSLLM>>, usage_handler: Arc<CUSG>, upstream_client: Client, backoff_current_interval: Duration, backoff_initial_interval: Duration, backoff_randomization_factor: f64, backoff_multiplier: f64, backoff_max_interval: Duration, backoff_max_elapsed_time: Duration, ) -> Self
Creates a new chat completions client.
Source§impl<CTXEXT, FENSLLM, CUSG> Client<CTXEXT, FENSLLM, CUSG>where
CTXEXT: ContextExt + Send + Sync + 'static,
FENSLLM: Fetcher<CTXEXT> + Send + Sync + 'static,
CUSG: UsageHandler<CTXEXT> + Send + Sync + 'static,
impl<CTXEXT, FENSLLM, CUSG> Client<CTXEXT, FENSLLM, CUSG>where
CTXEXT: ContextExt + Send + Sync + 'static,
FENSLLM: Fetcher<CTXEXT> + Send + Sync + 'static,
CUSG: UsageHandler<CTXEXT> + Send + Sync + 'static,
Sourcepub async fn create_unary_for_chat_handle_usage(
self: Arc<Self>,
ctx: Context<CTXEXT>,
request: Arc<ChatCompletionCreateParams>,
) -> Result<ChatCompletion, Error>
pub async fn create_unary_for_chat_handle_usage( self: Arc<Self>, ctx: Context<CTXEXT>, request: Arc<ChatCompletionCreateParams>, ) -> Result<ChatCompletion, Error>
Creates a unary chat completion, tracking usage after completion.
Internally streams the response and aggregates chunks into a single response.
Sourcepub async fn create_streaming_for_chat_handle_usage(
self: Arc<Self>,
ctx: Context<CTXEXT>,
request: Arc<ChatCompletionCreateParams>,
) -> Result<impl Stream<Item = Result<ChatCompletionChunk, Error>> + Send + Unpin + 'static, Error>
pub async fn create_streaming_for_chat_handle_usage( self: Arc<Self>, ctx: Context<CTXEXT>, request: Arc<ChatCompletionCreateParams>, ) -> Result<impl Stream<Item = Result<ChatCompletionChunk, Error>> + Send + Unpin + 'static, Error>
Creates a streaming chat completion, tracking usage after the stream ends.
Sourcepub async fn create_streaming_for_vector_handle_usage(
self: Arc<Self>,
ctx: Context<CTXEXT>,
request: Arc<VectorCompletionCreateParams>,
vector_pfx_indices: Vec<Arc<Vec<(String, usize)>>>,
ensemble_llm: EnsembleLlmWithFallbacksAndCount,
) -> Result<impl Stream<Item = Result<ChatCompletionChunk, Error>> + Send + Unpin + 'static, Error>
pub async fn create_streaming_for_vector_handle_usage( self: Arc<Self>, ctx: Context<CTXEXT>, request: Arc<VectorCompletionCreateParams>, vector_pfx_indices: Vec<Arc<Vec<(String, usize)>>>, ensemble_llm: EnsembleLlmWithFallbacksAndCount, ) -> Result<impl Stream<Item = Result<ChatCompletionChunk, Error>> + Send + Unpin + 'static, Error>
Creates a streaming completion for vector voting, tracking usage after the stream ends.
Used internally by vector completions to generate LLM votes.
Source§impl<CTXEXT, FENSLLM, CUSG> Client<CTXEXT, FENSLLM, CUSG>
impl<CTXEXT, FENSLLM, CUSG> Client<CTXEXT, FENSLLM, CUSG>
Sourcepub async fn create_streaming_for_chat(
&self,
ctx: Context<CTXEXT>,
request: Arc<ChatCompletionCreateParams>,
) -> Result<impl Stream<Item = Result<ChatCompletionChunk, Error>> + Send + Unpin + 'static, Error>
pub async fn create_streaming_for_chat( &self, ctx: Context<CTXEXT>, request: Arc<ChatCompletionCreateParams>, ) -> Result<impl Stream<Item = Result<ChatCompletionChunk, Error>> + Send + Unpin + 'static, Error>
Creates a streaming chat completion without usage tracking.
Handles model validation, Ensemble LLM fetching, fallback logic, and retry with exponential backoff.
Sourcepub async fn create_streaming_for_vector(
&self,
ctx: Context<CTXEXT>,
request: Arc<VectorCompletionCreateParams>,
vector_pfx_indices: Vec<Arc<Vec<(String, usize)>>>,
ensemble_llm: EnsembleLlmWithFallbacksAndCount,
) -> Result<impl Stream<Item = Result<ChatCompletionChunk, Error>> + Send + Unpin + 'static, Error>
pub async fn create_streaming_for_vector( &self, ctx: Context<CTXEXT>, request: Arc<VectorCompletionCreateParams>, vector_pfx_indices: Vec<Arc<Vec<(String, usize)>>>, ensemble_llm: EnsembleLlmWithFallbacksAndCount, ) -> Result<impl Stream<Item = Result<ChatCompletionChunk, Error>> + Send + Unpin + 'static, Error>
Creates a streaming completion for vector voting without usage tracking.
Used internally by vector completions. Handles fallback logic and retry with exponential backoff.
Trait Implementations§
Auto Trait Implementations§
impl<CTXEXT, FENSLLM, CUSG> Freeze for Client<CTXEXT, FENSLLM, CUSG>
impl<CTXEXT, FENSLLM, CUSG> !RefUnwindSafe for Client<CTXEXT, FENSLLM, CUSG>
impl<CTXEXT, FENSLLM, CUSG> Send for Client<CTXEXT, FENSLLM, CUSG>
impl<CTXEXT, FENSLLM, CUSG> Sync for Client<CTXEXT, FENSLLM, CUSG>
impl<CTXEXT, FENSLLM, CUSG> Unpin for Client<CTXEXT, FENSLLM, CUSG>
impl<CTXEXT, FENSLLM, CUSG> !UnwindSafe for Client<CTXEXT, FENSLLM, CUSG>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more