VLLMClient

Struct VLLMClient 

Source
pub struct VLLMClient { /* private fields */ }
Expand description

vLLM/OpenAI-compatible client

Implementations§

Source§

impl VLLMClient

Source

pub fn new(config: LLMServiceConfig) -> Result<Self>

Create new vLLM client from configuration

Source

pub fn from_env() -> Result<Self>

Create from environment variables

Source

pub fn metrics(&self) -> MetricsSnapshot

Get current API metrics

Source

pub async fn embed_batch_optimized<F>( &self, texts: &[String], batch_size: usize, progress_callback: Option<F>, ) -> Result<Vec<Vec<f32>>>
where F: Fn(usize, usize) + Send + Sync,

Embed texts with optimized batching

Splits large batches into optimal chunks for better throughput and parallel processing. Returns progress updates via callback.

Source

pub async fn embed_batch_parallel( &self, texts: &[String], batch_size: usize, max_concurrent: usize, ) -> Result<Vec<Vec<f32>>>

Embed texts in parallel with multiple concurrent batches

Uses tokio to process multiple batches concurrently for maximum throughput. Useful for embedding large document collections.

Trait Implementations§

Source§

impl LLMClient for VLLMClient

Source§

fn chat_completion<'life0, 'async_trait>( &'life0 self, messages: Vec<ChatMessage>, ) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait,

Generate chat completion
Source§

fn embed<'life0, 'life1, 'async_trait>( &'life0 self, text: &'life1 str, ) -> Pin<Box<dyn Future<Output = Result<Vec<f32>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Generate embeddings for text
Source§

fn embed_batch<'life0, 'life1, 'async_trait>( &'life0 self, texts: &'life1 [String], ) -> Pin<Box<dyn Future<Output = Result<Vec<Vec<f32>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Generate embeddings for multiple texts
Source§

fn embedding_dimensions(&self) -> usize

Get embedding dimensions
Source§

fn model_name(&self) -> &str

Get model name

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> PolicyExt for T
where T: ?Sized,

Source§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more
Source§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more