pub struct VLLMClient { /* private fields */ }Expand description
vLLM/OpenAI-compatible client
Implementations§
Source§impl VLLMClient
impl VLLMClient
Sourcepub fn new(config: LLMServiceConfig) -> Result<Self>
pub fn new(config: LLMServiceConfig) -> Result<Self>
Create new vLLM client from configuration
Sourcepub fn metrics(&self) -> MetricsSnapshot
pub fn metrics(&self) -> MetricsSnapshot
Get current API metrics
Sourcepub async fn embed_batch_optimized<F>(
&self,
texts: &[String],
batch_size: usize,
progress_callback: Option<F>,
) -> Result<Vec<Vec<f32>>>
pub async fn embed_batch_optimized<F>( &self, texts: &[String], batch_size: usize, progress_callback: Option<F>, ) -> Result<Vec<Vec<f32>>>
Embed texts with optimized batching
Splits large batches into optimal chunks for better throughput and parallel processing. Returns progress updates via callback.
Sourcepub async fn embed_batch_parallel(
&self,
texts: &[String],
batch_size: usize,
max_concurrent: usize,
) -> Result<Vec<Vec<f32>>>
pub async fn embed_batch_parallel( &self, texts: &[String], batch_size: usize, max_concurrent: usize, ) -> Result<Vec<Vec<f32>>>
Embed texts in parallel with multiple concurrent batches
Uses tokio to process multiple batches concurrently for maximum throughput. Useful for embedding large document collections.
Trait Implementations§
Source§impl LLMClient for VLLMClient
impl LLMClient for VLLMClient
Source§fn chat_completion<'life0, 'async_trait>(
&'life0 self,
messages: Vec<ChatMessage>,
) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn chat_completion<'life0, 'async_trait>(
&'life0 self,
messages: Vec<ChatMessage>,
) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Generate chat completion
Source§fn embed<'life0, 'life1, 'async_trait>(
&'life0 self,
text: &'life1 str,
) -> Pin<Box<dyn Future<Output = Result<Vec<f32>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn embed<'life0, 'life1, 'async_trait>(
&'life0 self,
text: &'life1 str,
) -> Pin<Box<dyn Future<Output = Result<Vec<f32>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Generate embeddings for text
Source§fn embed_batch<'life0, 'life1, 'async_trait>(
&'life0 self,
texts: &'life1 [String],
) -> Pin<Box<dyn Future<Output = Result<Vec<Vec<f32>>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn embed_batch<'life0, 'life1, 'async_trait>(
&'life0 self,
texts: &'life1 [String],
) -> Pin<Box<dyn Future<Output = Result<Vec<Vec<f32>>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Generate embeddings for multiple texts
Source§fn embedding_dimensions(&self) -> usize
fn embedding_dimensions(&self) -> usize
Get embedding dimensions
Source§fn model_name(&self) -> &str
fn model_name(&self) -> &str
Get model name
Auto Trait Implementations§
impl Freeze for VLLMClient
impl !RefUnwindSafe for VLLMClient
impl Send for VLLMClient
impl Sync for VLLMClient
impl Unpin for VLLMClient
impl !UnwindSafe for VLLMClient
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more