pub struct LLMWorker { /* private fields */ }Implementations§
Source§impl LLMWorker
impl LLMWorker
Sourcepub fn new(shared_state: Arc<SharedState>) -> Self
pub fn new(shared_state: Arc<SharedState>) -> Self
Create with shared state (legacy constructor)
Sourcepub fn new_with_backend(backend_url: String) -> Self
pub fn new_with_backend(backend_url: String) -> Self
Create with explicit backend URL
Sourcepub fn set_runtime_manager(&self, runtime_manager: Arc<RuntimeManager>)
pub fn set_runtime_manager(&self, runtime_manager: Arc<RuntimeManager>)
Set the runtime manager
Sourcepub async fn is_runtime_ready(&self) -> bool
pub async fn is_runtime_ready(&self) -> bool
Check if runtime is ready for inference
Sourcepub async fn generate_response(
&self,
_session_id: String,
context: Vec<Message>,
) -> Result<String>
pub async fn generate_response( &self, _session_id: String, context: Vec<Message>, ) -> Result<String>
Generate a complete (non-streaming) response from the LLM.
Sourcepub async fn stream_response(
&self,
messages: Vec<Message>,
max_tokens: u32,
temperature: f32,
) -> Result<impl Stream<Item = Result<String, Error>>>
pub async fn stream_response( &self, messages: Vec<Message>, max_tokens: u32, temperature: f32, ) -> Result<impl Stream<Item = Result<String, Error>>>
Stream response tokens from the LLM as Server-Sent Events. Returns a stream of SSE-formatted strings ready to send to the client.
Sourcepub async fn batch_process(
&self,
prompts: Vec<(String, Vec<Message>)>,
) -> Result<Vec<String>>
pub async fn batch_process( &self, prompts: Vec<(String, Vec<Message>)>, ) -> Result<Vec<String>>
Batch process multiple prompts (non-streaming)
Sourcepub async fn initialize_model(&self, model_path: &str) -> Result<()>
pub async fn initialize_model(&self, model_path: &str) -> Result<()>
Initialize LLM model (no-op for HTTP proxy mode)
Sourcepub async fn generate_embeddings(
&self,
texts: Vec<String>,
) -> Result<Vec<Vec<f32>>>
pub async fn generate_embeddings( &self, texts: Vec<String>, ) -> Result<Vec<Vec<f32>>>
Generate embeddings for one or more text inputs via llama-server’s /v1/embeddings endpoint. This reuses the vectors llama.cpp already computes during inference — no separate model needed. Returns a Vec of embedding vectors (one per input string).
Auto Trait Implementations§
impl !Freeze for LLMWorker
impl !RefUnwindSafe for LLMWorker
impl Send for LLMWorker
impl Sync for LLMWorker
impl Unpin for LLMWorker
impl UnsafeUnpin for LLMWorker
impl !UnwindSafe for LLMWorker
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more