Struct Client

Source

pub struct Client { /* private fields */ }

Expand description

The llama.cpp completion client.

Client loads a GGUF model on a dedicated inference thread and exposes it through Rig’s CompletionClient trait. Construct one with Client::builder, or — for backward-compatible positional construction — Client::from_gguf.

§Lifecycle

The worker thread owns the LlamaModel, LlamaContext, and (when the mtmd feature is on) the multimodal projector. It only releases that memory when it exits, which happens in two cases:

On Client::reload, the worker drops the old model and loads the new one in place — the Client itself is not dropped, and the worker thread is reused. Caller blocks on the reload result.
On Client::drop, the worker thread is signalled and joined. See impl Drop for Client for the exact semantics — including how a long in-flight generation is cancelled so the dropping thread doesn’t have to wait for it to finish naturally.

Struct Client Copy item path

§Lifecycle

Implementations§

impl Client

pub fn builder(model_path: impl Into<String>) -> ClientBuilder

pub fn from_gguf( model_path: impl Into<String>, n_ctx: u32, sampling_params: SamplingParams, fit_params: FitParams, kv_cache_params: KvCacheParams, checkpoint_params: CheckpointParams, ) -> Result<Self, LoadError>

§Arguments

§Errors

pub fn from_gguf_with_mmproj( model_path: impl Into<String>, mmproj_path: impl Into<String>, n_ctx: u32, sampling_params: SamplingParams, fit_params: FitParams, kv_cache_params: KvCacheParams, checkpoint_params: CheckpointParams, ) -> Result<Self, LoadError>

§Arguments

§Errors

pub fn reload( &self, model_path: String, mmproj_path: Option<String>, n_ctx: u32, sampling: SamplingParams, fit_params: FitParams, kv_cache_params: KvCacheParams, checkpoint_params: CheckpointParams, ) -> Result<(), LoadError>

§Errors

Trait Implementations§

impl CompletionClient for Client

type CompletionModel = Model

fn completion_model(&self, model: impl Into<String>) -> Self::CompletionModel

fn agent(&self, model: impl Into<String>) -> AgentBuilder<Self::CompletionModel>

fn extractor<T>( &self, model: impl Into<String>, ) -> ExtractorBuilder<Self::CompletionModel, T>where T: JsonSchema + for<'a> Deserialize<'a> + Serialize + Send + Sync,

impl Drop for Client

fn drop(&mut self)

Auto Trait Implementations§

impl !Freeze for Client

impl !RefUnwindSafe for Client

impl Send for Client

impl Sync for Client

impl Unpin for Client

impl UnsafeUnpin for Client

impl !UnwindSafe for Client

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> WasmCompatSend for Twhere T: Send,

impl<T> WasmCompatSync for Twhere T: Sync,

Struct Client

fn extractor<T>( &self, model: impl Into<String>, ) -> ExtractorBuilder<Self::CompletionModel, T>
where T: JsonSchema + for<'a> Deserialize<'a> + Serialize + Send + Sync,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> WasmCompatSend for T
where T: Send,

impl<T> WasmCompatSync for T
where T: Sync,