Struct InferenceEngine

Source

pub struct InferenceEngine {
    pub client: Client,
    pub api_url: String,
    pub base_url: String,
    pub species: String,
    pub snark: u8,
    pub kv_semaphore: Semaphore,
    pub model: RwLock<String>,
    pub context_length: AtomicUsize,
    pub economics: Arc<Mutex<SessionEconomics>>,
    pub worker_model: Option<String>,
    pub gemma_native_formatting: Arc<AtomicBool>,
    pub cancel_token: Arc<AtomicBool>,
}

Fields§

§client: Client§api_url: String§base_url: String

Root URL of the LLM provider (e.g. http://localhost:1234). All non-completions endpoints (models list, health, embeddings) are derived from this.

§species: String§snark: u8§kv_semaphore: Semaphore§model: RwLock<String>

The model ID currently loaded in LM Studio (auto-detected on boot).

§context_length: AtomicUsize

Context window length in tokens (auto-detected from LM Studio, default 32768).

§economics: Arc<Mutex<SessionEconomics>>§worker_model: Option<String>

Optional model ID for worker-level tasks (Swarms / research).

§gemma_native_formatting: Arc<AtomicBool>

Opt-in Gemma-native request shaping. Off by default.

§cancel_token: Arc<AtomicBool>

Global cancellation token for hard-interrupting the inference stream.

Implementations§

Source §

impl InferenceEngine

Source

pub fn new( api_url: String, species: String, snark: u8, ) -> Result<Self, Box<dyn Error>>

Source

pub async fn health_check(&self) -> bool

Returns true if LM Studio is reachable.

Source

pub async fn get_loaded_model(&self) -> Option<String>

Query /api/v0/models and return the first loaded chat model id. Uses /api/v0/models (not /v1/models) because the OpenAI-compat endpoint omits the type field, making it impossible to distinguish embedding models from chat models. Falls back to /v1/models with a name heuristic if /api/v0/models is unavailable. Returns Some(“”) when LM Studio is reachable but no chat model is loaded so callers can distinguish “offline” (None) from “no chat model” (Some(“”)).

Source

pub async fn get_embedding_model(&self) -> Option<String>

Returns the ID of the first loaded embedding model, if any. Uses /api/v0/models which includes type and state fields. The OpenAI-compat /v1/models endpoint omits type so cannot be used here. Accepts any non-empty state (not just “loaded”) to handle LM Studio variants where the embed model may report a different state string at startup.

Source

pub async fn detect_context_length(&self) -> usize

Detect the loaded model’s context window size. Tries LM Studio’s /api/v0/models endpoint first and prefers the loaded model’s live loaded_context_length, then falls back to older context_length / max_context_length style fields. Falls back to a heuristic from the model name, then 32K.

Source

pub async fn refresh_runtime_profile(&self) -> Option<(String, usize, bool)>

Source

pub fn build_system_prompt( &self, snark: u8, chaos: u8, brief: bool, professional: bool, tools: &[ToolDefinition], reasoning_history: Option<&str>, mcp_tools: &[McpTool], ) -> String

Source

pub fn build_system_prompt_legacy( &self, snark: u8, _chaos: u8, brief: bool, professional: bool, tools: &[ToolDefinition], reasoning_history: Option<&str>, ) -> String

Source

pub async fn call_with_tools( &self, messages: &[ChatMessage], tools: &[ToolDefinition], model_override: Option<&str>, ) -> Result<(Option<String>, Option<Vec<ToolCallResponse>>, Option<TokenUsage>, Option<String>), String>

Send messages to the model. Returns (text_content, tool_calls). Exactly one of the two will be Some on a successful response.

Source

pub async fn stream_messages( &self, messages: &[ChatMessage], tx: Sender<InferenceEvent>, ) -> Result<(), Box<dyn Error>>

Stream a conversation (no tools). Emits Token/Done/Error events.

Source

pub async fn stream_generation( &self, prompt: &str, snark: u8, chaos: u8, brief: bool, professional: bool, tx: Sender<InferenceEvent>, ) -> Result<(), Box<dyn Error>>

Single-turn streaming (legacy helper used by startup sequence).

Source

pub async fn generate_task_worker( &self, prompt: &str, professional: bool, ) -> Result<String, String>

Runs a task using the worker_model if set, otherwise falls back to the main model.

Source

pub async fn generate_task( &self, prompt: &str, professional: bool, ) -> Result<String, String>

Source

pub async fn generate_task_with_temp( &self, prompt: &str, temp: f32, professional: bool, ) -> Result<String, String>

Source

pub async fn generate_task_with_model( &self, prompt: &str, temp: f32, professional: bool, model: &str, ) -> Result<String, String>

Source

pub fn snip_history( &self, turns: &[ChatMessage], max_tokens_estimate: usize, keep_recent: usize, ) -> Vec<ChatMessage>

Prune middle turns when context grows too large, keeping system + recent N.

Auto Trait Implementations§

§

impl !Freeze for InferenceEngine

§

impl !RefUnwindSafe for InferenceEngine

§

impl Send for InferenceEngine

§

impl Sync for InferenceEngine

§

impl Unpin for InferenceEngine

§

impl UnsafeUnpin for InferenceEngine

§

impl !UnwindSafe for InferenceEngine

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<S> FromSample<S> for S

Source §

fn from_sample_(s: S) -> S

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

impl<F, T> IntoSample<T> for F
where T: FromSample<F>,

Source §

fn into_sample(self) -> T

Source §

impl<T> PolicyExt for T
where T: ?Sized,

Source §

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more

Source §

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more

Source §

impl<T> Same for T

Source §

type Output = T

Should always be Self

Source §

impl<T, U> ToSample for T
where U: FromSample<T>,

Source §

fn to_sample_(self) -> U

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

fn vzip(self) -> V

Source §

impl<T> WithSubscriber for T

Source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

Struct InferenceEngine Copy item path

Fields§

Implementations§

impl InferenceEngine

pub fn new( api_url: String, species: String, snark: u8, ) -> Result<Self, Box<dyn Error>>

pub fn set_gemma_native_formatting(&self, enabled: bool)

pub fn gemma_native_formatting_enabled(&self) -> bool

pub fn current_model(&self) -> String

pub fn current_context_length(&self) -> usize

pub fn set_runtime_profile(&self, model: &str, context_length: usize)

pub async fn health_check(&self) -> bool

pub async fn get_loaded_model(&self) -> Option<String>

pub async fn get_embedding_model(&self) -> Option<String>

pub async fn detect_context_length(&self) -> usize

pub async fn refresh_runtime_profile(&self) -> Option<(String, usize, bool)>

pub fn build_system_prompt( &self, snark: u8, chaos: u8, brief: bool, professional: bool, tools: &[ToolDefinition], reasoning_history: Option<&str>, mcp_tools: &[McpTool], ) -> String

pub fn build_system_prompt_legacy( &self, snark: u8, _chaos: u8, brief: bool, professional: bool, tools: &[ToolDefinition], reasoning_history: Option<&str>, ) -> String

pub async fn call_with_tools( &self, messages: &[ChatMessage], tools: &[ToolDefinition], model_override: Option<&str>, ) -> Result<(Option<String>, Option<Vec<ToolCallResponse>>, Option<TokenUsage>, Option<String>), String>

pub async fn stream_messages( &self, messages: &[ChatMessage], tx: Sender<InferenceEvent>, ) -> Result<(), Box<dyn Error>>

pub async fn stream_generation( &self, prompt: &str, snark: u8, chaos: u8, brief: bool, professional: bool, tx: Sender<InferenceEvent>, ) -> Result<(), Box<dyn Error>>

pub async fn generate_task_worker( &self, prompt: &str, professional: bool, ) -> Result<String, String>

pub async fn generate_task( &self, prompt: &str, professional: bool, ) -> Result<String, String>

pub async fn generate_task_with_temp( &self, prompt: &str, temp: f32, professional: bool, ) -> Result<String, String>

pub async fn generate_task_with_model( &self, prompt: &str, temp: f32, professional: bool, model: &str, ) -> Result<String, String>

pub fn snip_history( &self, turns: &[ChatMessage], max_tokens_estimate: usize, keep_recent: usize, ) -> Vec<ChatMessage>

Auto Trait Implementations§

impl !Freeze for InferenceEngine

impl !RefUnwindSafe for InferenceEngine

impl Send for InferenceEngine

impl Sync for InferenceEngine

impl Unpin for InferenceEngine

impl UnsafeUnpin for InferenceEngine

impl !UnwindSafe for InferenceEngine

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<S> FromSample<S> for S

fn from_sample_(s: S) -> S

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<F, T> IntoSample<T> for Fwhere T: FromSample<F>,

fn into_sample(self) -> T

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T> Same for T

type Output = T

impl<T, U> ToSample<U> for Twhere U: FromSample<T>,

fn to_sample_(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<S, T> Duplex<S> for Twhere T: FromSample<S> + ToSample<S>,

Struct InferenceEngine

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<F, T> IntoSample<T> for F
where T: FromSample<F>,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T, U> ToSample<U> for T
where U: FromSample<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<S, T> Duplex<S> for T
where T: FromSample<S> + ToSample<S>,