Skip to main content

InferenceEngine

Struct InferenceEngine 

Source
pub struct InferenceEngine {
    pub client: Client,
    pub api_url: String,
    pub base_url: String,
    pub species: String,
    pub snark: u8,
    pub kv_semaphore: Semaphore,
    pub model: RwLock<String>,
    pub context_length: AtomicUsize,
    pub economics: Arc<Mutex<SessionEconomics>>,
    pub worker_model: Option<String>,
    pub gemma_native_formatting: Arc<AtomicBool>,
    pub cancel_token: Arc<AtomicBool>,
}

Fields§

§client: Client§api_url: String§base_url: String

Root URL of the LLM provider (e.g. http://localhost:1234). All non-completions endpoints (models list, health, embeddings) are derived from this.

§species: String§snark: u8§kv_semaphore: Semaphore§model: RwLock<String>

The model ID currently loaded in LM Studio (auto-detected on boot).

§context_length: AtomicUsize

Context window length in tokens (auto-detected from LM Studio, default 32768).

§economics: Arc<Mutex<SessionEconomics>>§worker_model: Option<String>

Optional model ID for worker-level tasks (Swarms / research).

§gemma_native_formatting: Arc<AtomicBool>

Opt-in Gemma-native request shaping. Off by default.

§cancel_token: Arc<AtomicBool>

Global cancellation token for hard-interrupting the inference stream.

Implementations§

Source§

impl InferenceEngine

Source

pub fn new( api_url: String, species: String, snark: u8, ) -> Result<Self, Box<dyn Error>>

Source

pub fn set_gemma_native_formatting(&self, enabled: bool)

Source

pub fn gemma_native_formatting_enabled(&self) -> bool

Source

pub fn current_model(&self) -> String

Source

pub fn current_context_length(&self) -> usize

Source

pub fn set_runtime_profile(&self, model: &str, context_length: usize)

Source

pub async fn health_check(&self) -> bool

Returns true if LM Studio is reachable.

Source

pub async fn get_loaded_model(&self) -> Option<String>

Query /api/v0/models and return the first loaded chat model id. Uses /api/v0/models (not /v1/models) because the OpenAI-compat endpoint omits the type field, making it impossible to distinguish embedding models from chat models. Falls back to /v1/models with a name heuristic if /api/v0/models is unavailable. Returns Some(“”) when LM Studio is reachable but no chat model is loaded so callers can distinguish “offline” (None) from “no chat model” (Some(“”)).

Source

pub async fn get_embedding_model(&self) -> Option<String>

Returns the ID of the first loaded embedding model, if any. Uses /api/v0/models which includes type and state fields. The OpenAI-compat /v1/models endpoint omits type so cannot be used here. Accepts any non-empty state (not just “loaded”) to handle LM Studio variants where the embed model may report a different state string at startup.

Source

pub async fn detect_context_length(&self) -> usize

Detect the loaded model’s context window size. Tries LM Studio’s /api/v0/models endpoint first and prefers the loaded model’s live loaded_context_length, then falls back to older context_length / max_context_length style fields. Falls back to a heuristic from the model name, then 32K.

Source

pub async fn refresh_runtime_profile(&self) -> Option<(String, usize, bool)>

Source

pub fn build_system_prompt( &self, snark: u8, chaos: u8, brief: bool, professional: bool, tools: &[ToolDefinition], reasoning_history: Option<&str>, mcp_tools: &[McpTool], ) -> String

Source

pub fn build_system_prompt_legacy( &self, snark: u8, _chaos: u8, brief: bool, professional: bool, tools: &[ToolDefinition], reasoning_history: Option<&str>, ) -> String

Source

pub async fn call_with_tools( &self, messages: &[ChatMessage], tools: &[ToolDefinition], model_override: Option<&str>, ) -> Result<(Option<String>, Option<Vec<ToolCallResponse>>, Option<TokenUsage>, Option<String>), String>

Send messages to the model. Returns (text_content, tool_calls). Exactly one of the two will be Some on a successful response.

Source

pub async fn stream_messages( &self, messages: &[ChatMessage], tx: Sender<InferenceEvent>, ) -> Result<(), Box<dyn Error>>

Stream a conversation (no tools). Emits Token/Done/Error events.

Source

pub async fn stream_generation( &self, prompt: &str, snark: u8, chaos: u8, brief: bool, professional: bool, tx: Sender<InferenceEvent>, ) -> Result<(), Box<dyn Error>>

Single-turn streaming (legacy helper used by startup sequence).

Source

pub async fn generate_task_worker( &self, prompt: &str, professional: bool, ) -> Result<String, String>

Runs a task using the worker_model if set, otherwise falls back to the main model.

Source

pub async fn generate_task( &self, prompt: &str, professional: bool, ) -> Result<String, String>

Source

pub async fn generate_task_with_temp( &self, prompt: &str, temp: f32, professional: bool, ) -> Result<String, String>

Source

pub async fn generate_task_with_model( &self, prompt: &str, temp: f32, professional: bool, model: &str, ) -> Result<String, String>

Source

pub fn snip_history( &self, turns: &[ChatMessage], max_tokens_estimate: usize, keep_recent: usize, ) -> Vec<ChatMessage>

Prune middle turns when context grows too large, keeping system + recent N.

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<S> FromSample<S> for S

Source§

fn from_sample_(s: S) -> S

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<F, T> IntoSample<T> for F
where T: FromSample<F>,

Source§

fn into_sample(self) -> T

Source§

impl<T> PolicyExt for T
where T: ?Sized,

Source§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more
Source§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, U> ToSample<U> for T
where U: FromSample<T>,

Source§

fn to_sample_(self) -> U

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<S, T> Duplex<S> for T
where T: FromSample<S> + ToSample<S>,