pub struct InferenceEngine {
pub provider: Arc<RwLock<Box<dyn ModelProvider>>>,
pub cached_model: Arc<RwLock<String>>,
pub cached_context: Arc<AtomicUsize>,
pub base_url: String,
pub species: String,
pub snark: u8,
pub kv_semaphore: Semaphore,
pub economics: Arc<Mutex<SessionEconomics>>,
pub worker_model: Option<String>,
pub gemma_native_formatting: Arc<AtomicBool>,
pub cancel_token: Arc<AtomicBool>,
}Fields§
§provider: Arc<RwLock<Box<dyn ModelProvider>>>§cached_model: Arc<RwLock<String>>§cached_context: Arc<AtomicUsize>§base_url: String§species: String§snark: u8§kv_semaphore: Semaphore§economics: Arc<Mutex<SessionEconomics>>§worker_model: Option<String>Optional model ID for worker-level tasks (Swarms / research).
gemma_native_formatting: Arc<AtomicBool>Opt-in Gemma-native request shaping. Off by default.
cancel_token: Arc<AtomicBool>Global cancellation token for hard-interrupting the inference stream.
Implementations§
Source§impl InferenceEngine
impl InferenceEngine
pub fn new( api_url: String, species: String, snark: u8, ) -> Result<Self, Box<dyn Error>>
pub fn set_gemma_native_formatting(&self, enabled: bool)
pub async fn health_check(&self) -> bool
pub async fn provider_name(&self) -> String
pub async fn get_loaded_model(&self) -> Option<String>
pub async fn get_embedding_model(&self) -> Option<String>
pub async fn load_model(&self, model_id: &str) -> Result<(), String>
pub async fn load_model_with_context( &self, model_id: &str, context_length: Option<usize>, ) -> Result<(), String>
pub async fn load_embedding_model(&self, model_id: &str) -> Result<(), String>
pub async fn list_provider_models( &self, kind: ProviderModelKind, loaded_only: bool, ) -> Result<Vec<String>, String>
pub async fn unload_model( &self, model_id: Option<&str>, all: bool, ) -> Result<String, String>
pub async fn unload_embedding_model( &self, model_id: Option<&str>, ) -> Result<String, String>
pub async fn prewarm(&self) -> Result<(), String>
pub async fn detect_context_length(&self) -> usize
pub async fn set_runtime_profile(&self, model: &str, context_length: usize)
pub async fn refresh_runtime_profile(&self) -> Option<(String, usize, bool)>
pub fn build_system_prompt( &self, snark: u8, chaos: u8, brief: bool, professional: bool, tools: &[ToolDefinition], reasoning_history: Option<&str>, environment_summary: Option<&str>, mcp_tools: &[McpTool], ) -> String
pub fn build_system_prompt_legacy( &self, snark: u8, _chaos: u8, brief: bool, professional: bool, tools: &[ToolDefinition], reasoning_history: Option<&str>, environment_summary: Option<&str>, ) -> String
pub fn current_model(&self) -> String
pub fn current_context_length(&self) -> usize
pub fn is_compact_context_window(&self) -> bool
pub fn gemma_native_formatting_enabled(&self) -> bool
pub async fn call_with_tools( &self, messages: &[ChatMessage], tools: &[ToolDefinition], model_override: Option<&str>, ) -> Result<(Option<String>, Option<Vec<ToolCallResponse>>, Option<TokenUsage>, Option<String>), String>
Sourcepub async fn stream_messages(
&self,
messages: &[ChatMessage],
tx: Sender<InferenceEvent>,
) -> Result<(), Box<dyn Error>>
pub async fn stream_messages( &self, messages: &[ChatMessage], tx: Sender<InferenceEvent>, ) -> Result<(), Box<dyn Error>>
Stream a conversation (no tools). Emits Token/Done/Error events.
Sourcepub async fn stream_generation(
&self,
prompt: &str,
snark: u8,
chaos: u8,
brief: bool,
professional: bool,
tx: Sender<InferenceEvent>,
) -> Result<(), Box<dyn Error>>
pub async fn stream_generation( &self, prompt: &str, snark: u8, chaos: u8, brief: bool, professional: bool, tx: Sender<InferenceEvent>, ) -> Result<(), Box<dyn Error>>
Single-turn streaming (legacy helper used by startup sequence).
Sourcepub async fn generate_task_worker(
&self,
prompt: &str,
professional: bool,
) -> Result<String, String>
pub async fn generate_task_worker( &self, prompt: &str, professional: bool, ) -> Result<String, String>
Runs a task using the worker_model if set, otherwise falls back to the main model.
pub async fn generate_task( &self, prompt: &str, professional: bool, ) -> Result<String, String>
pub async fn generate_task_with_temp( &self, prompt: &str, temp: f32, professional: bool, ) -> Result<String, String>
pub async fn generate_task_with_model( &self, prompt: &str, _temp: f32, professional: bool, model: &str, ) -> Result<String, String>
Sourcepub fn snip_history(
&self,
turns: &[ChatMessage],
max_tokens_estimate: usize,
keep_recent: usize,
) -> Vec<ChatMessage>
pub fn snip_history( &self, turns: &[ChatMessage], max_tokens_estimate: usize, keep_recent: usize, ) -> Vec<ChatMessage>
Prune middle turns when context grows too large, keeping system + recent N.
Auto Trait Implementations§
impl !Freeze for InferenceEngine
impl !RefUnwindSafe for InferenceEngine
impl Send for InferenceEngine
impl Sync for InferenceEngine
impl Unpin for InferenceEngine
impl UnsafeUnpin for InferenceEngine
impl !UnwindSafe for InferenceEngine
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<S> FromSample<S> for S
impl<S> FromSample<S> for S
fn from_sample_(s: S) -> S
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more