use async_trait::async_trait;
use ferrum_types::{EngineConfig, InferenceRequest, InferenceResponse, Result, StreamChunk};
use futures::Stream;
use std::pin::Pin;
#[async_trait]
pub trait InferenceEngine: Send + Sync {
async fn status(&self) -> ferrum_types::EngineStatus;
async fn shutdown(&self) -> Result<()>;
fn config(&self) -> &EngineConfig;
fn metrics(&self) -> ferrum_types::EngineMetrics;
async fn health_check(&self) -> ferrum_types::HealthStatus;
}
#[async_trait]
pub trait LlmInferenceEngine: InferenceEngine {
async fn infer(&self, request: InferenceRequest) -> Result<InferenceResponse>;
async fn infer_stream(
&self,
request: InferenceRequest,
) -> Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>;
}
#[async_trait]
pub trait EmbedEngine: InferenceEngine {
async fn embed_text(&self, text: &str) -> Result<Vec<f32>>;
async fn embed_image(&self, image: &str) -> Result<Vec<f32>>;
fn embedding_dim(&self) -> usize;
}
#[async_trait]
pub trait TranscribeEngine: InferenceEngine {
async fn transcribe_file(&self, path: &str, language: Option<&str>) -> Result<String>;
async fn transcribe_bytes(&self, data: &[u8], language: Option<&str>) -> Result<String>;
}
#[async_trait]
pub trait TtsEngine: InferenceEngine {
async fn synthesize_speech(
&self,
text: &str,
language: Option<&str>,
chunk_frames: usize,
) -> Result<Vec<Vec<f32>>>;
fn tts_sample_rate(&self) -> u32;
}
#[async_trait]
pub trait AdvancedInferenceEngine: LlmInferenceEngine {
async fn infer_batch(
&self,
requests: Vec<InferenceRequest>,
) -> Result<Vec<Result<InferenceResponse>>>;
async fn infer_speculative(
&self,
request: InferenceRequest,
speculation_config: ferrum_types::SpeculationConfig,
) -> Result<InferenceResponse>;
async fn warmup(
&mut self,
warmup_requests: Vec<InferenceRequest>,
) -> Result<ferrum_types::WarmupResult>;
async fn reconfigure(&mut self, config: EngineConfig) -> Result<()>;
async fn diagnostics(&self) -> ferrum_types::DiagnosticsReport;
async fn export_state(&self) -> Result<ferrum_types::EngineState>;
async fn import_state(&mut self, state: ferrum_types::EngineState) -> Result<()>;
}
pub type SpeculationConfig = ferrum_types::SpeculationConfig;
pub type HardwareConstraints = ferrum_types::HardwareConstraints;
pub type RequestCharacteristics = ferrum_types::RequestCharacteristics;
pub type LatencyRequirements = ferrum_types::LatencyRequirements;