ferrum_interfaces/
engine.rs1use async_trait::async_trait;
8use ferrum_types::{EngineConfig, InferenceRequest, InferenceResponse, Result, StreamChunk};
9use futures::Stream;
10use std::pin::Pin;
11
12#[async_trait]
14pub trait InferenceEngine: Send + Sync {
15 async fn infer(&self, request: InferenceRequest) -> Result<InferenceResponse>;
17
18 async fn infer_stream(
20 &self,
21 request: InferenceRequest,
22 ) -> Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>;
23
24 async fn status(&self) -> ferrum_types::EngineStatus;
26
27 async fn shutdown(&self) -> Result<()>;
29
30 fn config(&self) -> &EngineConfig;
32
33 fn metrics(&self) -> ferrum_types::EngineMetrics;
35
36 async fn health_check(&self) -> ferrum_types::HealthStatus;
38}
39
40#[async_trait]
42pub trait AdvancedInferenceEngine: InferenceEngine {
43 async fn infer_batch(
45 &self,
46 requests: Vec<InferenceRequest>,
47 ) -> Result<Vec<Result<InferenceResponse>>>;
48
49 async fn infer_speculative(
51 &self,
52 request: InferenceRequest,
53 speculation_config: ferrum_types::SpeculationConfig,
54 ) -> Result<InferenceResponse>;
55
56 async fn warmup(
58 &mut self,
59 warmup_requests: Vec<InferenceRequest>,
60 ) -> Result<ferrum_types::WarmupResult>;
61
62 async fn reconfigure(&mut self, config: EngineConfig) -> Result<()>;
64
65 async fn diagnostics(&self) -> ferrum_types::DiagnosticsReport;
67
68 async fn export_state(&self) -> Result<ferrum_types::EngineState>;
70
71 async fn import_state(&mut self, state: ferrum_types::EngineState) -> Result<()>;
73}
74
75pub type SpeculationConfig = ferrum_types::SpeculationConfig;
77
78pub type HardwareConstraints = ferrum_types::HardwareConstraints;
80
81pub type RequestCharacteristics = ferrum_types::RequestCharacteristics;
83
84pub type LatencyRequirements = ferrum_types::LatencyRequirements;