ferrum_interfaces/
engine.rs1use async_trait::async_trait;
8use ferrum_types::{EngineConfig, InferenceRequest, InferenceResponse, Result, StreamChunk};
9use futures::Stream;
10use std::pin::Pin;
11
12#[async_trait]
14pub trait InferenceEngine: Send + Sync {
15 async fn infer(&self, request: InferenceRequest) -> Result<InferenceResponse>;
17
18 async fn infer_stream(
20 &self,
21 request: InferenceRequest,
22 ) -> Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>;
23
24 async fn status(&self) -> ferrum_types::EngineStatus;
26
27 async fn shutdown(&self) -> Result<()>;
29
30 fn config(&self) -> &EngineConfig;
32
33 fn metrics(&self) -> ferrum_types::EngineMetrics;
35
36 async fn health_check(&self) -> ferrum_types::HealthStatus;
38
39 async fn embed_text(&self, _text: &str) -> Result<Vec<f32>> {
41 Err(ferrum_types::FerrumError::model(
42 "This engine does not support text embedding",
43 ))
44 }
45
46 async fn embed_image(&self, _image: &str) -> Result<Vec<f32>> {
48 Err(ferrum_types::FerrumError::model(
49 "This engine does not support image embedding",
50 ))
51 }
52
53 fn embedding_dim(&self) -> usize {
55 0
56 }
57}
58
59#[async_trait]
61pub trait AdvancedInferenceEngine: InferenceEngine {
62 async fn infer_batch(
64 &self,
65 requests: Vec<InferenceRequest>,
66 ) -> Result<Vec<Result<InferenceResponse>>>;
67
68 async fn infer_speculative(
70 &self,
71 request: InferenceRequest,
72 speculation_config: ferrum_types::SpeculationConfig,
73 ) -> Result<InferenceResponse>;
74
75 async fn warmup(
77 &mut self,
78 warmup_requests: Vec<InferenceRequest>,
79 ) -> Result<ferrum_types::WarmupResult>;
80
81 async fn reconfigure(&mut self, config: EngineConfig) -> Result<()>;
83
84 async fn diagnostics(&self) -> ferrum_types::DiagnosticsReport;
86
87 async fn export_state(&self) -> Result<ferrum_types::EngineState>;
89
90 async fn import_state(&mut self, state: ferrum_types::EngineState) -> Result<()>;
92}
93
94pub type SpeculationConfig = ferrum_types::SpeculationConfig;
96
97pub type HardwareConstraints = ferrum_types::HardwareConstraints;
99
100pub type RequestCharacteristics = ferrum_types::RequestCharacteristics;
102
103pub type LatencyRequirements = ferrum_types::LatencyRequirements;