ferrum_interfaces/
engine.rs1use async_trait::async_trait;
8use ferrum_types::{EngineConfig, InferenceRequest, InferenceResponse, Result, StreamChunk};
9use futures::Stream;
10use std::pin::Pin;
11
12#[async_trait]
14pub trait InferenceEngine: Send + Sync {
15 async fn infer(&self, request: InferenceRequest) -> Result<InferenceResponse>;
17
18 async fn infer_stream(
20 &self,
21 request: InferenceRequest,
22 ) -> Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>;
23
24 async fn status(&self) -> ferrum_types::EngineStatus;
26
27 async fn shutdown(&self) -> Result<()>;
29
30 fn config(&self) -> &EngineConfig;
32
33 fn metrics(&self) -> ferrum_types::EngineMetrics;
35
36 async fn health_check(&self) -> ferrum_types::HealthStatus;
38
39 async fn embed_text(&self, _text: &str) -> Result<Vec<f32>> {
41 Err(ferrum_types::FerrumError::model(
42 "This engine does not support text embedding",
43 ))
44 }
45
46 async fn embed_image(&self, _image: &str) -> Result<Vec<f32>> {
48 Err(ferrum_types::FerrumError::model(
49 "This engine does not support image embedding",
50 ))
51 }
52
53 fn embedding_dim(&self) -> usize {
55 0
56 }
57
58 async fn transcribe_file(&self, _path: &str, _language: Option<&str>) -> Result<String> {
60 Err(ferrum_types::FerrumError::model(
61 "This engine does not support audio transcription",
62 ))
63 }
64
65 async fn transcribe_bytes(&self, _data: &[u8], _language: Option<&str>) -> Result<String> {
67 Err(ferrum_types::FerrumError::model(
68 "This engine does not support audio transcription",
69 ))
70 }
71}
72
73#[async_trait]
75pub trait AdvancedInferenceEngine: InferenceEngine {
76 async fn infer_batch(
78 &self,
79 requests: Vec<InferenceRequest>,
80 ) -> Result<Vec<Result<InferenceResponse>>>;
81
82 async fn infer_speculative(
84 &self,
85 request: InferenceRequest,
86 speculation_config: ferrum_types::SpeculationConfig,
87 ) -> Result<InferenceResponse>;
88
89 async fn warmup(
91 &mut self,
92 warmup_requests: Vec<InferenceRequest>,
93 ) -> Result<ferrum_types::WarmupResult>;
94
95 async fn reconfigure(&mut self, config: EngineConfig) -> Result<()>;
97
98 async fn diagnostics(&self) -> ferrum_types::DiagnosticsReport;
100
101 async fn export_state(&self) -> Result<ferrum_types::EngineState>;
103
104 async fn import_state(&mut self, state: ferrum_types::EngineState) -> Result<()>;
106}
107
108pub type SpeculationConfig = ferrum_types::SpeculationConfig;
110
111pub type HardwareConstraints = ferrum_types::HardwareConstraints;
113
114pub type RequestCharacteristics = ferrum_types::RequestCharacteristics;
116
117pub type LatencyRequirements = ferrum_types::LatencyRequirements;