ferrum_interfaces/
engine.rs1use async_trait::async_trait;
8use ferrum_types::{EngineConfig, InferenceRequest, InferenceResponse, Result, StreamChunk};
9use futures::Stream;
10use std::pin::Pin;
11
12#[async_trait]
14pub trait InferenceEngine: Send + Sync {
15 async fn infer(&self, request: InferenceRequest) -> Result<InferenceResponse>;
17
18 async fn infer_stream(
20 &self,
21 request: InferenceRequest,
22 ) -> Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>;
23
24 async fn status(&self) -> ferrum_types::EngineStatus;
26
27 async fn shutdown(&self) -> Result<()>;
29
30 fn config(&self) -> &EngineConfig;
32
33 fn metrics(&self) -> ferrum_types::EngineMetrics;
35
36 async fn health_check(&self) -> ferrum_types::HealthStatus;
38
39 async fn embed_text(&self, _text: &str) -> Result<Vec<f32>> {
41 Err(ferrum_types::FerrumError::model(
42 "This engine does not support text embedding",
43 ))
44 }
45
46 async fn embed_image(&self, _image: &str) -> Result<Vec<f32>> {
48 Err(ferrum_types::FerrumError::model(
49 "This engine does not support image embedding",
50 ))
51 }
52
53 fn embedding_dim(&self) -> usize {
55 0
56 }
57
58 async fn transcribe_file(&self, _path: &str, _language: Option<&str>) -> Result<String> {
60 Err(ferrum_types::FerrumError::model(
61 "This engine does not support audio transcription",
62 ))
63 }
64
65 async fn transcribe_bytes(&self, _data: &[u8], _language: Option<&str>) -> Result<String> {
67 Err(ferrum_types::FerrumError::model(
68 "This engine does not support audio transcription",
69 ))
70 }
71
72 async fn synthesize_speech(
76 &self,
77 _text: &str,
78 _language: Option<&str>,
79 _chunk_frames: usize,
80 ) -> Result<Vec<Vec<f32>>> {
81 Err(ferrum_types::FerrumError::model(
82 "This engine does not support speech synthesis",
83 ))
84 }
85
86 fn tts_sample_rate(&self) -> u32 {
88 24000
89 }
90}
91
92#[async_trait]
94pub trait AdvancedInferenceEngine: InferenceEngine {
95 async fn infer_batch(
97 &self,
98 requests: Vec<InferenceRequest>,
99 ) -> Result<Vec<Result<InferenceResponse>>>;
100
101 async fn infer_speculative(
103 &self,
104 request: InferenceRequest,
105 speculation_config: ferrum_types::SpeculationConfig,
106 ) -> Result<InferenceResponse>;
107
108 async fn warmup(
110 &mut self,
111 warmup_requests: Vec<InferenceRequest>,
112 ) -> Result<ferrum_types::WarmupResult>;
113
114 async fn reconfigure(&mut self, config: EngineConfig) -> Result<()>;
116
117 async fn diagnostics(&self) -> ferrum_types::DiagnosticsReport;
119
120 async fn export_state(&self) -> Result<ferrum_types::EngineState>;
122
123 async fn import_state(&mut self, state: ferrum_types::EngineState) -> Result<()>;
125}
126
127pub type SpeculationConfig = ferrum_types::SpeculationConfig;
129
130pub type HardwareConstraints = ferrum_types::HardwareConstraints;
132
133pub type RequestCharacteristics = ferrum_types::RequestCharacteristics;
135
136pub type LatencyRequirements = ferrum_types::LatencyRequirements;