ferrum_interfaces/
engine.rs

1//! Inference engine interface with streaming and batch support
2//!
3//! This module provides the top-level inference engine interface that
4//! orchestrates all other components: tokenizer, model executor, scheduler,
5//! and sampler.
6
7use async_trait::async_trait;
8use ferrum_types::{EngineConfig, InferenceRequest, InferenceResponse, Result, StreamChunk};
9use futures::Stream;
10use std::pin::Pin;
11
12/// Core inference engine trait
13#[async_trait]
14pub trait InferenceEngine: Send + Sync {
15    /// Execute single inference request
16    async fn infer(&self, request: InferenceRequest) -> Result<InferenceResponse>;
17
18    /// Execute streaming inference request
19    async fn infer_stream(
20        &self,
21        request: InferenceRequest,
22    ) -> Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>>;
23
24    /// Get current engine status
25    async fn status(&self) -> ferrum_types::EngineStatus;
26
27    /// Shutdown engine gracefully
28    async fn shutdown(&self) -> Result<()>;
29
30    /// Get engine configuration
31    fn config(&self) -> &EngineConfig;
32
33    /// Get engine metrics
34    fn metrics(&self) -> ferrum_types::EngineMetrics;
35
36    /// Health check
37    async fn health_check(&self) -> ferrum_types::HealthStatus;
38
39    /// Embed raw text string → float vector (engine handles tokenization).
40    async fn embed_text(&self, _text: &str) -> Result<Vec<f32>> {
41        Err(ferrum_types::FerrumError::model(
42            "This engine does not support text embedding",
43        ))
44    }
45
46    /// Embed image (file path or base64) → float vector. Default: not supported.
47    async fn embed_image(&self, _image: &str) -> Result<Vec<f32>> {
48        Err(ferrum_types::FerrumError::model(
49            "This engine does not support image embedding",
50        ))
51    }
52
53    /// Get embedding dimension. Default: 0 (not an embedding model).
54    fn embedding_dim(&self) -> usize {
55        0
56    }
57
58    /// Transcribe audio file → text. Default: not supported.
59    async fn transcribe_file(&self, _path: &str, _language: Option<&str>) -> Result<String> {
60        Err(ferrum_types::FerrumError::model(
61            "This engine does not support audio transcription",
62        ))
63    }
64
65    /// Transcribe audio bytes (WAV) → text. Default: not supported.
66    async fn transcribe_bytes(&self, _data: &[u8], _language: Option<&str>) -> Result<String> {
67        Err(ferrum_types::FerrumError::model(
68            "This engine does not support audio transcription",
69        ))
70    }
71
72    /// Synthesize speech → PCM audio chunks (streaming).
73    /// Returns Vec of (chunk_index, PCM f32 samples).
74    /// Default: not supported.
75    async fn synthesize_speech(
76        &self,
77        _text: &str,
78        _language: Option<&str>,
79        _chunk_frames: usize,
80    ) -> Result<Vec<Vec<f32>>> {
81        Err(ferrum_types::FerrumError::model(
82            "This engine does not support speech synthesis",
83        ))
84    }
85
86    /// Get TTS sample rate (default 24000).
87    fn tts_sample_rate(&self) -> u32 {
88        24000
89    }
90}
91
92/// Advanced engine capabilities
93#[async_trait]
94pub trait AdvancedInferenceEngine: InferenceEngine {
95    /// Execute batch inference
96    async fn infer_batch(
97        &self,
98        requests: Vec<InferenceRequest>,
99    ) -> Result<Vec<Result<InferenceResponse>>>;
100
101    /// Execute speculative inference
102    async fn infer_speculative(
103        &self,
104        request: InferenceRequest,
105        speculation_config: ferrum_types::SpeculationConfig,
106    ) -> Result<InferenceResponse>;
107
108    /// Warm up engine with sample requests
109    async fn warmup(
110        &mut self,
111        warmup_requests: Vec<InferenceRequest>,
112    ) -> Result<ferrum_types::WarmupResult>;
113
114    /// Configure engine at runtime
115    async fn reconfigure(&mut self, config: EngineConfig) -> Result<()>;
116
117    /// Get detailed diagnostics
118    async fn diagnostics(&self) -> ferrum_types::DiagnosticsReport;
119
120    /// Export engine state for debugging
121    async fn export_state(&self) -> Result<ferrum_types::EngineState>;
122
123    /// Import engine state for debugging/testing
124    async fn import_state(&mut self, state: ferrum_types::EngineState) -> Result<()>;
125}
126
127/// Speculation configuration for speculative decoding
128pub type SpeculationConfig = ferrum_types::SpeculationConfig;
129
130/// Hardware constraints alias
131pub type HardwareConstraints = ferrum_types::HardwareConstraints;
132
133/// Request characteristics alias
134pub type RequestCharacteristics = ferrum_types::RequestCharacteristics;
135
136/// Latency requirements alias
137pub type LatencyRequirements = ferrum_types::LatencyRequirements;
ferrum_interfaces/engine.rs

ferrum_interfaces/
engine.rs