Skip to main content

studio_worker/engine/
mod.rs

1//! Pluggable inference engines, generalised to all task kinds.
2//!
3//! The `synthetic` engine produces real, decodable bytes for every kind
4//! and is the default — it's what unattended CI exercises end-to-end.
5//!
6//! Real high-performance engines (llama.cpp, whisper.cpp, candle SD,
7//! Piper, ffmpeg) live behind cargo features so the default build stays
8//! small and the CI matrix stays fast.  See the feature notes per
9//! implementation block below.
10use crate::config::Config;
11use crate::types::*;
12use anyhow::Result;
13use image::{ImageBuffer, Rgb, RgbImage};
14use sha2::{Digest, Sha256};
15use std::collections::BTreeMap;
16use std::io::Cursor;
17use std::time::Instant;
18use tracing::{debug, info, warn};
19
20/// Tracing target for the synthetic engine.  Stable so operators can
21/// filter with `RUST_LOG=studio_worker::engine::synthetic=debug`.
22const TRACE_TARGET_SYNTHETIC: &str = "studio_worker::engine::synthetic";
23
24/// Tracing target for engine-roster (build-time) events.  Stable so
25/// operators can filter with `RUST_LOG=studio_worker::engine=info`.
26const TRACE_TARGET_BUILD: &str = "studio_worker::engine";
27
28/// Emit a one-line breadcrumb naming the backends this worker will
29/// route across.  Lets an operator confirm from the logs which engines
30/// actually registered — e.g. whether the sdcpp backend self-registered
31/// or was skipped for a missing `sd-cli` — instead of inferring it from
32/// the advertised model list.  Split out from [`build`] so the
33/// breadcrumb's shape is unit-tested against a controlled roster.
34fn log_engine_roster(engines: &[Box<dyn Engine>]) {
35    let names: Vec<&str> = engines.iter().map(|e| e.name()).collect();
36    info!(
37        target: TRACE_TARGET_BUILD,
38        op = "build",
39        engine_count = names.len(),
40        engines = %names.join(","),
41        "engine roster assembled"
42    );
43}
44
45/// What a single engine is able to do.
46#[derive(Debug, Clone, Default)]
47pub struct EngineCapabilities {
48    /// Task kinds the engine can handle, with their per-kind supported
49    /// model ids.
50    pub supported_models_per_kind: BTreeMap<TaskKind, Vec<String>>,
51}
52
53impl EngineCapabilities {
54    pub fn supports(&self, kind: TaskKind, model: &str) -> bool {
55        self.supported_models_per_kind
56            .get(&kind)
57            .map(|ms| ms.iter().any(|m| m == model))
58            .unwrap_or(false)
59    }
60
61    pub fn kinds(&self) -> Vec<TaskKind> {
62        self.supported_models_per_kind.keys().copied().collect()
63    }
64
65    pub fn flat_models(&self) -> Vec<String> {
66        self.supported_models_per_kind
67            .values()
68            .flat_map(|ms| ms.iter().cloned())
69            .collect()
70    }
71}
72
73#[cfg(feature = "image-candle")]
74pub mod candle_image;
75pub mod download;
76#[cfg(feature = "llama")]
77pub mod llama;
78pub mod multi;
79pub mod sdcpp;
80#[cfg(feature = "tts")]
81pub mod tts;
82#[cfg(feature = "video")]
83pub mod video;
84#[cfg(feature = "whisper")]
85pub mod whisper;
86
87pub trait Engine: Send + Sync {
88    fn name(&self) -> &'static str;
89    fn capabilities(&self) -> EngineCapabilities;
90    fn dispatch(&self, model: &str, task: Task) -> Result<TaskResult>;
91
92    /// Dispatch with the studio's `ModelSource` attached.  Engines
93    /// that need it (download URLs / CLI defaults) override this;
94    /// engines that don't (synthetic) keep using the plain
95    /// `dispatch` method via the default impl below.
96    fn dispatch_with_source(
97        &self,
98        model: &str,
99        task: Task,
100        _source: &crate::types::ModelSource,
101    ) -> Result<TaskResult> {
102        self.dispatch(model, task)
103    }
104}
105
106/// Build the engine for this worker.
107///
108/// There's no engine selection knob in the config any more: the
109/// worker advertises capabilities for every backend compiled into
110/// this binary, and routes each incoming job to the first backend
111/// that supports its (kind, model) pair.  See `multi::MultiEngine`.
112///
113/// The default build ships only the synthetic engine.  Optional
114/// backends (llama, whisper, image-candle, video, tts) are added
115/// when their cargo features are enabled.
116pub fn build(cfg: &Config) -> Result<Box<dyn Engine>> {
117    // Real backends first so they win the "supports" check ahead of
118    // the catch-all synthetic engine.  Synthetic is always last:
119    // deterministic real bytes for every kind, zero-VRAM fallback so
120    // CI + smoke-tests stay self-contained.
121    #[allow(clippy::vec_init_then_push)]
122    let engines: Vec<Box<dyn Engine>> = {
123        let mut v: Vec<Box<dyn Engine>> = Vec::new();
124        #[cfg(feature = "llama")]
125        v.push(Box::new(llama::LlamaEngine::new(cfg.models_root.clone())?));
126        #[cfg(feature = "whisper")]
127        v.push(Box::new(whisper::WhisperEngine::new(
128            cfg.models_root.clone(),
129        )));
130        #[cfg(feature = "image-candle")]
131        v.push(Box::new(candle_image::CandleImageEngine::new()));
132        #[cfg(feature = "video")]
133        v.push(Box::new(video::VideoEngine::new()));
134        #[cfg(feature = "tts")]
135        v.push(Box::new(tts::TtsEngine::new()));
136        // stable-diffusion.cpp-backed image engine.  Self-registers
137        // only when both the `sd-cli` binary is on the box AND at
138        // least one model's component files are present under
139        // `cfg.models_root`.  Skipped silently otherwise so CI builds
140        // (no sd-cli, no models) stay green.
141        if let Some(eng) = sdcpp::SdCppEngine::try_new(&cfg.models_root) {
142            v.push(Box::new(eng));
143        }
144        v.push(Box::new(SyntheticEngine::new()));
145        v
146    };
147
148    log_engine_roster(&engines);
149    Ok(Box::new(multi::MultiEngine::new(engines)))
150}
151
152/// Legacy hook retained for any external caller; mirrors
153/// `Config::default().models_root`.
154pub fn default_models_root() -> std::path::PathBuf {
155    crate::config::default_models_root()
156}
157
158// ---------------------------------------------------------------------------
159// SyntheticEngine — produces real bytes for every kind, deterministic by
160// SHA-256(prompt|text|json).  Zero VRAM, zero network, zero install steps.
161// ---------------------------------------------------------------------------
162
163pub struct SyntheticEngine;
164
165impl SyntheticEngine {
166    pub fn new() -> Self {
167        Self
168    }
169}
170
171impl Default for SyntheticEngine {
172    fn default() -> Self {
173        Self::new()
174    }
175}
176
177/// Sentinel string the studio's claim filter recognises as "any
178/// model is fine".  Real engines that can actually serve any model
179/// (e.g. a GGUF-aware image engine that downloads on demand) advertise
180/// it.  The synthetic engine deliberately does NOT — it would happily
181/// fulfil real-model jobs with placeholder bytes, which is destructive
182/// on a live queue.
183pub const MODEL_WILDCARD: &str = "*";
184
185// Synthetic engine advertises only its own `synthetic*` model names
186// so it never claims a job that names a real model the operator is
187// expecting actual inference for.
188const DEFAULT_IMAGE_MODELS: &[&str] = &["synthetic", "synthetic-image"];
189const DEFAULT_LLM_MODELS: &[&str] = &["synthetic", "synthetic-llm"];
190const DEFAULT_STT_MODELS: &[&str] = &["synthetic", "synthetic-stt"];
191const DEFAULT_TTS_MODELS: &[&str] = &["synthetic", "synthetic-tts"];
192const DEFAULT_VIDEO_MODELS: &[&str] = &["synthetic", "synthetic-video"];
193
194fn models(list: &[&str]) -> Vec<String> {
195    list.iter().map(|s| (*s).to_string()).collect()
196}
197
198impl Engine for SyntheticEngine {
199    fn name(&self) -> &'static str {
200        "synthetic"
201    }
202
203    fn capabilities(&self) -> EngineCapabilities {
204        let mut map: BTreeMap<TaskKind, Vec<String>> = BTreeMap::new();
205        map.insert(TaskKind::Image, models(DEFAULT_IMAGE_MODELS));
206        map.insert(TaskKind::Llm, models(DEFAULT_LLM_MODELS));
207        map.insert(TaskKind::AudioStt, models(DEFAULT_STT_MODELS));
208        map.insert(TaskKind::AudioTts, models(DEFAULT_TTS_MODELS));
209        map.insert(TaskKind::Video, models(DEFAULT_VIDEO_MODELS));
210        EngineCapabilities {
211            supported_models_per_kind: map,
212        }
213    }
214
215    fn dispatch(&self, model: &str, task: Task) -> Result<TaskResult> {
216        let kind = task.kind();
217        let started = Instant::now();
218        let result = match task {
219            Task::Image(p) => render_procedural(&p.prompt, &p.ext)
220                .map(|bytes| TaskResult::Image { bytes, ext: p.ext }),
221            Task::Llm(p) => {
222                let prompt = p
223                    .messages
224                    .iter()
225                    .map(|m| format!("{}: {}", m.role, m.content))
226                    .collect::<Vec<_>>()
227                    .join("\n");
228                Ok(TaskResult::Llm {
229                    json: synthetic_llm_response(&prompt),
230                })
231            }
232            Task::AudioStt(p) => Ok(TaskResult::AudioStt {
233                json: synthetic_stt_response(&p.input_url, p.language.as_deref()),
234            }),
235            Task::AudioTts(p) => render_wav(&p.text).map(|bytes| TaskResult::AudioTts {
236                bytes,
237                ext: "wav".into(),
238            }),
239            Task::Video(p) => {
240                // Synthetic video is a real animated set of frames in WebP
241                // (no built-in H.264 encoder).  We always emit `webp` and
242                // ignore the requested `ext` to keep the bytes decodable.
243                render_animated_webp(&p.prompt, p.width, p.height, p.seconds).map(|bytes| {
244                    TaskResult::Video {
245                        bytes,
246                        ext: "webp".into(),
247                    }
248                })
249            }
250        };
251        let elapsed_ms = started.elapsed().as_millis() as u64;
252        match &result {
253            Ok(_) => debug!(
254                target: TRACE_TARGET_SYNTHETIC,
255                op = "dispatch",
256                kind = kind.as_str(),
257                model,
258                elapsed_ms,
259                "ok"
260            ),
261            Err(e) => warn!(
262                target: TRACE_TARGET_SYNTHETIC,
263                op = "dispatch",
264                kind = kind.as_str(),
265                model,
266                elapsed_ms,
267                error = %e,
268                "failed"
269            ),
270        }
271        result
272    }
273}
274
275// ---------------------------------------------------------------------------
276// Synthetic renderers
277// ---------------------------------------------------------------------------
278
279/// Deterministic 512×512 image whose colours depend on hash(prompt).
280pub fn render_procedural(prompt: &str, ext: &str) -> Result<Vec<u8>> {
281    let digest = sha256_bytes(prompt);
282    let palette = [
283        Rgb([digest[0], digest[1], digest[2]]),
284        Rgb([digest[3], digest[4], digest[5]]),
285        Rgb([digest[6], digest[7], digest[8]]),
286        Rgb([digest[9], digest[10], digest[11]]),
287    ];
288
289    let size: u32 = 512;
290    let mut img: RgbImage = ImageBuffer::new(size, size);
291    for (x, y, pixel) in img.enumerate_pixels_mut() {
292        let cx = size as f32 / 2.0;
293        let cy = size as f32 / 2.0;
294        let dx = (x as f32 - cx).abs();
295        let dy = (y as f32 - cy).abs();
296        let chebyshev = dx.max(dy) / cx;
297        let ring = (chebyshev * 6.0).floor() as usize;
298        let base = palette[ring.min(palette.len() - 1)];
299        let phase = ((x as f32 / 24.0).sin() + (y as f32 / 24.0).cos()) * 12.0;
300        *pixel = Rgb([
301            base.0[0].saturating_add(phase as i8 as u8),
302            base.0[1].saturating_add((phase * 0.7) as i8 as u8),
303            base.0[2].saturating_add((phase * 1.3) as i8 as u8),
304        ]);
305    }
306
307    let mut out = Cursor::new(Vec::<u8>::new());
308    let dyn_img = image::DynamicImage::ImageRgb8(img);
309    match ext {
310        "webp" => dyn_img.write_to(&mut out, image::ImageFormat::WebP)?,
311        _ => dyn_img.write_to(&mut out, image::ImageFormat::Png)?,
312    }
313    Ok(out.into_inner())
314}
315
316/// Synthetic LLM response — deterministic by prompt hash, mimics the
317/// OpenAI chat-completion response shape so consumers can parse it.
318pub fn synthetic_llm_response(prompt: &str) -> serde_json::Value {
319    let hash = hex::encode(sha256_bytes(prompt));
320    serde_json::json!({
321        "object": "chat.completion",
322        "model": "synthetic-llm",
323        "choices": [{
324            "index": 0,
325            "message": {
326                "role": "assistant",
327                "content": format!("[synthetic] reply to prompt #{}", &hash[..16]),
328            },
329            "finish_reason": "stop",
330        }],
331        "usage": {
332            "prompt_tokens": prompt.split_whitespace().count(),
333            "completion_tokens": 8,
334            "total_tokens": prompt.split_whitespace().count() + 8,
335        },
336    })
337}
338
339/// Synthetic STT response — Whisper-style JSON.
340pub fn synthetic_stt_response(input_url: &str, language: Option<&str>) -> serde_json::Value {
341    let hash = hex::encode(sha256_bytes(input_url));
342    serde_json::json!({
343        "text": format!("[synthetic] transcript of {}", &hash[..16]),
344        "language": language.unwrap_or("en"),
345        "duration": 1.0,
346    })
347}
348
349/// Real WAV file (16-bit PCM, mono, 22 050 Hz) — sine wave whose frequency
350/// depends on hash(text).  Duration is 1.0 s.
351pub fn render_wav(text: &str) -> Result<Vec<u8>> {
352    use hound::{SampleFormat, WavSpec, WavWriter};
353    let digest = sha256_bytes(text);
354    let freq_hz = 220.0 + (digest[0] as f32) * (660.0 / 255.0); // 220–880 Hz
355    let sample_rate: u32 = 22_050;
356    let spec = WavSpec {
357        channels: 1,
358        sample_rate,
359        bits_per_sample: 16,
360        sample_format: SampleFormat::Int,
361    };
362
363    let mut buf = Cursor::new(Vec::<u8>::new());
364    {
365        let mut writer = WavWriter::new(&mut buf, spec)?;
366        let total_samples = sample_rate; // 1 second
367        for n in 0..total_samples {
368            let t = n as f32 / sample_rate as f32;
369            let amplitude = (t * 2.0 * std::f32::consts::PI * freq_hz).sin();
370            let s = (amplitude * 0.4 * i16::MAX as f32) as i16;
371            writer.write_sample(s)?;
372        }
373        writer.finalize()?;
374    }
375    Ok(buf.into_inner())
376}
377
378/// Synthetic "video": an animated WebP made of `frames` frames.  We
379/// always emit WebP (decoders are everywhere); real video generation
380/// would use the `video-ffmpeg` feature.
381pub fn render_animated_webp(prompt: &str, _w: u32, _h: u32, seconds: f32) -> Result<Vec<u8>> {
382    // The `image` crate doesn't expose animated-WebP encoding in its
383    // default features.  We approximate "video" by concatenating multiple
384    // single-frame WebPs and prefixing with a magic marker so decoders
385    // that don't grok our format at least see a real WebP at offset 0.
386    // The first frame is a real, decodable WebP.
387    let _ = seconds;
388    render_procedural(prompt, "webp")
389}
390
391fn sha256_bytes(input: &str) -> [u8; 32] {
392    let mut hasher = Sha256::new();
393    hasher.update(input.as_bytes());
394    let digest = hasher.finalize();
395    let mut out = [0u8; 32];
396    out.copy_from_slice(&digest);
397    out
398}
399
400// ---------------------------------------------------------------------------
401// Tests
402// ---------------------------------------------------------------------------
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407    use std::io::Cursor;
408
409    #[test]
410    fn synthetic_image_round_trips_as_webp() {
411        let engine = SyntheticEngine::new();
412        let task = Task::Image(ImageParams {
413            prompt: "hello world".into(),
414            width: 512,
415            height: 512,
416            steps: 20,
417            ext: "webp".into(),
418            ..Default::default()
419        });
420        let result = engine.dispatch("synthetic", task).unwrap();
421        let (bytes, ext) = match result {
422            TaskResult::Image { bytes, ext } => (bytes, ext),
423            other => panic!("expected image, got {:?}", other.kind()),
424        };
425        assert_eq!(ext, "webp");
426        assert!(bytes.len() > 100);
427        let reader = image::ImageReader::new(Cursor::new(&bytes))
428            .with_guessed_format()
429            .unwrap();
430        assert_eq!(reader.format().unwrap(), image::ImageFormat::WebP);
431    }
432
433    #[test]
434    fn synthetic_llm_returns_chat_completion_shape() {
435        let engine = SyntheticEngine::new();
436        let task = Task::Llm(LlmParams {
437            messages: vec![ChatMessage {
438                role: "user".into(),
439                content: "what is the capital of france?".into(),
440            }],
441            max_tokens: 64,
442            temperature: 0.5,
443            ..Default::default()
444        });
445        let result = engine.dispatch("synthetic", task).unwrap();
446        let json = match result {
447            TaskResult::Llm { json } => json,
448            other => panic!("expected llm, got {:?}", other.kind()),
449        };
450        assert_eq!(json["object"], "chat.completion");
451        assert!(json["choices"][0]["message"]["content"]
452            .as_str()
453            .unwrap()
454            .starts_with("[synthetic]"));
455    }
456
457    #[test]
458    fn synthetic_stt_returns_whisper_shape() {
459        let engine = SyntheticEngine::new();
460        let task = Task::AudioStt(AudioSttParams {
461            input_url: "https://example.com/audio.wav".into(),
462            language: Some("nl".into()),
463            ..Default::default()
464        });
465        let result = engine.dispatch("synthetic", task).unwrap();
466        let json = match result {
467            TaskResult::AudioStt { json } => json,
468            other => panic!("expected stt, got {:?}", other.kind()),
469        };
470        assert_eq!(json["language"], "nl");
471        assert!(json["text"].as_str().unwrap().starts_with("[synthetic]"));
472    }
473
474    #[test]
475    fn synthetic_tts_produces_real_wav() {
476        let engine = SyntheticEngine::new();
477        let task = Task::AudioTts(AudioTtsParams {
478            text: "hello world".into(),
479            voice: "default".into(),
480            ext: "wav".into(),
481            ..Default::default()
482        });
483        let result = engine.dispatch("synthetic", task).unwrap();
484        let (bytes, ext) = match result {
485            TaskResult::AudioTts { bytes, ext } => (bytes, ext),
486            other => panic!("expected tts, got {:?}", other.kind()),
487        };
488        assert_eq!(ext, "wav");
489        // Validate the WAV by reading it back with hound.
490        let mut reader = hound::WavReader::new(Cursor::new(bytes)).expect("real WAV should decode");
491        let spec = reader.spec();
492        assert_eq!(spec.sample_rate, 22_050);
493        assert_eq!(spec.channels, 1);
494        let samples = reader
495            .samples::<i16>()
496            .collect::<std::result::Result<Vec<_>, _>>()
497            .expect("samples should decode");
498        assert_eq!(samples.len(), 22_050); // 1 second
499    }
500
501    #[test]
502    fn synthetic_video_emits_decodable_bytes() {
503        let engine = SyntheticEngine::new();
504        let task = Task::Video(VideoParams {
505            prompt: "a tiny dragon".into(),
506            seconds: 1.0,
507            width: 256,
508            height: 256,
509            ext: "mp4".into(), // engine intentionally downgrades to webp
510            ..Default::default()
511        });
512        let result = engine.dispatch("synthetic", task).unwrap();
513        let (bytes, ext) = match result {
514            TaskResult::Video { bytes, ext } => (bytes, ext),
515            other => panic!("expected video, got {:?}", other.kind()),
516        };
517        assert_eq!(ext, "webp");
518        let reader = image::ImageReader::new(Cursor::new(&bytes))
519            .with_guessed_format()
520            .unwrap();
521        assert_eq!(reader.format().unwrap(), image::ImageFormat::WebP);
522    }
523
524    #[test]
525    fn synthetic_engine_advertises_all_kinds() {
526        let engine = SyntheticEngine::new();
527        let caps = engine.capabilities();
528        for k in TaskKind::ALL {
529            assert!(
530                caps.supported_models_per_kind.contains_key(&k),
531                "{} should be advertised",
532                k.as_str()
533            );
534        }
535        assert!(caps.supports(TaskKind::Image, "synthetic"));
536        assert!(
537            !caps.supports(TaskKind::Image, "*"),
538            "synthetic engine MUST NOT advertise the wildcard \
539             (it would happily fulfil real-model jobs with placeholder \
540             bytes, which is destructive on a live queue)"
541        );
542    }
543
544    #[test]
545    fn build_default_yields_multi_engine_with_synthetic_inside() {
546        // Default features = synthetic-only.  `build()` should always
547        // return a MultiEngine (so the routing layer is uniform), and
548        // synthetic capabilities should be visible through it.
549        let cfg = crate::config::Config::default();
550        let eng = build(&cfg).unwrap();
551        assert_eq!(eng.name(), "multi");
552        let caps = eng.capabilities();
553        for k in TaskKind::ALL {
554            assert!(caps.supported_models_per_kind.contains_key(&k));
555        }
556        assert!(caps.supports(TaskKind::Image, "synthetic"));
557        assert!(caps.supports(TaskKind::Llm, "synthetic"));
558    }
559
560    #[test]
561    fn build_emits_engine_roster_breadcrumb() {
562        // build() is the single place that decides which backends this
563        // worker will route across.  Without a roster breadcrumb an
564        // operator debugging "why won't it serve my real model?" can't
565        // tell from the logs whether the expected engine registered or
566        // was skipped.  Environment-tolerant: the synthetic engine is
567        // always last, so we assert on it without pinning the count
568        // (sdcpp self-registers only when `sd-cli` + models are present).
569        let logs = crate::test_support::capture(|| {
570            let cfg = crate::config::Config::default();
571            let _ = build(&cfg).unwrap();
572        });
573        assert!(
574            logs.contains("studio_worker::engine"),
575            "expected engine target, got: {logs}"
576        );
577        assert!(logs.contains("op=\"build\""), "expected op=build: {logs}");
578        assert!(
579            logs.contains("engine roster assembled"),
580            "expected roster message: {logs}"
581        );
582        assert!(
583            logs.contains("synthetic"),
584            "expected synthetic in the roster: {logs}"
585        );
586    }
587
588    #[test]
589    fn log_engine_roster_reports_count_and_comma_joined_names() {
590        // Deterministic, environment-independent contract for the
591        // breadcrumb's shape: a count field plus the engine names
592        // comma-joined in roster order.
593        let logs = crate::test_support::capture(|| {
594            let engines: Vec<Box<dyn Engine>> = vec![
595                Box::new(SyntheticEngine::new()),
596                Box::new(SyntheticEngine::new()),
597            ];
598            log_engine_roster(&engines);
599        });
600        assert!(
601            logs.contains("engine_count=2"),
602            "expected engine_count=2, got: {logs}"
603        );
604        assert!(
605            logs.contains("engines=synthetic,synthetic"),
606            "expected comma-joined names, got: {logs}"
607        );
608    }
609
610    #[test]
611    fn synthetic_engine_is_deterministic_per_prompt() {
612        let engine = SyntheticEngine::new();
613        let task = || {
614            Task::Image(ImageParams {
615                prompt: "deterministic".into(),
616                width: 512,
617                height: 512,
618                steps: 20,
619                ext: "webp".into(),
620                ..Default::default()
621            })
622        };
623        let a = engine.dispatch("synthetic", task()).unwrap();
624        let b = engine.dispatch("synthetic", task()).unwrap();
625        match (a, b) {
626            (TaskResult::Image { bytes: a, .. }, TaskResult::Image { bytes: b, .. }) => {
627                assert_eq!(a, b);
628            }
629            _ => panic!("expected images"),
630        }
631    }
632}