1use crate::config::Config;
11use crate::types::*;
12use anyhow::Result;
13use image::{ImageBuffer, Rgb, RgbImage};
14use sha2::{Digest, Sha256};
15use std::collections::BTreeMap;
16use std::io::Cursor;
17use std::time::Instant;
18use tracing::{debug, info, warn};
19
20const TRACE_TARGET_SYNTHETIC: &str = "studio_worker::engine::synthetic";
23
24const TRACE_TARGET_BUILD: &str = "studio_worker::engine";
27
28fn log_engine_roster(engines: &[Box<dyn Engine>]) {
35 let names: Vec<&str> = engines.iter().map(|e| e.name()).collect();
36 info!(
37 target: TRACE_TARGET_BUILD,
38 op = "build",
39 engine_count = names.len(),
40 engines = %names.join(","),
41 "engine roster assembled"
42 );
43}
44
45#[derive(Debug, Clone, Default)]
47pub struct EngineCapabilities {
48 pub supported_models_per_kind: BTreeMap<TaskKind, Vec<String>>,
51}
52
53impl EngineCapabilities {
54 pub fn supports(&self, kind: TaskKind, model: &str) -> bool {
55 self.supported_models_per_kind
56 .get(&kind)
57 .map(|ms| ms.iter().any(|m| m == model))
58 .unwrap_or(false)
59 }
60
61 pub fn kinds(&self) -> Vec<TaskKind> {
62 self.supported_models_per_kind.keys().copied().collect()
63 }
64
65 pub fn flat_models(&self) -> Vec<String> {
66 self.supported_models_per_kind
67 .values()
68 .flat_map(|ms| ms.iter().cloned())
69 .collect()
70 }
71}
72
73#[cfg(feature = "image-candle")]
74pub mod candle_image;
75pub mod download;
76#[cfg(all(feature = "llama", not(target_os = "windows")))]
79pub mod llama;
80pub mod multi;
81pub mod sdcpp;
82#[cfg(feature = "tts")]
83pub mod tts;
84#[cfg(feature = "video")]
85pub mod video;
86#[cfg(feature = "whisper")]
87pub mod whisper;
88
89pub trait Engine: Send + Sync {
90 fn name(&self) -> &'static str;
91 fn capabilities(&self) -> EngineCapabilities;
92 fn dispatch(&self, model: &str, task: Task) -> Result<TaskResult>;
93
94 fn dispatch_with_source(
99 &self,
100 model: &str,
101 task: Task,
102 _source: &crate::types::ModelSource,
103 ) -> Result<TaskResult> {
104 self.dispatch(model, task)
105 }
106}
107
108pub fn build(cfg: &Config) -> Result<Box<dyn Engine>> {
119 #[allow(clippy::vec_init_then_push)]
124 let engines: Vec<Box<dyn Engine>> = {
125 let mut v: Vec<Box<dyn Engine>> = Vec::new();
126 #[cfg(all(feature = "llama", not(target_os = "windows")))]
127 v.push(Box::new(llama::LlamaEngine::new(cfg.models_root.clone())?));
128 #[cfg(feature = "whisper")]
129 v.push(Box::new(whisper::WhisperEngine::new(
130 cfg.models_root.clone(),
131 )));
132 #[cfg(feature = "image-candle")]
133 v.push(Box::new(candle_image::CandleImageEngine::new()));
134 #[cfg(feature = "video")]
135 v.push(Box::new(video::VideoEngine::new()));
136 #[cfg(feature = "tts")]
137 v.push(Box::new(tts::TtsEngine::new()));
138 if let Some(eng) = sdcpp::SdCppEngine::try_new(&cfg.models_root) {
144 v.push(Box::new(eng));
145 }
146 v.push(Box::new(SyntheticEngine::new()));
147 v
148 };
149
150 log_engine_roster(&engines);
151 Ok(Box::new(multi::MultiEngine::new(engines)))
152}
153
154pub fn default_models_root() -> std::path::PathBuf {
157 crate::config::default_models_root()
158}
159
160pub struct SyntheticEngine;
166
167impl SyntheticEngine {
168 pub fn new() -> Self {
169 Self
170 }
171}
172
173impl Default for SyntheticEngine {
174 fn default() -> Self {
175 Self::new()
176 }
177}
178
179pub const MODEL_WILDCARD: &str = "*";
186
187const DEFAULT_IMAGE_MODELS: &[&str] = &["synthetic", "synthetic-image"];
191const DEFAULT_LLM_MODELS: &[&str] = &["synthetic", "synthetic-llm"];
192const DEFAULT_STT_MODELS: &[&str] = &["synthetic", "synthetic-stt"];
193const DEFAULT_TTS_MODELS: &[&str] = &["synthetic", "synthetic-tts"];
194const DEFAULT_VIDEO_MODELS: &[&str] = &["synthetic", "synthetic-video"];
195
196fn models(list: &[&str]) -> Vec<String> {
197 list.iter().map(|s| (*s).to_string()).collect()
198}
199
200impl Engine for SyntheticEngine {
201 fn name(&self) -> &'static str {
202 "synthetic"
203 }
204
205 fn capabilities(&self) -> EngineCapabilities {
206 let mut map: BTreeMap<TaskKind, Vec<String>> = BTreeMap::new();
207 map.insert(TaskKind::Image, models(DEFAULT_IMAGE_MODELS));
208 map.insert(TaskKind::Llm, models(DEFAULT_LLM_MODELS));
209 map.insert(TaskKind::AudioStt, models(DEFAULT_STT_MODELS));
210 map.insert(TaskKind::AudioTts, models(DEFAULT_TTS_MODELS));
211 map.insert(TaskKind::Video, models(DEFAULT_VIDEO_MODELS));
212 EngineCapabilities {
213 supported_models_per_kind: map,
214 }
215 }
216
217 fn dispatch(&self, model: &str, task: Task) -> Result<TaskResult> {
218 let kind = task.kind();
219 let started = Instant::now();
220 let result = match task {
221 Task::Image(p) => render_procedural(&p.prompt, &p.ext)
222 .map(|bytes| TaskResult::Image { bytes, ext: p.ext }),
223 Task::Llm(p) => {
224 let prompt = p
225 .messages
226 .iter()
227 .map(|m| format!("{}: {}", m.role, m.content))
228 .collect::<Vec<_>>()
229 .join("\n");
230 Ok(TaskResult::Llm {
231 json: synthetic_llm_response(&prompt),
232 })
233 }
234 Task::AudioStt(p) => Ok(TaskResult::AudioStt {
235 json: synthetic_stt_response(&p.input_url, p.language.as_deref()),
236 }),
237 Task::AudioTts(p) => render_wav(&p.text).map(|bytes| TaskResult::AudioTts {
238 bytes,
239 ext: "wav".into(),
240 }),
241 Task::Video(p) => {
242 render_animated_webp(&p.prompt, p.width, p.height, p.seconds).map(|bytes| {
246 TaskResult::Video {
247 bytes,
248 ext: "webp".into(),
249 }
250 })
251 }
252 };
253 let elapsed_ms = started.elapsed().as_millis() as u64;
254 match &result {
255 Ok(_) => debug!(
256 target: TRACE_TARGET_SYNTHETIC,
257 op = "dispatch",
258 kind = kind.as_str(),
259 model,
260 elapsed_ms,
261 "ok"
262 ),
263 Err(e) => warn!(
264 target: TRACE_TARGET_SYNTHETIC,
265 op = "dispatch",
266 kind = kind.as_str(),
267 model,
268 elapsed_ms,
269 error = %e,
270 "failed"
271 ),
272 }
273 result
274 }
275}
276
277pub fn render_procedural(prompt: &str, ext: &str) -> Result<Vec<u8>> {
283 let digest = sha256_bytes(prompt);
284 let palette = [
285 Rgb([digest[0], digest[1], digest[2]]),
286 Rgb([digest[3], digest[4], digest[5]]),
287 Rgb([digest[6], digest[7], digest[8]]),
288 Rgb([digest[9], digest[10], digest[11]]),
289 ];
290
291 let size: u32 = 512;
292 let mut img: RgbImage = ImageBuffer::new(size, size);
293 for (x, y, pixel) in img.enumerate_pixels_mut() {
294 let cx = size as f32 / 2.0;
295 let cy = size as f32 / 2.0;
296 let dx = (x as f32 - cx).abs();
297 let dy = (y as f32 - cy).abs();
298 let chebyshev = dx.max(dy) / cx;
299 let ring = (chebyshev * 6.0).floor() as usize;
300 let base = palette[ring.min(palette.len() - 1)];
301 let phase = ((x as f32 / 24.0).sin() + (y as f32 / 24.0).cos()) * 12.0;
302 *pixel = Rgb([
303 base.0[0].saturating_add(phase as i8 as u8),
304 base.0[1].saturating_add((phase * 0.7) as i8 as u8),
305 base.0[2].saturating_add((phase * 1.3) as i8 as u8),
306 ]);
307 }
308
309 let mut out = Cursor::new(Vec::<u8>::new());
310 let dyn_img = image::DynamicImage::ImageRgb8(img);
311 match ext {
312 "webp" => dyn_img.write_to(&mut out, image::ImageFormat::WebP)?,
313 _ => dyn_img.write_to(&mut out, image::ImageFormat::Png)?,
314 }
315 Ok(out.into_inner())
316}
317
318pub fn synthetic_llm_response(prompt: &str) -> serde_json::Value {
321 let hash = hex::encode(sha256_bytes(prompt));
322 serde_json::json!({
323 "object": "chat.completion",
324 "model": "synthetic-llm",
325 "choices": [{
326 "index": 0,
327 "message": {
328 "role": "assistant",
329 "content": format!("[synthetic] reply to prompt #{}", &hash[..16]),
330 },
331 "finish_reason": "stop",
332 }],
333 "usage": {
334 "prompt_tokens": prompt.split_whitespace().count(),
335 "completion_tokens": 8,
336 "total_tokens": prompt.split_whitespace().count() + 8,
337 },
338 })
339}
340
341pub fn synthetic_stt_response(input_url: &str, language: Option<&str>) -> serde_json::Value {
343 let hash = hex::encode(sha256_bytes(input_url));
344 serde_json::json!({
345 "text": format!("[synthetic] transcript of {}", &hash[..16]),
346 "language": language.unwrap_or("en"),
347 "duration": 1.0,
348 })
349}
350
351pub fn render_wav(text: &str) -> Result<Vec<u8>> {
354 use hound::{SampleFormat, WavSpec, WavWriter};
355 let digest = sha256_bytes(text);
356 let freq_hz = 220.0 + (digest[0] as f32) * (660.0 / 255.0); let sample_rate: u32 = 22_050;
358 let spec = WavSpec {
359 channels: 1,
360 sample_rate,
361 bits_per_sample: 16,
362 sample_format: SampleFormat::Int,
363 };
364
365 let mut buf = Cursor::new(Vec::<u8>::new());
366 {
367 let mut writer = WavWriter::new(&mut buf, spec)?;
368 let total_samples = sample_rate; for n in 0..total_samples {
370 let t = n as f32 / sample_rate as f32;
371 let amplitude = (t * 2.0 * std::f32::consts::PI * freq_hz).sin();
372 let s = (amplitude * 0.4 * i16::MAX as f32) as i16;
373 writer.write_sample(s)?;
374 }
375 writer.finalize()?;
376 }
377 Ok(buf.into_inner())
378}
379
380pub fn render_animated_webp(prompt: &str, _w: u32, _h: u32, seconds: f32) -> Result<Vec<u8>> {
384 let _ = seconds;
390 render_procedural(prompt, "webp")
391}
392
393fn sha256_bytes(input: &str) -> [u8; 32] {
394 let mut hasher = Sha256::new();
395 hasher.update(input.as_bytes());
396 let digest = hasher.finalize();
397 let mut out = [0u8; 32];
398 out.copy_from_slice(&digest);
399 out
400}
401
402#[cfg(test)]
407mod tests {
408 use super::*;
409 use std::io::Cursor;
410
411 #[test]
412 fn synthetic_image_round_trips_as_webp() {
413 let engine = SyntheticEngine::new();
414 let task = Task::Image(ImageParams {
415 prompt: "hello world".into(),
416 width: 512,
417 height: 512,
418 steps: 20,
419 ext: "webp".into(),
420 ..Default::default()
421 });
422 let result = engine.dispatch("synthetic", task).unwrap();
423 let (bytes, ext) = match result {
424 TaskResult::Image { bytes, ext } => (bytes, ext),
425 other => panic!("expected image, got {:?}", other.kind()),
426 };
427 assert_eq!(ext, "webp");
428 assert!(bytes.len() > 100);
429 let reader = image::ImageReader::new(Cursor::new(&bytes))
430 .with_guessed_format()
431 .unwrap();
432 assert_eq!(reader.format().unwrap(), image::ImageFormat::WebP);
433 }
434
435 #[test]
436 fn synthetic_llm_returns_chat_completion_shape() {
437 let engine = SyntheticEngine::new();
438 let task = Task::Llm(LlmParams {
439 messages: vec![ChatMessage {
440 role: "user".into(),
441 content: "what is the capital of france?".into(),
442 }],
443 max_tokens: 64,
444 temperature: 0.5,
445 ..Default::default()
446 });
447 let result = engine.dispatch("synthetic", task).unwrap();
448 let json = match result {
449 TaskResult::Llm { json } => json,
450 other => panic!("expected llm, got {:?}", other.kind()),
451 };
452 assert_eq!(json["object"], "chat.completion");
453 assert!(json["choices"][0]["message"]["content"]
454 .as_str()
455 .unwrap()
456 .starts_with("[synthetic]"));
457 }
458
459 #[test]
460 fn synthetic_stt_returns_whisper_shape() {
461 let engine = SyntheticEngine::new();
462 let task = Task::AudioStt(AudioSttParams {
463 input_url: "https://example.com/audio.wav".into(),
464 language: Some("nl".into()),
465 ..Default::default()
466 });
467 let result = engine.dispatch("synthetic", task).unwrap();
468 let json = match result {
469 TaskResult::AudioStt { json } => json,
470 other => panic!("expected stt, got {:?}", other.kind()),
471 };
472 assert_eq!(json["language"], "nl");
473 assert!(json["text"].as_str().unwrap().starts_with("[synthetic]"));
474 }
475
476 #[test]
477 fn synthetic_tts_produces_real_wav() {
478 let engine = SyntheticEngine::new();
479 let task = Task::AudioTts(AudioTtsParams {
480 text: "hello world".into(),
481 voice: "default".into(),
482 ext: "wav".into(),
483 ..Default::default()
484 });
485 let result = engine.dispatch("synthetic", task).unwrap();
486 let (bytes, ext) = match result {
487 TaskResult::AudioTts { bytes, ext } => (bytes, ext),
488 other => panic!("expected tts, got {:?}", other.kind()),
489 };
490 assert_eq!(ext, "wav");
491 let mut reader = hound::WavReader::new(Cursor::new(bytes)).expect("real WAV should decode");
493 let spec = reader.spec();
494 assert_eq!(spec.sample_rate, 22_050);
495 assert_eq!(spec.channels, 1);
496 let samples = reader
497 .samples::<i16>()
498 .collect::<std::result::Result<Vec<_>, _>>()
499 .expect("samples should decode");
500 assert_eq!(samples.len(), 22_050); }
502
503 #[test]
504 fn synthetic_video_emits_decodable_bytes() {
505 let engine = SyntheticEngine::new();
506 let task = Task::Video(VideoParams {
507 prompt: "a tiny dragon".into(),
508 seconds: 1.0,
509 width: 256,
510 height: 256,
511 ext: "mp4".into(), ..Default::default()
513 });
514 let result = engine.dispatch("synthetic", task).unwrap();
515 let (bytes, ext) = match result {
516 TaskResult::Video { bytes, ext } => (bytes, ext),
517 other => panic!("expected video, got {:?}", other.kind()),
518 };
519 assert_eq!(ext, "webp");
520 let reader = image::ImageReader::new(Cursor::new(&bytes))
521 .with_guessed_format()
522 .unwrap();
523 assert_eq!(reader.format().unwrap(), image::ImageFormat::WebP);
524 }
525
526 #[test]
527 fn synthetic_engine_advertises_all_kinds() {
528 let engine = SyntheticEngine::new();
529 let caps = engine.capabilities();
530 for k in TaskKind::ALL {
531 assert!(
532 caps.supported_models_per_kind.contains_key(&k),
533 "{} should be advertised",
534 k.as_str()
535 );
536 }
537 assert!(caps.supports(TaskKind::Image, "synthetic"));
538 assert!(
539 !caps.supports(TaskKind::Image, "*"),
540 "synthetic engine MUST NOT advertise the wildcard \
541 (it would happily fulfil real-model jobs with placeholder \
542 bytes, which is destructive on a live queue)"
543 );
544 }
545
546 #[test]
547 fn build_default_yields_multi_engine_with_synthetic_inside() {
548 let cfg = crate::config::Config::default();
552 let eng = build(&cfg).unwrap();
553 assert_eq!(eng.name(), "multi");
554 let caps = eng.capabilities();
555 for k in TaskKind::ALL {
556 assert!(caps.supported_models_per_kind.contains_key(&k));
557 }
558 assert!(caps.supports(TaskKind::Image, "synthetic"));
559 assert!(caps.supports(TaskKind::Llm, "synthetic"));
560 }
561
562 #[test]
563 fn build_emits_engine_roster_breadcrumb() {
564 let logs = crate::test_support::capture(|| {
572 let cfg = crate::config::Config::default();
573 let _ = build(&cfg).unwrap();
574 });
575 assert!(
576 logs.contains("studio_worker::engine"),
577 "expected engine target, got: {logs}"
578 );
579 assert!(logs.contains("op=\"build\""), "expected op=build: {logs}");
580 assert!(
581 logs.contains("engine roster assembled"),
582 "expected roster message: {logs}"
583 );
584 assert!(
585 logs.contains("synthetic"),
586 "expected synthetic in the roster: {logs}"
587 );
588 }
589
590 #[test]
591 fn log_engine_roster_reports_count_and_comma_joined_names() {
592 let logs = crate::test_support::capture(|| {
596 let engines: Vec<Box<dyn Engine>> = vec![
597 Box::new(SyntheticEngine::new()),
598 Box::new(SyntheticEngine::new()),
599 ];
600 log_engine_roster(&engines);
601 });
602 assert!(
603 logs.contains("engine_count=2"),
604 "expected engine_count=2, got: {logs}"
605 );
606 assert!(
607 logs.contains("engines=synthetic,synthetic"),
608 "expected comma-joined names, got: {logs}"
609 );
610 }
611
612 #[test]
613 fn synthetic_engine_is_deterministic_per_prompt() {
614 let engine = SyntheticEngine::new();
615 let task = || {
616 Task::Image(ImageParams {
617 prompt: "deterministic".into(),
618 width: 512,
619 height: 512,
620 steps: 20,
621 ext: "webp".into(),
622 ..Default::default()
623 })
624 };
625 let a = engine.dispatch("synthetic", task()).unwrap();
626 let b = engine.dispatch("synthetic", task()).unwrap();
627 match (a, b) {
628 (TaskResult::Image { bytes: a, .. }, TaskResult::Image { bytes: b, .. }) => {
629 assert_eq!(a, b);
630 }
631 _ => panic!("expected images"),
632 }
633 }
634}