use std::path::{Path, PathBuf};
use std::process::Command;
use crate::schema::ModelSchema;
use crate::tasks::generate_video::{GenerateVideoRequest, GenerateVideoResult, VideoMode};
use crate::InferenceError;
const WAN2_MODULE: &str = "mlx_video.models.wan_2.generate";
fn python_binary() -> PathBuf {
if let Ok(value) = std::env::var("CAR_MLX_VIDEO_PYTHON") {
return PathBuf::from(value);
}
if let Ok(home) = std::env::var("HOME") {
let runtime_python = PathBuf::from(home)
.join(".car")
.join("visual-runtime")
.join("bin")
.join("python");
if runtime_python.exists() {
return runtime_python;
}
}
PathBuf::from("python3")
}
pub fn is_wan_family(schema: &ModelSchema) -> bool {
schema.family.starts_with("yume")
|| schema.tags.iter().any(|tag| {
matches!(
tag.as_str(),
"wan2.2" | "ti2v" | "world-model" | "image-to-video"
)
})
}
pub fn generate_wan_video(
schema: &ModelSchema,
model_dir: &Path,
req: &GenerateVideoRequest,
) -> Result<GenerateVideoResult, InferenceError> {
req.validate().map_err(InferenceError::InferenceFailed)?;
let output_path = req
.output_path
.clone()
.unwrap_or_else(|| "output.mp4".to_string());
let mode = req.effective_mode();
match mode {
VideoMode::T2v | VideoMode::I2v => {}
VideoMode::AudioVideo | VideoMode::AudioRefVideo => {
return Err(InferenceError::UnsupportedMode {
mode: "audio_video",
backend: "mlx-video-wan2",
reason: "Yume/Wan2.2 TI2V exposes text-to-video and image-to-video, not audio-conditioned video",
});
}
VideoMode::Extend => {
return Err(InferenceError::UnsupportedMode {
mode: "extend",
backend: "mlx-video-wan2",
reason: "the upstream Wan 2 MLX generator does not expose video extension",
});
}
VideoMode::Retake => {
return Err(InferenceError::UnsupportedMode {
mode: "retake",
backend: "mlx-video-wan2",
reason:
"the upstream Wan 2 MLX generator does not expose retake/inpaint generation",
});
}
}
let python = python_binary();
let mut cmd = Command::new(&python);
cmd.arg("-m")
.arg(WAN2_MODULE)
.arg("--model-dir")
.arg(model_dir)
.arg("--prompt")
.arg(&req.prompt)
.arg("--output-path")
.arg(&output_path);
if let Some(w) = req.width {
cmd.arg("--width").arg(w.to_string());
}
if let Some(h) = req.height {
cmd.arg("--height").arg(h.to_string());
}
if let Some(f) = req.num_frames {
cmd.arg("--num-frames").arg(f.to_string());
}
if let Some(s) = req.steps {
cmd.arg("--steps").arg(s.to_string());
}
if let Some(g) = req.guidance {
cmd.arg("--guide-scale").arg(g.to_string());
}
if let Some(seed) = req.seed {
cmd.arg("--seed").arg(seed.to_string());
}
if let VideoMode::I2v = mode {
let path = req.image_path.as_deref().ok_or_else(|| {
InferenceError::InferenceFailed("i2v requested but image_path is empty".to_string())
})?;
cmd.arg("--image").arg(path);
}
if req.negative_prompt.is_some() || req.fps.is_some() {
tracing::warn!(
model = %schema.id,
"mlx-video Wan 2 adapter ignores negative_prompt/fps because the upstream Yume entrypoint does not document those flags"
);
}
tracing::info!(
model = %schema.id,
?mode,
prompt = %req.prompt,
output = %output_path,
"external mlx-video Wan 2: invoking"
);
let output = cmd.output().map_err(|e| {
InferenceError::InferenceFailed(format!(
"failed to spawn `{}` for {WAN2_MODULE}: {e}",
python.display()
))
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
let stdout = String::from_utf8_lossy(&output.stdout);
return Err(InferenceError::InferenceFailed(format!(
"mlx-video Wan 2 exited with status {}: stderr={stderr} stdout={stdout}. \
Install or upgrade the visual runtime so `{WAN2_MODULE}` is importable.",
output.status
)));
}
Ok(GenerateVideoResult {
video_path: output_path,
media_type: "video/mp4".to_string(),
model_used: Some(format!("external-mlx-video:{}", schema.id)),
})
}