use std::process::Command;
use crate::tasks::generate_video::{GenerateVideoRequest, GenerateVideoResult, VideoMode};
use crate::InferenceError;
const CLI_BINARY: &str = "ltx-2-mlx";
const DEFAULT_MODEL: &str = "dgrauet/ltx-2.3-mlx-q4";
pub fn is_available() -> bool {
Command::new(CLI_BINARY)
.arg("--help")
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map(|s| s.success())
.unwrap_or(false)
}
pub fn generate_video(req: &GenerateVideoRequest) -> Result<GenerateVideoResult, InferenceError> {
req.validate().map_err(InferenceError::InferenceFailed)?;
let output_path = req
.output_path
.clone()
.unwrap_or_else(|| "output.mp4".to_string());
let mode = req.effective_mode();
let audio_conditioning = mode == VideoMode::AudioRefVideo && !req.audio_passthrough;
let mut cmd = Command::new(CLI_BINARY);
cmd.arg(if audio_conditioning {
"a2v"
} else {
"generate"
})
.arg("--prompt")
.arg(&req.prompt)
.arg("--output")
.arg(&output_path)
.arg("--model")
.arg(req.model.as_deref().unwrap_or(DEFAULT_MODEL));
if let Some(w) = req.width {
cmd.arg("--width").arg(w.to_string());
}
if let Some(h) = req.height {
cmd.arg("--height").arg(h.to_string());
}
if let Some(f) = req.num_frames {
cmd.arg("--frames").arg(f.to_string());
}
if let Some(s) = req.steps {
if audio_conditioning {
cmd.arg("--stage1-steps").arg(s.to_string());
cmd.arg("--stage2-steps").arg("0");
} else {
cmd.arg("--steps").arg(s.to_string());
}
}
if let Some(g) = req.guidance {
cmd.arg("--cfg-scale").arg(g.to_string());
}
if let Some(seed) = req.seed {
cmd.arg("--seed").arg(seed.to_string());
}
match mode {
VideoMode::T2v | VideoMode::AudioVideo => {}
VideoMode::I2v => {
let path = req.image_path.as_deref().ok_or_else(|| {
InferenceError::InferenceFailed("i2v requested but image_path is empty".to_string())
})?;
cmd.arg("--image").arg(path);
}
VideoMode::AudioRefVideo => {
if audio_conditioning {
let path = req.audio_path.as_deref().ok_or_else(|| {
InferenceError::InferenceFailed(
"audio_ref_video requested but audio_path is empty".to_string(),
)
})?;
cmd.arg("--audio").arg(path);
if let Some(path) = req.image_path.as_deref() {
cmd.arg("--image").arg(path);
}
} else {
tracing::warn!(
audio_path = ?req.audio_path,
"external ltx-2-mlx: audio_path is mux-only because audio_passthrough=true; \
generating text-only video via `generate`"
);
}
}
VideoMode::Extend => {
return Err(InferenceError::UnsupportedMode {
mode: "extend",
backend: "external-ltx-2-mlx",
reason: "not exposed by the upstream Python CLI's argparse surface",
});
}
VideoMode::Retake => {
return Err(InferenceError::UnsupportedMode {
mode: "retake",
backend: "external-ltx-2-mlx",
reason: "not exposed by the upstream Python CLI's argparse surface",
});
}
}
tracing::info!(?mode, prompt = %req.prompt, output = %output_path, "external ltx-2-mlx: invoking");
let output = cmd.output().map_err(|e| {
InferenceError::InferenceFailed(format!(
"failed to spawn `{CLI_BINARY}`: {e}. \
Install the companion Python port with `uv sync` in a clone of \
https://github.com/dgrauet/ltx-2-mlx and add its venv bin to PATH."
))
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
let stdout = String::from_utf8_lossy(&output.stdout);
return Err(InferenceError::InferenceFailed(format!(
"ltx-2-mlx exited with status {}: stderr={stderr} stdout={stdout}",
output.status
)));
}
Ok(GenerateVideoResult {
video_path: output_path,
media_type: "video/mp4".to_string(),
model_used: Some(format!(
"external:{}",
req.model.as_deref().unwrap_or(DEFAULT_MODEL)
)),
})
}