use std::process::Command;
use crate::tasks::generate_video::{GenerateVideoRequest, GenerateVideoResult, VideoMode};
use crate::InferenceError;
const CLI_BINARY: &str = "ltx-2-mlx";
const DEFAULT_MODEL: &str = "dgrauet/ltx-2.3-mlx-q4";
pub fn is_available() -> bool {
Command::new(CLI_BINARY)
.arg("--help")
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map(|s| s.success())
.unwrap_or(false)
}
pub fn generate_video(req: &GenerateVideoRequest) -> Result<GenerateVideoResult, InferenceError> {
req.validate()
.map_err(InferenceError::InferenceFailed)?;
let output_path = req
.output_path
.clone()
.unwrap_or_else(|| "output.mp4".to_string());
let mode = req.effective_mode();
let mut cmd = Command::new(CLI_BINARY);
cmd.arg("generate")
.arg("--prompt")
.arg(&req.prompt)
.arg("--output")
.arg(&output_path)
.arg("--model")
.arg(req.model.as_deref().unwrap_or(DEFAULT_MODEL));
if let Some(w) = req.width {
cmd.arg("--width").arg(w.to_string());
}
if let Some(h) = req.height {
cmd.arg("--height").arg(h.to_string());
}
if let Some(f) = req.num_frames {
cmd.arg("--frames").arg(f.to_string());
}
if let Some(s) = req.steps {
cmd.arg("--steps").arg(s.to_string());
}
if let Some(g) = req.guidance {
cmd.arg("--cfg-scale").arg(g.to_string());
}
if let Some(seed) = req.seed {
cmd.arg("--seed").arg(seed.to_string());
}
match mode {
VideoMode::T2v | VideoMode::AudioVideo => {}
VideoMode::I2v => {
let path = req.image_path.as_deref().ok_or_else(|| {
InferenceError::InferenceFailed(
"i2v requested but image_path is empty".to_string(),
)
})?;
cmd.arg("--image").arg(path);
}
VideoMode::AudioRefVideo => {
tracing::warn!(
audio_path = ?req.audio_path,
"external ltx-2-mlx: audio_path is INFORMATIONAL ONLY — \
generated frames are pure text-to-video and not conditioned \
on the audio bytes. Caller muxes the song downstream. (#130)"
);
}
VideoMode::Extend => {
return Err(InferenceError::UnsupportedMode {
mode: "extend",
backend: "external-ltx-2-mlx",
reason: "not exposed by the upstream Python CLI's argparse surface",
});
}
VideoMode::Retake => {
return Err(InferenceError::UnsupportedMode {
mode: "retake",
backend: "external-ltx-2-mlx",
reason: "not exposed by the upstream Python CLI's argparse surface",
});
}
}
tracing::info!(?mode, prompt = %req.prompt, output = %output_path, "external ltx-2-mlx: invoking");
let output = cmd.output().map_err(|e| {
InferenceError::InferenceFailed(format!(
"failed to spawn `{CLI_BINARY}`: {e}. \
Install the companion Python port with `uv sync` in a clone of \
https://github.com/dgrauet/ltx-2-mlx and add its venv bin to PATH."
))
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
let stdout = String::from_utf8_lossy(&output.stdout);
return Err(InferenceError::InferenceFailed(format!(
"ltx-2-mlx exited with status {}: stderr={stderr} stdout={stdout}",
output.status
)));
}
Ok(GenerateVideoResult {
video_path: output_path,
media_type: "video/mp4".to_string(),
model_used: Some(format!(
"external:{}",
req.model.as_deref().unwrap_or(DEFAULT_MODEL)
)),
})
}