use serde::{Deserialize, Serialize};
fn default_video_format() -> String {
"mp4".to_string()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum VideoMode {
T2v,
I2v,
AudioVideo,
AudioRefVideo,
Extend,
Retake,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GenerateVideoRequest {
pub prompt: String,
#[serde(default)]
pub model: Option<String>,
#[serde(default)]
pub negative_prompt: Option<String>,
#[serde(default)]
pub width: Option<u32>,
#[serde(default)]
pub height: Option<u32>,
#[serde(default)]
pub num_frames: Option<u32>,
#[serde(default)]
pub steps: Option<u32>,
#[serde(default)]
pub guidance: Option<f32>,
#[serde(default)]
pub audio_guidance: Option<f32>,
#[serde(default)]
pub seed: Option<u64>,
#[serde(default)]
pub fps: Option<u32>,
#[serde(default)]
pub output_path: Option<String>,
#[serde(default = "default_video_format")]
pub format: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub image_path: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub video_path: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub audio_path: Option<String>,
#[serde(default)]
pub audio_passthrough: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub extend_after_frame: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub extend_context_frames: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub retake_start_frame: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub retake_end_frame: Option<u32>,
#[serde(default)]
pub mode: Option<VideoMode>,
}
impl GenerateVideoRequest {
pub fn requires_audio_passthrough_opt_in(&self) -> bool {
self.audio_path.is_some()
&& self.effective_mode() == VideoMode::AudioRefVideo
&& !self.audio_passthrough
}
pub fn effective_mode(&self) -> VideoMode {
if let Some(m) = self.mode {
return m;
}
if self.video_path.is_some()
&& (self.retake_start_frame.is_some() || self.retake_end_frame.is_some())
{
return VideoMode::Retake;
}
if self.video_path.is_some() {
return VideoMode::Extend;
}
if self.audio_path.is_some() && self.image_path.is_none() {
return VideoMode::AudioRefVideo;
}
if self.image_path.is_some() {
return VideoMode::I2v;
}
VideoMode::T2v
}
pub fn validate(&self) -> Result<(), String> {
if self.retake_start_frame.is_some() != self.retake_end_frame.is_some() {
return Err("retake_start_frame and retake_end_frame must be set together".to_string());
}
if self.extend_after_frame.is_some() && self.video_path.is_none() {
return Err("extend_after_frame requires video_path (set mode=extend)".to_string());
}
if self.extend_context_frames.is_some() && self.video_path.is_none() {
return Err("extend_context_frames requires video_path (set mode=extend)".to_string());
}
if (self.retake_start_frame.is_some() || self.retake_end_frame.is_some())
&& self.video_path.is_none()
{
return Err(
"retake_{start,end}_frame requires video_path (set mode=retake)".to_string(),
);
}
let mode = self.effective_mode();
match mode {
VideoMode::T2v => {
if self.image_path.is_some() {
return Err("mode=t2v set explicitly but image_path was also provided; \
omit mode or set mode=i2v"
.to_string());
}
if self.video_path.is_some() {
return Err("mode=t2v does not accept video_path; \
set mode=extend or mode=retake"
.to_string());
}
}
VideoMode::I2v => {
if self.image_path.is_none() {
return Err("mode=i2v requires image_path".to_string());
}
if self.video_path.is_some() {
return Err(
"mode=i2v does not accept video_path; use mode=extend or mode=retake"
.to_string(),
);
}
}
VideoMode::AudioVideo => {
if self.image_path.is_some() {
return Err("mode=audio_video does not accept image_path; \
image conditioning is not wired for joint audio+video synthesis"
.to_string());
}
if self.video_path.is_some() {
return Err("mode=audio_video does not accept video_path".to_string());
}
}
VideoMode::Extend => {
if self.video_path.is_none() {
return Err("mode=extend requires video_path".to_string());
}
if self.retake_start_frame.is_some() || self.retake_end_frame.is_some() {
return Err(
"mode=extend does not accept retake_start_frame / retake_end_frame; \
use mode=retake"
.to_string(),
);
}
if self.image_path.is_some() {
return Err(
"mode=extend does not accept image_path; extend is video-conditioned"
.to_string(),
);
}
}
VideoMode::Retake => {
if self.video_path.is_none() {
return Err("mode=retake requires video_path".to_string());
}
match (self.retake_start_frame, self.retake_end_frame) {
(Some(s), Some(e)) if s < e => {}
(Some(_), Some(_)) => {
return Err("mode=retake requires retake_start_frame < retake_end_frame"
.to_string());
}
_ => {
return Err(
"mode=retake requires both retake_start_frame and retake_end_frame"
.to_string(),
);
}
}
if self.image_path.is_some() {
return Err(
"mode=retake does not accept image_path; retake is video-conditioned"
.to_string(),
);
}
if self.extend_after_frame.is_some() {
return Err(
"mode=retake does not accept extend_after_frame; use mode=extend"
.to_string(),
);
}
if self.extend_context_frames.is_some() {
return Err(
"mode=retake does not accept extend_context_frames; use mode=extend"
.to_string(),
);
}
}
VideoMode::AudioRefVideo => {
if self.audio_path.is_none() {
return Err("mode=audio_ref_video requires audio_path".to_string());
}
if self.video_path.is_some() {
return Err("mode=audio_ref_video does not accept video_path".to_string());
}
if self.image_path.is_some() {
return Err(
"mode=audio_ref_video does not currently accept image_path: \
the combination produces severe artifacts on the native LTX \
backend (#130). Use mode=i2v for image-anchored generation \
(audio is then a downstream mux concern), or omit image_path \
for text+audio"
.to_string(),
);
}
if self.retake_start_frame.is_some()
|| self.retake_end_frame.is_some()
|| self.extend_after_frame.is_some()
|| self.extend_context_frames.is_some()
{
return Err("mode=audio_ref_video does not accept frame-range fields; \
retake/extend semantics don't apply"
.to_string());
}
}
}
Ok(())
}
}
impl Default for GenerateVideoRequest {
fn default() -> Self {
Self {
prompt: String::new(),
model: None,
negative_prompt: None,
width: None,
height: None,
num_frames: None,
steps: None,
guidance: None,
audio_guidance: None,
seed: None,
fps: None,
output_path: None,
format: default_video_format(),
image_path: None,
video_path: None,
audio_path: None,
audio_passthrough: false,
extend_after_frame: None,
extend_context_frames: None,
retake_start_frame: None,
retake_end_frame: None,
mode: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GenerateVideoResult {
pub video_path: String,
pub media_type: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub model_used: Option<String>,
}
#[cfg(test)]
mod tests {
use super::*;
fn req(prompt: &str) -> GenerateVideoRequest {
GenerateVideoRequest {
prompt: prompt.into(),
..GenerateVideoRequest::default()
}
}
#[test]
fn legacy_payload_deserializes_with_new_fields_defaulted() {
let json = r#"{"prompt":"a cat","image_path":"/tmp/cat.png","format":"mp4"}"#;
let r: GenerateVideoRequest = serde_json::from_str(json).unwrap();
assert_eq!(r.effective_mode(), VideoMode::I2v);
assert!(r.video_path.is_none());
assert!(r.extend_after_frame.is_none());
assert!(r.retake_start_frame.is_none());
r.validate().unwrap();
}
#[test]
fn effective_mode_infers_extend_from_video_path_alone() {
let r = GenerateVideoRequest {
video_path: Some("/tmp/clip.mp4".into()),
..req("continue walking")
};
assert_eq!(r.effective_mode(), VideoMode::Extend);
r.validate().unwrap();
}
#[test]
fn effective_mode_infers_retake_from_range_on_video() {
let r = GenerateVideoRequest {
video_path: Some("/tmp/clip.mp4".into()),
retake_start_frame: Some(10),
retake_end_frame: Some(20),
..req("replace this span with a zoom")
};
assert_eq!(r.effective_mode(), VideoMode::Retake);
r.validate().unwrap();
}
#[test]
fn explicit_mode_overrides_inference() {
let r = GenerateVideoRequest {
image_path: Some("/tmp/a.png".into()),
mode: Some(VideoMode::T2v),
..req("x")
};
assert!(r.validate().is_err());
}
#[test]
fn extend_requires_video_path() {
let r = GenerateVideoRequest {
mode: Some(VideoMode::Extend),
..req("x")
};
assert!(r.validate().is_err());
}
#[test]
fn retake_requires_both_frame_bounds_in_order() {
let base = GenerateVideoRequest {
video_path: Some("/tmp/clip.mp4".into()),
mode: Some(VideoMode::Retake),
..req("x")
};
assert!(GenerateVideoRequest {
retake_start_frame: Some(0),
..base.clone()
}
.validate()
.is_err());
assert!(GenerateVideoRequest {
retake_start_frame: Some(10),
retake_end_frame: Some(10),
..base.clone()
}
.validate()
.is_err());
GenerateVideoRequest {
retake_start_frame: Some(10),
retake_end_frame: Some(20),
..base
}
.validate()
.unwrap();
}
#[test]
fn extend_rejects_retake_range_fields() {
let r = GenerateVideoRequest {
video_path: Some("/tmp/clip.mp4".into()),
mode: Some(VideoMode::Extend),
retake_start_frame: Some(0),
retake_end_frame: Some(5),
..req("x")
};
assert!(r.validate().is_err());
}
#[test]
fn retake_rejects_extend_after_frame() {
let r = GenerateVideoRequest {
video_path: Some("/tmp/clip.mp4".into()),
retake_start_frame: Some(0),
retake_end_frame: Some(5),
extend_after_frame: Some(10),
..req("x")
};
assert!(r.validate().is_err());
}
#[test]
fn stray_extend_fields_without_video_path_are_rejected() {
assert!(GenerateVideoRequest {
extend_after_frame: Some(10),
..req("x")
}
.validate()
.is_err());
assert!(GenerateVideoRequest {
extend_context_frames: Some(8),
..req("x")
}
.validate()
.is_err());
}
#[test]
fn stray_retake_bounds_without_video_path_are_rejected() {
assert!(GenerateVideoRequest {
retake_start_frame: Some(0),
retake_end_frame: Some(5),
..req("x")
}
.validate()
.is_err());
}
#[test]
fn retake_bounds_must_be_set_together() {
assert!(GenerateVideoRequest {
video_path: Some("/tmp/clip.mp4".into()),
retake_start_frame: Some(0),
..req("x")
}
.validate()
.is_err());
assert!(GenerateVideoRequest {
video_path: Some("/tmp/clip.mp4".into()),
retake_end_frame: Some(5),
..req("x")
}
.validate()
.is_err());
}
#[test]
fn extend_accepts_context_frames_but_retake_rejects_it() {
GenerateVideoRequest {
video_path: Some("/tmp/clip.mp4".into()),
extend_context_frames: Some(8),
..req("x")
}
.validate()
.unwrap();
assert!(GenerateVideoRequest {
video_path: Some("/tmp/clip.mp4".into()),
retake_start_frame: Some(0),
retake_end_frame: Some(5),
extend_context_frames: Some(8),
..req("x")
}
.validate()
.is_err());
}
#[test]
fn audio_video_still_rejects_image_and_video() {
assert!(GenerateVideoRequest {
image_path: Some("/tmp/a.png".into()),
mode: Some(VideoMode::AudioVideo),
..req("x")
}
.validate()
.is_err());
assert!(GenerateVideoRequest {
video_path: Some("/tmp/a.mp4".into()),
mode: Some(VideoMode::AudioVideo),
..req("x")
}
.validate()
.is_err());
}
#[test]
fn audio_ref_video_infers_from_audio_path_alone() {
let audio_only = GenerateVideoRequest {
audio_path: Some("/tmp/ref.wav".into()),
audio_passthrough: false,
..req("x")
};
assert_eq!(audio_only.effective_mode(), VideoMode::AudioRefVideo);
audio_only.validate().unwrap();
}
#[test]
fn audio_ref_video_rejects_image_plus_audio_combination() {
let image_and_audio = GenerateVideoRequest {
image_path: Some("/tmp/anchor.png".into()),
audio_path: Some("/tmp/ref.wav".into()),
audio_passthrough: false,
..req("x")
};
assert_eq!(image_and_audio.effective_mode(), VideoMode::I2v);
let explicit = GenerateVideoRequest {
mode: Some(VideoMode::AudioRefVideo),
image_path: Some("/tmp/anchor.png".into()),
audio_path: Some("/tmp/ref.wav".into()),
audio_passthrough: false,
..req("x")
};
let err = explicit.validate().unwrap_err();
assert!(
err.contains("does not currently accept image_path"),
"expected #130 error message, got: {err}"
);
}
#[test]
fn audio_ref_video_requires_audio_and_rejects_video_path() {
assert!(GenerateVideoRequest {
mode: Some(VideoMode::AudioRefVideo),
..req("x")
}
.validate()
.is_err());
assert!(GenerateVideoRequest {
mode: Some(VideoMode::AudioRefVideo),
audio_path: Some("/tmp/ref.wav".into()),
audio_passthrough: false,
video_path: Some("/tmp/clip.mp4".into()),
..req("x")
}
.validate()
.is_err());
}
#[test]
fn passthrough_gate_fires_for_audio_ref_video_without_opt_in() {
let r = GenerateVideoRequest {
mode: Some(VideoMode::AudioRefVideo),
audio_path: Some("/tmp/song.wav".into()),
audio_passthrough: false,
..req("x")
};
assert!(r.requires_audio_passthrough_opt_in());
}
#[test]
fn passthrough_gate_skipped_when_opt_in_set() {
let r = GenerateVideoRequest {
mode: Some(VideoMode::AudioRefVideo),
audio_path: Some("/tmp/song.wav".into()),
audio_passthrough: true,
..req("x")
};
assert!(!r.requires_audio_passthrough_opt_in());
}
#[test]
fn passthrough_gate_skipped_when_audio_path_absent() {
let r = GenerateVideoRequest {
mode: Some(VideoMode::AudioRefVideo),
audio_path: None,
audio_passthrough: false,
..req("x")
};
assert!(!r.requires_audio_passthrough_opt_in());
}
#[test]
fn passthrough_gate_skipped_for_audio_video_mode() {
let r = GenerateVideoRequest {
mode: Some(VideoMode::AudioVideo),
audio_path: Some("/tmp/song.wav".into()),
audio_passthrough: false,
..req("x")
};
assert!(!r.requires_audio_passthrough_opt_in());
}
}