1use crate::error::{SpatialError, SpatialResult};
2use crate::stereo::generate_stereo_pair;
3use crate::SpatialConfig;
4use image::{DynamicImage, ImageBuffer, RgbImage};
5use std::path::Path;
6use std::process::Stdio;
7use tokio::io::{AsyncReadExt, AsyncWriteExt};
8use tokio::process::Command;
9use tokio::sync::mpsc;
10
11#[derive(Clone, Debug)]
12pub struct VideoProgress {
13 pub current_frame: u32,
14 pub total_frames: u32,
15 pub stage: String,
16 pub percent: f64,
17}
18
19impl VideoProgress {
20 pub fn new(current_frame: u32, total_frames: u32, stage: String) -> Self {
21 let percent = if total_frames > 0 {
22 (current_frame as f64 / total_frames as f64 * 100.0).min(100.0)
23 } else {
24 0.0
25 };
26 Self {
27 current_frame,
28 total_frames,
29 stage,
30 percent,
31 }
32 }
33}
34
35#[derive(Clone, Debug)]
36pub struct VideoMetadata {
37 pub width: u32,
38 pub height: u32,
39 pub fps: f64,
40 pub total_frames: u32,
41 pub duration: f64,
42 pub has_audio: bool,
43}
44
45pub type ProgressCallback = Box<dyn Fn(VideoProgress) + Send + Sync>;
46
47pub async fn get_video_metadata(input_path: &Path) -> SpatialResult<VideoMetadata> {
48 let input_str = input_path
49 .to_str()
50 .ok_or_else(|| SpatialError::Other("Invalid input path encoding".to_string()))?;
51
52 let output = Command::new("ffprobe")
53 .args([
54 "-v", "error",
55 "-select_streams", "v:0",
56 "-show_entries", "stream=width,height,r_frame_rate,nb_frames,duration",
57 "-show_entries", "format=duration",
58 "-of", "json",
59 input_str,
60 ])
61 .output()
62 .await
63 .map_err(|e| {
64 SpatialError::Other(format!(
65 "Failed to run ffprobe (is ffmpeg installed?): {}",
66 e
67 ))
68 })?;
69
70 if !output.status.success() {
71 let stderr = String::from_utf8_lossy(&output.stderr);
72 return Err(SpatialError::Other(format!("ffprobe failed: {}", stderr)));
73 }
74
75 let stdout = String::from_utf8_lossy(&output.stdout);
76 let json: serde_json::Value = serde_json::from_str(&stdout)
77 .map_err(|e| SpatialError::Other(format!("Failed to parse ffprobe JSON: {}", e)))?;
78
79 let stream = json["streams"]
80 .as_array()
81 .and_then(|s| s.first())
82 .ok_or_else(|| SpatialError::Other("No video stream found".to_string()))?;
83
84 let width = stream["width"]
85 .as_u64()
86 .ok_or_else(|| SpatialError::Other("Failed to parse width".to_string()))? as u32;
87 let height = stream["height"]
88 .as_u64()
89 .ok_or_else(|| SpatialError::Other("Failed to parse height".to_string()))? as u32;
90
91 let fps = stream["r_frame_rate"]
92 .as_str()
93 .map(|s| {
94 if let Some((num, den)) = s.split_once('/') {
95 let n: f64 = num.parse().unwrap_or(30.0);
96 let d: f64 = den.parse().unwrap_or(1.0);
97 n / d
98 } else {
99 s.parse().unwrap_or(30.0)
100 }
101 })
102 .unwrap_or(30.0);
103
104 let duration = stream["duration"]
105 .as_str()
106 .and_then(|s| s.parse::<f64>().ok())
107 .or_else(|| {
108 json["format"]["duration"]
109 .as_str()
110 .and_then(|s| s.parse::<f64>().ok())
111 })
112 .unwrap_or(0.0);
113
114 let total_frames = stream["nb_frames"]
115 .as_str()
116 .and_then(|s| s.parse::<u32>().ok())
117 .unwrap_or_else(|| (duration * fps).round() as u32);
118
119 let audio_output = Command::new("ffprobe")
120 .args([
121 "-v", "error",
122 "-select_streams", "a:0",
123 "-show_entries", "stream=codec_type",
124 "-of", "csv=p=0",
125 input_str,
126 ])
127 .output()
128 .await
129 .map_err(|e| SpatialError::Other(format!("Failed to check audio: {}", e)))?;
130
131 let has_audio = String::from_utf8_lossy(&audio_output.stdout)
132 .trim()
133 .contains("audio");
134
135 Ok(VideoMetadata {
136 width,
137 height,
138 fps,
139 total_frames,
140 duration,
141 has_audio,
142 })
143}
144
145async fn extract_frames(
146 input_path: &Path,
147 metadata: &VideoMetadata,
148) -> SpatialResult<mpsc::Receiver<Vec<u8>>> {
149 let (tx, rx) = mpsc::channel::<Vec<u8>>(10);
150
151 let width = metadata.width;
152 let height = metadata.height;
153 let frame_size = (width * height * 3) as usize;
154
155 let input_path = input_path.to_path_buf();
156
157 tokio::spawn(async move {
158 let mut child = Command::new("ffmpeg")
159 .args([
160 "-i",
161 input_path.to_str().unwrap(),
162 "-f",
163 "rawvideo",
164 "-pix_fmt",
165 "rgb24",
166 "-vsync",
167 "0",
168 "-",
169 ])
170 .stdout(Stdio::piped())
171 .stderr(Stdio::null())
172 .spawn()
173 .expect("Failed to spawn ffmpeg");
174
175 let stdout = child.stdout.take().expect("Failed to capture stdout");
176 let mut reader = tokio::io::BufReader::new(stdout);
177 let mut frame_buffer = vec![0u8; frame_size];
178
179 loop {
180 match reader.read_exact(&mut frame_buffer).await {
181 Ok(_) => {
182 if tx.send(frame_buffer.clone()).await.is_err() {
183 break;
184 }
185 }
186 Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => break,
187 Err(_) => break,
188 }
189 }
190
191 let _ = child.wait().await;
192 });
193
194 Ok(rx)
195}
196
197fn frame_to_image(data: &[u8], width: u32, height: u32) -> SpatialResult<DynamicImage> {
198 let rgb_image = RgbImage::from_raw(width, height, data.to_vec()).ok_or_else(|| {
199 SpatialError::ImageError(format!(
200 "Failed to create image from frame data ({}x{})",
201 width, height
202 ))
203 })?;
204 Ok(DynamicImage::ImageRgb8(rgb_image))
205}
206
207async fn encode_stereo_video(
208 output_path: std::path::PathBuf,
209 metadata: VideoMetadata,
210 mut rx: mpsc::Receiver<(DynamicImage, DynamicImage)>,
211) -> SpatialResult<()> {
212 let width = metadata.width;
213 let height = metadata.height;
214 let fps = metadata.fps;
215
216 let output_width = width * 2;
217 let output_height = height;
218
219 let mut child = Command::new("ffmpeg")
220 .args([
221 "-f",
222 "rawvideo",
223 "-pix_fmt",
224 "rgb24",
225 "-s",
226 &format!("{}x{}", output_width, output_height),
227 "-r",
228 &format!("{}", fps),
229 "-i",
230 "-",
231 "-c:v",
232 "libx264",
233 "-preset",
234 "medium",
235 "-crf",
236 "23",
237 "-pix_fmt",
238 "yuv420p",
239 "-y",
240 output_path.to_str().unwrap(),
241 ])
242 .stdin(Stdio::piped())
243 .stdout(Stdio::null())
244 .stderr(Stdio::null())
245 .spawn()
246 .map_err(|e| SpatialError::Other(format!("Failed to spawn ffmpeg encoder: {}", e)))?;
247
248 let mut stdin = child.stdin.take().expect("Failed to capture stdin");
249
250 while let Some((left, right)) = rx.recv().await {
251 let mut sbs_image = ImageBuffer::new(output_width, output_height);
252
253 let left_rgb = left.to_rgb8();
254 for y in 0..height {
255 for x in 0..width {
256 let pixel = left_rgb.get_pixel(x, y);
257 sbs_image.put_pixel(x, y, *pixel);
258 }
259 }
260
261 let right_rgb = right.to_rgb8();
262 for y in 0..height {
263 for x in 0..width {
264 let pixel = right_rgb.get_pixel(x, y);
265 sbs_image.put_pixel(width + x, y, *pixel);
266 }
267 }
268
269 stdin
270 .write_all(&sbs_image.into_raw())
271 .await
272 .map_err(|e| SpatialError::IoError(format!("Failed to write frame: {}", e)))?;
273 }
274
275 drop(stdin);
276
277 let status = child
278 .wait()
279 .await
280 .map_err(|e| SpatialError::Other(format!("ffmpeg encoding failed: {}", e)))?;
281
282 if !status.success() {
283 return Err(SpatialError::Other(
284 "ffmpeg encoding exited with error".to_string(),
285 ));
286 }
287
288 Ok(())
289}
290
291fn is_spatial_cli_available() -> bool {
292 std::process::Command::new("spatial")
293 .arg("--version")
294 .output()
295 .map(|o| o.status.success())
296 .unwrap_or(false)
297}
298
299async fn encode_mvhevc_video(
300 sbs_path: &Path,
301 output_path: &Path,
302 input_path: &Path,
303 metadata: &VideoMetadata,
304) -> SpatialResult<()> {
305 let sbs_str = sbs_path.to_str()
306 .ok_or_else(|| SpatialError::Other("Invalid SBS path".to_string()))?;
307 let output_str = output_path.to_str()
308 .ok_or_else(|| SpatialError::Other("Invalid output path".to_string()))?;
309
310 let mut args = vec![
311 "make",
312 "--input", sbs_str,
313 "--output", output_str,
314 "--format", "sbs",
315 "--cdist", "65",
316 "--hfov", "90",
317 "--hadjust", "0",
318 "--projection", "rect",
319 "--overwrite",
320 ];
321
322 if !metadata.has_audio {
323 args.push("--no-audio");
324 }
325
326 let output = Command::new("spatial")
327 .args(&args)
328 .output()
329 .await
330 .map_err(|e| SpatialError::Other(format!("Failed to run spatial CLI: {}", e)))?;
331
332 if !output.status.success() {
333 let stderr = String::from_utf8_lossy(&output.stderr);
334 return Err(SpatialError::Other(format!("spatial make failed: {}", stderr)));
335 }
336
337 if metadata.has_audio {
338 let input_str = input_path.to_str()
339 .ok_or_else(|| SpatialError::Other("Invalid input path".to_string()))?;
340
341 let with_audio_path = output_path.with_extension("tmp.mov");
342 let with_audio_str = with_audio_path.to_str()
343 .ok_or_else(|| SpatialError::Other("Invalid temp path".to_string()))?;
344
345 let mux_output = Command::new("ffmpeg")
346 .args([
347 "-i", output_str,
348 "-i", input_str,
349 "-c:v", "copy",
350 "-c:a", "aac",
351 "-map", "0:v:0",
352 "-map", "1:a:0",
353 "-y", with_audio_str,
354 ])
355 .output()
356 .await
357 .map_err(|e| SpatialError::Other(format!("Failed to mux audio: {}", e)))?;
358
359 if mux_output.status.success() {
360 let _ = tokio::fs::remove_file(output_path).await;
361 tokio::fs::rename(&with_audio_path, output_path).await
362 .map_err(|e| SpatialError::IoError(format!("Failed to rename muxed file: {}", e)))?;
363 }
364 }
365
366 Ok(())
367}
368
369pub async fn process_video(
370 input_path: &Path,
371 output_path: &Path,
372 config: SpatialConfig,
373 progress_cb: Option<ProgressCallback>,
374) -> SpatialResult<()> {
375 if !input_path.exists() {
376 return Err(SpatialError::IoError(format!(
377 "Input file not found: {:?}",
378 input_path
379 )));
380 }
381
382 let metadata = get_video_metadata(input_path).await?;
383 let use_spatial = is_spatial_cli_available();
384
385 let sbs_path = if use_spatial {
386 let temp_dir = std::env::temp_dir();
387 temp_dir.join(format!(
388 "spatial_maker_sbs_{}.mov",
389 std::time::SystemTime::now()
390 .duration_since(std::time::UNIX_EPOCH)
391 .unwrap_or_default()
392 .as_millis()
393 ))
394 } else {
395 output_path.to_path_buf()
396 };
397
398 crate::model::ensure_model_exists::<fn(u64, u64)>(&config.encoder_size, None).await?;
399
400 #[cfg(all(target_os = "macos", feature = "coreml"))]
401 let estimator = {
402 let model_path = crate::model::find_model(&config.encoder_size)?;
403 let model_str = model_path.to_str().ok_or_else(|| {
404 SpatialError::ModelError("Invalid model path encoding".to_string())
405 })?;
406 std::sync::Arc::new(crate::depth_coreml::CoreMLDepthEstimator::new(model_str)?)
407 };
408
409 let mut frame_rx = extract_frames(input_path, &metadata).await?;
410
411 let (processed_tx, processed_rx) = mpsc::channel::<(DynamicImage, DynamicImage)>(10);
412
413 let encode_handle = tokio::spawn(encode_stereo_video(
414 sbs_path.clone(),
415 metadata.clone(),
416 processed_rx,
417 ));
418
419 let mut frame_count = 0u32;
420 let total_frames = metadata.total_frames;
421
422 if let Some(ref cb) = progress_cb {
423 cb(VideoProgress::new(0, total_frames, "extracting".to_string()));
424 }
425
426 while let Some(frame_data) = frame_rx.recv().await {
427 let frame = frame_to_image(&frame_data, metadata.width, metadata.height)?;
428
429 frame_count += 1;
430 if let Some(ref cb) = progress_cb {
431 if frame_count % 10 == 0 || frame_count == total_frames {
432 cb(VideoProgress::new(
433 frame_count,
434 total_frames,
435 "processing".to_string(),
436 ));
437 }
438 }
439
440 #[cfg(all(target_os = "macos", feature = "coreml"))]
441 let depth_map = estimator.estimate(&frame)?;
442
443 #[cfg(not(all(target_os = "macos", feature = "coreml")))]
444 let depth_map = {
445 #[cfg(feature = "onnx")]
446 {
447 let model_path = crate::model::find_model(&config.encoder_size)?;
448 let est = crate::depth::OnnxDepthEstimator::new(model_path.to_str().unwrap())?;
449 est.estimate(&frame)?
450 }
451 #[cfg(not(feature = "onnx"))]
452 {
453 return Err(SpatialError::ConfigError(
454 "No depth backend enabled. Enable 'coreml' or 'onnx' feature.".to_string(),
455 ));
456 }
457 };
458
459 let (left, right) = generate_stereo_pair(&frame, &depth_map, config.max_disparity)?;
460
461 if processed_tx.send((left, right)).await.is_err() {
462 return Err(SpatialError::Other(
463 "Encoder stopped unexpectedly".to_string(),
464 ));
465 }
466 }
467
468 drop(processed_tx);
469
470 if let Some(ref cb) = progress_cb {
471 cb(VideoProgress::new(
472 total_frames,
473 total_frames,
474 "encoding".to_string(),
475 ));
476 }
477
478 encode_handle
479 .await
480 .map_err(|e| SpatialError::Other(format!("Encoding task failed: {}", e)))??;
481
482 if use_spatial {
483 if let Some(ref cb) = progress_cb {
484 cb(VideoProgress::new(
485 total_frames,
486 total_frames,
487 "packaging".to_string(),
488 ));
489 }
490
491 let result = encode_mvhevc_video(&sbs_path, output_path, input_path, &metadata).await;
492 let _ = tokio::fs::remove_file(&sbs_path).await;
493 result?;
494 }
495
496 if let Some(ref cb) = progress_cb {
497 cb(VideoProgress::new(
498 total_frames,
499 total_frames,
500 "complete".to_string(),
501 ));
502 }
503
504 Ok(())
505}