1use anyhow::{Result, bail};
5use bytes::BytesMut;
6
7use crate::frame::{PixelFormat, VideoFrame};
8
9pub fn scale_frame(
10 frame: &VideoFrame,
11 target_width: u32,
12 target_height: u32,
13) -> Result<VideoFrame> {
14 if frame.width == target_width && frame.height == target_height {
15 return Ok(frame.clone());
16 }
17
18 match frame.format {
19 PixelFormat::Yuv420p => scale_frame_8bit(frame, target_width, target_height),
20 PixelFormat::Yuv420p10le => scale_frame_10bit(frame, target_width, target_height),
25 _ => bail!(
26 "scaling only implemented for Yuv420p / Yuv420p10le; got {:?}",
27 frame.format
28 ),
29 }
30}
31
32fn scale_frame_8bit(
33 frame: &VideoFrame,
34 target_width: u32,
35 target_height: u32,
36) -> Result<VideoFrame> {
37 let src_w = frame.width as usize;
38 let src_h = frame.height as usize;
39 let dst_w = target_width as usize;
40 let dst_h = target_height as usize;
41
42 let src_y_size = src_w * src_h;
43 let dst_y_size = dst_w * dst_h;
44 let dst_uv_size = dst_y_size / 4;
45
46 let mut out = BytesMut::with_capacity(dst_y_size + dst_uv_size * 2);
47
48 let y_plane = &frame.data[..src_y_size];
50 out.extend(bilinear_scale_plane(y_plane, src_w, src_h, dst_w, dst_h));
51
52 let u_offset = src_y_size;
54 let u_plane = &frame.data[u_offset..u_offset + src_y_size / 4];
55 out.extend(bilinear_scale_plane(
56 u_plane,
57 src_w / 2,
58 src_h / 2,
59 dst_w / 2,
60 dst_h / 2,
61 ));
62
63 let v_offset = u_offset + src_y_size / 4;
65 let v_plane = &frame.data[v_offset..v_offset + src_y_size / 4];
66 out.extend(bilinear_scale_plane(
67 v_plane,
68 src_w / 2,
69 src_h / 2,
70 dst_w / 2,
71 dst_h / 2,
72 ));
73
74 Ok(VideoFrame::new(
75 out.freeze(),
76 target_width,
77 target_height,
78 frame.format,
79 frame.color_space,
80 frame.pts,
81 ))
82}
83
84fn scale_frame_10bit(
93 frame: &VideoFrame,
94 target_width: u32,
95 target_height: u32,
96) -> Result<VideoFrame> {
97 let src_w = frame.width as usize;
98 let src_h = frame.height as usize;
99 let dst_w = target_width as usize;
100 let dst_h = target_height as usize;
101
102 let bytes_per_sample = 2usize;
103 let src_y_size_samples = src_w * src_h;
104 let src_y_size_bytes = src_y_size_samples * bytes_per_sample;
105 let src_c_size_samples = (src_w / 2) * (src_h / 2);
106 let src_c_size_bytes = src_c_size_samples * bytes_per_sample;
107
108 if frame.data.len() < src_y_size_bytes + 2 * src_c_size_bytes {
109 bail!(
110 "10-bit frame data too short for {}x{}: {} bytes",
111 src_w,
112 src_h,
113 frame.data.len()
114 );
115 }
116
117 let dst_y_size_samples = dst_w * dst_h;
118 let dst_c_size_samples = (dst_w / 2) * (dst_h / 2);
119 let dst_total_bytes = (dst_y_size_samples + 2 * dst_c_size_samples) * bytes_per_sample;
120
121 let y_plane = super::read_u16le(&frame.data[..src_y_size_bytes]);
123 let u_plane =
124 super::read_u16le(&frame.data[src_y_size_bytes..src_y_size_bytes + src_c_size_bytes]);
125 let v_plane = super::read_u16le(
126 &frame.data
127 [src_y_size_bytes + src_c_size_bytes..src_y_size_bytes + 2 * src_c_size_bytes],
128 );
129
130 let y_dst = bilinear_scale_plane_u16(&y_plane, src_w, src_h, dst_w, dst_h);
132 let u_dst =
133 bilinear_scale_plane_u16(&u_plane, src_w / 2, src_h / 2, dst_w / 2, dst_h / 2);
134 let v_dst =
135 bilinear_scale_plane_u16(&v_plane, src_w / 2, src_h / 2, dst_w / 2, dst_h / 2);
136
137 let mut out = BytesMut::with_capacity(dst_total_bytes);
138 super::write_u16le(&mut out, &y_dst);
139 super::write_u16le(&mut out, &u_dst);
140 super::write_u16le(&mut out, &v_dst);
141
142 Ok(VideoFrame::new(
143 out.freeze(),
144 target_width,
145 target_height,
146 frame.format,
147 frame.color_space,
148 frame.pts,
149 ))
150}
151
152pub fn bilinear_scale_plane_u16_scalar(
158 src: &[u16],
159 src_w: usize,
160 src_h: usize,
161 dst_w: usize,
162 dst_h: usize,
163) -> Vec<u16> {
164 let mut dst = vec![0u16; dst_w * dst_h];
165 let x_ratio = src_w as f64 / dst_w as f64;
166 let y_ratio = src_h as f64 / dst_h as f64;
167
168 for dy in 0..dst_h {
169 let sy = (dy as f64 * y_ratio).min((src_h - 1) as f64);
170 let y0 = sy as usize;
171 let y1 = (y0 + 1).min(src_h - 1);
172 let fy = sy - y0 as f64;
173
174 for dx in 0..dst_w {
175 let sx = (dx as f64 * x_ratio).min((src_w - 1) as f64);
176 let x0 = sx as usize;
177 let x1 = (x0 + 1).min(src_w - 1);
178 let fx = sx - x0 as f64;
179
180 let p00 = src[y0 * src_w + x0] as f64;
181 let p10 = src[y0 * src_w + x1] as f64;
182 let p01 = src[y1 * src_w + x0] as f64;
183 let p11 = src[y1 * src_w + x1] as f64;
184
185 let val = p00 * (1.0 - fx) * (1.0 - fy)
186 + p10 * fx * (1.0 - fy)
187 + p01 * (1.0 - fx) * fy
188 + p11 * fx * fy;
189
190 dst[dy * dst_w + dx] = val.round().clamp(0.0, 1023.0) as u16;
196 }
197 }
198 dst
199}
200
201pub fn bilinear_scale_plane_u16(
216 src: &[u16],
217 src_w: usize,
218 src_h: usize,
219 dst_w: usize,
220 dst_h: usize,
221) -> Vec<u16> {
222 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
223 {
224 if std::is_x86_feature_detected!("avx2") && dst_w >= 16 {
228 return unsafe {
230 bilinear_scale_plane_u16_avx2(src, src_w, src_h, dst_w, dst_h)
231 };
232 }
233 }
234 bilinear_scale_plane_u16_scalar(src, src_w, src_h, dst_w, dst_h)
235}
236
237#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
255#[target_feature(enable = "avx2")]
256unsafe fn bilinear_scale_plane_u16_avx2(
257 src: &[u16],
258 src_w: usize,
259 src_h: usize,
260 dst_w: usize,
261 dst_h: usize,
262) -> Vec<u16> {
263 unsafe {
264 #[cfg(target_arch = "x86")]
265 use std::arch::x86::*;
266 #[cfg(target_arch = "x86_64")]
267 use std::arch::x86_64::*;
268
269 let mut dst = vec![0u16; dst_w * dst_h];
270
271 let x_step = ((src_w as u64) << 32) / (dst_w as u64);
275 let y_step = ((src_h as u64) << 32) / (dst_h as u64);
276
277 let mut x0s: Vec<u32> = vec![0; dst_w];
279 let mut x1s: Vec<u32> = vec![0; dst_w];
280 let mut fxs_q15: Vec<i16> = vec![0; dst_w];
281 let mut one_minus_fxs_q15: Vec<i16> = vec![0; dst_w];
282 for dx in 0..dst_w {
283 let sx_32_32 = (dx as u64) * x_step;
284 let x0_full = (sx_32_32 >> 32) as usize;
285 let x0 = x0_full.min(src_w - 1);
286 let fx_q16 = ((sx_32_32 >> 16) & 0xFFFF) as u32;
287 let fx_q15 = ((fx_q16 as i32) >> 1).min(32767) as i16;
289 if x0 >= src_w - 1 {
290 x0s[dx] = (src_w - 1) as u32;
291 x1s[dx] = (src_w - 1) as u32;
292 fxs_q15[dx] = 0;
293 one_minus_fxs_q15[dx] = 32767;
294 } else {
295 x0s[dx] = x0 as u32;
296 x1s[dx] = (x0 + 1) as u32;
297 fxs_q15[dx] = fx_q15;
298 one_minus_fxs_q15[dx] = 32767 - fx_q15;
299 }
300 }
301
302 let v_max = _mm256_set1_epi16(1023);
303 let v_zero = _mm256_setzero_si256();
304
305 for dy in 0..dst_h {
306 let sy_32_32 = (dy as u64) * y_step;
307 let y0_full = (sy_32_32 >> 32) as usize;
308 let y0 = y0_full.min(src_h - 1);
309 let fy_q16 = ((sy_32_32 >> 16) & 0xFFFF) as u32;
310 let y1 = (y0 + 1).min(src_h - 1);
311 let fy_q15 = ((fy_q16 as i32) >> 1).min(32767) as i16;
312 let one_minus_fy_q15 = 32767i16 - fy_q15;
313
314 let row0 = y0 * src_w;
315 let row1 = y1 * src_w;
316 let dst_row = dy * dst_w;
317
318 let v_fy = _mm256_set1_epi16(fy_q15);
319 let v_one_minus_fy = _mm256_set1_epi16(one_minus_fy_q15);
320
321 let mut dx = 0usize;
322 while dx + 16 <= dst_w {
323 let mut p00_buf = [0u16; 16];
329 let mut p10_buf = [0u16; 16];
330 let mut p01_buf = [0u16; 16];
331 let mut p11_buf = [0u16; 16];
332 for i in 0..16 {
333 let x0 = x0s[dx + i] as usize;
334 let x1 = x1s[dx + i] as usize;
335 p00_buf[i] = *src.get_unchecked(row0 + x0);
336 p10_buf[i] = *src.get_unchecked(row0 + x1);
337 p01_buf[i] = *src.get_unchecked(row1 + x0);
338 p11_buf[i] = *src.get_unchecked(row1 + x1);
339 }
340
341 let p00 = _mm256_loadu_si256(p00_buf.as_ptr() as *const _);
346 let p10 = _mm256_loadu_si256(p10_buf.as_ptr() as *const _);
347 let p01 = _mm256_loadu_si256(p01_buf.as_ptr() as *const _);
348 let p11 = _mm256_loadu_si256(p11_buf.as_ptr() as *const _);
349
350 let v_fx = _mm256_loadu_si256(fxs_q15.as_ptr().add(dx) as *const _);
352 let v_one_minus_fx =
353 _mm256_loadu_si256(one_minus_fxs_q15.as_ptr().add(dx) as *const _);
354
355 let top = _mm256_add_epi16(
366 _mm256_mulhrs_epi16(p00, v_one_minus_fx),
367 _mm256_mulhrs_epi16(p10, v_fx),
368 );
369 let bottom = _mm256_add_epi16(
370 _mm256_mulhrs_epi16(p01, v_one_minus_fx),
371 _mm256_mulhrs_epi16(p11, v_fx),
372 );
373
374 let out_i16 = _mm256_add_epi16(
376 _mm256_mulhrs_epi16(top, v_one_minus_fy),
377 _mm256_mulhrs_epi16(bottom, v_fy),
378 );
379
380 let clamped =
385 _mm256_min_epi16(_mm256_max_epi16(out_i16, v_zero), v_max);
386
387 _mm256_storeu_si256(
388 dst.as_mut_ptr().add(dst_row + dx) as *mut _,
389 clamped,
390 );
391
392 dx += 16;
393 }
394
395 while dx < dst_w {
400 let x0 = x0s[dx] as usize;
401 let x1 = x1s[dx] as usize;
402 let fx = fxs_q15[dx] as f64 / 32768.0;
403 let fy = fy_q15 as f64 / 32768.0;
404
405 let p00 = src[row0 + x0] as f64;
406 let p10 = src[row0 + x1] as f64;
407 let p01 = src[row1 + x0] as f64;
408 let p11 = src[row1 + x1] as f64;
409
410 let val = p00 * (1.0 - fx) * (1.0 - fy)
411 + p10 * fx * (1.0 - fy)
412 + p01 * (1.0 - fx) * fy
413 + p11 * fx * fy;
414 dst[dst_row + dx] = val.round().clamp(0.0, 1023.0) as u16;
415 dx += 1;
416 }
417 }
418
419 dst
420 }
421}
422
423pub fn bilinear_scale_plane(
425 src: &[u8],
426 src_w: usize,
427 src_h: usize,
428 dst_w: usize,
429 dst_h: usize,
430) -> Vec<u8> {
431 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
432 {
433 if std::is_x86_feature_detected!("avx2") && dst_w >= 16 {
437 return unsafe { bilinear_scale_plane_avx2(src, src_w, src_h, dst_w, dst_h) };
439 }
440 }
441 bilinear_scale_plane_scalar(src, src_w, src_h, dst_w, dst_h)
442}
443
444pub fn bilinear_scale_plane_scalar(
445 src: &[u8],
446 src_w: usize,
447 src_h: usize,
448 dst_w: usize,
449 dst_h: usize,
450) -> Vec<u8> {
451 let mut dst = vec![0u8; dst_w * dst_h];
452 let x_ratio = src_w as f64 / dst_w as f64;
453 let y_ratio = src_h as f64 / dst_h as f64;
454
455 for dy in 0..dst_h {
456 let sy = (dy as f64 * y_ratio).min((src_h - 1) as f64);
457 let y0 = sy as usize;
458 let y1 = (y0 + 1).min(src_h - 1);
459 let fy = sy - y0 as f64;
460
461 for dx in 0..dst_w {
462 let sx = (dx as f64 * x_ratio).min((src_w - 1) as f64);
463 let x0 = sx as usize;
464 let x1 = (x0 + 1).min(src_w - 1);
465 let fx = sx - x0 as f64;
466
467 let p00 = src[y0 * src_w + x0] as f64;
468 let p10 = src[y0 * src_w + x1] as f64;
469 let p01 = src[y1 * src_w + x0] as f64;
470 let p11 = src[y1 * src_w + x1] as f64;
471
472 let val = p00 * (1.0 - fx) * (1.0 - fy)
473 + p10 * fx * (1.0 - fy)
474 + p01 * (1.0 - fx) * fy
475 + p11 * fx * fy;
476
477 dst[dy * dst_w + dx] = val.round() as u8;
478 }
479 }
480 dst
481}
482
483#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
484#[target_feature(enable = "avx2")]
485unsafe fn bilinear_scale_plane_avx2(
486 src: &[u8],
487 src_w: usize,
488 src_h: usize,
489 dst_w: usize,
490 dst_h: usize,
491) -> Vec<u8> {
492 unsafe {
493 #[cfg(target_arch = "x86")]
494 use std::arch::x86::*;
495 #[cfg(target_arch = "x86_64")]
496 use std::arch::x86_64::*;
497
498 let mut dst = vec![0u8; dst_w * dst_h];
499
500 let x_step = ((src_w as u64) << 32) / (dst_w as u64); let y_step = ((src_h as u64) << 32) / (dst_h as u64);
509
510 let mut x0s: Vec<u32> = vec![0; dst_w];
512 let mut fxs: Vec<u16> = vec![0; dst_w];
513 for dx in 0..dst_w {
514 let sx_32_32 = (dx as u64) * x_step; let x0_full = (sx_32_32 >> 32) as usize;
516 let x0 = x0_full.min(src_w - 1);
517 let fx_q16 = ((sx_32_32 >> 16) & 0xFFFF) as u16; if x0 >= src_w - 1 {
520 x0s[dx] = (src_w - 1) as u32;
521 fxs[dx] = 0;
522 } else {
523 x0s[dx] = x0 as u32;
524 fxs[dx] = fx_q16;
525 }
526 }
527
528 let mut fx_q15: Vec<i16> = vec![0; dst_w];
540 let mut one_minus_fx_q15: Vec<i16> = vec![0; dst_w];
541 for dx in 0..dst_w {
542 let fxq15 = (fxs[dx] as i32 >> 1).min(32767) as i16;
544 fx_q15[dx] = fxq15;
545 one_minus_fx_q15[dx] = 32767 - fxq15;
546 }
547
548 for dy in 0..dst_h {
549 let sy_32_32 = (dy as u64) * y_step;
550 let y0_full = (sy_32_32 >> 32) as usize;
551 let y0 = y0_full.min(src_h - 1);
552 let fy_q16 = ((sy_32_32 >> 16) & 0xFFFF) as u32;
553 let y1 = (y0 + 1).min(src_h - 1);
554 let fy_q15 = ((fy_q16 as i32) >> 1).min(32767) as i16;
555 let one_minus_fy_q15 = 32767i16 - fy_q15;
556
557 let row0 = y0 * src_w;
558 let row1 = y1 * src_w;
559 let dst_row = dy * dst_w;
560
561 let v_fy = _mm256_set1_epi16(fy_q15);
565 let v_one_minus_fy = _mm256_set1_epi16(one_minus_fy_q15);
566
567 let mut dx = 0usize;
568 while dx + 16 <= dst_w {
569 let mut p00_buf = [0u8; 16];
572 let mut p10_buf = [0u8; 16];
573 let mut p01_buf = [0u8; 16];
574 let mut p11_buf = [0u8; 16];
575 for i in 0..16 {
576 let x0 = x0s[dx + i] as usize;
577 let x1 = (x0 + 1).min(src_w - 1);
578 p00_buf[i] = *src.get_unchecked(row0 + x0);
579 p10_buf[i] = *src.get_unchecked(row0 + x1);
580 p01_buf[i] = *src.get_unchecked(row1 + x0);
581 p11_buf[i] = *src.get_unchecked(row1 + x1);
582 }
583
584 let p00 = _mm256_cvtepu8_epi16(
586 _mm_loadu_si128(p00_buf.as_ptr() as *const _),
587 );
588 let p10 = _mm256_cvtepu8_epi16(
589 _mm_loadu_si128(p10_buf.as_ptr() as *const _),
590 );
591 let p01 = _mm256_cvtepu8_epi16(
592 _mm_loadu_si128(p01_buf.as_ptr() as *const _),
593 );
594 let p11 = _mm256_cvtepu8_epi16(
595 _mm_loadu_si128(p11_buf.as_ptr() as *const _),
596 );
597
598 let p00 = _mm256_slli_epi16::<7>(p00);
602 let p10 = _mm256_slli_epi16::<7>(p10);
603 let p01 = _mm256_slli_epi16::<7>(p01);
604 let p11 = _mm256_slli_epi16::<7>(p11);
605
606 let v_fx = _mm256_loadu_si256(fx_q15.as_ptr().add(dx) as *const _);
608 let v_one_minus_fx =
609 _mm256_loadu_si256(one_minus_fx_q15.as_ptr().add(dx) as *const _);
610
611 let top = _mm256_add_epi16(
613 _mm256_mulhrs_epi16(p00, v_one_minus_fx),
614 _mm256_mulhrs_epi16(p10, v_fx),
615 );
616 let bottom = _mm256_add_epi16(
617 _mm256_mulhrs_epi16(p01, v_one_minus_fx),
618 _mm256_mulhrs_epi16(p11, v_fx),
619 );
620
621 let out_q7 = _mm256_add_epi16(
626 _mm256_mulhrs_epi16(top, v_one_minus_fy),
627 _mm256_mulhrs_epi16(bottom, v_fy),
628 );
629 let rounded = _mm256_add_epi16(out_q7, _mm256_set1_epi16(64));
631 let shifted = _mm256_srai_epi16::<7>(rounded);
632
633 let packed = _mm256_packus_epi16(shifted, shifted);
635 let packed = _mm256_permute4x64_epi64::<0b00_00_10_00>(packed);
638 _mm_storeu_si128(
639 dst.as_mut_ptr().add(dst_row + dx) as *mut _,
640 _mm256_castsi256_si128(packed),
641 );
642
643 dx += 16;
644 }
645
646 while dx < dst_w {
648 let x0 = x0s[dx] as usize;
649 let x1 = (x0 + 1).min(src_w - 1);
650 let fx = fxs[dx] as f64 / 65536.0;
651 let fy = fy_q16 as f64 / 65536.0;
652
653 let p00 = src[row0 + x0] as f64;
654 let p10 = src[row0 + x1] as f64;
655 let p01 = src[row1 + x0] as f64;
656 let p11 = src[row1 + x1] as f64;
657
658 let val = p00 * (1.0 - fx) * (1.0 - fy)
659 + p10 * fx * (1.0 - fy)
660 + p01 * (1.0 - fx) * fy
661 + p11 * fx * fy;
662 dst[dst_row + dx] = val.round() as u8;
663 dx += 1;
664 }
665 }
666
667 dst
668 }
669}