1pub const PFV_MAGIC: &[u8] = b"PFVIDEO\0";
2pub const PFV_VERSION: u32 = 211;
3
4use crate::{dct::{DctQuantizedMatrix8x8, DctMatrix8x8, FP_BITS}, plane::VideoPlane};
5
6#[cfg(feature = "multithreading")]
7use rayon::prelude::*;
8
9#[derive(Clone, Copy)]
10pub struct EncodedMacroBlock {
11 pub subblocks: [DctQuantizedMatrix8x8;4]
12}
13
14#[derive(Clone, Copy)]
15pub struct DeltaEncodedMacroBlock {
16 pub motion_x: i8,
17 pub motion_y: i8,
18 pub subblocks: Option<[DctQuantizedMatrix8x8;4]>
19}
20
21pub struct MacroBlock {
22 pub pixels: [u8;256]
23}
24
25pub struct EncodedIFrame {
26 pub y: EncodedIPlane,
27 pub u: EncodedIPlane,
28 pub v: EncodedIPlane,
29}
30
31pub struct EncodedPFrame {
32 pub y: EncodedPPlane,
33 pub u: EncodedPPlane,
34 pub v: EncodedPPlane,
35}
36
37pub struct EncodedIPlane {
38 pub width: usize,
39 pub height: usize,
40 pub blocks_wide: usize,
41 pub blocks_high: usize,
42 pub blocks: Vec<EncodedMacroBlock>,
43}
44
45pub struct EncodedPPlane {
46 pub width: usize,
47 pub height: usize,
48 pub blocks_wide: usize,
49 pub blocks_high: usize,
50 pub blocks: Vec<DeltaEncodedMacroBlock>,
51}
52
53pub struct DeltaBlock {
54 pub width: usize,
55 pub height: usize,
56 pub deltas: Vec<i16>,
57}
58
59impl DeltaBlock {
60 pub fn new(width: usize, height: usize) -> DeltaBlock {
61 DeltaBlock { width: width, height: height, deltas: vec![0;width * height] }
62 }
63
64 pub fn blit(self: &mut DeltaBlock, src: &DeltaBlock, dx: usize, dy: usize, sx: usize, sy: usize, sw: usize, sh: usize) {
65 for row in 0..sh {
66 let src_row = row + sy;
67 let dest_row = row + dy;
68 let src_offset = (src_row * src.width) + sx;
69 let dst_offset = (dest_row * self.width) + dx;
70
71 self.deltas[dst_offset..(dst_offset + sw)].copy_from_slice(&src.deltas[src_offset..(src_offset + sw)]);
72 }
73 }
74
75 pub fn get_slice(self: &DeltaBlock, sx: usize, sy: usize, sw: usize, sh: usize) -> DeltaBlock {
76 let mut new_slice = DeltaBlock::new(sw, sh);
77 new_slice.blit(self, 0, 0, sx, sy, sw, sh);
78
79 new_slice
80 }
81}
82
83impl MacroBlock {
84 pub fn new() -> MacroBlock {
85 MacroBlock { pixels: [0;256] }
86 }
87
88 pub fn blit_subblock(self: &mut MacroBlock, src: &[u8;64], dx: usize, dy: usize) {
89 for row in 0..8 {
90 let dest_row = row + dy;
91 let src_offset = row * 8;
92 let dst_offset = (dest_row * 16) + dx;
93
94 self.pixels[dst_offset..(dst_offset + 8)].copy_from_slice(&src[src_offset..(src_offset + 8)]);
95 }
96 }
97
98 pub fn apply_residuals(self: &mut MacroBlock, from: &MacroBlock) {
99 for (delta, pixel) in self.pixels.iter_mut().zip(from.pixels) {
100 let d = (*delta as i16 - 128) * 2;
101 let p = pixel as i16;
102 *delta = (p + d).clamp(0, 255) as u8;
103 }
104 }
105}
106
107impl VideoPlane {
108 fn calc_residuals(from: &VideoPlane, to: &VideoPlane) -> DeltaBlock {
109 debug_assert!(from.width == to.width && from.height == to.height);
110
111 let mut residuals = DeltaBlock::new(from.width, from.height);
112
113 for (f, t) in residuals.deltas.iter_mut().zip(&from.pixels) {
114 *f = *t as i16;
115 }
116
117 for (f, t) in residuals.deltas.iter_mut().zip(&to.pixels) {
118 let delta = *f as i16 - *t as i16;
119 *f = delta.clamp(-255, 255);
120 }
121
122 residuals
123 }
124
125 fn calc_error(from: &VideoPlane, to: &VideoPlane, ref_lms: f32) -> f32 {
126 assert!(from.width == to.width && from.height == to.height);
127
128 let mut sum = 0.0;
129
130 for (_, (a, b)) in from.pixels.iter().zip(&to.pixels).enumerate() {
131 let diff = *a as f32 - *b as f32;
132 sum += diff * diff;
133 if sum >= ref_lms {
134 return sum;
135 }
136 }
137
138 return sum;
139 }
140
141 fn encode_block(src: &VideoPlane, q_table: &[i32;64]) -> EncodedMacroBlock {
142 debug_assert!(src.width == 16 && src.height == 16);
143
144 let subblocks = [
146 VideoPlane::encode_subblock(&src.get_slice(0, 0, 8, 8), q_table),
147 VideoPlane::encode_subblock(&src.get_slice(8, 0, 8, 8), q_table),
148 VideoPlane::encode_subblock(&src.get_slice(0, 8, 8, 8), q_table),
149 VideoPlane::encode_subblock(&src.get_slice(8, 8, 8, 8), q_table)];
150
151 EncodedMacroBlock { subblocks: subblocks }
152 }
153
154 fn block_search(src: &VideoPlane, refplane: &VideoPlane, cx: i32, cy: i32, stepsize: i32) -> (i32, i32, f32, VideoPlane) {
155 let mut best_dx = 0;
156 let mut best_dy = 0;
157 let mut best_err = f32::INFINITY;
158 let mut best_slice = VideoPlane::new(16, 16);
159
160 {
162 let slice = refplane.get_slice(cx as usize, cy as usize, 16, 16);
163 best_slice.pixels.copy_from_slice(&slice.pixels);
164 best_err = VideoPlane::calc_error(src, &slice, best_err);
165 }
166
167 for my in -1..2 {
169
170 let offsy = cy + (my * stepsize);
171 if offsy < 0 || offsy > refplane.height as i32 - 16 {
172 continue;
173 }
174
175 for mx in -1..2 {
176 if my == 0 && mx == 0 {
177 continue;
179 }
180
181 let offsx = cx + (mx * stepsize);
182 if offsx < 0 || offsx > refplane.width as i32 - 16 {
183 continue;
184 }
185
186 let slice = refplane.get_slice(offsx as usize, offsy as usize, 16, 16);
187 let err = VideoPlane::calc_error(src, &slice, best_err);
188
189 if err < best_err {
190 best_slice.pixels.copy_from_slice(&slice.pixels);
191 best_err = err;
192 best_dx = mx * stepsize;
193 best_dy = my * stepsize;
194 }
195 }
196 }
197
198 if stepsize > 1 {
199 let (dx2, dy2, err2, slice2) = VideoPlane::block_search(src, refplane, cx + best_dx, cy + best_dy, stepsize / 2);
200 return (best_dx + dx2, best_dy + dy2, err2, slice2);
201 } else {
202 return (best_dx, best_dy, best_err, best_slice);
203 }
204 }
205
206 fn encode_block_delta(src: &VideoPlane, refplane: &VideoPlane, bx: usize, by: usize, q_table: &[i32;64], px_err: f32) -> DeltaEncodedMacroBlock {
207 debug_assert!(src.width == 16 && src.height == 16);
208
209 let min_err = px_err * px_err * 256.0;
210
211 let (best_dx, best_dy, best_err, prev_block) = VideoPlane::block_search(src, refplane, bx as i32, by as i32, 8);
213
214 let sx = bx as i32 + best_dx;
215 let sy = by as i32 + best_dy;
216
217 assert!(sx >= 0 && sx <= refplane.width as i32 - 16);
218 assert!(sy >= 0 && sy <= refplane.height as i32 - 16);
219
220 if best_err <= min_err {
222 DeltaEncodedMacroBlock { motion_x: best_dx as i8, motion_y: best_dy as i8, subblocks: None }
223 } else {
224 let delta_block = VideoPlane::calc_residuals(src, &prev_block);
226
227 let subblocks = [
229 VideoPlane::encode_subblock_delta(&delta_block.get_slice(0, 0, 8, 8), q_table),
230 VideoPlane::encode_subblock_delta(&delta_block.get_slice(8, 0, 8, 8), q_table),
231 VideoPlane::encode_subblock_delta(&delta_block.get_slice(0, 8, 8, 8), q_table),
232 VideoPlane::encode_subblock_delta(&delta_block.get_slice(8, 8, 8, 8), q_table)];
233
234 DeltaEncodedMacroBlock { motion_x: best_dx as i8, motion_y: best_dy as i8, subblocks: Some(subblocks) }
235 }
236 }
237
238 fn decode_block(src: &EncodedMacroBlock, q_table: &[i32;64]) -> MacroBlock {
239 let subblocks = [
240 VideoPlane::decode_subblock(&src.subblocks[0], q_table),
241 VideoPlane::decode_subblock(&src.subblocks[1], q_table),
242 VideoPlane::decode_subblock(&src.subblocks[2], q_table),
243 VideoPlane::decode_subblock(&src.subblocks[3], q_table)];
244
245 let mut block = MacroBlock::new();
246 block.blit_subblock(&subblocks[0], 0, 0);
247 block.blit_subblock(&subblocks[1], 8, 0);
248 block.blit_subblock(&subblocks[2], 0, 8);
249 block.blit_subblock(&subblocks[3], 8, 8);
250
251 block
252 }
253
254 fn decode_block_delta(src: &DeltaEncodedMacroBlock, refplane: &VideoPlane, bx: usize, by: usize, q_table: &[i32;64]) -> MacroBlock {
255 let sx = bx as i32 + src.motion_x as i32;
256 let sy = by as i32 + src.motion_y as i32;
257
258 debug_assert!(sx >= 0 && sx <= refplane.width as i32 - 16);
259 debug_assert!(sy >= 0 && sy <= refplane.height as i32 - 16);
260
261 let prev_block = refplane.get_block(sx as usize, sy as usize);
262
263 match src.subblocks {
264 Some(subblocks) => {
265 let subblocks = [
266 VideoPlane::decode_subblock(&subblocks[0], q_table),
267 VideoPlane::decode_subblock(&subblocks[1], q_table),
268 VideoPlane::decode_subblock(&subblocks[2], q_table),
269 VideoPlane::decode_subblock(&subblocks[3], q_table)];
270
271 let mut block = MacroBlock::new();
272 block.blit_subblock(&subblocks[0], 0, 0);
273 block.blit_subblock(&subblocks[1], 8, 0);
274 block.blit_subblock(&subblocks[2], 0, 8);
275 block.blit_subblock(&subblocks[3], 8, 8);
276
277 block.apply_residuals(&prev_block);
278
279 return block;
280 }
281 None => {
282 return prev_block;
283 }
284 };
285 }
286
287 fn encode_subblock(src: &VideoPlane, q_table: &[i32;64]) -> DctQuantizedMatrix8x8 {
288 assert!(src.width == 8 && src.height == 8);
289
290 let mut dct = DctMatrix8x8::new();
291 let cell_px: Vec<i32> = src.pixels.iter().map(|x| ((*x as i32) - 128) << FP_BITS).collect();
292 dct.m.copy_from_slice(&cell_px);
293
294 dct.dct_transform_rows();
295 dct.dct_transform_columns();
296
297 dct.encode(q_table)
298 }
299
300 fn encode_subblock_delta(src: &DeltaBlock, q_table: &[i32;64]) -> DctQuantizedMatrix8x8 {
301 assert!(src.width == 8 && src.height == 8);
302
303 let mut dct = DctMatrix8x8::new();
304 let cell_px: Vec<i32> = src.deltas.iter().map(|x| (*x as i32 / 2) << FP_BITS).collect();
305 dct.m.copy_from_slice(&cell_px);
306
307 dct.dct_transform_rows();
308 dct.dct_transform_columns();
309
310 dct.encode(q_table)
311 }
312
313 fn decode_subblock(src: &DctQuantizedMatrix8x8, q_table: &[i32;64]) -> [u8;64] {
314 let mut dct = DctMatrix8x8::decode(src, q_table);
315 dct.dct_inverse_transform_columns();
316 dct.dct_inverse_transform_rows();
317
318 let mut result = [0;64];
319
320 for (idx, px) in dct.m.iter().enumerate() {
321 result[idx] = ((*px >> FP_BITS) + 128).clamp(0, 255) as u8;
322 }
323
324 result
325 }
326
327 pub fn get_block(self: &VideoPlane, sx: usize, sy: usize) -> MacroBlock {
328 let mut dest: MacroBlock = MacroBlock { pixels: [0;256] };
329
330 for row in 0..16 {
331 let src_row = row + sy;
332 let src_offset = (src_row * self.width) + sx;
333 let dst_offset = row * 16;
334
335 dest.pixels[dst_offset..(dst_offset + 16)].copy_from_slice(&self.pixels[src_offset..(src_offset + 16)]);
336 }
337
338 dest
339 }
340
341 pub fn blit_block(self: &mut VideoPlane, block: &MacroBlock, dx: usize, dy: usize) {
342 for row in 0..16 {
343 let dest_row = row + dy;
344 let src_offset = row * 16;
345 let dst_offset = (dest_row * self.width) + dx;
346
347 self.pixels[dst_offset..(dst_offset + 16)].copy_from_slice(&block.pixels[src_offset..(src_offset + 16)]);
348 }
349 }
350
351 pub fn encode_plane(self: &VideoPlane, q_table: &[i32;64], clear_color: u8, #[cfg(feature = "multithreading")] tp: &rayon::ThreadPool) -> EncodedIPlane {
352 let pad_width: usize = self.width + (16 - (self.width % 16)) % 16;
353 let pad_height = self.height + (16 - (self.height % 16)) % 16;
354 let mut img_copy = VideoPlane::new(pad_width, pad_height);
355 img_copy.pixels.fill(clear_color);
356 img_copy.blit(self, 0, 0, 0, 0, self.width, self.height);
357
358 let blocks_wide = pad_width / 16;
359 let blocks_high = pad_height / 16;
360
361 let mut blocks: Vec<VideoPlane> = Vec::with_capacity(blocks_wide * blocks_high);
362
363 for block_y in 0..blocks_high {
365 for block_x in 0..blocks_wide {
366 let mut block = VideoPlane::new(16, 16);
367 block.blit(&img_copy, 0, 0, block_x * 16, block_y * 16, 16, 16);
368 blocks.push(block);
369 }
370 }
371
372 #[cfg(feature = "multithreading")]
374 let enc_result: Vec<_> = tp.install(|| {
375 blocks.par_iter().map(|x| {
376 VideoPlane::encode_block(x, q_table)
377 }).collect()
378 });
379
380 #[cfg(not(feature = "multithreading"))]
381 let enc_result: Vec<_> = blocks.iter().map(|x| {
382 VideoPlane::encode_block(x, q_table)
383 }).collect();
384
385 EncodedIPlane { width: pad_width, height: pad_height, blocks_wide: blocks_wide, blocks_high: blocks_high, blocks: enc_result }
386 }
387
388 pub fn encode_plane_delta(self: &VideoPlane, refplane: &VideoPlane, q_table: &[i32;64], px_err: f32, clear_color: u8, #[cfg(feature = "multithreading")] tp: &rayon::ThreadPool) -> EncodedPPlane {
389 let pad_width: usize = self.width + (16 - (self.width % 16)) % 16;
390 let pad_height = self.height + (16 - (self.height % 16)) % 16;
391 let mut img_copy = VideoPlane::new(pad_width, pad_height);
392 img_copy.pixels.fill(clear_color);
393 img_copy.blit(self, 0, 0, 0, 0, self.width, self.height);
394
395 let blocks_wide = pad_width / 16;
396 let blocks_high = pad_height / 16;
397
398 let mut blocks: Vec<_> = Vec::with_capacity(blocks_wide * blocks_high);
399
400 for block_y in 0..blocks_high {
402 for block_x in 0..blocks_wide {
403 let mut block = VideoPlane::new(16, 16);
404 block.blit(&img_copy, 0, 0, block_x * 16, block_y * 16, 16, 16);
405 blocks.push((block, block_x * 16, block_y * 16));
406 }
407 }
408
409 #[cfg(feature = "multithreading")]
411 let enc_result: Vec<_> = tp.install(|| {blocks.par_iter().map(|(block, bx, by)| {
412 VideoPlane::encode_block_delta(block, refplane, *bx, *by, q_table, px_err)
413 }).collect()});
414
415 #[cfg(not(feature = "multithreading"))]
416 let enc_result: Vec<_> = blocks.iter().map(|(block, bx, by)| {
417 VideoPlane::encode_block_delta(block, refplane, *bx, *by, q_table, px_err)
418 }).collect();
419
420 EncodedPPlane { width: pad_width, height: pad_height, blocks_wide: blocks_wide, blocks_high: blocks_high, blocks: enc_result }
421 }
422
423 pub fn decode_plane(src: &EncodedIPlane, q_table: &[i32;64], #[cfg(feature = "multithreading")] tp: &rayon::ThreadPool) -> VideoPlane {
424 let mut plane = VideoPlane::new(src.blocks_wide * 16, src.blocks_high * 16);
425
426 let total_blocks = src.blocks_wide * src.blocks_high;
427
428 #[cfg(feature = "multithreading")]
429 let results: Vec<_> = tp.install(|| {(0..total_blocks).into_par_iter().map(|x| {
430 VideoPlane::decode_block(&src.blocks[x], q_table)
431 }).collect()});
432
433 #[cfg(not(feature = "multithreading"))]
434 let results: Vec<_> = (0..total_blocks).into_iter().map(|x| {
435 VideoPlane::decode_block(&src.blocks[x], q_table)
436 }).collect();
437
438 for block_y in 0..src.blocks_high {
439 for block_x in 0..src.blocks_wide {
440 let block = &results[block_x + (block_y * src.blocks_wide)];
441 plane.blit_block(block, block_x * 16, block_y * 16);
442 }
443 }
444
445 plane
446 }
447
448 pub fn decode_plane_delta(src: &EncodedPPlane, refplane: &VideoPlane, q_table: &[i32;64], #[cfg(feature = "multithreading")] tp: &rayon::ThreadPool) -> VideoPlane {
449 let mut plane = VideoPlane::new(src.blocks_wide * 16, src.blocks_high * 16);
450
451 let total_blocks = src.blocks_wide * src.blocks_high;
452
453 #[cfg(feature = "multithreading")]
454 let results: Vec<_> = tp.install(|| {(0..total_blocks).into_par_iter().map(|x| {
455 let bx = x % src.blocks_wide;
456 let by = x / src.blocks_wide;
457 VideoPlane::decode_block_delta(&src.blocks[x], refplane, bx * 16, by * 16, q_table)
458 }).collect()});
459
460 #[cfg(not(feature = "multithreading"))]
461 let results: Vec<_> = (0..total_blocks).into_iter().map(|x| {
462 let bx = x % src.blocks_wide;
463 let by = x / src.blocks_wide;
464 VideoPlane::decode_block_delta(&src.blocks[x], refplane, bx * 16, by * 16, q_table)
465 }).collect();
466
467 for block_y in 0..src.blocks_high {
468 for block_x in 0..src.blocks_wide {
469 let block = &results[block_x + (block_y * src.blocks_wide)];
470 plane.blit_block(block, block_x * 16, block_y * 16);
471 }
472 }
473
474 plane
475 }
476
477 pub fn decode_plane_into(src: &EncodedIPlane, q_table: &[i32;64], target: &mut VideoPlane, #[cfg(feature = "multithreading")] tp: &rayon::ThreadPool) {
478 let total_blocks = src.blocks_wide * src.blocks_high;
479
480 #[cfg(feature = "multithreading")]
481 let results: Vec<_> = tp.install(|| {(0..total_blocks).into_par_iter().map(|x| {
482 VideoPlane::decode_block(&src.blocks[x], q_table)
483 }).collect()});
484
485 #[cfg(not(feature = "multithreading"))]
486 let results: Vec<_> = (0..total_blocks).into_iter().map(|x| {
487 VideoPlane::decode_block(&src.blocks[x], q_table)
488 }).collect();
489
490 for block_y in 0..src.blocks_high {
491 for block_x in 0..src.blocks_wide {
492 let block = &results[block_x + (block_y * src.blocks_wide)];
493 target.blit_block(block, block_x * 16, block_y * 16);
494 }
495 }
496 }
497
498 pub fn decode_plane_delta_into(src: &EncodedPPlane, refplane: &mut VideoPlane, q_table: &[i32;64], #[cfg(feature = "multithreading")] tp: &rayon::ThreadPool) {
499 let total_blocks = src.blocks_wide * src.blocks_high;
500
501 #[cfg(feature = "multithreading")]
502 let results: Vec<_> = tp.install(|| {(0..total_blocks).into_par_iter().map(|x| {
503 let bx = x % src.blocks_wide;
504 let by = x / src.blocks_wide;
505 VideoPlane::decode_block_delta(&src.blocks[x], refplane, bx * 16, by * 16, q_table)
506 }).collect()});
507
508 #[cfg(not(feature = "multithreading"))]
509 let results: Vec<_> = (0..total_blocks).into_iter().map(|x| {
510 let bx = x % src.blocks_wide;
511 let by = x / src.blocks_wide;
512 VideoPlane::decode_block_delta(&src.blocks[x], refplane, bx * 16, by * 16, q_table)
513 }).collect();
514
515 for block_y in 0..src.blocks_high {
516 for block_x in 0..src.blocks_wide {
517 let block = &results[block_x + (block_y * src.blocks_wide)];
518 refplane.blit_block(block, block_x * 16, block_y * 16);
519 }
520 }
521 }
522
523 pub fn reduce(self: &VideoPlane) -> VideoPlane {
524 let mut new_slice = VideoPlane::new(self.width / 2, self.height / 2);
525
526 for iy in 0..new_slice.height {
527 for ix in 0..new_slice.width {
528 let sx = ix * 2;
529 let sy = iy * 2;
530
531 new_slice.pixels[ix + (iy * new_slice.width)] = self.pixels[sx + (sy * self.width)];
532 }
533 }
534
535 new_slice
536 }
537
538 pub fn double(self: &VideoPlane) -> VideoPlane {
539 let mut new_slice = VideoPlane::new(self.width * 2, self.height * 2);
540
541 for iy in 0..self.height {
542 for ix in 0..self.width {
543 let dx = ix * 2;
544 let dy = iy * 2;
545 let d_idx = dx + (dy * new_slice.width);
546 let px = self.pixels[ix + (iy * self.width)];
547
548 new_slice.pixels[d_idx] = px;
549 new_slice.pixels[d_idx + 1] = px;
550 new_slice.pixels[d_idx + new_slice.width] = px;
551 new_slice.pixels[d_idx + new_slice.width + 1] = px;
552 }
553 }
554
555 new_slice
556 }
557}