pfv_rs/
common.rs

1pub const PFV_MAGIC: &[u8] = b"PFVIDEO\0";
2pub const PFV_VERSION: u32 = 211;
3
4use crate::{dct::{DctQuantizedMatrix8x8, DctMatrix8x8, FP_BITS}, plane::VideoPlane};
5
6#[cfg(feature = "multithreading")]
7use rayon::prelude::*;
8
9#[derive(Clone, Copy)]
10pub struct EncodedMacroBlock {
11    pub subblocks: [DctQuantizedMatrix8x8;4]
12}
13
14#[derive(Clone, Copy)]
15pub struct DeltaEncodedMacroBlock {
16    pub motion_x: i8,
17    pub motion_y: i8,
18    pub subblocks: Option<[DctQuantizedMatrix8x8;4]>
19}
20
21pub struct MacroBlock {
22    pub pixels: [u8;256]
23}
24
25pub struct EncodedIFrame {
26    pub y: EncodedIPlane,
27    pub u: EncodedIPlane,
28    pub v: EncodedIPlane,
29}
30
31pub struct EncodedPFrame {
32    pub y: EncodedPPlane,
33    pub u: EncodedPPlane,
34    pub v: EncodedPPlane,
35}
36
37pub struct EncodedIPlane {
38    pub width: usize,
39    pub height: usize,
40    pub blocks_wide: usize,
41    pub blocks_high: usize,
42    pub blocks: Vec<EncodedMacroBlock>,
43}
44
45pub struct EncodedPPlane {
46    pub width: usize,
47    pub height: usize,
48    pub blocks_wide: usize,
49    pub blocks_high: usize,
50    pub blocks: Vec<DeltaEncodedMacroBlock>,
51}
52
53pub struct DeltaBlock {
54    pub width: usize,
55    pub height: usize,
56    pub deltas: Vec<i16>,
57}
58
59impl DeltaBlock {
60    pub fn new(width: usize, height: usize) -> DeltaBlock {
61        DeltaBlock { width: width, height: height, deltas: vec![0;width * height] }
62    }
63
64    pub fn blit(self: &mut DeltaBlock, src: &DeltaBlock, dx: usize, dy: usize, sx: usize, sy: usize, sw: usize, sh: usize) {
65        for row in 0..sh {
66            let src_row = row + sy;
67            let dest_row = row + dy;
68            let src_offset = (src_row * src.width) + sx;
69            let dst_offset = (dest_row * self.width) + dx;
70
71            self.deltas[dst_offset..(dst_offset + sw)].copy_from_slice(&src.deltas[src_offset..(src_offset + sw)]);
72        }
73    }
74
75    pub fn get_slice(self: &DeltaBlock, sx: usize, sy: usize, sw: usize, sh: usize) -> DeltaBlock {
76        let mut new_slice = DeltaBlock::new(sw, sh);
77        new_slice.blit(self, 0, 0, sx, sy, sw, sh);
78
79        new_slice
80    }
81}
82
83impl MacroBlock {
84    pub fn new() -> MacroBlock {
85        MacroBlock { pixels: [0;256] }
86    }
87
88    pub fn blit_subblock(self: &mut MacroBlock, src: &[u8;64], dx: usize, dy: usize) {
89        for row in 0..8 {
90            let dest_row = row + dy;
91            let src_offset = row * 8;
92            let dst_offset = (dest_row * 16) + dx;
93
94            self.pixels[dst_offset..(dst_offset + 8)].copy_from_slice(&src[src_offset..(src_offset + 8)]);
95        }
96    }
97
98    pub fn apply_residuals(self: &mut MacroBlock, from: &MacroBlock) {
99        for (delta, pixel) in self.pixels.iter_mut().zip(from.pixels) {
100            let d = (*delta as i16 - 128) * 2;
101            let p = pixel as i16;
102            *delta = (p + d).clamp(0, 255) as u8;
103        }
104    }
105}
106
107impl VideoPlane {
108    fn calc_residuals(from: &VideoPlane, to: &VideoPlane) -> DeltaBlock {
109        debug_assert!(from.width == to.width && from.height == to.height);
110
111        let mut residuals = DeltaBlock::new(from.width, from.height);
112        
113        for (f, t) in residuals.deltas.iter_mut().zip(&from.pixels) {
114            *f = *t as i16;
115        }
116
117        for (f, t) in residuals.deltas.iter_mut().zip(&to.pixels) {
118            let delta = *f as i16 - *t as i16;
119            *f = delta.clamp(-255, 255);
120        }
121
122        residuals
123    }
124
125    fn calc_error(from: &VideoPlane, to: &VideoPlane, ref_lms: f32) -> f32 {
126        assert!(from.width == to.width && from.height == to.height);
127
128        let mut sum = 0.0;
129
130        for (_, (a, b)) in from.pixels.iter().zip(&to.pixels).enumerate() {
131            let diff = *a as f32 - *b as f32;
132            sum += diff * diff;
133            if sum >= ref_lms {
134                return sum;
135            }
136        }
137
138        return sum;
139    }
140
141    fn encode_block(src: &VideoPlane, q_table: &[i32;64]) -> EncodedMacroBlock {
142        debug_assert!(src.width == 16 && src.height == 16);
143
144        // split into 4 subblocks and encode each one
145        let subblocks = [
146            VideoPlane::encode_subblock(&src.get_slice(0, 0, 8, 8), q_table),
147            VideoPlane::encode_subblock(&src.get_slice(8, 0, 8, 8), q_table),
148            VideoPlane::encode_subblock(&src.get_slice(0, 8, 8, 8), q_table),
149            VideoPlane::encode_subblock(&src.get_slice(8, 8, 8, 8), q_table)];
150
151        EncodedMacroBlock { subblocks: subblocks }
152    }
153
154    fn block_search(src: &VideoPlane, refplane: &VideoPlane, cx: i32, cy: i32, stepsize: i32) -> (i32, i32, f32, VideoPlane) {
155        let mut best_dx = 0;
156        let mut best_dy = 0;
157        let mut best_err = f32::INFINITY;
158        let mut best_slice = VideoPlane::new(16, 16);
159
160        // test center point first
161        {
162            let slice = refplane.get_slice(cx as usize, cy as usize, 16, 16);
163            best_slice.pixels.copy_from_slice(&slice.pixels);
164            best_err = VideoPlane::calc_error(src, &slice, best_err);
165        }
166
167        // search 8 locations around center point at multiples of step size
168        for my in -1..2 {
169
170            let offsy = cy + (my * stepsize);
171            if offsy < 0 || offsy > refplane.height as i32 - 16 {
172                continue;
173            }
174
175            for mx in -1..2 {
176                if my == 0 && mx == 0 {
177                    // we already tested (0, 0) - skip
178                    continue;
179                }
180
181                let offsx = cx + (mx * stepsize);
182                if offsx < 0 || offsx > refplane.width as i32 - 16 {
183                    continue;
184                }
185
186                let slice = refplane.get_slice(offsx as usize, offsy as usize, 16, 16);
187                let err = VideoPlane::calc_error(src, &slice, best_err);
188
189                if err < best_err {
190                    best_slice.pixels.copy_from_slice(&slice.pixels);
191                    best_err = err;
192                    best_dx = mx * stepsize;
193                    best_dy = my * stepsize;
194                }
195            }
196        }
197
198        if stepsize > 1 {
199            let (dx2, dy2, err2, slice2) = VideoPlane::block_search(src, refplane, cx + best_dx, cy + best_dy, stepsize / 2);
200            return (best_dx + dx2, best_dy + dy2, err2, slice2);
201        } else {
202            return (best_dx, best_dy, best_err, best_slice);
203        }
204    }
205
206    fn encode_block_delta(src: &VideoPlane, refplane: &VideoPlane, bx: usize, by: usize, q_table: &[i32;64], px_err: f32) -> DeltaEncodedMacroBlock {
207        debug_assert!(src.width == 16 && src.height == 16);
208
209        let min_err = px_err * px_err * 256.0;
210
211        // four step search around block pos to find delta which minimizes error
212        let (best_dx, best_dy, best_err, prev_block) = VideoPlane::block_search(src, refplane, bx as i32, by as i32, 8);
213
214        let sx = bx as i32 + best_dx;
215        let sy = by as i32 + best_dy;
216
217        assert!(sx >= 0 && sx <= refplane.width as i32 - 16);
218        assert!(sy >= 0 && sy <= refplane.height as i32 - 16);
219
220        // if the best delta is small enough, skip coefficients
221        if best_err <= min_err {
222            DeltaEncodedMacroBlock { motion_x: best_dx as i8, motion_y: best_dy as i8, subblocks: None }
223        } else {
224            // generate delta values
225            let delta_block = VideoPlane::calc_residuals(src, &prev_block);
226
227            // split into 4 subblocks and encode each one
228            let subblocks = [
229                VideoPlane::encode_subblock_delta(&delta_block.get_slice(0, 0, 8, 8), q_table),
230                VideoPlane::encode_subblock_delta(&delta_block.get_slice(8, 0, 8, 8), q_table),
231                VideoPlane::encode_subblock_delta(&delta_block.get_slice(0, 8, 8, 8), q_table),
232                VideoPlane::encode_subblock_delta(&delta_block.get_slice(8, 8, 8, 8), q_table)];
233
234            DeltaEncodedMacroBlock { motion_x: best_dx as i8, motion_y: best_dy as i8, subblocks: Some(subblocks) }
235        }
236    }
237    
238    fn decode_block(src: &EncodedMacroBlock, q_table: &[i32;64]) -> MacroBlock {
239        let subblocks = [
240            VideoPlane::decode_subblock(&src.subblocks[0], q_table),
241            VideoPlane::decode_subblock(&src.subblocks[1], q_table),
242            VideoPlane::decode_subblock(&src.subblocks[2], q_table),
243            VideoPlane::decode_subblock(&src.subblocks[3], q_table)];
244
245        let mut block = MacroBlock::new();
246        block.blit_subblock(&subblocks[0], 0, 0);
247        block.blit_subblock(&subblocks[1], 8, 0);
248        block.blit_subblock(&subblocks[2], 0, 8);
249        block.blit_subblock(&subblocks[3], 8, 8);
250
251        block
252    }
253
254    fn decode_block_delta(src: &DeltaEncodedMacroBlock, refplane: &VideoPlane, bx: usize, by: usize, q_table: &[i32;64]) -> MacroBlock {
255        let sx = bx as i32 + src.motion_x as i32;
256        let sy = by as i32 + src.motion_y as i32;
257
258        debug_assert!(sx >= 0 && sx <= refplane.width as i32 - 16);
259        debug_assert!(sy >= 0 && sy <= refplane.height as i32 - 16);
260
261        let prev_block = refplane.get_block(sx as usize, sy as usize);
262
263        match src.subblocks {
264            Some(subblocks) => {
265                let subblocks = [
266                    VideoPlane::decode_subblock(&subblocks[0], q_table),
267                    VideoPlane::decode_subblock(&subblocks[1], q_table),
268                    VideoPlane::decode_subblock(&subblocks[2], q_table),
269                    VideoPlane::decode_subblock(&subblocks[3], q_table)];
270
271                let mut block = MacroBlock::new();
272                block.blit_subblock(&subblocks[0], 0, 0);
273                block.blit_subblock(&subblocks[1], 8, 0);
274                block.blit_subblock(&subblocks[2], 0, 8);
275                block.blit_subblock(&subblocks[3], 8, 8);
276
277                block.apply_residuals(&prev_block);
278
279                return block;
280            }
281            None => {
282                return prev_block;
283            }
284        };
285    }
286
287    fn encode_subblock(src: &VideoPlane, q_table: &[i32;64]) -> DctQuantizedMatrix8x8 {
288        assert!(src.width == 8 && src.height == 8);
289
290        let mut dct = DctMatrix8x8::new();
291        let cell_px: Vec<i32> = src.pixels.iter().map(|x| ((*x as i32) - 128) << FP_BITS).collect();
292        dct.m.copy_from_slice(&cell_px);
293
294        dct.dct_transform_rows();
295        dct.dct_transform_columns();
296
297        dct.encode(q_table)
298    }
299
300    fn encode_subblock_delta(src: &DeltaBlock, q_table: &[i32;64]) -> DctQuantizedMatrix8x8 {
301        assert!(src.width == 8 && src.height == 8);
302
303        let mut dct = DctMatrix8x8::new();
304        let cell_px: Vec<i32> = src.deltas.iter().map(|x| (*x as i32 / 2) << FP_BITS).collect();
305        dct.m.copy_from_slice(&cell_px);
306
307        dct.dct_transform_rows();
308        dct.dct_transform_columns();
309
310        dct.encode(q_table)
311    }
312
313    fn decode_subblock(src: &DctQuantizedMatrix8x8, q_table: &[i32;64]) -> [u8;64] {
314        let mut dct = DctMatrix8x8::decode(src, q_table);
315        dct.dct_inverse_transform_columns();
316        dct.dct_inverse_transform_rows();
317
318        let mut result = [0;64];
319        
320        for (idx, px) in dct.m.iter().enumerate() {
321            result[idx] = ((*px >> FP_BITS) + 128).clamp(0, 255) as u8;
322        }
323
324        result
325    }
326
327    pub fn get_block(self: &VideoPlane, sx: usize, sy: usize) -> MacroBlock {
328        let mut dest: MacroBlock = MacroBlock { pixels: [0;256] };
329
330        for row in 0..16 {
331            let src_row = row + sy;
332            let src_offset = (src_row * self.width) + sx;
333            let dst_offset = row * 16;
334
335            dest.pixels[dst_offset..(dst_offset + 16)].copy_from_slice(&self.pixels[src_offset..(src_offset + 16)]);
336        }
337
338        dest
339    }
340
341    pub fn blit_block(self: &mut VideoPlane, block: &MacroBlock, dx: usize, dy: usize) {
342        for row in 0..16 {
343            let dest_row = row + dy;
344            let src_offset = row * 16;
345            let dst_offset = (dest_row * self.width) + dx;
346
347            self.pixels[dst_offset..(dst_offset + 16)].copy_from_slice(&block.pixels[src_offset..(src_offset + 16)]);
348        }
349    }
350
351    pub fn encode_plane(self: &VideoPlane, q_table: &[i32;64], clear_color: u8, #[cfg(feature = "multithreading")] tp: &rayon::ThreadPool) -> EncodedIPlane {
352        let pad_width: usize = self.width + (16 - (self.width % 16)) % 16;
353        let pad_height = self.height + (16 - (self.height % 16)) % 16;
354        let mut img_copy = VideoPlane::new(pad_width, pad_height);
355        img_copy.pixels.fill(clear_color);
356        img_copy.blit(self, 0, 0, 0, 0, self.width, self.height);
357
358        let blocks_wide = pad_width / 16;
359        let blocks_high = pad_height / 16;
360
361        let mut blocks: Vec<VideoPlane> = Vec::with_capacity(blocks_wide * blocks_high);
362
363        // split image plane into 16x16 macroblocks
364        for block_y in 0..blocks_high {
365            for block_x in 0..blocks_wide {
366                let mut block = VideoPlane::new(16, 16);
367                block.blit(&img_copy, 0, 0, block_x * 16, block_y * 16, 16, 16);
368                blocks.push(block);
369            }
370        }
371
372        // encode each macroblock in parallel
373        #[cfg(feature = "multithreading")]
374        let enc_result: Vec<_> = tp.install(|| {
375            blocks.par_iter().map(|x| {
376                VideoPlane::encode_block(x, q_table)
377            }).collect()
378        });
379
380        #[cfg(not(feature = "multithreading"))]
381        let enc_result: Vec<_> = blocks.iter().map(|x| {
382            VideoPlane::encode_block(x, q_table)
383        }).collect();
384
385        EncodedIPlane { width: pad_width, height: pad_height, blocks_wide: blocks_wide, blocks_high: blocks_high, blocks: enc_result }
386    }
387
388    pub fn encode_plane_delta(self: &VideoPlane, refplane: &VideoPlane, q_table: &[i32;64], px_err: f32, clear_color: u8, #[cfg(feature = "multithreading")] tp: &rayon::ThreadPool) -> EncodedPPlane {
389        let pad_width: usize = self.width + (16 - (self.width % 16)) % 16;
390        let pad_height = self.height + (16 - (self.height % 16)) % 16;
391        let mut img_copy = VideoPlane::new(pad_width, pad_height);
392        img_copy.pixels.fill(clear_color);
393        img_copy.blit(self, 0, 0, 0, 0, self.width, self.height);
394
395        let blocks_wide = pad_width / 16;
396        let blocks_high = pad_height / 16;
397
398        let mut blocks: Vec<_> = Vec::with_capacity(blocks_wide * blocks_high);
399
400        // split image plane into 16x16 macroblocks
401        for block_y in 0..blocks_high {
402            for block_x in 0..blocks_wide {
403                let mut block = VideoPlane::new(16, 16);
404                block.blit(&img_copy, 0, 0, block_x * 16, block_y * 16, 16, 16);
405                blocks.push((block, block_x * 16, block_y * 16));
406            }
407        }
408
409        // encode each macroblock in parallel
410        #[cfg(feature = "multithreading")]
411        let enc_result: Vec<_> = tp.install(|| {blocks.par_iter().map(|(block, bx, by)| {
412            VideoPlane::encode_block_delta(block, refplane, *bx, *by, q_table, px_err)
413        }).collect()});
414
415        #[cfg(not(feature = "multithreading"))]
416        let enc_result: Vec<_> = blocks.iter().map(|(block, bx, by)| {
417            VideoPlane::encode_block_delta(block, refplane, *bx, *by, q_table, px_err)
418        }).collect();
419
420        EncodedPPlane { width: pad_width, height: pad_height, blocks_wide: blocks_wide, blocks_high: blocks_high, blocks: enc_result }
421    }
422
423    pub fn decode_plane(src: &EncodedIPlane, q_table: &[i32;64], #[cfg(feature = "multithreading")] tp: &rayon::ThreadPool) -> VideoPlane {
424        let mut plane = VideoPlane::new(src.blocks_wide * 16, src.blocks_high * 16);
425
426        let total_blocks = src.blocks_wide * src.blocks_high;
427
428        #[cfg(feature = "multithreading")]
429        let results: Vec<_> = tp.install(|| {(0..total_blocks).into_par_iter().map(|x| {
430            VideoPlane::decode_block(&src.blocks[x], q_table)
431        }).collect()});
432
433        #[cfg(not(feature = "multithreading"))]
434        let results: Vec<_> = (0..total_blocks).into_iter().map(|x| {
435            VideoPlane::decode_block(&src.blocks[x], q_table)
436        }).collect();
437
438        for block_y in 0..src.blocks_high {
439            for block_x in 0..src.blocks_wide {
440                let block = &results[block_x + (block_y * src.blocks_wide)];
441                plane.blit_block(block, block_x * 16, block_y * 16);
442            }
443        }
444
445        plane
446    }
447
448    pub fn decode_plane_delta(src: &EncodedPPlane, refplane: &VideoPlane, q_table: &[i32;64], #[cfg(feature = "multithreading")] tp: &rayon::ThreadPool) -> VideoPlane {
449        let mut plane = VideoPlane::new(src.blocks_wide * 16, src.blocks_high * 16);
450
451        let total_blocks = src.blocks_wide * src.blocks_high;
452
453        #[cfg(feature = "multithreading")]
454        let results: Vec<_> = tp.install(|| {(0..total_blocks).into_par_iter().map(|x| {
455            let bx = x % src.blocks_wide;
456            let by = x / src.blocks_wide;
457            VideoPlane::decode_block_delta(&src.blocks[x], refplane, bx * 16, by * 16, q_table)
458        }).collect()});
459
460        #[cfg(not(feature = "multithreading"))]
461        let results: Vec<_> = (0..total_blocks).into_iter().map(|x| {
462            let bx = x % src.blocks_wide;
463            let by = x / src.blocks_wide;
464            VideoPlane::decode_block_delta(&src.blocks[x], refplane, bx * 16, by * 16, q_table)
465        }).collect();
466
467        for block_y in 0..src.blocks_high {
468            for block_x in 0..src.blocks_wide {
469                let block = &results[block_x + (block_y * src.blocks_wide)];
470                plane.blit_block(block, block_x * 16, block_y * 16);
471            }
472        }
473
474        plane
475    }
476
477    pub fn decode_plane_into(src: &EncodedIPlane, q_table: &[i32;64], target: &mut VideoPlane, #[cfg(feature = "multithreading")] tp: &rayon::ThreadPool) {
478        let total_blocks = src.blocks_wide * src.blocks_high;
479
480        #[cfg(feature = "multithreading")]
481        let results: Vec<_> = tp.install(|| {(0..total_blocks).into_par_iter().map(|x| {
482            VideoPlane::decode_block(&src.blocks[x], q_table)
483        }).collect()});
484
485        #[cfg(not(feature = "multithreading"))]
486        let results: Vec<_> = (0..total_blocks).into_iter().map(|x| {
487            VideoPlane::decode_block(&src.blocks[x], q_table)
488        }).collect();
489
490        for block_y in 0..src.blocks_high {
491            for block_x in 0..src.blocks_wide {
492                let block = &results[block_x + (block_y * src.blocks_wide)];
493                target.blit_block(block, block_x * 16, block_y * 16);
494            }
495        }
496    }
497
498    pub fn decode_plane_delta_into(src: &EncodedPPlane, refplane: &mut VideoPlane, q_table: &[i32;64], #[cfg(feature = "multithreading")] tp: &rayon::ThreadPool) {
499        let total_blocks = src.blocks_wide * src.blocks_high;
500
501        #[cfg(feature = "multithreading")]
502        let results: Vec<_> = tp.install(|| {(0..total_blocks).into_par_iter().map(|x| {
503            let bx = x % src.blocks_wide;
504            let by = x / src.blocks_wide;
505            VideoPlane::decode_block_delta(&src.blocks[x], refplane, bx * 16, by * 16, q_table)
506        }).collect()});
507
508        #[cfg(not(feature = "multithreading"))]
509        let results: Vec<_> = (0..total_blocks).into_iter().map(|x| {
510            let bx = x % src.blocks_wide;
511            let by = x / src.blocks_wide;
512            VideoPlane::decode_block_delta(&src.blocks[x], refplane, bx * 16, by * 16, q_table)
513        }).collect();
514
515        for block_y in 0..src.blocks_high {
516            for block_x in 0..src.blocks_wide {
517                let block = &results[block_x + (block_y * src.blocks_wide)];
518                refplane.blit_block(block, block_x * 16, block_y * 16);
519            }
520        }
521    }
522
523    pub fn reduce(self: &VideoPlane) -> VideoPlane {
524        let mut new_slice = VideoPlane::new(self.width / 2, self.height / 2);
525
526        for iy in 0..new_slice.height {
527            for ix in 0..new_slice.width {
528                let sx = ix * 2;
529                let sy = iy * 2;
530
531                new_slice.pixels[ix + (iy * new_slice.width)] = self.pixels[sx + (sy * self.width)];
532            }
533        }
534
535        new_slice
536    }
537
538    pub fn double(self: &VideoPlane) -> VideoPlane {
539        let mut new_slice = VideoPlane::new(self.width * 2, self.height * 2);
540
541        for iy in 0..self.height {
542            for ix in 0..self.width {
543                let dx = ix * 2;
544                let dy = iy * 2;
545                let d_idx = dx + (dy * new_slice.width);
546                let px = self.pixels[ix + (iy * self.width)];
547
548                new_slice.pixels[d_idx] = px;
549                new_slice.pixels[d_idx + 1] = px;
550                new_slice.pixels[d_idx + new_slice.width] = px;
551                new_slice.pixels[d_idx + new_slice.width + 1] = px;
552            }
553        }
554
555        new_slice
556    }
557}