oximedia_codec/
psycho_visual.rs

1//! Psycho-visual masking model for perceptual rate control.
2//!
3//! This module implements a visibility threshold model for per-block
4//! quantization decisions. The human visual system (HVS) is less sensitive
5//! to distortion in:
6//!
7//! - **High-texture regions** (spatial masking)
8//! - **Bright regions** (luminance masking — Weber's law)
9//! - **High temporal activity** (temporal masking)
10//! - **Near strong edges** (edge masking)
11//!
12//! The combined Just-Noticeable Difference (JND) threshold drives a QP delta
13//! that can be added to the base quantisation parameter. A positive delta means
14//! the block can tolerate more distortion (higher QP); negative means it needs
15//! finer coding (lower QP).
16//!
17//! # Reference
18//!
19//! Watson, A.B. (1993). *DCTune: A technique for visual optimization of DCT
20//! quantization matrices.* Society for Information Display Digest of Technical
21//! Papers, 24, 946–949.
22
23#![allow(dead_code)]
24#![allow(clippy::cast_precision_loss)]
25#![allow(clippy::cast_possible_truncation)]
26
27// ---------------------------------------------------------------------------
28// Configuration
29// ---------------------------------------------------------------------------
30
31/// Configuration for the psycho-visual masking model.
32#[derive(Debug, Clone)]
33pub struct PsychoVisualConfig {
34    /// Enable spatial (texture) masking.  Default: `true`.
35    pub spatial_masking: bool,
36    /// Enable luminance masking.  Default: `true`.
37    pub luminance_masking: bool,
38    /// Enable temporal masking (requires SAD from motion estimation).  Default: `true`.
39    pub temporal_masking: bool,
40    /// Enable edge masking.  Default: `true`.
41    pub edge_masking: bool,
42    /// Overall strength multiplier applied to the JND-derived QP delta.
43    /// Range [0.0, 2.0]; default `1.0`.
44    pub strength: f32,
45    /// Maximum allowed positive QP delta (soften a block at most this much).
46    pub max_qp_delta_pos: i32,
47    /// Maximum allowed negative QP delta (sharpen a block at most this much).
48    pub max_qp_delta_neg: i32,
49}
50
51impl Default for PsychoVisualConfig {
52    fn default() -> Self {
53        Self {
54            spatial_masking: true,
55            luminance_masking: true,
56            temporal_masking: true,
57            edge_masking: true,
58            strength: 1.0,
59            max_qp_delta_pos: 6,
60            max_qp_delta_neg: 3,
61        }
62    }
63}
64
65// ---------------------------------------------------------------------------
66// Block statistics fed into the masking model
67// ---------------------------------------------------------------------------
68
69/// Per-block statistics gathered during analysis (before encoding).
70#[derive(Debug, Clone, Default)]
71pub struct BlockMaskStats {
72    /// Mean luma value of the block (0–255).
73    pub mean_luma: f32,
74    /// Spatial variance of luma values (proxy for texture energy).
75    pub luma_variance: f32,
76    /// Sum of absolute differences from motion estimation (temporal activity).
77    pub motion_sad: f32,
78    /// Edge energy (e.g. Sobel gradient magnitude sum over the block).
79    pub edge_energy: f32,
80    /// Number of pixels in the block.
81    pub block_area: u32,
82}
83
84impl BlockMaskStats {
85    /// Create stats from a raw luma block slice.
86    ///
87    /// `pixels` should be a row-major luma block (e.g. 8×8 = 64 elements, 0–255).
88    #[must_use]
89    pub fn from_luma_block(pixels: &[u8]) -> Self {
90        if pixels.is_empty() {
91            return Self::default();
92        }
93        let n = pixels.len() as f32;
94        let mean = pixels.iter().map(|&p| p as f32).sum::<f32>() / n;
95        let variance = pixels
96            .iter()
97            .map(|&p| {
98                let d = p as f32 - mean;
99                d * d
100            })
101            .sum::<f32>()
102            / n;
103
104        // Approximate edge energy: sum of horizontal and vertical absolute differences
105        let side = (n.sqrt().round() as usize).max(1);
106        let mut edge_energy = 0.0f32;
107        for row in 0..side {
108            for col in 0..side {
109                let idx = row * side + col;
110                let right = if col + 1 < side {
111                    pixels[idx + 1] as f32
112                } else {
113                    pixels[idx] as f32
114                };
115                let down = if row + 1 < side {
116                    pixels[idx + side] as f32
117                } else {
118                    pixels[idx] as f32
119                };
120                edge_energy +=
121                    (pixels[idx] as f32 - right).abs() + (pixels[idx] as f32 - down).abs();
122            }
123        }
124
125        Self {
126            mean_luma: mean,
127            luma_variance: variance,
128            motion_sad: 0.0,
129            edge_energy,
130            block_area: pixels.len() as u32,
131        }
132    }
133}
134
135// ---------------------------------------------------------------------------
136// Masking model
137// ---------------------------------------------------------------------------
138
139/// Psycho-visual masking model that computes per-block QP deltas.
140#[derive(Debug, Clone)]
141pub struct PsychoVisualModel {
142    cfg: PsychoVisualConfig,
143}
144
145impl PsychoVisualModel {
146    /// Create a new masking model with the given configuration.
147    #[must_use]
148    pub fn new(cfg: PsychoVisualConfig) -> Self {
149        Self { cfg }
150    }
151
152    /// Create a masking model with default configuration.
153    #[must_use]
154    pub fn default_model() -> Self {
155        Self::new(PsychoVisualConfig::default())
156    }
157
158    /// Compute a QP delta for a single block.
159    ///
160    /// Positive values allow higher QP (more compression); negative values
161    /// force lower QP (higher quality).
162    ///
163    /// The block statistics should be gathered before calling this function.
164    #[must_use]
165    pub fn compute_qp_delta(&self, stats: &BlockMaskStats) -> i32 {
166        if self.cfg.strength < 1e-6 {
167            return 0;
168        }
169
170        let mut delta = 0.0f32;
171
172        // --- Spatial / texture masking ---
173        if self.cfg.spatial_masking {
174            // Blocks with high variance can hide more distortion.
175            // Log-scaling keeps the range reasonable.
176            let texture_delta = (stats.luma_variance.max(0.0).ln_1p() / 5.0).min(3.0);
177            delta += texture_delta;
178        }
179
180        // --- Luminance masking (Weber's law) ---
181        if self.cfg.luminance_masking {
182            // Very bright (>200) and very dark (<32) regions have reduced sensitivity.
183            let luma = stats.mean_luma;
184            let luma_delta = if luma > 200.0 {
185                (luma - 200.0) / 55.0 * 2.0 // bright: up to +2
186            } else if luma < 32.0 {
187                (32.0 - luma) / 32.0 * 1.5 // dark: up to +1.5
188            } else {
189                0.0
190            };
191            delta += luma_delta;
192        }
193
194        // --- Temporal masking ---
195        if self.cfg.temporal_masking {
196            // High SAD areas are in motion; viewers track moving objects, so
197            // perceptual sensitivity is *lower* → allow higher QP.
198            let area = stats.block_area.max(1) as f32;
199            let sad_per_pixel = stats.motion_sad / area;
200            let temporal_delta = (sad_per_pixel / 16.0).min(2.0);
201            delta += temporal_delta;
202        }
203
204        // --- Edge masking ---
205        if self.cfg.edge_masking {
206            // Near strong edges, texture masking is already captured;
207            // but very flat blocks near edges need protection.
208            let area = stats.block_area.max(1) as f32;
209            let edge_density = stats.edge_energy / area;
210            let edge_delta = if edge_density < 2.0 && stats.luma_variance < 10.0 {
211                // Flat block near background: protect detail
212                -1.5
213            } else if edge_density > 20.0 {
214                // Dense edge: masking applies
215                1.0
216            } else {
217                0.0
218            };
219            delta += edge_delta;
220        }
221
222        // Apply strength multiplier and clamp
223        let scaled = (delta * self.cfg.strength).round() as i32;
224        scaled
225            .max(-self.cfg.max_qp_delta_neg)
226            .min(self.cfg.max_qp_delta_pos)
227    }
228
229    /// Build a QP delta map for an entire frame.
230    ///
231    /// `luma_plane` is the raw luma data (row-major, 8-bit), `width` and `height`
232    /// are the frame dimensions.  Blocks are 8×8 pixels.  The returned vector
233    /// has one entry per block in raster-scan order.
234    #[must_use]
235    pub fn compute_frame_qp_map(
236        &self,
237        luma_plane: &[u8],
238        width: usize,
239        height: usize,
240        motion_sad_map: Option<&[f32]>,
241    ) -> Vec<i32> {
242        const BLOCK: usize = 8;
243        let blocks_x = (width + BLOCK - 1) / BLOCK;
244        let blocks_y = (height + BLOCK - 1) / BLOCK;
245        let mut map = Vec::with_capacity(blocks_x * blocks_y);
246
247        for by in 0..blocks_y {
248            for bx in 0..blocks_x {
249                // Gather block pixels
250                let mut block_pixels: Vec<u8> = Vec::with_capacity(BLOCK * BLOCK);
251                for row in 0..BLOCK {
252                    let y = by * BLOCK + row;
253                    if y >= height {
254                        break;
255                    }
256                    for col in 0..BLOCK {
257                        let x = bx * BLOCK + col;
258                        if x < width {
259                            block_pixels.push(luma_plane[y * width + x]);
260                        }
261                    }
262                }
263
264                let mut stats = BlockMaskStats::from_luma_block(&block_pixels);
265
266                // Optionally fill motion SAD
267                if let Some(sad_map) = motion_sad_map {
268                    let block_idx = by * blocks_x + bx;
269                    if block_idx < sad_map.len() {
270                        stats.motion_sad = sad_map[block_idx];
271                    }
272                }
273
274                map.push(self.compute_qp_delta(&stats));
275            }
276        }
277
278        map
279    }
280
281    /// Return the current configuration.
282    #[must_use]
283    pub fn config(&self) -> &PsychoVisualConfig {
284        &self.cfg
285    }
286}
287
288// ---------------------------------------------------------------------------
289// Visibility threshold lookup table (DCT-frequency domain)
290// ---------------------------------------------------------------------------
291
292/// Luminance visibility thresholds per DCT frequency (8×8 block, row-major).
293///
294/// Based on Watson (1993) Table 1 — luma quantization matrix scaled to
295/// produce a minimum JND threshold.  Values are in the same scale as standard
296/// JPEG quantization matrices; lower value = more visible frequency.
297pub const LUMA_VISIBILITY_THRESHOLDS: [f32; 64] = [
298    16.0, 11.0, 10.0, 16.0, 24.0, 40.0, 51.0, 61.0, 12.0, 12.0, 14.0, 19.0, 26.0, 58.0, 60.0, 55.0,
299    14.0, 13.0, 16.0, 24.0, 40.0, 57.0, 69.0, 56.0, 14.0, 17.0, 22.0, 29.0, 51.0, 87.0, 80.0, 62.0,
300    18.0, 22.0, 37.0, 56.0, 68.0, 109.0, 103.0, 77.0, 24.0, 35.0, 55.0, 64.0, 81.0, 104.0, 113.0,
301    92.0, 49.0, 64.0, 78.0, 87.0, 103.0, 121.0, 120.0, 101.0, 72.0, 92.0, 95.0, 98.0, 112.0, 100.0,
302    103.0, 99.0,
303];
304
305/// Chrominance visibility thresholds per DCT frequency (8×8 block).
306pub const CHROMA_VISIBILITY_THRESHOLDS: [f32; 64] = [
307    17.0, 18.0, 24.0, 47.0, 99.0, 99.0, 99.0, 99.0, 18.0, 21.0, 26.0, 66.0, 99.0, 99.0, 99.0, 99.0,
308    24.0, 26.0, 56.0, 99.0, 99.0, 99.0, 99.0, 99.0, 47.0, 66.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0,
309    99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0,
310    99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0,
311];
312
313/// Scale the standard visibility threshold table by a quality factor.
314///
315/// `quality` is in [1, 100] as in JPEG; higher quality → smaller threshold
316/// divisors → more bits used.
317#[must_use]
318pub fn scale_thresholds(base: &[f32; 64], quality: u8) -> [f32; 64] {
319    let q = quality.clamp(1, 100) as f32;
320    let scale = if q < 50.0 {
321        5000.0 / q
322    } else {
323        200.0 - 2.0 * q
324    };
325    let mut out = [0.0f32; 64];
326    for (i, (&b, o)) in base.iter().zip(out.iter_mut()).enumerate() {
327        let _ = i; // suppress unused warning
328        *o = ((b * scale / 100.0 + 0.5).floor()).clamp(1.0, 255.0);
329    }
330    out
331}
332
333// ---------------------------------------------------------------------------
334// PvsModel — simplified psycho-visual model with visibility threshold
335// ---------------------------------------------------------------------------
336
337/// Simplified psycho-visual sensitivity (PVS) model.
338///
339/// Provides a single `visibility_threshold` method that returns the minimum
340/// distortion energy that becomes just-noticeable to the human visual system
341/// (HVS) for a given luma level and spatial frequency.
342///
343/// The model is derived from the Watson (1993) DCTune formulation:
344/// - Base threshold from the luma quantization table (frequency-dependent).
345/// - Luminance masking multiplier: brighter or darker regions tolerate more
346///   distortion (Weber-Fechner / power-law masking).
347///
348/// # Reference
349/// Watson, A.B. (1993). *DCTune*.
350#[derive(Debug, Clone, Default)]
351pub struct PvsModel;
352
353impl PvsModel {
354    /// Create a new `PvsModel`.
355    #[must_use]
356    pub fn new() -> Self {
357        Self
358    }
359
360    /// Compute the visibility threshold (JND) for the given luma and frequency.
361    ///
362    /// # Parameters
363    /// - `luma`  – mean luminance of the block, normalised [0.0, 1.0] (0 = black,
364    ///             1 = white).
365    /// - `freq`  – spatial frequency index in [0.0, 1.0] representing low (0) to
366    ///             high (1) frequency DCT coefficients.  Linearly interpolated
367    ///             between the DC threshold and the highest AC threshold in the
368    ///             JPEG luma quantization matrix.
369    ///
370    /// # Returns
371    /// A positive threshold value.  Distortions below this level are perceptually
372    /// invisible; distortions above may be visible.
373    #[must_use]
374    pub fn visibility_threshold(luma: f32, freq: f32) -> f32 {
375        // --- Frequency-dependent base threshold ---
376        // Clamp freq to [0, 1] and linearly interpolate across the JPEG luma QT
377        // (DC coefficient threshold = 16, highest AC = 121)
378        let freq_t = freq.clamp(0.0, 1.0);
379        let dc_thresh = LUMA_VISIBILITY_THRESHOLDS[0]; // 16.0
380        let ac_max_thresh = 121.0_f32; // max in LUMA_VISIBILITY_THRESHOLDS
381        let base = dc_thresh + (ac_max_thresh - dc_thresh) * freq_t;
382
383        // --- Luminance masking multiplier ---
384        // Power-law: threshold rises for very dark (<0.1) and very bright (>0.9)
385        let luma_c = luma.clamp(0.0, 1.0);
386        let masking_mult = if luma_c < 0.1 {
387            // Dark region: HVS less sensitive to distortion
388            1.0 + (0.1 - luma_c) * 5.0
389        } else if luma_c > 0.9 {
390            // Bright region: also less sensitive
391            1.0 + (luma_c - 0.9) * 5.0
392        } else {
393            // Mid-range: most sensitive (threshold close to base)
394            1.0
395        };
396
397        (base * masking_mult).max(1.0)
398    }
399}
400
401// ---------------------------------------------------------------------------
402// Tests
403// ---------------------------------------------------------------------------
404
405#[cfg(test)]
406mod tests {
407    use super::*;
408
409    #[test]
410    fn test_default_config_strength() {
411        let model = PsychoVisualModel::default_model();
412        assert!((model.cfg.strength - 1.0).abs() < 1e-6);
413    }
414
415    #[test]
416    fn test_qp_delta_zero_strength() {
417        let mut cfg = PsychoVisualConfig::default();
418        cfg.strength = 0.0;
419        let model = PsychoVisualModel::new(cfg);
420        let stats = BlockMaskStats {
421            mean_luma: 200.0,
422            luma_variance: 500.0,
423            motion_sad: 1000.0,
424            edge_energy: 0.0,
425            block_area: 64,
426        };
427        assert_eq!(model.compute_qp_delta(&stats), 0);
428    }
429
430    #[test]
431    fn test_qp_delta_clamped_positive() {
432        let model = PsychoVisualModel::default_model();
433        // Very high variance + bright + high motion → positive delta, capped at max
434        let stats = BlockMaskStats {
435            mean_luma: 255.0,
436            luma_variance: 50000.0,
437            motion_sad: 10000.0,
438            edge_energy: 5000.0,
439            block_area: 64,
440        };
441        let delta = model.compute_qp_delta(&stats);
442        assert!(delta <= model.cfg.max_qp_delta_pos);
443    }
444
445    #[test]
446    fn test_qp_delta_clamped_negative() {
447        let model = PsychoVisualModel::default_model();
448        // Flat dark block near edge → negative delta (protect quality), capped at -max_neg
449        let stats = BlockMaskStats {
450            mean_luma: 30.0,
451            luma_variance: 0.5,
452            motion_sad: 0.0,
453            edge_energy: 0.5,
454            block_area: 64,
455        };
456        let delta = model.compute_qp_delta(&stats);
457        assert!(delta >= -model.cfg.max_qp_delta_neg);
458    }
459
460    #[test]
461    fn test_block_stats_from_luma_uniform() {
462        let pixels = vec![128u8; 64];
463        let stats = BlockMaskStats::from_luma_block(&pixels);
464        assert!((stats.mean_luma - 128.0).abs() < 1e-3);
465        assert!(stats.luma_variance < 1e-3);
466    }
467
468    #[test]
469    fn test_block_stats_from_luma_gradient() {
470        let pixels: Vec<u8> = (0u8..64).collect();
471        let stats = BlockMaskStats::from_luma_block(&pixels);
472        // Mean of 0..64 = 31.5
473        assert!((stats.mean_luma - 31.5).abs() < 0.5);
474        assert!(stats.luma_variance > 100.0);
475    }
476
477    #[test]
478    fn test_compute_frame_qp_map_dimensions() {
479        let model = PsychoVisualModel::default_model();
480        let luma = vec![128u8; 64 * 48];
481        let map = model.compute_frame_qp_map(&luma, 64, 48, None);
482        // 64/8 = 8 blocks wide, 48/8 = 6 blocks tall
483        assert_eq!(map.len(), 8 * 6);
484    }
485
486    #[test]
487    fn test_scale_thresholds_high_quality() {
488        // At quality 100 the scale should reduce the thresholds significantly
489        let hi = scale_thresholds(&LUMA_VISIBILITY_THRESHOLDS, 100);
490        let lo = scale_thresholds(&LUMA_VISIBILITY_THRESHOLDS, 10);
491        // All high-quality values should be <= low-quality values
492        for (h, l) in hi.iter().zip(lo.iter()) {
493            assert!(h <= l, "hi={h} lo={l}");
494        }
495    }
496
497    #[test]
498    fn test_scale_thresholds_min_clamp() {
499        // Even at quality 1 the minimum threshold must be 1
500        let out = scale_thresholds(&LUMA_VISIBILITY_THRESHOLDS, 1);
501        for &v in &out {
502            assert!(v >= 1.0);
503        }
504    }
505
506    #[test]
507    fn test_visibility_threshold_tables_length() {
508        assert_eq!(LUMA_VISIBILITY_THRESHOLDS.len(), 64);
509        assert_eq!(CHROMA_VISIBILITY_THRESHOLDS.len(), 64);
510    }
511
512    #[test]
513    fn test_qp_map_with_sad_map() {
514        let model = PsychoVisualModel::default_model();
515        let luma = vec![100u8; 16 * 16];
516        let sad_map = vec![200.0f32; 4]; // 2×2 blocks
517        let map = model.compute_frame_qp_map(&luma, 16, 16, Some(&sad_map));
518        assert_eq!(map.len(), 4);
519    }
520
521    // PvsModel tests
522    #[test]
523    fn pvs_model_threshold_positive() {
524        let t = PvsModel::visibility_threshold(0.5, 0.0);
525        assert!(t > 0.0, "threshold must be positive, got {t}");
526    }
527
528    #[test]
529    fn pvs_model_low_freq_below_high_freq() {
530        let t_low = PvsModel::visibility_threshold(0.5, 0.0);
531        let t_high = PvsModel::visibility_threshold(0.5, 1.0);
532        assert!(
533            t_low <= t_high,
534            "low-freq threshold {t_low} should be <= high-freq {t_high}"
535        );
536    }
537
538    #[test]
539    fn pvs_model_dark_region_higher_threshold() {
540        let t_mid = PvsModel::visibility_threshold(0.5, 0.5);
541        let t_dark = PvsModel::visibility_threshold(0.0, 0.5);
542        assert!(
543            t_dark >= t_mid,
544            "dark region threshold {t_dark} should be >= mid-tone {t_mid}"
545        );
546    }
547
548    #[test]
549    fn pvs_model_bright_region_higher_threshold() {
550        let t_mid = PvsModel::visibility_threshold(0.5, 0.5);
551        let t_bright = PvsModel::visibility_threshold(1.0, 0.5);
552        assert!(
553            t_bright >= t_mid,
554            "bright region threshold {t_bright} should be >= mid-tone {t_mid}"
555        );
556    }
557
558    #[test]
559    fn pvs_model_clamped_inputs_safe() {
560        // Should not panic with out-of-range inputs
561        let t1 = PvsModel::visibility_threshold(-1.0, -0.5);
562        let t2 = PvsModel::visibility_threshold(2.0, 5.0);
563        assert!(t1 > 0.0);
564        assert!(t2 > 0.0);
565    }
566}
oximedia_codec/psycho_visual.rs

oximedia_codec/
psycho_visual.rs