oximedia_codec/psycho_visual.rs
1//! Psycho-visual masking model for perceptual rate control.
2//!
3//! This module implements a visibility threshold model for per-block
4//! quantization decisions. The human visual system (HVS) is less sensitive
5//! to distortion in:
6//!
7//! - **High-texture regions** (spatial masking)
8//! - **Bright regions** (luminance masking — Weber's law)
9//! - **High temporal activity** (temporal masking)
10//! - **Near strong edges** (edge masking)
11//!
12//! The combined Just-Noticeable Difference (JND) threshold drives a QP delta
13//! that can be added to the base quantisation parameter. A positive delta means
14//! the block can tolerate more distortion (higher QP); negative means it needs
15//! finer coding (lower QP).
16//!
17//! # Reference
18//!
19//! Watson, A.B. (1993). *DCTune: A technique for visual optimization of DCT
20//! quantization matrices.* Society for Information Display Digest of Technical
21//! Papers, 24, 946–949.
22
23#![allow(dead_code)]
24#![allow(clippy::cast_precision_loss)]
25#![allow(clippy::cast_possible_truncation)]
26
27// ---------------------------------------------------------------------------
28// Configuration
29// ---------------------------------------------------------------------------
30
31/// Configuration for the psycho-visual masking model.
32#[derive(Debug, Clone)]
33pub struct PsychoVisualConfig {
34 /// Enable spatial (texture) masking. Default: `true`.
35 pub spatial_masking: bool,
36 /// Enable luminance masking. Default: `true`.
37 pub luminance_masking: bool,
38 /// Enable temporal masking (requires SAD from motion estimation). Default: `true`.
39 pub temporal_masking: bool,
40 /// Enable edge masking. Default: `true`.
41 pub edge_masking: bool,
42 /// Overall strength multiplier applied to the JND-derived QP delta.
43 /// Range [0.0, 2.0]; default `1.0`.
44 pub strength: f32,
45 /// Maximum allowed positive QP delta (soften a block at most this much).
46 pub max_qp_delta_pos: i32,
47 /// Maximum allowed negative QP delta (sharpen a block at most this much).
48 pub max_qp_delta_neg: i32,
49}
50
51impl Default for PsychoVisualConfig {
52 fn default() -> Self {
53 Self {
54 spatial_masking: true,
55 luminance_masking: true,
56 temporal_masking: true,
57 edge_masking: true,
58 strength: 1.0,
59 max_qp_delta_pos: 6,
60 max_qp_delta_neg: 3,
61 }
62 }
63}
64
65// ---------------------------------------------------------------------------
66// Block statistics fed into the masking model
67// ---------------------------------------------------------------------------
68
69/// Per-block statistics gathered during analysis (before encoding).
70#[derive(Debug, Clone, Default)]
71pub struct BlockMaskStats {
72 /// Mean luma value of the block (0–255).
73 pub mean_luma: f32,
74 /// Spatial variance of luma values (proxy for texture energy).
75 pub luma_variance: f32,
76 /// Sum of absolute differences from motion estimation (temporal activity).
77 pub motion_sad: f32,
78 /// Edge energy (e.g. Sobel gradient magnitude sum over the block).
79 pub edge_energy: f32,
80 /// Number of pixels in the block.
81 pub block_area: u32,
82}
83
84impl BlockMaskStats {
85 /// Create stats from a raw luma block slice.
86 ///
87 /// `pixels` should be a row-major luma block (e.g. 8×8 = 64 elements, 0–255).
88 #[must_use]
89 pub fn from_luma_block(pixels: &[u8]) -> Self {
90 if pixels.is_empty() {
91 return Self::default();
92 }
93 let n = pixels.len() as f32;
94 let mean = pixels.iter().map(|&p| p as f32).sum::<f32>() / n;
95 let variance = pixels
96 .iter()
97 .map(|&p| {
98 let d = p as f32 - mean;
99 d * d
100 })
101 .sum::<f32>()
102 / n;
103
104 // Approximate edge energy: sum of horizontal and vertical absolute differences
105 let side = (n.sqrt().round() as usize).max(1);
106 let mut edge_energy = 0.0f32;
107 for row in 0..side {
108 for col in 0..side {
109 let idx = row * side + col;
110 let right = if col + 1 < side {
111 pixels[idx + 1] as f32
112 } else {
113 pixels[idx] as f32
114 };
115 let down = if row + 1 < side {
116 pixels[idx + side] as f32
117 } else {
118 pixels[idx] as f32
119 };
120 edge_energy +=
121 (pixels[idx] as f32 - right).abs() + (pixels[idx] as f32 - down).abs();
122 }
123 }
124
125 Self {
126 mean_luma: mean,
127 luma_variance: variance,
128 motion_sad: 0.0,
129 edge_energy,
130 block_area: pixels.len() as u32,
131 }
132 }
133}
134
135// ---------------------------------------------------------------------------
136// Masking model
137// ---------------------------------------------------------------------------
138
139/// Psycho-visual masking model that computes per-block QP deltas.
140#[derive(Debug, Clone)]
141pub struct PsychoVisualModel {
142 cfg: PsychoVisualConfig,
143}
144
145impl PsychoVisualModel {
146 /// Create a new masking model with the given configuration.
147 #[must_use]
148 pub fn new(cfg: PsychoVisualConfig) -> Self {
149 Self { cfg }
150 }
151
152 /// Create a masking model with default configuration.
153 #[must_use]
154 pub fn default_model() -> Self {
155 Self::new(PsychoVisualConfig::default())
156 }
157
158 /// Compute a QP delta for a single block.
159 ///
160 /// Positive values allow higher QP (more compression); negative values
161 /// force lower QP (higher quality).
162 ///
163 /// The block statistics should be gathered before calling this function.
164 #[must_use]
165 pub fn compute_qp_delta(&self, stats: &BlockMaskStats) -> i32 {
166 if self.cfg.strength < 1e-6 {
167 return 0;
168 }
169
170 let mut delta = 0.0f32;
171
172 // --- Spatial / texture masking ---
173 if self.cfg.spatial_masking {
174 // Blocks with high variance can hide more distortion.
175 // Log-scaling keeps the range reasonable.
176 let texture_delta = (stats.luma_variance.max(0.0).ln_1p() / 5.0).min(3.0);
177 delta += texture_delta;
178 }
179
180 // --- Luminance masking (Weber's law) ---
181 if self.cfg.luminance_masking {
182 // Very bright (>200) and very dark (<32) regions have reduced sensitivity.
183 let luma = stats.mean_luma;
184 let luma_delta = if luma > 200.0 {
185 (luma - 200.0) / 55.0 * 2.0 // bright: up to +2
186 } else if luma < 32.0 {
187 (32.0 - luma) / 32.0 * 1.5 // dark: up to +1.5
188 } else {
189 0.0
190 };
191 delta += luma_delta;
192 }
193
194 // --- Temporal masking ---
195 if self.cfg.temporal_masking {
196 // High SAD areas are in motion; viewers track moving objects, so
197 // perceptual sensitivity is *lower* → allow higher QP.
198 let area = stats.block_area.max(1) as f32;
199 let sad_per_pixel = stats.motion_sad / area;
200 let temporal_delta = (sad_per_pixel / 16.0).min(2.0);
201 delta += temporal_delta;
202 }
203
204 // --- Edge masking ---
205 if self.cfg.edge_masking {
206 // Near strong edges, texture masking is already captured;
207 // but very flat blocks near edges need protection.
208 let area = stats.block_area.max(1) as f32;
209 let edge_density = stats.edge_energy / area;
210 let edge_delta = if edge_density < 2.0 && stats.luma_variance < 10.0 {
211 // Flat block near background: protect detail
212 -1.5
213 } else if edge_density > 20.0 {
214 // Dense edge: masking applies
215 1.0
216 } else {
217 0.0
218 };
219 delta += edge_delta;
220 }
221
222 // Apply strength multiplier and clamp
223 let scaled = (delta * self.cfg.strength).round() as i32;
224 scaled
225 .max(-self.cfg.max_qp_delta_neg)
226 .min(self.cfg.max_qp_delta_pos)
227 }
228
229 /// Build a QP delta map for an entire frame.
230 ///
231 /// `luma_plane` is the raw luma data (row-major, 8-bit), `width` and `height`
232 /// are the frame dimensions. Blocks are 8×8 pixels. The returned vector
233 /// has one entry per block in raster-scan order.
234 #[must_use]
235 pub fn compute_frame_qp_map(
236 &self,
237 luma_plane: &[u8],
238 width: usize,
239 height: usize,
240 motion_sad_map: Option<&[f32]>,
241 ) -> Vec<i32> {
242 const BLOCK: usize = 8;
243 let blocks_x = (width + BLOCK - 1) / BLOCK;
244 let blocks_y = (height + BLOCK - 1) / BLOCK;
245 let mut map = Vec::with_capacity(blocks_x * blocks_y);
246
247 for by in 0..blocks_y {
248 for bx in 0..blocks_x {
249 // Gather block pixels
250 let mut block_pixels: Vec<u8> = Vec::with_capacity(BLOCK * BLOCK);
251 for row in 0..BLOCK {
252 let y = by * BLOCK + row;
253 if y >= height {
254 break;
255 }
256 for col in 0..BLOCK {
257 let x = bx * BLOCK + col;
258 if x < width {
259 block_pixels.push(luma_plane[y * width + x]);
260 }
261 }
262 }
263
264 let mut stats = BlockMaskStats::from_luma_block(&block_pixels);
265
266 // Optionally fill motion SAD
267 if let Some(sad_map) = motion_sad_map {
268 let block_idx = by * blocks_x + bx;
269 if block_idx < sad_map.len() {
270 stats.motion_sad = sad_map[block_idx];
271 }
272 }
273
274 map.push(self.compute_qp_delta(&stats));
275 }
276 }
277
278 map
279 }
280
281 /// Return the current configuration.
282 #[must_use]
283 pub fn config(&self) -> &PsychoVisualConfig {
284 &self.cfg
285 }
286}
287
288// ---------------------------------------------------------------------------
289// Visibility threshold lookup table (DCT-frequency domain)
290// ---------------------------------------------------------------------------
291
292/// Luminance visibility thresholds per DCT frequency (8×8 block, row-major).
293///
294/// Based on Watson (1993) Table 1 — luma quantization matrix scaled to
295/// produce a minimum JND threshold. Values are in the same scale as standard
296/// JPEG quantization matrices; lower value = more visible frequency.
297pub const LUMA_VISIBILITY_THRESHOLDS: [f32; 64] = [
298 16.0, 11.0, 10.0, 16.0, 24.0, 40.0, 51.0, 61.0, 12.0, 12.0, 14.0, 19.0, 26.0, 58.0, 60.0, 55.0,
299 14.0, 13.0, 16.0, 24.0, 40.0, 57.0, 69.0, 56.0, 14.0, 17.0, 22.0, 29.0, 51.0, 87.0, 80.0, 62.0,
300 18.0, 22.0, 37.0, 56.0, 68.0, 109.0, 103.0, 77.0, 24.0, 35.0, 55.0, 64.0, 81.0, 104.0, 113.0,
301 92.0, 49.0, 64.0, 78.0, 87.0, 103.0, 121.0, 120.0, 101.0, 72.0, 92.0, 95.0, 98.0, 112.0, 100.0,
302 103.0, 99.0,
303];
304
305/// Chrominance visibility thresholds per DCT frequency (8×8 block).
306pub const CHROMA_VISIBILITY_THRESHOLDS: [f32; 64] = [
307 17.0, 18.0, 24.0, 47.0, 99.0, 99.0, 99.0, 99.0, 18.0, 21.0, 26.0, 66.0, 99.0, 99.0, 99.0, 99.0,
308 24.0, 26.0, 56.0, 99.0, 99.0, 99.0, 99.0, 99.0, 47.0, 66.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0,
309 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0,
310 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0, 99.0,
311];
312
313/// Scale the standard visibility threshold table by a quality factor.
314///
315/// `quality` is in [1, 100] as in JPEG; higher quality → smaller threshold
316/// divisors → more bits used.
317#[must_use]
318pub fn scale_thresholds(base: &[f32; 64], quality: u8) -> [f32; 64] {
319 let q = quality.clamp(1, 100) as f32;
320 let scale = if q < 50.0 {
321 5000.0 / q
322 } else {
323 200.0 - 2.0 * q
324 };
325 let mut out = [0.0f32; 64];
326 for (i, (&b, o)) in base.iter().zip(out.iter_mut()).enumerate() {
327 let _ = i; // suppress unused warning
328 *o = ((b * scale / 100.0 + 0.5).floor()).clamp(1.0, 255.0);
329 }
330 out
331}
332
333// ---------------------------------------------------------------------------
334// PvsModel — simplified psycho-visual model with visibility threshold
335// ---------------------------------------------------------------------------
336
337/// Simplified psycho-visual sensitivity (PVS) model.
338///
339/// Provides a single `visibility_threshold` method that returns the minimum
340/// distortion energy that becomes just-noticeable to the human visual system
341/// (HVS) for a given luma level and spatial frequency.
342///
343/// The model is derived from the Watson (1993) DCTune formulation:
344/// - Base threshold from the luma quantization table (frequency-dependent).
345/// - Luminance masking multiplier: brighter or darker regions tolerate more
346/// distortion (Weber-Fechner / power-law masking).
347///
348/// # Reference
349/// Watson, A.B. (1993). *DCTune*.
350#[derive(Debug, Clone, Default)]
351pub struct PvsModel;
352
353impl PvsModel {
354 /// Create a new `PvsModel`.
355 #[must_use]
356 pub fn new() -> Self {
357 Self
358 }
359
360 /// Compute the visibility threshold (JND) for the given luma and frequency.
361 ///
362 /// # Parameters
363 /// - `luma` – mean luminance of the block, normalised [0.0, 1.0] (0 = black,
364 /// 1 = white).
365 /// - `freq` – spatial frequency index in [0.0, 1.0] representing low (0) to
366 /// high (1) frequency DCT coefficients. Linearly interpolated
367 /// between the DC threshold and the highest AC threshold in the
368 /// JPEG luma quantization matrix.
369 ///
370 /// # Returns
371 /// A positive threshold value. Distortions below this level are perceptually
372 /// invisible; distortions above may be visible.
373 #[must_use]
374 pub fn visibility_threshold(luma: f32, freq: f32) -> f32 {
375 // --- Frequency-dependent base threshold ---
376 // Clamp freq to [0, 1] and linearly interpolate across the JPEG luma QT
377 // (DC coefficient threshold = 16, highest AC = 121)
378 let freq_t = freq.clamp(0.0, 1.0);
379 let dc_thresh = LUMA_VISIBILITY_THRESHOLDS[0]; // 16.0
380 let ac_max_thresh = 121.0_f32; // max in LUMA_VISIBILITY_THRESHOLDS
381 let base = dc_thresh + (ac_max_thresh - dc_thresh) * freq_t;
382
383 // --- Luminance masking multiplier ---
384 // Power-law: threshold rises for very dark (<0.1) and very bright (>0.9)
385 let luma_c = luma.clamp(0.0, 1.0);
386 let masking_mult = if luma_c < 0.1 {
387 // Dark region: HVS less sensitive to distortion
388 1.0 + (0.1 - luma_c) * 5.0
389 } else if luma_c > 0.9 {
390 // Bright region: also less sensitive
391 1.0 + (luma_c - 0.9) * 5.0
392 } else {
393 // Mid-range: most sensitive (threshold close to base)
394 1.0
395 };
396
397 (base * masking_mult).max(1.0)
398 }
399}
400
401// ---------------------------------------------------------------------------
402// Tests
403// ---------------------------------------------------------------------------
404
405#[cfg(test)]
406mod tests {
407 use super::*;
408
409 #[test]
410 fn test_default_config_strength() {
411 let model = PsychoVisualModel::default_model();
412 assert!((model.cfg.strength - 1.0).abs() < 1e-6);
413 }
414
415 #[test]
416 fn test_qp_delta_zero_strength() {
417 let mut cfg = PsychoVisualConfig::default();
418 cfg.strength = 0.0;
419 let model = PsychoVisualModel::new(cfg);
420 let stats = BlockMaskStats {
421 mean_luma: 200.0,
422 luma_variance: 500.0,
423 motion_sad: 1000.0,
424 edge_energy: 0.0,
425 block_area: 64,
426 };
427 assert_eq!(model.compute_qp_delta(&stats), 0);
428 }
429
430 #[test]
431 fn test_qp_delta_clamped_positive() {
432 let model = PsychoVisualModel::default_model();
433 // Very high variance + bright + high motion → positive delta, capped at max
434 let stats = BlockMaskStats {
435 mean_luma: 255.0,
436 luma_variance: 50000.0,
437 motion_sad: 10000.0,
438 edge_energy: 5000.0,
439 block_area: 64,
440 };
441 let delta = model.compute_qp_delta(&stats);
442 assert!(delta <= model.cfg.max_qp_delta_pos);
443 }
444
445 #[test]
446 fn test_qp_delta_clamped_negative() {
447 let model = PsychoVisualModel::default_model();
448 // Flat dark block near edge → negative delta (protect quality), capped at -max_neg
449 let stats = BlockMaskStats {
450 mean_luma: 30.0,
451 luma_variance: 0.5,
452 motion_sad: 0.0,
453 edge_energy: 0.5,
454 block_area: 64,
455 };
456 let delta = model.compute_qp_delta(&stats);
457 assert!(delta >= -model.cfg.max_qp_delta_neg);
458 }
459
460 #[test]
461 fn test_block_stats_from_luma_uniform() {
462 let pixels = vec![128u8; 64];
463 let stats = BlockMaskStats::from_luma_block(&pixels);
464 assert!((stats.mean_luma - 128.0).abs() < 1e-3);
465 assert!(stats.luma_variance < 1e-3);
466 }
467
468 #[test]
469 fn test_block_stats_from_luma_gradient() {
470 let pixels: Vec<u8> = (0u8..64).collect();
471 let stats = BlockMaskStats::from_luma_block(&pixels);
472 // Mean of 0..64 = 31.5
473 assert!((stats.mean_luma - 31.5).abs() < 0.5);
474 assert!(stats.luma_variance > 100.0);
475 }
476
477 #[test]
478 fn test_compute_frame_qp_map_dimensions() {
479 let model = PsychoVisualModel::default_model();
480 let luma = vec![128u8; 64 * 48];
481 let map = model.compute_frame_qp_map(&luma, 64, 48, None);
482 // 64/8 = 8 blocks wide, 48/8 = 6 blocks tall
483 assert_eq!(map.len(), 8 * 6);
484 }
485
486 #[test]
487 fn test_scale_thresholds_high_quality() {
488 // At quality 100 the scale should reduce the thresholds significantly
489 let hi = scale_thresholds(&LUMA_VISIBILITY_THRESHOLDS, 100);
490 let lo = scale_thresholds(&LUMA_VISIBILITY_THRESHOLDS, 10);
491 // All high-quality values should be <= low-quality values
492 for (h, l) in hi.iter().zip(lo.iter()) {
493 assert!(h <= l, "hi={h} lo={l}");
494 }
495 }
496
497 #[test]
498 fn test_scale_thresholds_min_clamp() {
499 // Even at quality 1 the minimum threshold must be 1
500 let out = scale_thresholds(&LUMA_VISIBILITY_THRESHOLDS, 1);
501 for &v in &out {
502 assert!(v >= 1.0);
503 }
504 }
505
506 #[test]
507 fn test_visibility_threshold_tables_length() {
508 assert_eq!(LUMA_VISIBILITY_THRESHOLDS.len(), 64);
509 assert_eq!(CHROMA_VISIBILITY_THRESHOLDS.len(), 64);
510 }
511
512 #[test]
513 fn test_qp_map_with_sad_map() {
514 let model = PsychoVisualModel::default_model();
515 let luma = vec![100u8; 16 * 16];
516 let sad_map = vec![200.0f32; 4]; // 2×2 blocks
517 let map = model.compute_frame_qp_map(&luma, 16, 16, Some(&sad_map));
518 assert_eq!(map.len(), 4);
519 }
520
521 // PvsModel tests
522 #[test]
523 fn pvs_model_threshold_positive() {
524 let t = PvsModel::visibility_threshold(0.5, 0.0);
525 assert!(t > 0.0, "threshold must be positive, got {t}");
526 }
527
528 #[test]
529 fn pvs_model_low_freq_below_high_freq() {
530 let t_low = PvsModel::visibility_threshold(0.5, 0.0);
531 let t_high = PvsModel::visibility_threshold(0.5, 1.0);
532 assert!(
533 t_low <= t_high,
534 "low-freq threshold {t_low} should be <= high-freq {t_high}"
535 );
536 }
537
538 #[test]
539 fn pvs_model_dark_region_higher_threshold() {
540 let t_mid = PvsModel::visibility_threshold(0.5, 0.5);
541 let t_dark = PvsModel::visibility_threshold(0.0, 0.5);
542 assert!(
543 t_dark >= t_mid,
544 "dark region threshold {t_dark} should be >= mid-tone {t_mid}"
545 );
546 }
547
548 #[test]
549 fn pvs_model_bright_region_higher_threshold() {
550 let t_mid = PvsModel::visibility_threshold(0.5, 0.5);
551 let t_bright = PvsModel::visibility_threshold(1.0, 0.5);
552 assert!(
553 t_bright >= t_mid,
554 "bright region threshold {t_bright} should be >= mid-tone {t_mid}"
555 );
556 }
557
558 #[test]
559 fn pvs_model_clamped_inputs_safe() {
560 // Should not panic with out-of-range inputs
561 let t1 = PvsModel::visibility_threshold(-1.0, -0.5);
562 let t2 = PvsModel::visibility_threshold(2.0, 5.0);
563 assert!(t1 > 0.0);
564 assert!(t2 > 0.0);
565 }
566}