zenwebp 0.4.5

High-performance WebP encoding and decoding in pure Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
//! Segment-based quantization analysis for VP8 encoding.
//!
//! This module provides macroblock-level analysis for adaptive quantization,
//! computing complexity metrics and assigning segments using k-means clustering.
//!
//! ## Module organization
//!
//! - `classifier`: Content type detection for auto-preset selection
//! - `histogram`: DCT coefficient histogram collection (SIMD candidate)
//! - `iterator`: Macroblock iteration for analysis pass
//! - `prediction`: Intra prediction for analysis (SIMD candidate)
//! - `segment`: K-means segment assignment and quantization
//!
//! ## Analysis pipeline
//!
//! 1. For each macroblock, compute DCT histograms using best I16/UV prediction
//! 2. Derive alpha (compressibility) from histogram shape
//! 3. Cluster macroblocks into 1-4 segments using k-means on alpha
//! 4. Compute per-segment quantization based on alpha centers
//!
//! Ported from libwebp src/enc/analysis_enc.c

#![allow(dead_code)]
#![allow(clippy::needless_range_loop)]

// Submodules
pub mod classifier;
pub mod histogram;
pub mod iterator;
pub mod prediction;
pub mod segment;

use alloc::vec::Vec;

// Re-exports
pub use classifier::{
    ClassifierDiag, ImageContentType, classify_image_type, classify_image_type_diag,
    content_type_to_tuning,
};
#[cfg(feature = "analyzer")]
pub use classifier::{
    ZenanalyzeDiag, classify_image_type_rgb8, classify_image_type_rgb8_diag,
    decide_bucket_from_diag, decide_bucket_stable, rgba8_to_rgb8,
};
pub use histogram::{collect_histogram_bps, forward_dct_4x4};
pub use iterator::AnalysisIterator;
pub use segment::{assign_segments_kmeans, compute_segment_quant, smooth_segment_map};

//------------------------------------------------------------------------------
// Constants

/// Maximum alpha value for macroblock complexity
pub const MAX_ALPHA: u8 = 255;

/// Alpha scale factor (from libwebp: ALPHA_SCALE = 2 * MAX_ALPHA = 510)
const ALPHA_SCALE: u32 = 2 * MAX_ALPHA as u32;

/// Maximum coefficient threshold for histogram binning (from libwebp)
const MAX_COEFF_THRESH: usize = 31;

/// Number of segments
pub const NUM_SEGMENTS: usize = 4;

/// Number of intra16 modes to test during analysis (DC=0, TM=1)
const MAX_INTRA16_MODE: usize = 2;

/// Number of UV modes to test during analysis (DC=0, TM=1)
const MAX_UV_MODE: usize = 2;

/// Bytes per stride in libwebp's work buffers
const BPS: usize = 32;

/// Offset to Y plane in work buffer
const Y_OFF_ENC: usize = 0;

/// Offset to U plane in work buffer
const U_OFF_ENC: usize = 16;

/// Offset to V plane in work buffer
const V_OFF_ENC: usize = 24;

/// Offsets to each 4x4 block within a BPS-strided buffer
/// Ported from libwebp's VP8DspScan
/// The 0 * BPS expressions are intentional to match libwebp's pattern
#[allow(clippy::erasing_op, clippy::identity_op)]
const VP8_DSP_SCAN: [usize; 24] = [
    // Luma (16 blocks)
    0 + 0 * BPS,
    4 + 0 * BPS,
    8 + 0 * BPS,
    12 + 0 * BPS,
    0 + 4 * BPS,
    4 + 4 * BPS,
    8 + 4 * BPS,
    12 + 4 * BPS,
    0 + 8 * BPS,
    4 + 8 * BPS,
    8 + 8 * BPS,
    12 + 8 * BPS,
    0 + 12 * BPS,
    4 + 12 * BPS,
    8 + 12 * BPS,
    12 + 12 * BPS,
    // U (4 blocks)
    0 + 0 * BPS,
    4 + 0 * BPS,
    0 + 4 * BPS,
    4 + 4 * BPS,
    // V (4 blocks, offset by 8 from U)
    8 + 0 * BPS,
    12 + 0 * BPS,
    8 + 4 * BPS,
    12 + 4 * BPS,
];

/// Mode offsets for I16 predictions in yuv_p buffer
/// Ported from libwebp's VP8I16ModeOffsets
const I16DC16: usize = 0;
const I16TM16: usize = I16DC16 + 16;
#[allow(clippy::identity_op)] // 1 * 16 matches libwebp's pattern (row * block_size * stride)
const I16VE16: usize = 1 * 16 * BPS;
const I16HE16: usize = I16VE16 + 16;

const VP8_I16_MODE_OFFSETS: [usize; 4] = [I16DC16, I16TM16, I16VE16, I16HE16];

/// Mode offsets for chroma predictions in yuv_p buffer
/// Ported from libwebp's VP8UVModeOffsets
const C8DC8: usize = 2 * 16 * BPS;
const C8TM8: usize = C8DC8 + 16;
const C8VE8: usize = 2 * 16 * BPS + 8 * BPS;
const C8HE8: usize = C8VE8 + 16;

const VP8_UV_MODE_OFFSETS: [usize; 4] = [C8DC8, C8TM8, C8VE8, C8HE8];

/// Size of prediction buffer (I16 + Chroma + I4 preds)
const PRED_SIZE_ENC: usize = 32 * BPS + 16 * BPS + 8 * BPS;

/// Size of YUV work buffer
const YUV_SIZE_ENC: usize = BPS * 16;

//------------------------------------------------------------------------------
// DCT histogram

/// DCT histogram result for alpha calculation
/// Ported from libwebp's VP8Histogram struct
#[derive(Default, Clone, Copy)]
pub struct DctHistogram {
    /// Maximum count in any histogram bin
    pub max_value: u32,
    /// Highest bin index with non-zero count
    pub last_non_zero: usize,
}

impl DctHistogram {
    /// Initialize histogram with default values (from libwebp's InitHistogram)
    pub fn new() -> Self {
        Self {
            max_value: 0,
            last_non_zero: 1,
        }
    }

    /// Compute histogram data from a distribution array
    /// Ported from libwebp's VP8SetHistogramData
    pub fn from_distribution(distribution: &[u32; MAX_COEFF_THRESH + 1]) -> Self {
        let mut max_value: u32 = 0;
        let mut last_non_zero: usize = 1;

        for (k, &count) in distribution.iter().enumerate() {
            if count > 0 {
                if count > max_value {
                    max_value = count;
                }
                last_non_zero = k;
            }
        }

        Self {
            max_value,
            last_non_zero,
        }
    }

    /// Get alpha value from histogram.
    /// Ported from libwebp's GetAlpha (analysis_enc.c).
    ///
    /// Alpha measures the "compressibility" of the block:
    /// - High alpha = many non-zero coefficients relative to max (harder)
    /// - Low alpha (0) = few non-zero coefficients (easier to compress)
    ///
    /// Formula: `ALPHA_SCALE * last_non_zero / max_value`
    pub fn get_alpha(&self) -> i32 {
        if self.max_value <= 1 {
            return 0;
        }
        let value = ALPHA_SCALE * self.last_non_zero as u32 / self.max_value;
        value as i32
    }
}

//------------------------------------------------------------------------------
// Alpha finalization

/// Finalize alpha value by inverting and clipping
/// Ported from libwebp's FinalAlphaValue
#[inline]
fn final_alpha_value(alpha: i32) -> u8 {
    let alpha = MAX_ALPHA as i32 - alpha;
    alpha.clamp(0, MAX_ALPHA as i32) as u8
}

//------------------------------------------------------------------------------
// Main analysis functions

/// Analyze a single macroblock and return (mixed alpha, raw uv_alpha)
/// Ported from libwebp's MBAnalyze
///
/// Returns:
/// - mixed alpha: finalized alpha value combining luma and chroma (for segment assignment)
/// - raw uv_alpha: unfinalized chroma alpha (for UV quant delta computation)
///
/// When `method >= 4`, `sns_strength > 0`, and `cost_model = ZenwebpDefault`,
/// blends in a masking-based alpha from local variance to improve adaptive
/// quantization for textured regions. Disabled under `StrictLibwebpParity`.
pub fn analyze_macroblock(
    it: &mut AnalysisIterator,
    method: u8,
    sns_strength: u8,
    cost_model: crate::encoder::api::CostModel,
) -> (u8, i32) {
    let (best_alpha, _best_mode) = it.analyze_best_intra16_mode();
    let (best_uv_alpha, _uv_mode) = it.analyze_best_uv_mode();

    // Final susceptibility mix
    let alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;

    // Blend with masking alpha for perceptual adaptive quantization.
    // Only when method >= 4 AND sns_strength > 0 (user hasn't disabled SNS)
    // AND cost_model is ZenwebpDefault (libwebp parity disables this blend).
    let alpha = if method >= 4
        && sns_strength > 0
        && cost_model == crate::encoder::api::CostModel::ZenwebpDefault
    {
        let masking = crate::encoder::psy::compute_masking_alpha(&it.yuv_in[Y_OFF_ENC..], BPS);
        crate::encoder::psy::blend_masking_alpha(alpha, masking, method)
    } else {
        alpha
    };

    // Finalize: invert and clip
    (final_alpha_value(alpha), best_uv_alpha)
}

/// Per-macroblock mode hint from the analysis pass.
///
/// Populated only when `method <= 1`, mirroring libwebp's `FastMBAnalyze`
/// which writes per-MB intra16/intra4 decisions during analysis that the
/// encode pass then consumes via `RefineUsingDistortion(refine_uv_mode=0)`.
///
/// At higher methods (m>=2) the encode pass re-evaluates everything and
/// these hints are irrelevant.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum MbModeHint {
    /// Use Intra16 with DC prediction (libwebp `VP8SetIntra16Mode(it, 0)`).
    I16Dc,
    /// Use Intra4 with all sub-blocks DC-predicted
    /// (libwebp `VP8SetIntra4Mode(it, all_zeros)`).
    I4AllDc,
}

/// Analysis result containing per-MB alphas and global statistics.
pub struct AnalysisResult {
    /// Per-macroblock alpha values (finalized, for segment assignment)
    pub mb_alphas: Vec<u8>,
    /// Histogram of alpha values
    pub alpha_histogram: [u32; 256],
    /// Average UV alpha across all macroblocks (raw, for UV quant delta)
    /// Range is typically ~30 (bad) to ~100 (ok to decimate UV more).
    pub uv_alpha_avg: i32,
    /// Per-macroblock mode hints (only populated when `method <= 1`).
    ///
    /// `None` means the encoder should run its normal mode-selection path.
    /// `Some(hints)` means the encoder should consume the hints directly,
    /// matching libwebp's `RefineUsingDistortion(try_both_modes=0)` flow.
    pub mb_mode_hints: Option<Vec<MbModeHint>>,
}

/// Compute the FastMBAnalyze decision for a single macroblock.
///
/// Ported from libwebp's `FastMBAnalyze` (analysis_enc.c:260). Splits the
/// 16×16 luma block into 16 4×4 sub-blocks, computes per-sub-block DC mean,
/// and uses the variance ratio to pick between Intra16-DC (low variance,
/// flat block) and Intra4-all-DC (high variance, more sub-block detail).
///
/// `quality` is the user-facing 0-100 quality (libwebp `config->quality`).
/// `y_in` is the 16×16 luma source plane in BPS-strided layout
/// (`yuv_in + Y_OFF_ENC` in libwebp).
fn fast_mb_analyze(y_in: &[u8], stride: usize, quality: i32) -> MbModeHint {
    // libwebp threshold: 8 + (17-8)*q/100, ranging from 8 (lowest q) to 17 (highest q).
    // Higher q => higher threshold => more likely to pick I4 (more local detail preserved).
    let q = quality.clamp(0, 100);
    let k_threshold: u64 = 8 + (17 - 8) * q as u64 / 100;

    // Compute 16 per-4×4-block DC sums (each is the sum of all 16 pixels in the 4×4 block).
    // libwebp's `VP8Mean16x4` writes 4 DC values per call (one per 4×4 column in a 4-row strip),
    // and is called 4 times to fill all 16. We compute the same 16 sums directly.
    let mut dc = [0u32; 16];
    for row_blk in 0..4 {
        for col_blk in 0..4 {
            let mut sum: u32 = 0;
            for r in 0..4 {
                let row_base = (row_blk * 4 + r) * stride + col_blk * 4;
                for c in 0..4 {
                    sum += y_in[row_base + c] as u32;
                }
            }
            dc[row_blk * 4 + col_blk] = sum;
        }
    }

    let (mut m, mut m2): (u64, u64) = (0, 0);
    for &d in &dc {
        m += d as u64;
        m2 += (d as u64) * (d as u64);
    }

    // libwebp: `if (kThreshold * m2 < m * m) -> I16-DC, else -> I4-all-DC`
    if k_threshold.saturating_mul(m2) < m.saturating_mul(m) {
        MbModeHint::I16Dc
    } else {
        MbModeHint::I4AllDc
    }
}

/// Run full analysis pass on image and return alpha histogram + per-MB alphas
/// Ported from libwebp's VP8EncAnalyze / DoSegmentsJob
///
/// `method` controls whether perceptual masking is blended into alpha values.
/// `cost_model` selects between zenwebp's perceptual extensions
/// (`ZenwebpDefault`) and strict libwebp parity (`StrictLibwebpParity`,
/// which disables the SATD masking-alpha blend).
///
/// `quality` is the user-facing 0-100 quality, used by `FastMBAnalyze`
/// when `method <= 1` to populate per-MB mode hints.
#[allow(clippy::too_many_arguments)]
pub fn analyze_image(
    y_src: &[u8],
    u_src: &[u8],
    v_src: &[u8],
    width: usize,
    height: usize,
    y_stride: usize,
    uv_stride: usize,
    method: u8,
    sns_strength: u8,
    cost_model: crate::encoder::api::CostModel,
    quality: i32,
) -> AnalysisResult {
    // Default behavior: collect FastMBAnalyze hints whenever libwebp would
    // (method <= 1). Internal callers that already know the hints will be
    // discarded (e.g. partition_limit will force I16-only) should call
    // `analyze_image_with_hint_gate` directly to skip the per-MB DC sums.
    analyze_image_with_hint_gate(
        y_src,
        u_src,
        v_src,
        width,
        height,
        y_stride,
        uv_stride,
        method,
        sns_strength,
        cost_model,
        quality,
        method <= 1,
    )
}

/// Internal variant of [`analyze_image`] that lets the caller force-disable
/// FastMBAnalyze hint collection.
///
/// `collect_mode_hints` overrides the default `method <= 1` decision. Setting
/// it to `false` skips the per-MB 4×4-block DC sums that feed `fast_mb_analyze`.
/// Useful when `partition_limit >= 100` will force I16-only mode selection
/// regardless of the hint, so collecting it would be wasted work.
#[allow(clippy::too_many_arguments)]
pub(crate) fn analyze_image_with_hint_gate(
    y_src: &[u8],
    u_src: &[u8],
    v_src: &[u8],
    width: usize,
    height: usize,
    y_stride: usize,
    uv_stride: usize,
    method: u8,
    sns_strength: u8,
    cost_model: crate::encoder::api::CostModel,
    quality: i32,
    collect_mode_hints: bool,
) -> AnalysisResult {
    let mut it = AnalysisIterator::new(width, height);
    it.reset();

    let total_mbs = it.mb_w * it.mb_h;
    let mut mb_alphas = Vec::with_capacity(total_mbs);
    let mut alpha_histogram = [0u32; 256];
    let mut uv_alpha_sum: i64 = 0;

    let mut mb_mode_hints: Vec<MbModeHint> = if collect_mode_hints {
        Vec::with_capacity(total_mbs)
    } else {
        Vec::new()
    };

    loop {
        it.import(y_src, u_src, v_src, y_stride, uv_stride);

        if collect_mode_hints {
            let hint = fast_mb_analyze(&it.yuv_in[Y_OFF_ENC..], BPS, quality);
            mb_mode_hints.push(hint);
        }

        let (alpha, uv_alpha) = analyze_macroblock(&mut it, method, sns_strength, cost_model);
        mb_alphas.push(alpha);
        alpha_histogram[alpha as usize] += 1;
        uv_alpha_sum += uv_alpha as i64;

        if !it.next() {
            break;
        }
    }

    // Compute average UV alpha (matching libwebp's enc->uv_alpha = sum / total_mb)
    let uv_alpha_avg = if total_mbs > 0 {
        (uv_alpha_sum / total_mbs as i64) as i32
    } else {
        64 // MID_ALPHA default
    };

    AnalysisResult {
        mb_alphas,
        alpha_histogram,
        uv_alpha_avg,
        mb_mode_hints: if collect_mode_hints {
            Some(mb_mode_hints)
        } else {
            None
        },
    }
}