zenjpeg 0.7.0

Pure Rust JPEG encoder/decoder with perceptual optimizations
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
//! Resource estimation heuristics for JPEG encoding and decoding operations.
//!
//! These heuristics provide approximate estimates for memory consumption and
//! time costs of encoding/decoding operations. Use them for:
//!
//! - Pre-allocating buffers
//! - Sizing thread pools
//! - Memory budgeting
//! - Progress estimation
//!
//! # Accuracy
//!
//! Estimates are based on the encoder's internal memory estimation logic.
//! Memory estimates include all internal buffers (strip buffers, DCT blocks,
//! token buffers, output buffers).
//!
//! # Content Type Impact
//!
//! Image content significantly affects both memory and time:
//!
//! | Content | Encode Memory | Encode Time |
//! |---------|---------------|-------------|
//! | Solid   | Min           | **Fastest** |
//! | Gradient| Typical       | Fast        |
//! | Photo   | Typical       | **Typical** |
//! | Noise   | Max           | Slow        |
//!
//! For photos (typical web content), adaptive quantization is the bottleneck.
//! For noise/high-entropy content, Huffman encoding is slower.
//!
//! # Example
//!
//! ```rust,ignore
//! use zenjpeg::heuristics::{estimate_encode, estimate_decode};
//! use zenjpeg::encoder::{EncoderConfig, ChromaSubsampling};
//! use zenjpeg::decoder::PixelFormat;
//!
//! // Estimate encode resources for a 1920x1080 image
//! let config = EncoderConfig::ycbcr(85, ChromaSubsampling::Quarter);
//! let encode_est = estimate_encode(1920, 1080, &config);
//! println!("Encode peak memory: {:.1} MB", encode_est.peak_memory_bytes as f64 / 1_000_000.0);
//! println!("Encode time: {:.0}ms (typical)", encode_est.time_ms);
//!
//! // Estimate decode resources
//! let decode_est = estimate_decode(1920, 1080, PixelFormat::Rgb);
//! println!("Decode peak memory: {:.1} MB", decode_est.peak_memory_bytes as f64 / 1_000_000.0);
//! println!("Decode time: {:.0}ms (typical)", decode_est.time_ms);
//! ```

use crate::encoder::EncoderConfig;
use crate::types::PixelFormat;

// =============================================================================
// Encode throughput constants (estimated from typical jpegli performance)
// =============================================================================

/// Encode throughput in Mpix/s for simple content (solid colors).
/// JPEG encoding is CPU-bound; simple content has less entropy to process.
const ENCODE_THROUGHPUT_MAX_MPIXELS: f64 = 40.0;

/// Encode throughput in Mpix/s for typical content (photos).
/// Most real-world images fall into this category.
const ENCODE_THROUGHPUT_TYP_MPIXELS: f64 = 15.0;

/// Encode throughput in Mpix/s for complex content (noise, high-entropy).
/// High-frequency content requires more AQ computation and Huffman encoding.
const ENCODE_THROUGHPUT_MIN_MPIXELS: f64 = 8.0;

// =============================================================================
// Decode throughput constants
// =============================================================================

/// Decode throughput in Mpix/s for simple content (baseline, simple).
const DECODE_THROUGHPUT_MAX_MPIXELS: f64 = 120.0;

/// Decode throughput in Mpix/s for typical content (photos).
const DECODE_THROUGHPUT_TYP_MPIXELS: f64 = 80.0;

/// Decode throughput in Mpix/s for complex content (progressive, high-entropy).
const DECODE_THROUGHPUT_MIN_MPIXELS: f64 = 40.0;

// =============================================================================
// Memory multipliers for content variation
// =============================================================================

/// Memory multiplier for simple content (min).
const ENCODE_MEMORY_MIN_MULT: f64 = 0.9;

/// Memory multiplier for typical content.
const ENCODE_MEMORY_TYP_MULT: f64 = 1.0;

/// Memory multiplier for complex content (max).
/// Token buffers grow with entropy.
const ENCODE_MEMORY_MAX_MULT: f64 = 1.3;

/// Decode memory varies little with content type.
const DECODE_MEMORY_MIN_MULT: f64 = 1.0;
const DECODE_MEMORY_TYP_MULT: f64 = 1.0;
const DECODE_MEMORY_MAX_MULT: f64 = 1.1;

// =============================================================================
// Public types
// =============================================================================

/// Resource estimation for encode operations.
///
/// Based on jpegli's internal memory estimation and throughput measurements.
#[derive(Debug, Clone, Copy)]
#[non_exhaustive]
pub struct EncodeEstimate {
    /// Minimum expected peak memory (best case: solid color, simple gradient).
    pub peak_memory_bytes_min: u64,

    /// Typical peak memory in bytes during encoding (natural photos).
    pub peak_memory_bytes: u64,

    /// Maximum expected peak memory (worst case: noise, high-entropy).
    pub peak_memory_bytes_max: u64,

    /// Estimated heap allocations. Fewer allocations = better latency.
    pub allocations: u32,

    /// Encode time in milliseconds (best case: simple content).
    pub time_ms_min: f32,

    /// Encode time in milliseconds (typical: real photographs).
    pub time_ms: f32,

    /// Encode time in milliseconds (worst case: noise/high-entropy).
    pub time_ms_max: f32,

    /// Estimated output size in bytes.
    /// JPEG compression varies widely; this is a rough estimate.
    pub output_bytes: u64,

    /// Input size in bytes (width × height × bytes_per_pixel).
    pub input_bytes: u64,
}

/// Resource estimation for decode operations.
///
/// Based on jpegli's decoder memory estimation and throughput measurements.
#[derive(Debug, Clone, Copy)]
#[non_exhaustive]
pub struct DecodeEstimate {
    /// Minimum expected peak memory (best case: baseline, simple content).
    pub peak_memory_bytes_min: u64,

    /// Typical peak memory in bytes during decoding.
    pub peak_memory_bytes: u64,

    /// Maximum expected peak memory (worst case: progressive, complex).
    pub peak_memory_bytes_max: u64,

    /// Estimated heap allocations during decoding.
    pub allocations: u32,

    /// Decode time in milliseconds (best case: baseline, simple content).
    pub time_ms_min: f32,

    /// Decode time in milliseconds (typical: real photos).
    pub time_ms: f32,

    /// Decode time in milliseconds (worst case: progressive, complex).
    pub time_ms_max: f32,

    /// Output buffer size in bytes (width × height × output_bpp).
    pub output_bytes: u64,
}

// =============================================================================
// Estimation functions
// =============================================================================

/// Estimate resources for encoding a JPEG image.
///
/// # Arguments
///
/// * `width` - Image width in pixels
/// * `height` - Image height in pixels
/// * `config` - Encoder configuration
///
/// # Example
///
/// ```rust,ignore
/// use zenjpeg::heuristics::estimate_encode;
/// use zenjpeg::encoder::{EncoderConfig, ChromaSubsampling};
///
/// let config = EncoderConfig::ycbcr(85, ChromaSubsampling::Quarter);
/// let est = estimate_encode(1920, 1080, &config);
/// println!("Peak memory: {:.1} MB", est.peak_memory_bytes as f64 / 1_000_000.0);
/// println!("Time: {:.0}ms (typical)", est.time_ms);
/// ```
#[must_use]
pub fn estimate_encode(width: u32, height: u32, config: &EncoderConfig) -> EncodeEstimate {
    let pixels = (width as u64) * (height as u64);

    // Use the encoder's internal memory estimation
    let base_memory = config.estimate_memory(width, height) as u64;

    // Apply content-dependent multipliers
    let peak_memory_bytes_min = (base_memory as f64 * ENCODE_MEMORY_MIN_MULT) as u64;
    let peak_memory_bytes = (base_memory as f64 * ENCODE_MEMORY_TYP_MULT) as u64;
    let peak_memory_bytes_max = (base_memory as f64 * ENCODE_MEMORY_MAX_MULT) as u64;

    // Time calculation from throughput
    // time_ms = pixels / (throughput_mpix/s * 1_000_000) * 1000
    //         = pixels / (throughput_mpix * 1000)
    let pixels_f = pixels as f64;

    // Adjust throughput for progressive mode (slower)
    let prog_factor = if config.is_progressive() { 0.7 } else { 1.0 };

    let time_ms_min = (pixels_f / (ENCODE_THROUGHPUT_MAX_MPIXELS * prog_factor * 1000.0)) as f32;
    let time_ms = (pixels_f / (ENCODE_THROUGHPUT_TYP_MPIXELS * prog_factor * 1000.0)) as f32;
    let time_ms_max = (pixels_f / (ENCODE_THROUGHPUT_MIN_MPIXELS * prog_factor * 1000.0)) as f32;

    // Output estimate: JPEG typically 5-20% of raw size for photos
    // Using ~10% as typical for quality 85
    let input_bytes = pixels * 3; // Assume RGB input
    let output_bytes = input_bytes / 10;

    // Allocations: encoder has ~20-30 major allocations
    let allocations = 25;

    EncodeEstimate {
        peak_memory_bytes_min,
        peak_memory_bytes,
        peak_memory_bytes_max,
        allocations,
        time_ms_min,
        time_ms,
        time_ms_max,
        output_bytes,
        input_bytes,
    }
}

/// Estimate resources for encoding with a guaranteed memory ceiling.
///
/// This uses the encoder's `estimate_memory_ceiling()` which returns an
/// absolute upper bound that actual peak memory will never exceed.
///
/// # Arguments
///
/// * `width` - Image width in pixels
/// * `height` - Image height in pixels
/// * `config` - Encoder configuration
///
/// # Example
///
/// ```rust,ignore
/// use zenjpeg::heuristics::estimate_encode_ceiling;
/// use zenjpeg::encoder::{EncoderConfig, ChromaSubsampling};
///
/// let config = EncoderConfig::ycbcr(85, ChromaSubsampling::Quarter);
/// let est = estimate_encode_ceiling(1920, 1080, &config);
/// // Reserve this much memory - actual usage guaranteed to be less
/// let buffer = Vec::with_capacity(est.peak_memory_bytes as usize);
/// ```
#[must_use]
pub fn estimate_encode_ceiling(width: u32, height: u32, config: &EncoderConfig) -> EncodeEstimate {
    let mut est = estimate_encode(width, height, config);

    // Override with the guaranteed ceiling from the encoder
    let ceiling = config.estimate_memory_ceiling(width, height) as u64;
    est.peak_memory_bytes_min = ceiling;
    est.peak_memory_bytes = ceiling;
    est.peak_memory_bytes_max = ceiling;

    est
}

/// Estimate resources for decoding a JPEG image.
///
/// # Arguments
///
/// * `width` - Image width in pixels
/// * `height` - Image height in pixels
/// * `format` - Output pixel format (determines bytes per pixel)
///
/// # Example
///
/// ```rust,ignore
/// use zenjpeg::heuristics::estimate_decode;
/// use zenjpeg::decoder::PixelFormat;
///
/// let est = estimate_decode(1920, 1080, PixelFormat::Rgb);
/// println!("Output buffer: {:.1} MB", est.output_bytes as f64 / 1_000_000.0);
/// println!("Peak memory: {:.1} MB", est.peak_memory_bytes as f64 / 1_000_000.0);
/// println!("Time: {:.0}ms (typical)", est.time_ms);
/// ```
#[must_use]
pub fn estimate_decode(width: u32, height: u32, format: PixelFormat) -> DecodeEstimate {
    let output_bpp = format.bytes_per_pixel() as u8;
    let w = width as usize;
    let h = height as usize;
    let pixels = (width as u64) * (height as u64);

    // Output buffer size
    let output_bytes = pixels * (output_bpp as u64);

    // Replicate decoder's memory estimation logic
    // MCU width for strip buffers (padded to 8)
    let mcu_cols = (w + 7) / 8;
    let strip_width = mcu_cols * 8;
    let strip_height = 8;

    // Strip buffers: Y, Cb, Cr each at i16 (2 bytes per pixel)
    let strip_size = strip_width * strip_height;
    let strip_total = strip_size * 2 * 3;

    // RGB output buffer
    let rgb_size = w * h * 3;

    // Streaming total (baseline 4:4:4)
    let streaming_total = strip_total + rgb_size;

    // Non-streaming (progressive, subsampled) coefficient storage
    let blocks_per_component = mcu_cols * ((h + 7) / 8);
    let coeff_storage = blocks_per_component * 130 * 3;

    // Worst case (non-streaming)
    let base_memory = streaming_total.max(coeff_storage + rgb_size) as u64;

    // Apply content-dependent multipliers
    let peak_memory_bytes_min = (base_memory as f64 * DECODE_MEMORY_MIN_MULT) as u64;
    let peak_memory_bytes = (base_memory as f64 * DECODE_MEMORY_TYP_MULT) as u64;
    let peak_memory_bytes_max = (base_memory as f64 * DECODE_MEMORY_MAX_MULT) as u64;

    // Time calculation from throughput
    let pixels_f = pixels as f64;
    let time_ms_min = (pixels_f / (DECODE_THROUGHPUT_MAX_MPIXELS * 1000.0)) as f32;
    let time_ms = (pixels_f / (DECODE_THROUGHPUT_TYP_MPIXELS * 1000.0)) as f32;
    let time_ms_max = (pixels_f / (DECODE_THROUGHPUT_MIN_MPIXELS * 1000.0)) as f32;

    // Allocations: decoder has fewer allocations
    let allocations = 15;

    DecodeEstimate {
        peak_memory_bytes_min,
        peak_memory_bytes,
        peak_memory_bytes_max,
        allocations,
        time_ms_min,
        time_ms,
        time_ms_max,
        output_bytes,
    }
}

/// Estimate resources for streaming decode (one scanline at a time).
///
/// This is more memory-efficient than full decode as it doesn't buffer
/// the entire coefficient array.
///
/// # Arguments
///
/// * `width` - Image width in pixels
/// * `height` - Image height in pixels
///
/// # Example
///
/// ```rust,ignore
/// use zenjpeg::heuristics::estimate_decode_streaming;
///
/// let est = estimate_decode_streaming(1920, 1080);
/// println!("Streaming decode memory: {:.1} MB",
///     est.peak_memory_bytes as f64 / 1_000_000.0);
/// ```
#[must_use]
pub fn estimate_decode_streaming(width: u32, height: u32) -> DecodeEstimate {
    let w = width as usize;
    let h = height as usize;
    let pixels = (width as u64) * (height as u64);

    // MCU width for strip buffers
    let mcu_cols = (w + 7) / 8;
    let strip_width = mcu_cols * 8;
    let strip_height = 8;

    // Strip buffers only (no full coefficient storage)
    let strip_size = strip_width * strip_height;
    let strip_total = strip_size * 2 * 3;

    // Output buffer (full image)
    let rgb_size = w * h * 3;

    let base_memory = (strip_total + rgb_size) as u64;

    // Streaming decode has minimal memory variation
    let peak_memory_bytes_min = base_memory;
    let peak_memory_bytes = base_memory;
    let peak_memory_bytes_max = (base_memory as f64 * 1.05) as u64;

    // Time is the same as regular decode
    let pixels_f = pixels as f64;
    let time_ms_min = (pixels_f / (DECODE_THROUGHPUT_MAX_MPIXELS * 1000.0)) as f32;
    let time_ms = (pixels_f / (DECODE_THROUGHPUT_TYP_MPIXELS * 1000.0)) as f32;
    let time_ms_max = (pixels_f / (DECODE_THROUGHPUT_MIN_MPIXELS * 1000.0)) as f32;

    let allocations = 10;
    let output_bytes = pixels * 3;

    DecodeEstimate {
        peak_memory_bytes_min,
        peak_memory_bytes,
        peak_memory_bytes_max,
        allocations,
        time_ms_min,
        time_ms,
        time_ms_max,
        output_bytes,
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::encoder::ChromaSubsampling;
    use crate::types::PixelFormat;

    #[test]
    fn encode_estimate_scales_with_size() {
        let config = EncoderConfig::ycbcr(85, ChromaSubsampling::Quarter);
        let small = estimate_encode(256, 256, &config);
        let large = estimate_encode(512, 512, &config);

        // 4x pixels should give roughly 4x memory
        let ratio = large.peak_memory_bytes as f64 / small.peak_memory_bytes as f64;
        assert!(ratio > 2.5 && ratio < 6.0, "Ratio was {}", ratio);
    }

    #[test]
    fn decode_estimate_scales_with_size() {
        let small = estimate_decode(256, 256, PixelFormat::Rgb);
        let large = estimate_decode(512, 512, PixelFormat::Rgb);

        // 4x pixels should give roughly 4x memory
        let ratio = large.peak_memory_bytes as f64 / small.peak_memory_bytes as f64;
        assert!(ratio > 2.5 && ratio < 6.0, "Ratio was {}", ratio);
    }

    #[test]
    fn time_ranges_are_ordered() {
        let config = EncoderConfig::ycbcr(85, ChromaSubsampling::Quarter);
        let enc = estimate_encode(1024, 1024, &config);
        assert!(enc.time_ms_min < enc.time_ms);
        assert!(enc.time_ms < enc.time_ms_max);

        let dec = estimate_decode(1024, 1024, PixelFormat::Rgb);
        assert!(dec.time_ms_min < dec.time_ms);
        assert!(dec.time_ms < dec.time_ms_max);
    }

    #[test]
    fn memory_ranges_are_ordered() {
        let config = EncoderConfig::ycbcr(85, ChromaSubsampling::Quarter);
        let enc = estimate_encode(1024, 1024, &config);
        assert!(enc.peak_memory_bytes_min <= enc.peak_memory_bytes);
        assert!(enc.peak_memory_bytes <= enc.peak_memory_bytes_max);

        let dec = estimate_decode(1024, 1024, PixelFormat::Rgb);
        assert!(dec.peak_memory_bytes_min <= dec.peak_memory_bytes);
        assert!(dec.peak_memory_bytes <= dec.peak_memory_bytes_max);
    }

    #[test]
    fn ceiling_is_at_least_typical() {
        let config = EncoderConfig::ycbcr(85, ChromaSubsampling::Quarter);
        let typical = estimate_encode(1024, 1024, &config);
        let ceiling = estimate_encode_ceiling(1024, 1024, &config);

        assert!(ceiling.peak_memory_bytes >= typical.peak_memory_bytes);
    }

    #[test]
    fn streaming_decode_uses_less_memory() {
        let full = estimate_decode(1024, 1024, PixelFormat::Rgb);
        let streaming = estimate_decode_streaming(1024, 1024);

        // Streaming should use less memory (no coefficient storage)
        assert!(streaming.peak_memory_bytes <= full.peak_memory_bytes);
    }

    #[test]
    fn progressive_is_slower() {
        let baseline = EncoderConfig::ycbcr(85, ChromaSubsampling::Quarter).progressive(false);
        let progressive = EncoderConfig::ycbcr(85, ChromaSubsampling::Quarter); // Progressive is default

        let base_est = estimate_encode(1024, 1024, &baseline);
        let prog_est = estimate_encode(1024, 1024, &progressive);

        // Progressive should be slower
        assert!(prog_est.time_ms > base_est.time_ms);
    }
}