Skip to main content

mozjpeg_rs/
encode.rs

1//! JPEG encoder pipeline.
2//!
3//! This module provides two encoder types:
4//!
5//! - [`Encoder`]: Full-featured encoder with trellis quantization, progressive mode,
6//!   and Huffman optimization. Batch encoding only.
7//! - [`StreamingEncoder`]: Streaming-capable encoder without optimizations.
8//!   Supports both batch and scanline-by-scanline encoding.
9//!
10//! Both implement the [`Encode`] trait for batch encoding.
11//!
12//! # Examples
13//!
14//! ```ignore
15//! use mozjpeg_rs::{Encoder, Preset};
16//!
17//! // Full-featured batch encoding
18//! let jpeg = Encoder::new(Preset::default())
19//!     .quality(85)
20//!     .encode_rgb(&pixels, width, height)?;
21//!
22//! // Streaming encoding (memory-efficient for large images)
23//! let mut stream = Encoder::streaming()
24//!     .quality(85)
25//!     .start(width, height, file)?;
26//! for row in scanlines.chunks(16) {
27//!     stream.write_scanlines(row)?;
28//! }
29//! stream.finish()?;
30//! ```
31
32use std::io::Write;
33use std::sync::atomic::{AtomicBool, Ordering};
34use std::time::{Duration, Instant};
35
36use crate::bitstream::BitWriter;
37use crate::color::convert_rgb_to_ycbcr_c_compat;
38use crate::consts::{DCTSIZE, DCTSIZE2, QuantTableIdx};
39use crate::deringing::preprocess_deringing;
40use crate::entropy::{EntropyEncoder, ProgressiveEncoder, ProgressiveSymbolCounter, SymbolCounter};
41use crate::error::{Error, Result};
42use crate::huffman::DerivedTable;
43use crate::huffman::FrequencyCounter;
44use crate::marker::MarkerWriter;
45use crate::progressive::{generate_baseline_scan, generate_mozjpeg_max_compression_scans};
46use crate::quant::{create_quant_tables, quantize_block_raw};
47use crate::sample;
48use crate::scan_optimize::{ScanSearchConfig, ScanSelector, generate_search_scans};
49use crate::scan_trial::ScanTrialEncoder;
50use crate::simd::SimdOps;
51#[cfg(target_arch = "x86_64")]
52use crate::simd::x86_64::entropy::SimdEntropyEncoder;
53use crate::trellis::trellis_quantize_block;
54use crate::types::{Limits, PixelDensity, Preset, Subsampling, TrellisConfig};
55
56mod helpers;
57mod streaming;
58
59pub(crate) use helpers::{
60    create_components, create_std_ac_chroma_table, create_std_ac_luma_table,
61    create_std_dc_chroma_table, create_std_dc_luma_table, create_ycbcr_components,
62    natural_to_zigzag, run_dc_trellis_by_row, try_alloc_vec, try_alloc_vec_array, write_dht_marker,
63    write_sos_marker,
64};
65pub use streaming::{EncodingStream, StreamingEncoder};
66
67// ============================================================================
68// Cancellation Support
69// ============================================================================
70
71/// Internal context for cancellation checking during encoding.
72///
73/// This is passed through the encoding pipeline to allow periodic
74/// cancellation checks without function signature changes everywhere.
75#[derive(Clone, Copy)]
76pub(crate) struct CancellationContext<'a> {
77    /// Optional cancellation flag - if set to true, encoding should abort.
78    pub cancel: Option<&'a AtomicBool>,
79    /// Optional deadline - if current time exceeds this, encoding should abort.
80    pub deadline: Option<Instant>,
81}
82
83impl<'a> CancellationContext<'a> {
84    /// Create a context with no cancellation (always succeeds).
85    #[allow(dead_code)]
86    pub const fn none() -> Self {
87        Self {
88            cancel: None,
89            deadline: None,
90        }
91    }
92
93    /// Create a context from optional cancel flag and timeout.
94    #[allow(dead_code)]
95    pub fn new(cancel: Option<&'a AtomicBool>, timeout: Option<Duration>) -> Self {
96        Self {
97            cancel,
98            deadline: timeout.map(|d| Instant::now() + d),
99        }
100    }
101
102    /// Check if cancellation has been requested.
103    ///
104    /// Returns `Ok(())` if encoding should continue, or `Err` if cancelled/timed out.
105    #[inline]
106    pub fn check(&self) -> Result<()> {
107        if let Some(c) = self.cancel
108            && c.load(Ordering::Relaxed)
109        {
110            return Err(Error::Cancelled);
111        }
112        if let Some(d) = self.deadline
113            && Instant::now() > d
114        {
115            return Err(Error::TimedOut);
116        }
117        Ok(())
118    }
119
120    /// Check cancellation every N iterations (to reduce overhead).
121    ///
122    /// Only performs the check when `iteration % interval == 0`.
123    #[inline]
124    #[allow(dead_code)]
125    pub fn check_periodic(&self, iteration: usize, interval: usize) -> Result<()> {
126        if iteration.is_multiple_of(interval) {
127            self.check()
128        } else {
129            Ok(())
130        }
131    }
132}
133
134// ============================================================================
135// Encode Trait (internal, for potential future streaming API)
136// ============================================================================
137
138/// Trait for JPEG encoding (batch mode).
139///
140/// Implemented by both [`Encoder`] and [`StreamingEncoder`].
141#[allow(dead_code)]
142pub trait Encode {
143    /// Encode RGB image data to JPEG.
144    ///
145    /// # Arguments
146    /// * `rgb_data` - RGB pixel data (3 bytes per pixel, row-major order)
147    /// * `width` - Image width in pixels
148    /// * `height` - Image height in pixels
149    fn encode_rgb(&self, rgb_data: &[u8], width: u32, height: u32) -> Result<Vec<u8>>;
150
151    /// Encode grayscale image data to JPEG.
152    ///
153    /// # Arguments
154    /// * `gray_data` - Grayscale pixel data (1 byte per pixel, row-major order)
155    /// * `width` - Image width in pixels
156    /// * `height` - Image height in pixels
157    fn encode_gray(&self, gray_data: &[u8], width: u32, height: u32) -> Result<Vec<u8>>;
158}
159
160/// JPEG encoder with configurable quality and features.
161#[derive(Debug, Clone)]
162pub struct Encoder {
163    /// Quality level (1-100)
164    quality: u8,
165    /// Enable progressive mode
166    progressive: bool,
167    /// Chroma subsampling mode
168    subsampling: Subsampling,
169    /// Quantization table variant
170    quant_table_idx: QuantTableIdx,
171    /// Custom luminance quantization table (overrides quant_table_idx if set)
172    custom_luma_qtable: Option<[u16; DCTSIZE2]>,
173    /// Custom chrominance quantization table (overrides quant_table_idx if set)
174    custom_chroma_qtable: Option<[u16; DCTSIZE2]>,
175    /// Trellis quantization configuration
176    trellis: TrellisConfig,
177    /// Force baseline-compatible output
178    force_baseline: bool,
179    /// Optimize Huffman tables (requires 2-pass)
180    optimize_huffman: bool,
181    /// Enable overshoot deringing (reduces ringing on white backgrounds)
182    overshoot_deringing: bool,
183    /// Use C mozjpeg-compatible color conversion for exact parity.
184    /// Produces bytewise-identical YCbCr values to C mozjpeg.
185    /// Enabled by default; use `.fast_color()` for faster yuv crate (~40% faster, ±1 rounding).
186    c_compat_color: bool,
187    /// Optimize progressive scan configuration (tries multiple configs, picks smallest)
188    optimize_scans: bool,
189    /// Restart interval in MCUs (0 = disabled)
190    restart_interval: u16,
191    /// Pixel density for JFIF APP0 marker
192    pixel_density: PixelDensity,
193    /// EXIF data to embed (raw TIFF structure, without "Exif\0\0" header)
194    exif_data: Option<Vec<u8>>,
195    /// ICC color profile to embed (will be chunked into APP2 markers)
196    icc_profile: Option<Vec<u8>>,
197    /// Custom APP markers to embed (marker number 0-15, data)
198    custom_markers: Vec<(u8, Vec<u8>)>,
199    /// SIMD operations dispatch (detected once at construction)
200    simd: SimdOps,
201    /// Smoothing factor (0-100, 0 = disabled)
202    /// Applies a weighted average filter to reduce fine-scale noise.
203    /// Useful for converting dithered images (like GIFs) to JPEG.
204    smoothing: u8,
205    /// Resource limits (dimensions, memory, ICC size)
206    limits: Limits,
207}
208
209impl Default for Encoder {
210    fn default() -> Self {
211        Self::new(Preset::default())
212    }
213}
214
215impl Encoder {
216    /// Create an encoder with the specified preset.
217    ///
218    /// # Arguments
219    ///
220    /// * `preset` - Encoding preset (see [`Preset`] for details):
221    ///   - [`BaselineFastest`](Preset::BaselineFastest): No optimizations, fastest encoding
222    ///   - [`BaselineBalanced`](Preset::BaselineBalanced): Baseline with all optimizations
223    ///   - [`ProgressiveBalanced`](Preset::ProgressiveBalanced): Progressive with optimizations (default)
224    ///   - [`ProgressiveSmallest`](Preset::ProgressiveSmallest): Maximum compression
225    ///
226    /// # Preset Comparison
227    ///
228    /// | Preset | Time | Size | Best For |
229    /// |--------|------|------|----------|
230    /// | `BaselineFastest` | ~2ms | baseline | Real-time, thumbnails |
231    /// | `BaselineBalanced` | ~7ms | -13% | Sequential playback |
232    /// | `ProgressiveBalanced` | ~9ms | -13% | Web images (default) |
233    /// | `ProgressiveSmallest` | ~21ms | -14% | Storage, archival |
234    ///
235    /// *Benchmarks: 512×512 Q75 image*
236    ///
237    /// # Example
238    ///
239    /// ```no_run
240    /// use mozjpeg_rs::{Encoder, Preset};
241    ///
242    /// let pixels: Vec<u8> = vec![128; 256 * 256 * 3];
243    ///
244    /// // Default: progressive with good balance
245    /// let jpeg = Encoder::new(Preset::default())
246    ///     .quality(85)
247    ///     .encode_rgb(&pixels, 256, 256)
248    ///     .unwrap();
249    ///
250    /// // Fastest for real-time applications
251    /// let jpeg = Encoder::new(Preset::BaselineFastest)
252    ///     .quality(80)
253    ///     .encode_rgb(&pixels, 256, 256)
254    ///     .unwrap();
255    ///
256    /// // Maximum compression (matches C mozjpeg)
257    /// let jpeg = Encoder::new(Preset::ProgressiveSmallest)
258    ///     .quality(85)
259    ///     .encode_rgb(&pixels, 256, 256)
260    ///     .unwrap();
261    /// ```
262    pub fn new(preset: Preset) -> Self {
263        match preset {
264            Preset::BaselineFastest => Self::fastest(),
265            Preset::BaselineBalanced => Self::baseline_optimized(),
266            Preset::ProgressiveBalanced => Self::progressive_balanced(),
267            Preset::ProgressiveSmallest => Self::max_compression(),
268        }
269    }
270
271    /// Create an encoder with the most optimized baseline (non-progressive) settings.
272    ///
273    /// This is the recommended starting point for most use cases. It produces
274    /// sequential (non-progressive) JPEGs with all mozjpeg optimizations enabled:
275    /// trellis quantization, Huffman optimization, and overshoot deringing.
276    ///
277    /// # Default Settings
278    ///
279    /// | Setting | Value | Notes |
280    /// |---------|-------|-------|
281    /// | quality | 75 | Good balance of size/quality |
282    /// | progressive | **false** | Sequential baseline JPEG |
283    /// | optimize_scans | **false** | N/A for baseline mode |
284    /// | subsampling | 4:2:0 | Standard chroma subsampling |
285    /// | trellis | **enabled** | AC + DC trellis quantization |
286    /// | optimize_huffman | **true** | 2-pass for optimal Huffman tables |
287    /// | overshoot_deringing | **true** | Reduces ringing on hard edges |
288    /// | quant_tables | ImageMagick | Same as C mozjpeg default |
289    /// | force_baseline | false | Allows 16-bit DQT at very low Q |
290    ///
291    /// # Comparison with C mozjpeg
292    ///
293    /// **Important:** This differs from C mozjpeg's `jpeg_set_defaults()`!
294    ///
295    /// C mozjpeg uses `JCP_MAX_COMPRESSION` profile by default, which enables
296    /// progressive mode and optimize_scans. This produces ~20% smaller files
297    /// but with slower encoding and progressive rendering.
298    ///
299    /// | Setting | `baseline_optimized()` | C mozjpeg default |
300    /// |---------|------------------------|-------------------|
301    /// | progressive | **false** | true |
302    /// | optimize_scans | **false** | true |
303    /// | trellis | true | true |
304    /// | deringing | true | true |
305    ///
306    /// To match C mozjpeg's default behavior, use [`max_compression()`](Self::max_compression).
307    ///
308    /// # Example
309    ///
310    /// ```no_run
311    /// use mozjpeg_rs::Encoder;
312    ///
313    /// let pixels: Vec<u8> = vec![128; 256 * 256 * 3];
314    /// let jpeg = Encoder::baseline_optimized()
315    ///     .quality(85)
316    ///     .encode_rgb(&pixels, 256, 256)
317    ///     .unwrap();
318    /// ```
319    pub fn baseline_optimized() -> Self {
320        Self {
321            quality: 75,
322            progressive: false,
323            subsampling: Subsampling::S420,
324            quant_table_idx: QuantTableIdx::ImageMagick,
325            custom_luma_qtable: None,
326            custom_chroma_qtable: None,
327            trellis: TrellisConfig::default(),
328            force_baseline: false,
329            optimize_huffman: true,
330            overshoot_deringing: true,
331            c_compat_color: true,
332            optimize_scans: false,
333            restart_interval: 0,
334            pixel_density: PixelDensity::default(),
335            exif_data: None,
336            icc_profile: None,
337            custom_markers: Vec::new(),
338            simd: SimdOps::detect(),
339            smoothing: 0,
340            limits: Limits::none(),
341        }
342    }
343
344    /// Create encoder with maximum compression (matches C mozjpeg defaults).
345    ///
346    /// This matches the `JCP_MAX_COMPRESSION` profile used by C mozjpeg's
347    /// `jpeg_set_defaults()` and the `mozjpeg` crate.
348    ///
349    /// # Settings (differences from `new()` in **bold**)
350    ///
351    /// | Setting | Value | Notes |
352    /// |---------|-------|-------|
353    /// | quality | 75 | Same as `new()` |
354    /// | progressive | **true** | Multi-scan progressive JPEG |
355    /// | optimize_scans | **true** | Tries multiple scan configs |
356    /// | subsampling | 4:2:0 | Same as `new()` |
357    /// | trellis | enabled | Same as `new()` |
358    /// | optimize_huffman | true | Same as `new()` |
359    /// | overshoot_deringing | true | Same as `new()` |
360    ///
361    /// # File Size Comparison
362    ///
363    /// Typical results at Q75 (256×256 image):
364    /// - `Encoder::baseline_optimized()`: ~650 bytes (baseline)
365    /// - `Encoder::max_compression()`: ~520 bytes (**~20% smaller**)
366    ///
367    /// # Example
368    ///
369    /// ```no_run
370    /// use mozjpeg_rs::Encoder;
371    ///
372    /// // Match C mozjpeg's default compression
373    /// let pixels: Vec<u8> = vec![128; 256 * 256 * 3];
374    /// let jpeg = Encoder::max_compression()
375    ///     .quality(85)
376    ///     .encode_rgb(&pixels, 256, 256)
377    ///     .unwrap();
378    /// ```
379    pub fn max_compression() -> Self {
380        Self {
381            quality: 75,
382            progressive: true,
383            subsampling: Subsampling::S420,
384            quant_table_idx: QuantTableIdx::ImageMagick,
385            custom_luma_qtable: None,
386            custom_chroma_qtable: None,
387            trellis: TrellisConfig::default(),
388            force_baseline: false,
389            optimize_huffman: true,
390            overshoot_deringing: true,
391            c_compat_color: true,
392            optimize_scans: true,
393            restart_interval: 0,
394            pixel_density: PixelDensity::default(),
395            exif_data: None,
396            icc_profile: None,
397            custom_markers: Vec::new(),
398            simd: SimdOps::detect(),
399            smoothing: 0,
400            limits: Limits::none(),
401        }
402    }
403
404    /// Create encoder with progressive mode and all optimizations except optimize_scans.
405    ///
406    /// This is the **recommended default** for most use cases. It provides:
407    /// - Progressive rendering (blurry-to-sharp loading)
408    /// - All mozjpeg optimizations (trellis, Huffman, deringing)
409    /// - Good balance between file size and encoding speed
410    ///
411    /// # Settings
412    ///
413    /// | Setting | Value | Notes |
414    /// |---------|-------|-------|
415    /// | progressive | **true** | Multi-scan progressive JPEG |
416    /// | optimize_scans | **false** | Uses fixed 9-scan config |
417    /// | trellis | enabled | AC + DC trellis quantization |
418    /// | optimize_huffman | true | 2-pass for optimal tables |
419    /// | overshoot_deringing | true | Reduces ringing on hard edges |
420    ///
421    /// # vs `max_compression()`
422    ///
423    /// This preset omits `optimize_scans` which:
424    /// - Saves ~100% encoding time (9ms vs 21ms at 512×512)
425    /// - Loses only ~1% file size reduction
426    ///
427    /// Use `max_compression()` only when file size is critical.
428    ///
429    /// # Example
430    ///
431    /// ```no_run
432    /// use mozjpeg_rs::Encoder;
433    ///
434    /// let pixels: Vec<u8> = vec![128; 256 * 256 * 3];
435    /// let jpeg = Encoder::progressive_balanced()
436    ///     .quality(85)
437    ///     .encode_rgb(&pixels, 256, 256)
438    ///     .unwrap();
439    /// ```
440    pub fn progressive_balanced() -> Self {
441        Self {
442            quality: 75,
443            progressive: true,
444            subsampling: Subsampling::S420,
445            quant_table_idx: QuantTableIdx::ImageMagick,
446            custom_luma_qtable: None,
447            custom_chroma_qtable: None,
448            trellis: TrellisConfig::default(),
449            force_baseline: false,
450            optimize_huffman: true,
451            overshoot_deringing: true,
452            c_compat_color: true,
453            optimize_scans: false, // Key difference from max_compression()
454            restart_interval: 0,
455            pixel_density: PixelDensity::default(),
456            exif_data: None,
457            icc_profile: None,
458            custom_markers: Vec::new(),
459            simd: SimdOps::detect(),
460            smoothing: 0,
461            limits: Limits::none(),
462        }
463    }
464
465    /// Create encoder with fastest settings (libjpeg-turbo compatible).
466    ///
467    /// Disables all mozjpeg-specific optimizations for maximum encoding speed.
468    /// Output is compatible with standard libjpeg/libjpeg-turbo.
469    ///
470    /// # Settings (differences from `new()` in **bold**)
471    ///
472    /// | Setting | Value | Notes |
473    /// |---------|-------|-------|
474    /// | quality | 75 | Same as `new()` |
475    /// | progressive | false | Same as `new()` |
476    /// | trellis | **disabled** | No trellis quantization |
477    /// | optimize_huffman | **false** | Uses default Huffman tables |
478    /// | overshoot_deringing | **false** | No deringing filter |
479    /// | force_baseline | **true** | 8-bit DQT only |
480    ///
481    /// # Performance
482    ///
483    /// Encoding is ~4-10x faster than `new()`, but files are ~10-20% larger.
484    ///
485    /// # Example
486    ///
487    /// ```no_run
488    /// use mozjpeg_rs::Encoder;
489    ///
490    /// // Fast encoding for real-time applications
491    /// let pixels: Vec<u8> = vec![128; 256 * 256 * 3];
492    /// let jpeg = Encoder::fastest()
493    ///     .quality(80)
494    ///     .encode_rgb(&pixels, 256, 256)
495    ///     .unwrap();
496    /// ```
497    pub fn fastest() -> Self {
498        Self {
499            quality: 75,
500            progressive: false,
501            subsampling: Subsampling::S420,
502            quant_table_idx: QuantTableIdx::ImageMagick,
503            custom_luma_qtable: None,
504            custom_chroma_qtable: None,
505            trellis: TrellisConfig::disabled(),
506            force_baseline: true,
507            optimize_huffman: false,
508            overshoot_deringing: false,
509            c_compat_color: true,
510            optimize_scans: false,
511            restart_interval: 0,
512            pixel_density: PixelDensity::default(),
513            exif_data: None,
514            icc_profile: None,
515            custom_markers: Vec::new(),
516            simd: SimdOps::detect(),
517            smoothing: 0,
518            limits: Limits::none(),
519        }
520    }
521
522    /// Set quality level (1-100).
523    ///
524    /// Higher values produce larger, higher-quality images.
525    pub fn quality(mut self, quality: u8) -> Self {
526        self.quality = quality.clamp(1, 100);
527        self
528    }
529
530    /// Enable or disable progressive mode.
531    pub fn progressive(mut self, enable: bool) -> Self {
532        self.progressive = enable;
533        self
534    }
535
536    /// Set chroma subsampling mode.
537    pub fn subsampling(mut self, mode: Subsampling) -> Self {
538        self.subsampling = mode;
539        self
540    }
541
542    /// Set quantization table variant.
543    pub fn quant_tables(mut self, idx: QuantTableIdx) -> Self {
544        self.quant_table_idx = idx;
545        self
546    }
547
548    /// Configure trellis quantization.
549    pub fn trellis(mut self, config: TrellisConfig) -> Self {
550        self.trellis = config;
551        self
552    }
553
554    /// Force baseline-compatible output.
555    pub fn force_baseline(mut self, enable: bool) -> Self {
556        self.force_baseline = enable;
557        self
558    }
559
560    /// Enable Huffman table optimization.
561    pub fn optimize_huffman(mut self, enable: bool) -> Self {
562        self.optimize_huffman = enable;
563        self
564    }
565
566    /// Enable overshoot deringing.
567    ///
568    /// Reduces visible ringing artifacts near hard edges, especially on white
569    /// backgrounds. Works by allowing encoded values to "overshoot" above 255
570    /// (which will clamp back to 255 when decoded) to create smoother waveforms.
571    ///
572    /// This is a mozjpeg-specific feature that can improve visual quality at
573    /// minimal file size cost. Enabled by default.
574    pub fn overshoot_deringing(mut self, enable: bool) -> Self {
575        self.overshoot_deringing = enable;
576        self
577    }
578
579    /// Use faster color conversion with the `yuv` crate.
580    ///
581    /// - `fast_color(true)` — ~40% faster RGB→YCbCr using the `yuv` crate (±1 rounding vs C mozjpeg)
582    /// - `fast_color(false)` — exact C mozjpeg parity, bytewise identical output (default)
583    ///
584    /// The ±1 rounding differences are invisible in decoded images but may cause
585    /// slightly different file sizes (typically <1% for baseline mode).
586    ///
587    /// # Example
588    ///
589    /// ```
590    /// use mozjpeg_rs::Encoder;
591    ///
592    /// // Faster color conversion
593    /// let encoder = Encoder::new().quality(85).fast_color(true);
594    ///
595    /// // Exact C mozjpeg parity (default, explicit)
596    /// let encoder = Encoder::new().quality(85).fast_color(false);
597    /// ```
598    #[cfg(feature = "fast-yuv")]
599    pub fn fast_color(mut self, enable: bool) -> Self {
600        self.c_compat_color = !enable;
601        self
602    }
603
604    /// Legacy API for color conversion mode.
605    ///
606    /// **Deprecated:** Use [`fast_color()`](Self::fast_color) instead.
607    /// - `c_compat_color(true)` = `fast_color(false)` (exact C parity)
608    /// - `c_compat_color(false)` = `fast_color(true)` (faster yuv crate)
609    #[deprecated(since = "0.7.0", note = "Use fast_color() instead")]
610    pub fn c_compat_color(mut self, enable: bool) -> Self {
611        self.c_compat_color = enable;
612        self
613    }
614
615    /// Override SIMD operations dispatch for testing alternative DCT implementations.
616    ///
617    /// The default dispatch selects the best available i32-based DCT. Use this to
618    /// test experimental paths like [`SimdOps::avx2_i16()`], which uses 16-bit packed
619    /// SIMD and is vulnerable to overflow with [`overshoot_deringing`](Self::overshoot_deringing)
620    /// enabled (see [mozilla/mozjpeg#453](https://github.com/mozilla/mozjpeg/pull/453)).
621    ///
622    /// Test patterns for this bug are in the codec-corpus at
623    /// `imageflow/test_inputs/dct_overflow_patterns/`.
624    pub fn simd_ops(mut self, ops: SimdOps) -> Self {
625        self.simd = ops;
626        self
627    }
628
629    /// Enable or disable scan optimization for progressive mode.
630    ///
631    /// When enabled, the encoder tries multiple scan configurations and
632    /// picks the one that produces the smallest output. This can improve
633    /// compression by 1-3% but increases encoding time.
634    ///
635    /// Only has effect when progressive mode is enabled.
636    pub fn optimize_scans(mut self, enable: bool) -> Self {
637        self.optimize_scans = enable;
638        self
639    }
640
641    /// Set input smoothing factor (0-100).
642    ///
643    /// Applies a weighted average filter to reduce fine-scale noise in the
644    /// input image before encoding. This is particularly useful for converting
645    /// dithered images (like GIFs) to JPEG.
646    ///
647    /// - 0 = disabled (default)
648    /// - 10-50 = recommended for dithered images
649    /// - Higher values = more smoothing (may blur the image)
650    ///
651    /// # Example
652    /// ```
653    /// use mozjpeg_rs::Encoder;
654    ///
655    /// // Convert a dithered GIF to JPEG with smoothing
656    /// let encoder = Encoder::baseline_optimized()
657    ///     .quality(85)
658    ///     .smoothing(30);
659    /// ```
660    pub fn smoothing(mut self, factor: u8) -> Self {
661        self.smoothing = factor.min(100);
662        self
663    }
664
665    /// Set restart interval in MCUs.
666    ///
667    /// Restart markers are inserted every N MCUs, which can help with
668    /// error recovery and parallel decoding. Set to 0 to disable (default).
669    ///
670    /// Common values: 0 (disabled), or image width in MCUs for row-by-row restarts.
671    pub fn restart_interval(mut self, interval: u16) -> Self {
672        self.restart_interval = interval;
673        self
674    }
675
676    /// Set EXIF data to embed in the JPEG.
677    ///
678    /// # Arguments
679    /// * `data` - Raw EXIF data (TIFF structure). The "Exif\0\0" header
680    ///   will be added automatically.
681    ///
682    /// Pass empty or call without this method to omit EXIF data.
683    pub fn exif_data(mut self, data: Vec<u8>) -> Self {
684        self.exif_data = if data.is_empty() { None } else { Some(data) };
685        self
686    }
687
688    /// Set pixel density for the JFIF APP0 marker.
689    ///
690    /// This specifies the physical pixel density (DPI/DPC) or aspect ratio.
691    /// Note that most software ignores JFIF density in favor of EXIF metadata.
692    ///
693    /// # Example
694    /// ```
695    /// use mozjpeg_rs::{Encoder, PixelDensity};
696    ///
697    /// let encoder = Encoder::baseline_optimized()
698    ///     .pixel_density(PixelDensity::dpi(300, 300)); // 300 DPI
699    /// ```
700    pub fn pixel_density(mut self, density: PixelDensity) -> Self {
701        self.pixel_density = density;
702        self
703    }
704
705    /// Set ICC color profile to embed.
706    ///
707    /// The profile will be embedded in APP2 markers with the standard
708    /// "ICC_PROFILE" identifier. Large profiles are automatically chunked.
709    ///
710    /// # Arguments
711    /// * `profile` - Raw ICC profile data
712    pub fn icc_profile(mut self, profile: Vec<u8>) -> Self {
713        self.icc_profile = if profile.is_empty() {
714            None
715        } else {
716            Some(profile)
717        };
718        self
719    }
720
721    /// Add a custom APP marker.
722    ///
723    /// # Arguments
724    /// * `app_num` - APP marker number (0-15, e.g., 1 for EXIF, 2 for ICC)
725    /// * `data` - Raw marker data (including any identifier prefix)
726    ///
727    /// Multiple markers with the same number are allowed.
728    /// Markers are written in the order they are added.
729    pub fn add_marker(mut self, app_num: u8, data: Vec<u8>) -> Self {
730        if app_num <= 15 && !data.is_empty() {
731            self.custom_markers.push((app_num, data));
732        }
733        self
734    }
735
736    /// Set custom luminance quantization table.
737    ///
738    /// This overrides the table selected by `quant_tables()`.
739    /// Values should be in natural (row-major) order, not zigzag.
740    ///
741    /// # Arguments
742    /// * `table` - 64 quantization values (quality scaling still applies)
743    pub fn custom_luma_qtable(mut self, table: [u16; DCTSIZE2]) -> Self {
744        self.custom_luma_qtable = Some(table);
745        self
746    }
747
748    /// Set custom chrominance quantization table.
749    ///
750    /// This overrides the table selected by `quant_tables()`.
751    /// Values should be in natural (row-major) order, not zigzag.
752    ///
753    /// # Arguments
754    /// * `table` - 64 quantization values (quality scaling still applies)
755    pub fn custom_chroma_qtable(mut self, table: [u16; DCTSIZE2]) -> Self {
756        self.custom_chroma_qtable = Some(table);
757        self
758    }
759
760    // =========================================================================
761    // Resource Limits
762    // =========================================================================
763
764    /// Set resource limits for the encoder.
765    ///
766    /// Limits can restrict:
767    /// - Maximum image width and height
768    /// - Maximum pixel count (width × height)
769    /// - Maximum estimated memory allocation
770    /// - Maximum ICC profile size
771    ///
772    /// # Example
773    /// ```
774    /// use mozjpeg_rs::{Encoder, Preset, Limits};
775    ///
776    /// let limits = Limits::default()
777    ///     .max_width(4096)
778    ///     .max_height(4096)
779    ///     .max_pixel_count(16_000_000)
780    ///     .max_alloc_bytes(100 * 1024 * 1024);
781    ///
782    /// let encoder = Encoder::new(Preset::default())
783    ///     .limits(limits);
784    /// ```
785    pub fn limits(mut self, limits: Limits) -> Self {
786        self.limits = limits;
787        self
788    }
789
790    /// Check all resource limits before encoding.
791    ///
792    /// # Arguments
793    /// * `width` - Image width
794    /// * `height` - Image height
795    /// * `is_gray` - True for grayscale images (affects memory estimate)
796    fn check_limits(&self, width: u32, height: u32, is_gray: bool) -> Result<()> {
797        let limits = &self.limits;
798
799        // Check dimension limits
800        if (limits.max_width > 0 && width > limits.max_width)
801            || (limits.max_height > 0 && height > limits.max_height)
802        {
803            return Err(Error::DimensionLimitExceeded {
804                width,
805                height,
806                max_width: limits.max_width,
807                max_height: limits.max_height,
808            });
809        }
810
811        // Check pixel count limit
812        if limits.max_pixel_count > 0 {
813            let pixel_count = width as u64 * height as u64;
814            if pixel_count > limits.max_pixel_count {
815                return Err(Error::PixelCountExceeded {
816                    pixel_count,
817                    limit: limits.max_pixel_count,
818                });
819            }
820        }
821
822        // Check allocation limit
823        if limits.max_alloc_bytes > 0 {
824            let estimate = if is_gray {
825                self.estimate_resources_gray(width, height)
826            } else {
827                self.estimate_resources(width, height)
828            };
829            if estimate.peak_memory_bytes > limits.max_alloc_bytes {
830                return Err(Error::AllocationLimitExceeded {
831                    estimated: estimate.peak_memory_bytes,
832                    limit: limits.max_alloc_bytes,
833                });
834            }
835        }
836
837        // Check ICC profile size limit
838        if limits.max_icc_profile_bytes > 0
839            && let Some(ref icc) = self.icc_profile
840            && icc.len() > limits.max_icc_profile_bytes
841        {
842            return Err(Error::IccProfileTooLarge {
843                size: icc.len(),
844                limit: limits.max_icc_profile_bytes,
845            });
846        }
847
848        Ok(())
849    }
850
851    // =========================================================================
852    // Aliases for rimage/CLI-style naming
853    // =========================================================================
854
855    /// Set baseline mode (opposite of progressive).
856    ///
857    /// When `true`, produces a sequential JPEG (non-progressive).
858    /// This is equivalent to `progressive(false)`.
859    ///
860    /// # Example
861    /// ```
862    /// use mozjpeg_rs::Encoder;
863    ///
864    /// // These are equivalent:
865    /// let enc1 = Encoder::baseline_optimized().baseline(true);
866    /// let enc2 = Encoder::baseline_optimized().progressive(false);
867    /// ```
868    #[inline]
869    pub fn baseline(self, enable: bool) -> Self {
870        self.progressive(!enable)
871    }
872
873    /// Enable or disable Huffman coding optimization.
874    ///
875    /// Alias for [`optimize_huffman()`](Self::optimize_huffman).
876    /// This name matches mozjpeg's CLI flag naming.
877    #[inline]
878    pub fn optimize_coding(self, enable: bool) -> Self {
879        self.optimize_huffman(enable)
880    }
881
882    /// Set chroma subsampling mode.
883    ///
884    /// Alias for [`subsampling()`](Self::subsampling).
885    #[inline]
886    pub fn chroma_subsampling(self, mode: Subsampling) -> Self {
887        self.subsampling(mode)
888    }
889
890    /// Set quantization table variant.
891    ///
892    /// Alias for [`quant_tables()`](Self::quant_tables).
893    #[inline]
894    pub fn qtable(self, idx: QuantTableIdx) -> Self {
895        self.quant_tables(idx)
896    }
897
898    // =========================================================================
899    // Resource Estimation
900    // =========================================================================
901
902    /// Estimate resource usage for encoding an RGB image of the given dimensions.
903    ///
904    /// Returns peak memory usage (in bytes) and a relative CPU cost multiplier.
905    /// Useful for scheduling, enforcing resource limits, or providing feedback.
906    ///
907    /// # Arguments
908    /// * `width` - Image width in pixels
909    /// * `height` - Image height in pixels
910    ///
911    /// # Example
912    ///
913    /// ```
914    /// use mozjpeg_rs::{Encoder, Preset};
915    ///
916    /// let encoder = Encoder::new(Preset::ProgressiveBalanced).quality(85);
917    /// let estimate = encoder.estimate_resources(1920, 1080);
918    ///
919    /// println!("Peak memory: {} MB", estimate.peak_memory_bytes / 1_000_000);
920    /// println!("Relative CPU cost: {:.1}x", estimate.cpu_cost_multiplier);
921    /// ```
922    pub fn estimate_resources(&self, width: u32, height: u32) -> crate::types::ResourceEstimate {
923        let width = width as usize;
924        let height = height as usize;
925        let pixels = width * height;
926
927        // Calculate chroma dimensions based on subsampling
928        let (h_samp, v_samp) = self.subsampling.luma_factors();
929        let chroma_width = (width + h_samp as usize - 1) / h_samp as usize;
930        let chroma_height = (height + v_samp as usize - 1) / v_samp as usize;
931        let chroma_pixels = chroma_width * chroma_height;
932
933        // MCU-aligned dimensions
934        let mcu_h = 8 * h_samp as usize;
935        let mcu_v = 8 * v_samp as usize;
936        let mcu_width = (width + mcu_h - 1) / mcu_h * mcu_h;
937        let mcu_height = (height + mcu_v - 1) / mcu_v * mcu_v;
938
939        // Block counts
940        let y_blocks = (mcu_width / 8) * (mcu_height / 8);
941        let chroma_block_w = (chroma_width + 7) / 8;
942        let chroma_block_h = (chroma_height + 7) / 8;
943        let chroma_blocks = chroma_block_w * chroma_block_h;
944        let total_blocks = y_blocks + 2 * chroma_blocks;
945
946        // --- Memory estimation ---
947        let mut memory: usize = 0;
948
949        // Color conversion buffers (Y, Cb, Cr planes)
950        memory += 3 * pixels;
951
952        // Chroma subsampled buffers
953        memory += 2 * chroma_pixels;
954
955        // MCU-padded buffers
956        memory += mcu_width * mcu_height; // Y
957        let mcu_chroma_w = (chroma_width + 7) / 8 * 8;
958        let mcu_chroma_h = (chroma_height + 7) / 8 * 8;
959        memory += 2 * mcu_chroma_w * mcu_chroma_h; // Cb, Cr
960
961        // Block storage (needed for progressive or optimize_huffman)
962        let needs_block_storage = self.progressive || self.optimize_huffman;
963        if needs_block_storage {
964            // i16[64] per block = 128 bytes
965            memory += total_blocks * 128;
966        }
967
968        // Raw DCT storage (needed for DC trellis)
969        if self.trellis.dc_enabled {
970            // i32[64] per block = 256 bytes
971            memory += total_blocks * 256;
972        }
973
974        // Output buffer estimate (varies by quality, ~0.3-1.0x input for typical images)
975        // Use a conservative estimate based on quality
976        let output_ratio = if self.quality >= 95 {
977            0.8
978        } else if self.quality >= 85 {
979            0.5
980        } else if self.quality >= 75 {
981            0.3
982        } else {
983            0.2
984        };
985        memory += (pixels as f64 * 3.0 * output_ratio) as usize;
986
987        // --- CPU cost estimation ---
988        // Reference: BaselineFastest Q75 = 1.0
989        let mut cpu_cost = 1.0;
990
991        // Trellis AC quantization is the biggest CPU factor
992        if self.trellis.enabled {
993            cpu_cost += 3.5;
994        }
995
996        // DC trellis adds extra work
997        if self.trellis.dc_enabled {
998            cpu_cost += 0.5;
999        }
1000
1001        // Huffman optimization (frequency counting pass)
1002        if self.optimize_huffman {
1003            cpu_cost += 0.3;
1004        }
1005
1006        // Progressive mode (multiple scan encoding)
1007        if self.progressive {
1008            cpu_cost += 1.5;
1009        }
1010
1011        // optimize_scans (trial encoding many scan configurations)
1012        if self.optimize_scans {
1013            cpu_cost += 3.0;
1014        }
1015
1016        // High quality increases trellis work (more candidates to evaluate)
1017        // This matters most when trellis is enabled
1018        if self.trellis.enabled && self.quality >= 85 {
1019            let quality_factor = 1.0 + (self.quality as f64 - 85.0) / 30.0;
1020            cpu_cost *= quality_factor;
1021        }
1022
1023        crate::types::ResourceEstimate {
1024            peak_memory_bytes: memory,
1025            cpu_cost_multiplier: cpu_cost,
1026            block_count: total_blocks,
1027        }
1028    }
1029
1030    /// Estimate resource usage for encoding a grayscale image.
1031    ///
1032    /// Similar to [`estimate_resources`](Self::estimate_resources) but for single-channel images.
1033    pub fn estimate_resources_gray(
1034        &self,
1035        width: u32,
1036        height: u32,
1037    ) -> crate::types::ResourceEstimate {
1038        let width = width as usize;
1039        let height = height as usize;
1040        let pixels = width * height;
1041
1042        // MCU-aligned dimensions (always 8x8 for grayscale)
1043        let mcu_width = (width + 7) / 8 * 8;
1044        let mcu_height = (height + 7) / 8 * 8;
1045
1046        // Block count
1047        let blocks = (mcu_width / 8) * (mcu_height / 8);
1048
1049        // --- Memory estimation ---
1050        let mut memory: usize = 0;
1051
1052        // MCU-padded buffer
1053        memory += mcu_width * mcu_height;
1054
1055        // Block storage (needed for progressive or optimize_huffman)
1056        let needs_block_storage = self.progressive || self.optimize_huffman;
1057        if needs_block_storage {
1058            memory += blocks * 128;
1059        }
1060
1061        // Raw DCT storage (needed for DC trellis)
1062        if self.trellis.dc_enabled {
1063            memory += blocks * 256;
1064        }
1065
1066        // Output buffer estimate
1067        let output_ratio = if self.quality >= 95 {
1068            0.8
1069        } else if self.quality >= 85 {
1070            0.5
1071        } else if self.quality >= 75 {
1072            0.3
1073        } else {
1074            0.2
1075        };
1076        memory += (pixels as f64 * output_ratio) as usize;
1077
1078        // --- CPU cost (same formula, but less work due to single channel) ---
1079        let mut cpu_cost = 1.0;
1080
1081        if self.trellis.enabled {
1082            cpu_cost += 3.5;
1083        }
1084        if self.trellis.dc_enabled {
1085            cpu_cost += 0.5;
1086        }
1087        if self.optimize_huffman {
1088            cpu_cost += 0.3;
1089        }
1090        if self.progressive {
1091            cpu_cost += 1.0; // Less for grayscale (fewer scans)
1092        }
1093        if self.optimize_scans {
1094            cpu_cost += 2.0; // Less for grayscale
1095        }
1096        if self.trellis.enabled && self.quality >= 85 {
1097            let quality_factor = 1.0 + (self.quality as f64 - 85.0) / 30.0;
1098            cpu_cost *= quality_factor;
1099        }
1100
1101        // Grayscale is ~1/3 the work of RGB (single channel)
1102        cpu_cost /= 3.0;
1103
1104        crate::types::ResourceEstimate {
1105            peak_memory_bytes: memory,
1106            cpu_cost_multiplier: cpu_cost,
1107            block_count: blocks,
1108        }
1109    }
1110
1111    // =========================================================================
1112    // Encoding
1113    // =========================================================================
1114
1115    /// Encode RGB image data to JPEG.
1116    ///
1117    /// # Arguments
1118    /// * `rgb_data` - RGB pixel data (3 bytes per pixel, row-major)
1119    /// * `width` - Image width in pixels
1120    /// * `height` - Image height in pixels
1121    ///
1122    /// # Returns
1123    /// JPEG-encoded data as a `Vec<u8>`.
1124    pub fn encode_rgb(&self, rgb_data: &[u8], width: u32, height: u32) -> Result<Vec<u8>> {
1125        // Validate dimensions: must be non-zero
1126        if width == 0 || height == 0 {
1127            return Err(Error::InvalidDimensions { width, height });
1128        }
1129
1130        // Check all resource limits
1131        self.check_limits(width, height, false)?;
1132
1133        // Use checked arithmetic to prevent overflow
1134        let expected_len = (width as usize)
1135            .checked_mul(height as usize)
1136            .and_then(|n| n.checked_mul(3))
1137            .ok_or(Error::InvalidDimensions { width, height })?;
1138
1139        if rgb_data.len() != expected_len {
1140            return Err(Error::BufferSizeMismatch {
1141                expected: expected_len,
1142                actual: rgb_data.len(),
1143            });
1144        }
1145
1146        // Apply smoothing if enabled
1147        let rgb_data = if self.smoothing > 0 {
1148            std::borrow::Cow::Owned(crate::smooth::smooth_rgb(
1149                rgb_data,
1150                width,
1151                height,
1152                self.smoothing,
1153            ))
1154        } else {
1155            std::borrow::Cow::Borrowed(rgb_data)
1156        };
1157
1158        let mut output = Vec::new();
1159        self.encode_rgb_to_writer(&rgb_data, width, height, &mut output)?;
1160        Ok(output)
1161    }
1162
1163    /// Encode grayscale image data to JPEG.
1164    ///
1165    /// # Arguments
1166    /// * `gray_data` - Grayscale pixel data (1 byte per pixel, row-major)
1167    /// * `width` - Image width in pixels
1168    /// * `height` - Image height in pixels
1169    ///
1170    /// # Returns
1171    /// JPEG-encoded data as a `Vec<u8>`.
1172    pub fn encode_gray(&self, gray_data: &[u8], width: u32, height: u32) -> Result<Vec<u8>> {
1173        // Validate dimensions: must be non-zero
1174        if width == 0 || height == 0 {
1175            return Err(Error::InvalidDimensions { width, height });
1176        }
1177
1178        // Check all resource limits
1179        self.check_limits(width, height, true)?;
1180
1181        // Use checked arithmetic to prevent overflow
1182        let expected_len = (width as usize)
1183            .checked_mul(height as usize)
1184            .ok_or(Error::InvalidDimensions { width, height })?;
1185
1186        if gray_data.len() != expected_len {
1187            return Err(Error::BufferSizeMismatch {
1188                expected: expected_len,
1189                actual: gray_data.len(),
1190            });
1191        }
1192
1193        // Apply smoothing if enabled
1194        let gray_data = if self.smoothing > 0 {
1195            std::borrow::Cow::Owned(crate::smooth::smooth_grayscale(
1196                gray_data,
1197                width,
1198                height,
1199                self.smoothing,
1200            ))
1201        } else {
1202            std::borrow::Cow::Borrowed(gray_data)
1203        };
1204
1205        let mut output = Vec::new();
1206        self.encode_gray_to_writer(&gray_data, width, height, &mut output)?;
1207        Ok(output)
1208    }
1209
1210    /// Encode RGB image data with row stride to JPEG.
1211    ///
1212    /// Use this when your pixel buffer has padding between rows (e.g., memory-aligned
1213    /// buffers, cropped regions without copying, GPU textures).
1214    ///
1215    /// # Arguments
1216    /// * `rgb_data` - RGB pixel data with optional row padding
1217    /// * `width` - Image width in pixels
1218    /// * `height` - Image height in pixels
1219    /// * `stride` - Number of bytes between the start of consecutive rows.
1220    ///   Must be >= `width * 3`. A stride of `width * 3` means tightly packed rows.
1221    ///
1222    /// # Returns
1223    /// JPEG-encoded data as a `Vec<u8>`.
1224    ///
1225    /// # Example
1226    /// ```no_run
1227    /// use mozjpeg_rs::{Encoder, Preset};
1228    ///
1229    /// // 100x100 image with rows padded to 320 bytes (for 64-byte alignment)
1230    /// let stride = 320;
1231    /// let buffer: Vec<u8> = vec![128; stride * 100];
1232    ///
1233    /// let jpeg = Encoder::new(Preset::default())
1234    ///     .quality(85)
1235    ///     .encode_rgb_strided(&buffer, 100, 100, stride)?;
1236    /// # Ok::<(), mozjpeg_rs::Error>(())
1237    /// ```
1238    pub fn encode_rgb_strided(
1239        &self,
1240        rgb_data: &[u8],
1241        width: u32,
1242        height: u32,
1243        stride: usize,
1244    ) -> Result<Vec<u8>> {
1245        let width_usize = width as usize;
1246        let height_usize = height as usize;
1247        let row_bytes = width_usize
1248            .checked_mul(3)
1249            .ok_or(Error::InvalidDimensions { width, height })?;
1250
1251        // Validate stride
1252        if stride < row_bytes {
1253            return Err(Error::InvalidStride {
1254                stride,
1255                minimum: row_bytes,
1256            });
1257        }
1258
1259        // If stride equals row_bytes, use the fast path
1260        if stride == row_bytes {
1261            return self.encode_rgb(rgb_data, width, height);
1262        }
1263
1264        // Validate buffer size
1265        let expected_len = stride
1266            .checked_mul(height_usize.saturating_sub(1))
1267            .and_then(|n| n.checked_add(row_bytes))
1268            .ok_or(Error::InvalidDimensions { width, height })?;
1269
1270        if rgb_data.len() < expected_len {
1271            return Err(Error::BufferSizeMismatch {
1272                expected: expected_len,
1273                actual: rgb_data.len(),
1274            });
1275        }
1276
1277        // Copy to contiguous buffer
1278        let mut contiguous = try_alloc_vec(0u8, row_bytes * height_usize)?;
1279        for y in 0..height_usize {
1280            let src_start = y * stride;
1281            let dst_start = y * row_bytes;
1282            contiguous[dst_start..dst_start + row_bytes]
1283                .copy_from_slice(&rgb_data[src_start..src_start + row_bytes]);
1284        }
1285
1286        self.encode_rgb(&contiguous, width, height)
1287    }
1288
1289    /// Encode grayscale image data with row stride to JPEG.
1290    ///
1291    /// Use this when your pixel buffer has padding between rows (e.g., memory-aligned
1292    /// buffers, cropped regions without copying).
1293    ///
1294    /// # Arguments
1295    /// * `gray_data` - Grayscale pixel data with optional row padding
1296    /// * `width` - Image width in pixels
1297    /// * `height` - Image height in pixels
1298    /// * `stride` - Number of bytes between the start of consecutive rows.
1299    ///   Must be >= `width`. A stride of `width` means tightly packed rows.
1300    ///
1301    /// # Returns
1302    /// JPEG-encoded data as a `Vec<u8>`.
1303    pub fn encode_gray_strided(
1304        &self,
1305        gray_data: &[u8],
1306        width: u32,
1307        height: u32,
1308        stride: usize,
1309    ) -> Result<Vec<u8>> {
1310        let width_usize = width as usize;
1311        let height_usize = height as usize;
1312
1313        // Validate stride
1314        if stride < width_usize {
1315            return Err(Error::InvalidStride {
1316                stride,
1317                minimum: width_usize,
1318            });
1319        }
1320
1321        // If stride equals width, use the fast path
1322        if stride == width_usize {
1323            return self.encode_gray(gray_data, width, height);
1324        }
1325
1326        // Validate buffer size
1327        let expected_len = stride
1328            .checked_mul(height_usize.saturating_sub(1))
1329            .and_then(|n| n.checked_add(width_usize))
1330            .ok_or(Error::InvalidDimensions { width, height })?;
1331
1332        if gray_data.len() < expected_len {
1333            return Err(Error::BufferSizeMismatch {
1334                expected: expected_len,
1335                actual: gray_data.len(),
1336            });
1337        }
1338
1339        // Copy to contiguous buffer
1340        let mut contiguous = try_alloc_vec(0u8, width_usize * height_usize)?;
1341        for y in 0..height_usize {
1342            let src_start = y * stride;
1343            let dst_start = y * width_usize;
1344            contiguous[dst_start..dst_start + width_usize]
1345                .copy_from_slice(&gray_data[src_start..src_start + width_usize]);
1346        }
1347
1348        self.encode_gray(&contiguous, width, height)
1349    }
1350
1351    /// Encode RGB image data to JPEG with cancellation and timeout support.
1352    ///
1353    /// This method allows encoding to be cancelled mid-operation via an atomic flag,
1354    /// or to automatically abort if a timeout is exceeded.
1355    ///
1356    /// # Arguments
1357    /// * `rgb_data` - RGB pixel data (3 bytes per pixel, row-major)
1358    /// * `width` - Image width in pixels
1359    /// * `height` - Image height in pixels
1360    /// * `cancel` - Optional cancellation flag. Set to `true` to abort encoding.
1361    /// * `timeout` - Optional maximum encoding duration.
1362    ///
1363    /// # Returns
1364    /// * `Ok(Vec<u8>)` - JPEG-encoded data
1365    /// * `Err(Error::Cancelled)` - If cancelled via the flag
1366    /// * `Err(Error::TimedOut)` - If the timeout was exceeded
1367    ///
1368    /// # Example
1369    /// ```no_run
1370    /// use mozjpeg_rs::{Encoder, Preset};
1371    /// use std::sync::atomic::AtomicBool;
1372    /// use std::time::Duration;
1373    ///
1374    /// let encoder = Encoder::new(Preset::ProgressiveBalanced);
1375    /// let pixels: Vec<u8> = vec![128; 1920 * 1080 * 3];
1376    /// let cancel = AtomicBool::new(false);
1377    ///
1378    /// // Encode with 5 second timeout
1379    /// let result = encoder.encode_rgb_cancellable(
1380    ///     &pixels, 1920, 1080,
1381    ///     Some(&cancel),
1382    ///     Some(Duration::from_secs(5)),
1383    /// );
1384    /// ```
1385    pub fn encode_rgb_cancellable(
1386        &self,
1387        rgb_data: &[u8],
1388        width: u32,
1389        height: u32,
1390        cancel: Option<&AtomicBool>,
1391        timeout: Option<Duration>,
1392    ) -> Result<Vec<u8>> {
1393        // Validate dimensions
1394        if width == 0 || height == 0 {
1395            return Err(Error::InvalidDimensions { width, height });
1396        }
1397
1398        // Check all resource limits
1399        self.check_limits(width, height, false)?;
1400
1401        // Check buffer size
1402        let expected_len = (width as usize)
1403            .checked_mul(height as usize)
1404            .and_then(|n| n.checked_mul(3))
1405            .ok_or(Error::InvalidDimensions { width, height })?;
1406
1407        if rgb_data.len() != expected_len {
1408            return Err(Error::BufferSizeMismatch {
1409                expected: expected_len,
1410                actual: rgb_data.len(),
1411            });
1412        }
1413
1414        // Create cancellation context
1415        let ctx = CancellationContext::new(cancel, timeout);
1416
1417        // Check for immediate cancellation
1418        ctx.check()?;
1419
1420        // Apply smoothing if enabled
1421        let rgb_data = if self.smoothing > 0 {
1422            std::borrow::Cow::Owned(crate::smooth::smooth_rgb(
1423                rgb_data,
1424                width,
1425                height,
1426                self.smoothing,
1427            ))
1428        } else {
1429            std::borrow::Cow::Borrowed(rgb_data)
1430        };
1431
1432        let mut output = Vec::new();
1433        // For now, use the regular encoder (cancellation hooks can be added to
1434        // internal functions in a follow-up). Check cancellation before and after.
1435        ctx.check()?;
1436        self.encode_rgb_to_writer(&rgb_data, width, height, &mut output)?;
1437        ctx.check()?;
1438
1439        Ok(output)
1440    }
1441
1442    /// Encode grayscale image data to JPEG with cancellation and timeout support.
1443    ///
1444    /// This method allows encoding to be cancelled mid-operation via an atomic flag,
1445    /// or to automatically abort if a timeout is exceeded.
1446    ///
1447    /// # Arguments
1448    /// * `gray_data` - Grayscale pixel data (1 byte per pixel, row-major)
1449    /// * `width` - Image width in pixels
1450    /// * `height` - Image height in pixels
1451    /// * `cancel` - Optional cancellation flag. Set to `true` to abort encoding.
1452    /// * `timeout` - Optional maximum encoding duration.
1453    ///
1454    /// # Returns
1455    /// * `Ok(Vec<u8>)` - JPEG-encoded data
1456    /// * `Err(Error::Cancelled)` - If cancelled via the flag
1457    /// * `Err(Error::TimedOut)` - If the timeout was exceeded
1458    pub fn encode_gray_cancellable(
1459        &self,
1460        gray_data: &[u8],
1461        width: u32,
1462        height: u32,
1463        cancel: Option<&AtomicBool>,
1464        timeout: Option<Duration>,
1465    ) -> Result<Vec<u8>> {
1466        // Validate dimensions
1467        if width == 0 || height == 0 {
1468            return Err(Error::InvalidDimensions { width, height });
1469        }
1470
1471        // Check all resource limits
1472        self.check_limits(width, height, true)?;
1473
1474        // Check buffer size
1475        let expected_len = (width as usize)
1476            .checked_mul(height as usize)
1477            .ok_or(Error::InvalidDimensions { width, height })?;
1478
1479        if gray_data.len() != expected_len {
1480            return Err(Error::BufferSizeMismatch {
1481                expected: expected_len,
1482                actual: gray_data.len(),
1483            });
1484        }
1485
1486        // Create cancellation context
1487        let ctx = CancellationContext::new(cancel, timeout);
1488
1489        // Check for immediate cancellation
1490        ctx.check()?;
1491
1492        // Apply smoothing if enabled
1493        let gray_data = if self.smoothing > 0 {
1494            std::borrow::Cow::Owned(crate::smooth::smooth_grayscale(
1495                gray_data,
1496                width,
1497                height,
1498                self.smoothing,
1499            ))
1500        } else {
1501            std::borrow::Cow::Borrowed(gray_data)
1502        };
1503
1504        let mut output = Vec::new();
1505        // For now, use the regular encoder (cancellation hooks can be added to
1506        // internal functions in a follow-up). Check cancellation before and after.
1507        ctx.check()?;
1508        self.encode_gray_to_writer(&gray_data, width, height, &mut output)?;
1509        ctx.check()?;
1510
1511        Ok(output)
1512    }
1513
1514    /// Encode grayscale image data to a writer.
1515    pub fn encode_gray_to_writer<W: Write>(
1516        &self,
1517        gray_data: &[u8],
1518        width: u32,
1519        height: u32,
1520        output: W,
1521    ) -> Result<()> {
1522        let width = width as usize;
1523        let height = height as usize;
1524
1525        // For grayscale, Y plane is the input directly (no conversion needed)
1526        let y_plane = gray_data;
1527
1528        // Grayscale uses 1x1 sampling
1529        let (mcu_width, mcu_height) = sample::mcu_aligned_dimensions(width, height, 1, 1);
1530
1531        let mcu_y_size = mcu_width
1532            .checked_mul(mcu_height)
1533            .ok_or(Error::AllocationFailed)?;
1534        let mut y_mcu = try_alloc_vec(0u8, mcu_y_size)?;
1535        sample::expand_to_mcu(y_plane, width, height, &mut y_mcu, mcu_width, mcu_height);
1536
1537        // Create quantization table (only luma needed)
1538        let luma_qtable = if let Some(ref custom) = self.custom_luma_qtable {
1539            crate::quant::create_quant_table(custom, self.quality, self.force_baseline)
1540        } else {
1541            let (luma, _) =
1542                create_quant_tables(self.quality, self.quant_table_idx, self.force_baseline);
1543            luma
1544        };
1545
1546        // Create Huffman tables (only luma needed)
1547        let dc_luma_huff = create_std_dc_luma_table();
1548        let ac_luma_huff = create_std_ac_luma_table();
1549        let dc_luma_derived = DerivedTable::from_huff_table(&dc_luma_huff, true)?;
1550        let ac_luma_derived = DerivedTable::from_huff_table(&ac_luma_huff, false)?;
1551
1552        // Single component for grayscale
1553        let components = create_components(Subsampling::Gray);
1554
1555        // Write JPEG file
1556        let mut marker_writer = MarkerWriter::new(output);
1557
1558        // SOI
1559        marker_writer.write_soi()?;
1560
1561        // APP0 (JFIF) with pixel density
1562        marker_writer.write_jfif_app0(
1563            self.pixel_density.unit as u8,
1564            self.pixel_density.x,
1565            self.pixel_density.y,
1566        )?;
1567
1568        // EXIF (if present)
1569        if let Some(ref exif) = self.exif_data {
1570            marker_writer.write_app1_exif(exif)?;
1571        }
1572
1573        // ICC profile (if present)
1574        if let Some(ref icc) = self.icc_profile {
1575            marker_writer.write_icc_profile(icc)?;
1576        }
1577
1578        // Custom APP markers
1579        for (app_num, data) in &self.custom_markers {
1580            marker_writer.write_app(*app_num, data)?;
1581        }
1582
1583        // DQT (only luma table for grayscale)
1584        let luma_qtable_zz = natural_to_zigzag(&luma_qtable.values);
1585        marker_writer.write_dqt(0, &luma_qtable_zz, false)?;
1586
1587        // SOF (baseline or progressive)
1588        marker_writer.write_sof(
1589            self.progressive,
1590            8,
1591            height as u16,
1592            width as u16,
1593            &components,
1594        )?;
1595
1596        // DRI (restart interval)
1597        if self.restart_interval > 0 {
1598            marker_writer.write_dri(self.restart_interval)?;
1599        }
1600
1601        // DHT (only luma tables for grayscale) - written later for progressive
1602        if !self.progressive && !self.optimize_huffman {
1603            marker_writer
1604                .write_dht_multiple(&[(0, false, &dc_luma_huff), (0, true, &ac_luma_huff)])?;
1605        }
1606
1607        let mcu_rows = mcu_height / DCTSIZE;
1608        let mcu_cols = mcu_width / DCTSIZE;
1609        let num_blocks = mcu_rows
1610            .checked_mul(mcu_cols)
1611            .ok_or(Error::AllocationFailed)?;
1612
1613        if self.progressive {
1614            // Progressive mode: collect all blocks, then encode multiple scans
1615            let mut y_blocks = try_alloc_vec_array::<i16, DCTSIZE2>(num_blocks)?;
1616            let mut dct_block = [0i16; DCTSIZE2];
1617
1618            // Optionally collect raw DCT for DC trellis
1619            let dc_trellis_enabled = self.trellis.enabled && self.trellis.dc_enabled;
1620            let mut y_raw_dct = if dc_trellis_enabled {
1621                Some(try_alloc_vec_array::<i32, DCTSIZE2>(num_blocks)?)
1622            } else {
1623                None
1624            };
1625
1626            // Collect all blocks
1627            for mcu_row in 0..mcu_rows {
1628                for mcu_col in 0..mcu_cols {
1629                    let block_idx = mcu_row * mcu_cols + mcu_col;
1630                    self.process_block_to_storage_with_raw(
1631                        &y_mcu,
1632                        mcu_width,
1633                        mcu_row,
1634                        mcu_col,
1635                        &luma_qtable.values,
1636                        &ac_luma_derived,
1637                        &mut y_blocks[block_idx],
1638                        &mut dct_block,
1639                        y_raw_dct.as_mut().map(|v| v[block_idx].as_mut_slice()),
1640                    )?;
1641                }
1642            }
1643
1644            // Run DC trellis optimization if enabled
1645            if dc_trellis_enabled && let Some(ref y_raw) = y_raw_dct {
1646                run_dc_trellis_by_row(
1647                    y_raw,
1648                    &mut y_blocks,
1649                    luma_qtable.values[0],
1650                    &dc_luma_derived,
1651                    self.trellis.lambda_log_scale1,
1652                    self.trellis.lambda_log_scale2,
1653                    mcu_rows,
1654                    mcu_cols,
1655                    mcu_cols,
1656                    1,
1657                    1,
1658                    self.trellis.delta_dc_weight,
1659                );
1660            }
1661
1662            // Run EOB optimization if enabled (cross-block EOBRUN optimization)
1663            if self.trellis.enabled && self.trellis.eob_opt {
1664                use crate::trellis::{estimate_block_eob_info, optimize_eob_runs};
1665
1666                // Estimate EOB info for each block
1667                let eob_info: Vec<_> = y_blocks
1668                    .iter()
1669                    .map(|block| estimate_block_eob_info(block, &ac_luma_derived, 1, 63))
1670                    .collect();
1671
1672                // Optimize EOB runs across all blocks
1673                optimize_eob_runs(&mut y_blocks, &eob_info, &ac_luma_derived, 1, 63);
1674            }
1675
1676            // Generate progressive scan script for grayscale (1 component)
1677            let scans = generate_mozjpeg_max_compression_scans(1);
1678
1679            // Build optimized Huffman tables
1680            let mut dc_freq = FrequencyCounter::new();
1681            let mut dc_counter = ProgressiveSymbolCounter::new();
1682            for scan in &scans {
1683                let is_dc_first_scan = scan.ss == 0 && scan.se == 0 && scan.ah == 0;
1684                if is_dc_first_scan {
1685                    // Count DC symbols using progressive counter
1686                    for block in &y_blocks {
1687                        dc_counter.count_dc_first(block, 0, scan.al, &mut dc_freq);
1688                    }
1689                }
1690            }
1691
1692            let opt_dc_huff = dc_freq.generate_table()?;
1693            let opt_dc_derived = DerivedTable::from_huff_table(&opt_dc_huff, true)?;
1694
1695            // Write DC Huffman table upfront
1696            marker_writer.write_dht_multiple(&[(0, false, &opt_dc_huff)])?;
1697
1698            // Encode each scan
1699            let output = marker_writer.into_inner();
1700            let mut bit_writer = BitWriter::new(output);
1701
1702            for scan in &scans {
1703                let is_dc_scan = scan.ss == 0 && scan.se == 0;
1704
1705                if is_dc_scan {
1706                    // DC scan
1707                    marker_writer = MarkerWriter::new(bit_writer.into_inner());
1708                    marker_writer.write_sos(scan, &components)?;
1709                    bit_writer = BitWriter::new(marker_writer.into_inner());
1710
1711                    let mut prog_encoder = ProgressiveEncoder::new(&mut bit_writer);
1712
1713                    if scan.ah == 0 {
1714                        // DC first scan
1715                        for block in &y_blocks {
1716                            prog_encoder.encode_dc_first(block, 0, &opt_dc_derived, scan.al)?;
1717                        }
1718                    } else {
1719                        // DC refinement scan
1720                        for block in &y_blocks {
1721                            prog_encoder.encode_dc_refine(block, scan.al)?;
1722                        }
1723                    }
1724
1725                    prog_encoder.finish_scan(None)?;
1726                } else {
1727                    // AC scan - generate per-scan Huffman table
1728                    let mut ac_freq = FrequencyCounter::new();
1729                    let mut ac_counter = ProgressiveSymbolCounter::new();
1730
1731                    for block in &y_blocks {
1732                        if scan.ah == 0 {
1733                            ac_counter.count_ac_first(
1734                                block,
1735                                scan.ss,
1736                                scan.se,
1737                                scan.al,
1738                                &mut ac_freq,
1739                            );
1740                        } else {
1741                            ac_counter.count_ac_refine(
1742                                block,
1743                                scan.ss,
1744                                scan.se,
1745                                scan.ah,
1746                                scan.al,
1747                                &mut ac_freq,
1748                            );
1749                        }
1750                    }
1751                    ac_counter.finish_scan(Some(&mut ac_freq));
1752
1753                    let opt_ac_huff = ac_freq.generate_table()?;
1754                    let opt_ac_derived = DerivedTable::from_huff_table(&opt_ac_huff, false)?;
1755
1756                    // Write AC Huffman table and SOS
1757                    marker_writer = MarkerWriter::new(bit_writer.into_inner());
1758                    marker_writer.write_dht_multiple(&[(0, true, &opt_ac_huff)])?;
1759                    marker_writer.write_sos(scan, &components)?;
1760                    bit_writer = BitWriter::new(marker_writer.into_inner());
1761
1762                    let mut prog_encoder = ProgressiveEncoder::new(&mut bit_writer);
1763
1764                    for block in &y_blocks {
1765                        if scan.ah == 0 {
1766                            prog_encoder.encode_ac_first(
1767                                block,
1768                                scan.ss,
1769                                scan.se,
1770                                scan.al,
1771                                &opt_ac_derived,
1772                            )?;
1773                        } else {
1774                            prog_encoder.encode_ac_refine(
1775                                block,
1776                                scan.ss,
1777                                scan.se,
1778                                scan.ah,
1779                                scan.al,
1780                                &opt_ac_derived,
1781                            )?;
1782                        }
1783                    }
1784
1785                    prog_encoder.finish_scan(Some(&opt_ac_derived))?;
1786                }
1787            }
1788
1789            let mut output = bit_writer.into_inner();
1790            output.write_all(&[0xFF, 0xD9])?; // EOI
1791        } else if self.optimize_huffman {
1792            // 2-pass: collect blocks, count frequencies, then encode
1793            let mut y_blocks = try_alloc_vec_array::<i16, DCTSIZE2>(num_blocks)?;
1794            let mut dct_block = [0i16; DCTSIZE2];
1795
1796            // Collect all blocks using the same process as RGB encoding
1797            for mcu_row in 0..mcu_rows {
1798                for mcu_col in 0..mcu_cols {
1799                    let block_idx = mcu_row * mcu_cols + mcu_col;
1800                    self.process_block_to_storage_with_raw(
1801                        &y_mcu,
1802                        mcu_width,
1803                        mcu_row,
1804                        mcu_col,
1805                        &luma_qtable.values,
1806                        &ac_luma_derived,
1807                        &mut y_blocks[block_idx],
1808                        &mut dct_block,
1809                        None, // No raw DCT storage needed for grayscale
1810                    )?;
1811                }
1812            }
1813
1814            // Count frequencies using SymbolCounter
1815            let mut dc_freq = FrequencyCounter::new();
1816            let mut ac_freq = FrequencyCounter::new();
1817            let mut counter = SymbolCounter::new();
1818            for block in &y_blocks {
1819                counter.count_block(block, 0, &mut dc_freq, &mut ac_freq);
1820            }
1821
1822            // Generate optimized tables
1823            let opt_dc_huff = dc_freq.generate_table()?;
1824            let opt_ac_huff = ac_freq.generate_table()?;
1825            let opt_dc_derived = DerivedTable::from_huff_table(&opt_dc_huff, true)?;
1826            let opt_ac_derived = DerivedTable::from_huff_table(&opt_ac_huff, false)?;
1827
1828            // Write optimized Huffman tables
1829            marker_writer
1830                .write_dht_multiple(&[(0, false, &opt_dc_huff), (0, true, &opt_ac_huff)])?;
1831
1832            // Write SOS and encode
1833            let scans = generate_baseline_scan(1);
1834            marker_writer.write_sos(&scans[0], &components)?;
1835
1836            let output = marker_writer.into_inner();
1837            let mut bit_writer = BitWriter::new(output);
1838            let mut encoder = EntropyEncoder::new(&mut bit_writer);
1839
1840            // Restart marker support for grayscale (each block = 1 MCU)
1841            let restart_interval = self.restart_interval as usize;
1842            let mut restart_num = 0u8;
1843
1844            for (mcu_count, block) in y_blocks.iter().enumerate() {
1845                // Emit restart marker if needed
1846                if restart_interval > 0
1847                    && mcu_count > 0
1848                    && mcu_count.is_multiple_of(restart_interval)
1849                {
1850                    encoder.emit_restart(restart_num)?;
1851                    restart_num = restart_num.wrapping_add(1) & 0x07;
1852                }
1853                encoder.encode_block(block, 0, &opt_dc_derived, &opt_ac_derived)?;
1854            }
1855
1856            bit_writer.flush()?;
1857            let mut output = bit_writer.into_inner();
1858            output.write_all(&[0xFF, 0xD9])?; // EOI
1859        } else {
1860            // Single-pass encoding
1861            let scans = generate_baseline_scan(1);
1862            marker_writer.write_sos(&scans[0], &components)?;
1863
1864            let output = marker_writer.into_inner();
1865            let mut bit_writer = BitWriter::new(output);
1866            let mut encoder = EntropyEncoder::new(&mut bit_writer);
1867            let mut dct_block = [0i16; DCTSIZE2];
1868            let mut quant_block = [0i16; DCTSIZE2];
1869
1870            // Restart marker support
1871            let restart_interval = self.restart_interval as usize;
1872            let mut mcu_count = 0usize;
1873            let mut restart_num = 0u8;
1874
1875            for mcu_row in 0..mcu_rows {
1876                for mcu_col in 0..mcu_cols {
1877                    // Emit restart marker if needed
1878                    if restart_interval > 0
1879                        && mcu_count > 0
1880                        && mcu_count.is_multiple_of(restart_interval)
1881                    {
1882                        encoder.emit_restart(restart_num)?;
1883                        restart_num = restart_num.wrapping_add(1) & 0x07;
1884                    }
1885
1886                    // Process block directly to quant_block
1887                    self.process_block_to_storage_with_raw(
1888                        &y_mcu,
1889                        mcu_width,
1890                        mcu_row,
1891                        mcu_col,
1892                        &luma_qtable.values,
1893                        &ac_luma_derived,
1894                        &mut quant_block,
1895                        &mut dct_block,
1896                        None,
1897                    )?;
1898                    encoder.encode_block(&quant_block, 0, &dc_luma_derived, &ac_luma_derived)?;
1899                    mcu_count += 1;
1900                }
1901            }
1902
1903            bit_writer.flush()?;
1904            let mut output = bit_writer.into_inner();
1905            output.write_all(&[0xFF, 0xD9])?; // EOI
1906        }
1907
1908        Ok(())
1909    }
1910
1911    /// Encode pre-converted planar YCbCr image data to JPEG.
1912    ///
1913    /// This method accepts tightly packed YCbCr data (no row padding).
1914    /// For strided data, use [`encode_ycbcr_planar_strided`](Self::encode_ycbcr_planar_strided).
1915    ///
1916    /// # Arguments
1917    /// * `y` - Luma plane (width × height bytes, tightly packed)
1918    /// * `cb` - Cb chroma plane (chroma_width × chroma_height bytes)
1919    /// * `cr` - Cr chroma plane (chroma_width × chroma_height bytes)
1920    /// * `width` - Image width in pixels
1921    /// * `height` - Image height in pixels
1922    ///
1923    /// The chroma plane dimensions depend on the subsampling mode:
1924    /// - 4:4:4: chroma_width = width, chroma_height = height
1925    /// - 4:2:2: chroma_width = ceil(width/2), chroma_height = height
1926    /// - 4:2:0: chroma_width = ceil(width/2), chroma_height = ceil(height/2)
1927    ///
1928    /// # Returns
1929    /// JPEG-encoded data as a `Vec<u8>`.
1930    ///
1931    /// # Errors
1932    /// Returns an error if plane sizes don't match expected dimensions.
1933    pub fn encode_ycbcr_planar(
1934        &self,
1935        y: &[u8],
1936        cb: &[u8],
1937        cr: &[u8],
1938        width: u32,
1939        height: u32,
1940    ) -> Result<Vec<u8>> {
1941        // For packed data, stride equals width
1942        let (luma_h, luma_v) = self.subsampling.luma_factors();
1943        let (chroma_width, _) = sample::subsampled_dimensions(
1944            width as usize,
1945            height as usize,
1946            luma_h as usize,
1947            luma_v as usize,
1948        );
1949        self.encode_ycbcr_planar_strided(
1950            y,
1951            width as usize,
1952            cb,
1953            chroma_width,
1954            cr,
1955            chroma_width,
1956            width,
1957            height,
1958        )
1959    }
1960
1961    /// Encode pre-converted planar YCbCr image data to a writer.
1962    ///
1963    /// See [`encode_ycbcr_planar`](Self::encode_ycbcr_planar) for details.
1964    pub fn encode_ycbcr_planar_to_writer<W: Write>(
1965        &self,
1966        y: &[u8],
1967        cb: &[u8],
1968        cr: &[u8],
1969        width: u32,
1970        height: u32,
1971        output: W,
1972    ) -> Result<()> {
1973        // For packed data, stride equals width
1974        let (luma_h, luma_v) = self.subsampling.luma_factors();
1975        let (chroma_width, _) = sample::subsampled_dimensions(
1976            width as usize,
1977            height as usize,
1978            luma_h as usize,
1979            luma_v as usize,
1980        );
1981        self.encode_ycbcr_planar_strided_to_writer(
1982            y,
1983            width as usize,
1984            cb,
1985            chroma_width,
1986            cr,
1987            chroma_width,
1988            width,
1989            height,
1990            output,
1991        )
1992    }
1993
1994    /// Encode pre-converted planar YCbCr image data with arbitrary strides.
1995    ///
1996    /// This method accepts YCbCr data that has already been:
1997    /// 1. Converted from RGB to YCbCr color space
1998    /// 2. Downsampled according to the encoder's subsampling mode
1999    ///
2000    /// Use this when you have YCbCr data from video decoders or other sources
2001    /// that may have row padding (stride > width).
2002    ///
2003    /// # Arguments
2004    /// * `y` - Luma plane data
2005    /// * `y_stride` - Bytes per row in luma plane (must be >= width)
2006    /// * `cb` - Cb chroma plane data
2007    /// * `cb_stride` - Bytes per row in Cb plane (must be >= chroma_width)
2008    /// * `cr` - Cr chroma plane data
2009    /// * `cr_stride` - Bytes per row in Cr plane (must be >= chroma_width)
2010    /// * `width` - Image width in pixels
2011    /// * `height` - Image height in pixels
2012    ///
2013    /// The chroma plane dimensions depend on the subsampling mode:
2014    /// - 4:4:4: chroma_width = width, chroma_height = height
2015    /// - 4:2:2: chroma_width = ceil(width/2), chroma_height = height
2016    /// - 4:2:0: chroma_width = ceil(width/2), chroma_height = ceil(height/2)
2017    ///
2018    /// # Returns
2019    /// JPEG-encoded data as a `Vec<u8>`.
2020    ///
2021    /// # Errors
2022    /// Returns an error if:
2023    /// - Strides are less than the required width
2024    /// - Plane sizes don't match stride × height
2025    #[allow(clippy::too_many_arguments)]
2026    pub fn encode_ycbcr_planar_strided(
2027        &self,
2028        y: &[u8],
2029        y_stride: usize,
2030        cb: &[u8],
2031        cb_stride: usize,
2032        cr: &[u8],
2033        cr_stride: usize,
2034        width: u32,
2035        height: u32,
2036    ) -> Result<Vec<u8>> {
2037        let mut output = Vec::new();
2038        self.encode_ycbcr_planar_strided_to_writer(
2039            y,
2040            y_stride,
2041            cb,
2042            cb_stride,
2043            cr,
2044            cr_stride,
2045            width,
2046            height,
2047            &mut output,
2048        )?;
2049        Ok(output)
2050    }
2051
2052    /// Encode pre-converted planar YCbCr image data with arbitrary strides to a writer.
2053    ///
2054    /// See [`encode_ycbcr_planar_strided`](Self::encode_ycbcr_planar_strided) for details.
2055    #[allow(clippy::too_many_arguments)]
2056    pub fn encode_ycbcr_planar_strided_to_writer<W: Write>(
2057        &self,
2058        y: &[u8],
2059        y_stride: usize,
2060        cb: &[u8],
2061        cb_stride: usize,
2062        cr: &[u8],
2063        cr_stride: usize,
2064        width: u32,
2065        height: u32,
2066        output: W,
2067    ) -> Result<()> {
2068        let width = width as usize;
2069        let height = height as usize;
2070
2071        // Validate dimensions
2072        if width == 0 || height == 0 {
2073            return Err(Error::InvalidDimensions {
2074                width: width as u32,
2075                height: height as u32,
2076            });
2077        }
2078
2079        // Validate Y stride
2080        if y_stride < width {
2081            return Err(Error::InvalidSamplingFactor {
2082                h: y_stride as u8,
2083                v: width as u8,
2084            });
2085        }
2086
2087        let (luma_h, luma_v) = self.subsampling.luma_factors();
2088        let (chroma_width, chroma_height) =
2089            sample::subsampled_dimensions(width, height, luma_h as usize, luma_v as usize);
2090
2091        // Validate chroma strides
2092        if cb_stride < chroma_width {
2093            return Err(Error::InvalidSamplingFactor {
2094                h: cb_stride as u8,
2095                v: chroma_width as u8,
2096            });
2097        }
2098        if cr_stride < chroma_width {
2099            return Err(Error::InvalidSamplingFactor {
2100                h: cr_stride as u8,
2101                v: chroma_width as u8,
2102            });
2103        }
2104
2105        // Calculate expected plane sizes (stride × height)
2106        let y_size = y_stride
2107            .checked_mul(height)
2108            .ok_or(Error::InvalidDimensions {
2109                width: width as u32,
2110                height: height as u32,
2111            })?;
2112        let cb_size = cb_stride
2113            .checked_mul(chroma_height)
2114            .ok_or(Error::AllocationFailed)?;
2115        let cr_size = cr_stride
2116            .checked_mul(chroma_height)
2117            .ok_or(Error::AllocationFailed)?;
2118
2119        // Validate Y plane size
2120        if y.len() < y_size {
2121            return Err(Error::BufferSizeMismatch {
2122                expected: y_size,
2123                actual: y.len(),
2124            });
2125        }
2126
2127        // Validate Cb plane size
2128        if cb.len() < cb_size {
2129            return Err(Error::BufferSizeMismatch {
2130                expected: cb_size,
2131                actual: cb.len(),
2132            });
2133        }
2134
2135        // Validate Cr plane size
2136        if cr.len() < cr_size {
2137            return Err(Error::BufferSizeMismatch {
2138                expected: cr_size,
2139                actual: cr.len(),
2140            });
2141        }
2142
2143        // Expand planes to MCU-aligned dimensions
2144        let (mcu_width, mcu_height) =
2145            sample::mcu_aligned_dimensions(width, height, luma_h as usize, luma_v as usize);
2146        let (mcu_chroma_w, mcu_chroma_h) =
2147            (mcu_width / luma_h as usize, mcu_height / luma_v as usize);
2148
2149        let mcu_y_size = mcu_width
2150            .checked_mul(mcu_height)
2151            .ok_or(Error::AllocationFailed)?;
2152        let mcu_chroma_size = mcu_chroma_w
2153            .checked_mul(mcu_chroma_h)
2154            .ok_or(Error::AllocationFailed)?;
2155        let mut y_mcu = try_alloc_vec(0u8, mcu_y_size)?;
2156        let mut cb_mcu = try_alloc_vec(0u8, mcu_chroma_size)?;
2157        let mut cr_mcu = try_alloc_vec(0u8, mcu_chroma_size)?;
2158
2159        sample::expand_to_mcu_strided(
2160            y, width, y_stride, height, &mut y_mcu, mcu_width, mcu_height,
2161        );
2162        sample::expand_to_mcu_strided(
2163            cb,
2164            chroma_width,
2165            cb_stride,
2166            chroma_height,
2167            &mut cb_mcu,
2168            mcu_chroma_w,
2169            mcu_chroma_h,
2170        );
2171        sample::expand_to_mcu_strided(
2172            cr,
2173            chroma_width,
2174            cr_stride,
2175            chroma_height,
2176            &mut cr_mcu,
2177            mcu_chroma_w,
2178            mcu_chroma_h,
2179        );
2180
2181        // Encode using shared helper
2182        self.encode_ycbcr_mcu_to_writer(
2183            &y_mcu,
2184            &cb_mcu,
2185            &cr_mcu,
2186            width,
2187            height,
2188            mcu_width,
2189            mcu_height,
2190            chroma_width,
2191            chroma_height,
2192            mcu_chroma_w,
2193            mcu_chroma_h,
2194            output,
2195        )
2196    }
2197
2198    /// Encode RGB image data to a writer.
2199    pub fn encode_rgb_to_writer<W: Write>(
2200        &self,
2201        rgb_data: &[u8],
2202        width: u32,
2203        height: u32,
2204        output: W,
2205    ) -> Result<()> {
2206        let width = width as usize;
2207        let height = height as usize;
2208
2209        // Step 1: Convert RGB to YCbCr
2210        // Use checked arithmetic for num_pixels calculation
2211        let num_pixels = width.checked_mul(height).ok_or(Error::InvalidDimensions {
2212            width: width as u32,
2213            height: height as u32,
2214        })?;
2215
2216        let mut y_plane = try_alloc_vec(0u8, num_pixels)?;
2217        let mut cb_plane = try_alloc_vec(0u8, num_pixels)?;
2218        let mut cr_plane = try_alloc_vec(0u8, num_pixels)?;
2219
2220        if self.c_compat_color {
2221            // Use C mozjpeg-compatible color conversion for exact baseline parity
2222            convert_rgb_to_ycbcr_c_compat(
2223                rgb_data,
2224                &mut y_plane,
2225                &mut cb_plane,
2226                &mut cr_plane,
2227                num_pixels,
2228            );
2229        } else {
2230            // Use fast SIMD color conversion (default)
2231            (self.simd.color_convert_rgb_to_ycbcr)(
2232                rgb_data,
2233                &mut y_plane,
2234                &mut cb_plane,
2235                &mut cr_plane,
2236                num_pixels,
2237            );
2238        }
2239
2240        // Step 2: Downsample chroma if needed
2241        let (luma_h, luma_v) = self.subsampling.luma_factors();
2242        let (chroma_width, chroma_height) =
2243            sample::subsampled_dimensions(width, height, luma_h as usize, luma_v as usize);
2244
2245        let chroma_size = chroma_width
2246            .checked_mul(chroma_height)
2247            .ok_or(Error::AllocationFailed)?;
2248        let mut cb_subsampled = try_alloc_vec(0u8, chroma_size)?;
2249        let mut cr_subsampled = try_alloc_vec(0u8, chroma_size)?;
2250
2251        sample::downsample_plane(
2252            &cb_plane,
2253            width,
2254            height,
2255            luma_h as usize,
2256            luma_v as usize,
2257            &mut cb_subsampled,
2258        );
2259        sample::downsample_plane(
2260            &cr_plane,
2261            width,
2262            height,
2263            luma_h as usize,
2264            luma_v as usize,
2265            &mut cr_subsampled,
2266        );
2267
2268        // Step 3: Expand planes to MCU-aligned dimensions
2269        let (mcu_width, mcu_height) =
2270            sample::mcu_aligned_dimensions(width, height, luma_h as usize, luma_v as usize);
2271        let (mcu_chroma_w, mcu_chroma_h) =
2272            (mcu_width / luma_h as usize, mcu_height / luma_v as usize);
2273
2274        let mcu_y_size = mcu_width
2275            .checked_mul(mcu_height)
2276            .ok_or(Error::AllocationFailed)?;
2277        let mcu_chroma_size = mcu_chroma_w
2278            .checked_mul(mcu_chroma_h)
2279            .ok_or(Error::AllocationFailed)?;
2280        let mut y_mcu = try_alloc_vec(0u8, mcu_y_size)?;
2281        let mut cb_mcu = try_alloc_vec(0u8, mcu_chroma_size)?;
2282        let mut cr_mcu = try_alloc_vec(0u8, mcu_chroma_size)?;
2283
2284        sample::expand_to_mcu(&y_plane, width, height, &mut y_mcu, mcu_width, mcu_height);
2285        sample::expand_to_mcu(
2286            &cb_subsampled,
2287            chroma_width,
2288            chroma_height,
2289            &mut cb_mcu,
2290            mcu_chroma_w,
2291            mcu_chroma_h,
2292        );
2293        sample::expand_to_mcu(
2294            &cr_subsampled,
2295            chroma_width,
2296            chroma_height,
2297            &mut cr_mcu,
2298            mcu_chroma_w,
2299            mcu_chroma_h,
2300        );
2301
2302        // Encode using shared helper
2303        self.encode_ycbcr_mcu_to_writer(
2304            &y_mcu,
2305            &cb_mcu,
2306            &cr_mcu,
2307            width,
2308            height,
2309            mcu_width,
2310            mcu_height,
2311            chroma_width,
2312            chroma_height,
2313            mcu_chroma_w,
2314            mcu_chroma_h,
2315            output,
2316        )
2317    }
2318
2319    /// Internal helper: Encode MCU-aligned YCbCr planes to JPEG.
2320    ///
2321    /// This is the shared encoding logic used by both `encode_rgb_to_writer`
2322    /// and `encode_ycbcr_planar_to_writer`.
2323    #[allow(clippy::too_many_arguments)]
2324    fn encode_ycbcr_mcu_to_writer<W: Write>(
2325        &self,
2326        y_mcu: &[u8],
2327        cb_mcu: &[u8],
2328        cr_mcu: &[u8],
2329        width: usize,
2330        height: usize,
2331        mcu_width: usize,
2332        mcu_height: usize,
2333        chroma_width: usize,
2334        chroma_height: usize,
2335        mcu_chroma_w: usize,
2336        mcu_chroma_h: usize,
2337        output: W,
2338    ) -> Result<()> {
2339        let (luma_h, luma_v) = self.subsampling.luma_factors();
2340
2341        // Step 4: Create quantization tables
2342        let (luma_qtable, chroma_qtable) = {
2343            let (default_luma, default_chroma) =
2344                create_quant_tables(self.quality, self.quant_table_idx, self.force_baseline);
2345            let luma = if let Some(ref custom) = self.custom_luma_qtable {
2346                crate::quant::create_quant_table(custom, self.quality, self.force_baseline)
2347            } else {
2348                default_luma
2349            };
2350            let chroma = if let Some(ref custom) = self.custom_chroma_qtable {
2351                crate::quant::create_quant_table(custom, self.quality, self.force_baseline)
2352            } else {
2353                default_chroma
2354            };
2355            (luma, chroma)
2356        };
2357
2358        // Step 5: Create Huffman tables (standard tables)
2359        let dc_luma_huff = create_std_dc_luma_table();
2360        let dc_chroma_huff = create_std_dc_chroma_table();
2361        let ac_luma_huff = create_std_ac_luma_table();
2362        let ac_chroma_huff = create_std_ac_chroma_table();
2363
2364        let dc_luma_derived = DerivedTable::from_huff_table(&dc_luma_huff, true)?;
2365        let dc_chroma_derived = DerivedTable::from_huff_table(&dc_chroma_huff, true)?;
2366        let ac_luma_derived = DerivedTable::from_huff_table(&ac_luma_huff, false)?;
2367        let ac_chroma_derived = DerivedTable::from_huff_table(&ac_chroma_huff, false)?;
2368
2369        // Step 6: Set up components
2370        let components = create_ycbcr_components(self.subsampling);
2371
2372        // Step 7: Write JPEG file
2373        let mut marker_writer = MarkerWriter::new(output);
2374
2375        // SOI
2376        marker_writer.write_soi()?;
2377
2378        // APP0 (JFIF) with pixel density
2379        marker_writer.write_jfif_app0(
2380            self.pixel_density.unit as u8,
2381            self.pixel_density.x,
2382            self.pixel_density.y,
2383        )?;
2384
2385        // APP1 (EXIF) - if present
2386        if let Some(ref exif) = self.exif_data {
2387            marker_writer.write_app1_exif(exif)?;
2388        }
2389
2390        // ICC profile (if present)
2391        if let Some(ref icc) = self.icc_profile {
2392            marker_writer.write_icc_profile(icc)?;
2393        }
2394
2395        // Custom APP markers
2396        for (app_num, data) in &self.custom_markers {
2397            marker_writer.write_app(*app_num, data)?;
2398        }
2399
2400        // DQT (quantization tables in zigzag order) - combined into single marker
2401        let luma_qtable_zz = natural_to_zigzag(&luma_qtable.values);
2402        let chroma_qtable_zz = natural_to_zigzag(&chroma_qtable.values);
2403        marker_writer
2404            .write_dqt_multiple(&[(0, &luma_qtable_zz, false), (1, &chroma_qtable_zz, false)])?;
2405
2406        // SOF
2407        marker_writer.write_sof(
2408            self.progressive,
2409            8,
2410            height as u16,
2411            width as u16,
2412            &components,
2413        )?;
2414
2415        // DRI (restart interval) - if enabled
2416        if self.restart_interval > 0 {
2417            marker_writer.write_dri(self.restart_interval)?;
2418        }
2419
2420        // DHT (Huffman tables) - written here for non-optimized modes,
2421        // or later after frequency counting for optimized modes
2422        if !self.optimize_huffman {
2423            // Combine all tables into single DHT marker for smaller file size
2424            marker_writer.write_dht_multiple(&[
2425                (0, false, &dc_luma_huff),
2426                (1, false, &dc_chroma_huff),
2427                (0, true, &ac_luma_huff),
2428                (1, true, &ac_chroma_huff),
2429            ])?;
2430        }
2431
2432        if self.progressive {
2433            // Progressive mode: Store all blocks, then encode multiple scans
2434            let mcu_rows = mcu_height / (DCTSIZE * luma_v as usize);
2435            let mcu_cols = mcu_width / (DCTSIZE * luma_h as usize);
2436            let num_y_blocks = mcu_rows
2437                .checked_mul(mcu_cols)
2438                .and_then(|n| n.checked_mul(luma_h as usize))
2439                .and_then(|n| n.checked_mul(luma_v as usize))
2440                .ok_or(Error::AllocationFailed)?;
2441            let num_chroma_blocks = mcu_rows
2442                .checked_mul(mcu_cols)
2443                .ok_or(Error::AllocationFailed)?;
2444
2445            // Collect all quantized blocks
2446            let mut y_blocks = try_alloc_vec_array::<i16, DCTSIZE2>(num_y_blocks)?;
2447            let mut cb_blocks = try_alloc_vec_array::<i16, DCTSIZE2>(num_chroma_blocks)?;
2448            let mut cr_blocks = try_alloc_vec_array::<i16, DCTSIZE2>(num_chroma_blocks)?;
2449
2450            // Optionally collect raw DCT for DC trellis
2451            let dc_trellis_enabled = self.trellis.enabled && self.trellis.dc_enabled;
2452            let mut y_raw_dct = if dc_trellis_enabled {
2453                Some(try_alloc_vec_array::<i32, DCTSIZE2>(num_y_blocks)?)
2454            } else {
2455                None
2456            };
2457            let mut cb_raw_dct = if dc_trellis_enabled {
2458                Some(try_alloc_vec_array::<i32, DCTSIZE2>(num_chroma_blocks)?)
2459            } else {
2460                None
2461            };
2462            let mut cr_raw_dct = if dc_trellis_enabled {
2463                Some(try_alloc_vec_array::<i32, DCTSIZE2>(num_chroma_blocks)?)
2464            } else {
2465                None
2466            };
2467
2468            self.collect_blocks(
2469                y_mcu,
2470                mcu_width,
2471                mcu_height,
2472                cb_mcu,
2473                cr_mcu,
2474                mcu_chroma_w,
2475                mcu_chroma_h,
2476                &luma_qtable.values,
2477                &chroma_qtable.values,
2478                &ac_luma_derived,
2479                &ac_chroma_derived,
2480                &mut y_blocks,
2481                &mut cb_blocks,
2482                &mut cr_blocks,
2483                y_raw_dct.as_deref_mut(),
2484                cb_raw_dct.as_deref_mut(),
2485                cr_raw_dct.as_deref_mut(),
2486                luma_h,
2487                luma_v,
2488            )?;
2489
2490            // Run DC trellis optimization if enabled
2491            // C mozjpeg processes DC trellis row by row (each row is an independent chain)
2492            if dc_trellis_enabled {
2493                let h = luma_h as usize;
2494                let v = luma_v as usize;
2495                let y_block_cols = mcu_cols * h;
2496                let y_block_rows = mcu_rows * v;
2497
2498                if let Some(ref y_raw) = y_raw_dct {
2499                    run_dc_trellis_by_row(
2500                        y_raw,
2501                        &mut y_blocks,
2502                        luma_qtable.values[0],
2503                        &dc_luma_derived,
2504                        self.trellis.lambda_log_scale1,
2505                        self.trellis.lambda_log_scale2,
2506                        y_block_rows,
2507                        y_block_cols,
2508                        mcu_cols,
2509                        h,
2510                        v,
2511                        self.trellis.delta_dc_weight,
2512                    );
2513                }
2514                // Chroma has 1x1 per MCU, so MCU order = row order
2515                if let Some(ref cb_raw) = cb_raw_dct {
2516                    run_dc_trellis_by_row(
2517                        cb_raw,
2518                        &mut cb_blocks,
2519                        chroma_qtable.values[0],
2520                        &dc_chroma_derived,
2521                        self.trellis.lambda_log_scale1,
2522                        self.trellis.lambda_log_scale2,
2523                        mcu_rows,
2524                        mcu_cols,
2525                        mcu_cols,
2526                        1,
2527                        1,
2528                        self.trellis.delta_dc_weight,
2529                    );
2530                }
2531                if let Some(ref cr_raw) = cr_raw_dct {
2532                    run_dc_trellis_by_row(
2533                        cr_raw,
2534                        &mut cr_blocks,
2535                        chroma_qtable.values[0],
2536                        &dc_chroma_derived,
2537                        self.trellis.lambda_log_scale1,
2538                        self.trellis.lambda_log_scale2,
2539                        mcu_rows,
2540                        mcu_cols,
2541                        mcu_cols,
2542                        1,
2543                        1,
2544                        self.trellis.delta_dc_weight,
2545                    );
2546                }
2547            }
2548
2549            // Run EOB optimization if enabled (cross-block EOBRUN optimization)
2550            if self.trellis.enabled && self.trellis.eob_opt {
2551                use crate::trellis::{estimate_block_eob_info, optimize_eob_runs};
2552
2553                // Y component
2554                let y_eob_info: Vec<_> = y_blocks
2555                    .iter()
2556                    .map(|block| estimate_block_eob_info(block, &ac_luma_derived, 1, 63))
2557                    .collect();
2558                optimize_eob_runs(&mut y_blocks, &y_eob_info, &ac_luma_derived, 1, 63);
2559
2560                // Cb component
2561                let cb_eob_info: Vec<_> = cb_blocks
2562                    .iter()
2563                    .map(|block| estimate_block_eob_info(block, &ac_chroma_derived, 1, 63))
2564                    .collect();
2565                optimize_eob_runs(&mut cb_blocks, &cb_eob_info, &ac_chroma_derived, 1, 63);
2566
2567                // Cr component
2568                let cr_eob_info: Vec<_> = cr_blocks
2569                    .iter()
2570                    .map(|block| estimate_block_eob_info(block, &ac_chroma_derived, 1, 63))
2571                    .collect();
2572                optimize_eob_runs(&mut cr_blocks, &cr_eob_info, &ac_chroma_derived, 1, 63);
2573            }
2574
2575            // Generate progressive scan script
2576            let scans = if self.optimize_scans {
2577                // When optimize_scans is enabled, use the scan optimizer to find
2578                // the best frequency split and Al levels, including SA refinement.
2579                self.optimize_progressive_scans(
2580                    3, // num_components
2581                    &y_blocks,
2582                    &cb_blocks,
2583                    &cr_blocks,
2584                    mcu_rows,
2585                    mcu_cols,
2586                    luma_h,
2587                    luma_v,
2588                    width,
2589                    height,
2590                    chroma_width,
2591                    chroma_height,
2592                    &dc_luma_derived,
2593                    &dc_chroma_derived,
2594                    &ac_luma_derived,
2595                    &ac_chroma_derived,
2596                )?
2597            } else {
2598                // Use C mozjpeg's 9-scan JCP_MAX_COMPRESSION script.
2599                // This matches jcparam.c lines 932-947 (the JCP_MAX_COMPRESSION branch).
2600                // mozjpeg-sys defaults to JCP_MAX_COMPRESSION profile, which uses:
2601                // - DC with no successive approximation (Al=0)
2602                // - 8/9 frequency split for luma with successive approximation
2603                // - No successive approximation for chroma
2604                generate_mozjpeg_max_compression_scans(3)
2605            };
2606
2607            // Build Huffman tables and encode scans
2608            //
2609            // When optimize_scans=true, each AC scan gets its own optimal Huffman table
2610            // written immediately before the scan. This matches C mozjpeg behavior and
2611            // ensures the trial encoder's size estimates match actual encoded sizes.
2612            //
2613            // When optimize_huffman=true, use per-scan AC tables (matching C mozjpeg).
2614            // C automatically enables optimize_coding for progressive mode and does
2615            // 2 passes per scan: gather statistics, then output with optimal tables.
2616
2617            if self.optimize_huffman {
2618                // Per-scan AC tables mode: DC tables global, AC tables per-scan
2619                // This matches C mozjpeg's progressive behavior
2620
2621                // Count DC frequencies for first-pass DC scans only (Ah == 0)
2622                // DC refinement scans (Ah > 0) don't use Huffman coding - they output raw bits
2623                let mut dc_luma_freq = FrequencyCounter::new();
2624                let mut dc_chroma_freq = FrequencyCounter::new();
2625
2626                for scan in &scans {
2627                    let is_dc_first_scan = scan.ss == 0 && scan.se == 0 && scan.ah == 0;
2628                    if is_dc_first_scan {
2629                        self.count_dc_scan_symbols(
2630                            scan,
2631                            &y_blocks,
2632                            &cb_blocks,
2633                            &cr_blocks,
2634                            mcu_rows,
2635                            mcu_cols,
2636                            luma_h,
2637                            luma_v,
2638                            &mut dc_luma_freq,
2639                            &mut dc_chroma_freq,
2640                        );
2641                    }
2642                }
2643
2644                // Generate and write DC tables upfront
2645                let opt_dc_luma_huff = dc_luma_freq.generate_table()?;
2646                let opt_dc_chroma_huff = dc_chroma_freq.generate_table()?;
2647                marker_writer.write_dht_multiple(&[
2648                    (0, false, &opt_dc_luma_huff),
2649                    (1, false, &opt_dc_chroma_huff),
2650                ])?;
2651
2652                let opt_dc_luma = DerivedTable::from_huff_table(&opt_dc_luma_huff, true)?;
2653                let opt_dc_chroma = DerivedTable::from_huff_table(&opt_dc_chroma_huff, true)?;
2654
2655                // Get output writer from marker_writer
2656                let output = marker_writer.into_inner();
2657                let mut bit_writer = BitWriter::new(output);
2658
2659                // Encode each scan with per-scan AC tables
2660                for scan in &scans {
2661                    bit_writer.flush()?;
2662                    let mut inner = bit_writer.into_inner();
2663
2664                    let is_dc_scan = scan.ss == 0 && scan.se == 0;
2665
2666                    if !is_dc_scan {
2667                        // AC scan: build per-scan optimal Huffman table
2668                        let comp_idx = scan.component_index[0] as usize;
2669                        let blocks = match comp_idx {
2670                            0 => &y_blocks,
2671                            1 => &cb_blocks,
2672                            2 => &cr_blocks,
2673                            _ => &y_blocks,
2674                        };
2675                        let (block_cols, block_rows) = if comp_idx == 0 {
2676                            (width.div_ceil(DCTSIZE), height.div_ceil(DCTSIZE))
2677                        } else {
2678                            (
2679                                chroma_width.div_ceil(DCTSIZE),
2680                                chroma_height.div_ceil(DCTSIZE),
2681                            )
2682                        };
2683
2684                        // Count frequencies for this scan only
2685                        let mut ac_freq = FrequencyCounter::new();
2686                        self.count_ac_scan_symbols(
2687                            scan,
2688                            blocks,
2689                            mcu_rows,
2690                            mcu_cols,
2691                            luma_h,
2692                            luma_v,
2693                            comp_idx,
2694                            block_cols,
2695                            block_rows,
2696                            &mut ac_freq,
2697                        );
2698
2699                        // Build optimal table and write DHT
2700                        let ac_huff = ac_freq.generate_table()?;
2701                        let table_idx = if comp_idx == 0 { 0 } else { 1 };
2702                        write_dht_marker(&mut inner, table_idx, true, &ac_huff)?;
2703
2704                        // Write SOS and encode
2705                        write_sos_marker(&mut inner, scan, &components)?;
2706                        bit_writer = BitWriter::new(inner);
2707
2708                        let ac_derived = DerivedTable::from_huff_table(&ac_huff, false)?;
2709                        let mut prog_encoder = ProgressiveEncoder::new(&mut bit_writer);
2710
2711                        self.encode_progressive_scan(
2712                            scan,
2713                            &y_blocks,
2714                            &cb_blocks,
2715                            &cr_blocks,
2716                            mcu_rows,
2717                            mcu_cols,
2718                            luma_h,
2719                            luma_v,
2720                            width,
2721                            height,
2722                            chroma_width,
2723                            chroma_height,
2724                            &opt_dc_luma,
2725                            &opt_dc_chroma,
2726                            &ac_derived,
2727                            &ac_derived, // Not used for AC scans, but needed for signature
2728                            &mut prog_encoder,
2729                        )?;
2730                        prog_encoder.finish_scan(Some(&ac_derived))?;
2731                    } else {
2732                        // DC scan: use global DC tables
2733                        write_sos_marker(&mut inner, scan, &components)?;
2734                        bit_writer = BitWriter::new(inner);
2735
2736                        let mut prog_encoder = ProgressiveEncoder::new(&mut bit_writer);
2737                        self.encode_progressive_scan(
2738                            scan,
2739                            &y_blocks,
2740                            &cb_blocks,
2741                            &cr_blocks,
2742                            mcu_rows,
2743                            mcu_cols,
2744                            luma_h,
2745                            luma_v,
2746                            width,
2747                            height,
2748                            chroma_width,
2749                            chroma_height,
2750                            &opt_dc_luma,
2751                            &opt_dc_chroma,
2752                            &ac_luma_derived, // Not used for DC scans
2753                            &ac_chroma_derived,
2754                            &mut prog_encoder,
2755                        )?;
2756                        prog_encoder.finish_scan(None)?;
2757                    }
2758                }
2759
2760                // Flush and write EOI
2761                bit_writer.flush()?;
2762                let mut output = bit_writer.into_inner();
2763                output.write_all(&[0xFF, 0xD9])?;
2764            } else {
2765                // Standard tables mode (no optimization)
2766                let output = marker_writer.into_inner();
2767                let mut bit_writer = BitWriter::new(output);
2768
2769                for scan in &scans {
2770                    bit_writer.flush()?;
2771                    let mut inner = bit_writer.into_inner();
2772                    write_sos_marker(&mut inner, scan, &components)?;
2773
2774                    bit_writer = BitWriter::new(inner);
2775                    let mut prog_encoder = ProgressiveEncoder::new_standard_tables(&mut bit_writer);
2776
2777                    self.encode_progressive_scan(
2778                        scan,
2779                        &y_blocks,
2780                        &cb_blocks,
2781                        &cr_blocks,
2782                        mcu_rows,
2783                        mcu_cols,
2784                        luma_h,
2785                        luma_v,
2786                        width,
2787                        height,
2788                        chroma_width,
2789                        chroma_height,
2790                        &dc_luma_derived,
2791                        &dc_chroma_derived,
2792                        &ac_luma_derived,
2793                        &ac_chroma_derived,
2794                        &mut prog_encoder,
2795                    )?;
2796
2797                    let ac_table = if scan.ss > 0 {
2798                        if scan.component_index[0] == 0 {
2799                            Some(&ac_luma_derived)
2800                        } else {
2801                            Some(&ac_chroma_derived)
2802                        }
2803                    } else {
2804                        None
2805                    };
2806                    prog_encoder.finish_scan(ac_table)?;
2807                }
2808
2809                bit_writer.flush()?;
2810                let mut output = bit_writer.into_inner();
2811                output.write_all(&[0xFF, 0xD9])?;
2812            }
2813        } else if self.optimize_huffman {
2814            // Baseline mode with Huffman optimization (2-pass)
2815            // Pass 1: Collect blocks and count frequencies
2816            let mcu_rows = mcu_height / (DCTSIZE * luma_v as usize);
2817            let mcu_cols = mcu_width / (DCTSIZE * luma_h as usize);
2818            let num_y_blocks = mcu_rows
2819                .checked_mul(mcu_cols)
2820                .and_then(|n| n.checked_mul(luma_h as usize))
2821                .and_then(|n| n.checked_mul(luma_v as usize))
2822                .ok_or(Error::AllocationFailed)?;
2823            let num_chroma_blocks = mcu_rows
2824                .checked_mul(mcu_cols)
2825                .ok_or(Error::AllocationFailed)?;
2826
2827            let mut y_blocks = try_alloc_vec_array::<i16, DCTSIZE2>(num_y_blocks)?;
2828            let mut cb_blocks = try_alloc_vec_array::<i16, DCTSIZE2>(num_chroma_blocks)?;
2829            let mut cr_blocks = try_alloc_vec_array::<i16, DCTSIZE2>(num_chroma_blocks)?;
2830
2831            // Optionally collect raw DCT for DC trellis
2832            let dc_trellis_enabled = self.trellis.enabled && self.trellis.dc_enabled;
2833            let mut y_raw_dct = if dc_trellis_enabled {
2834                Some(try_alloc_vec_array::<i32, DCTSIZE2>(num_y_blocks)?)
2835            } else {
2836                None
2837            };
2838            let mut cb_raw_dct = if dc_trellis_enabled {
2839                Some(try_alloc_vec_array::<i32, DCTSIZE2>(num_chroma_blocks)?)
2840            } else {
2841                None
2842            };
2843            let mut cr_raw_dct = if dc_trellis_enabled {
2844                Some(try_alloc_vec_array::<i32, DCTSIZE2>(num_chroma_blocks)?)
2845            } else {
2846                None
2847            };
2848
2849            self.collect_blocks(
2850                y_mcu,
2851                mcu_width,
2852                mcu_height,
2853                cb_mcu,
2854                cr_mcu,
2855                mcu_chroma_w,
2856                mcu_chroma_h,
2857                &luma_qtable.values,
2858                &chroma_qtable.values,
2859                &ac_luma_derived,
2860                &ac_chroma_derived,
2861                &mut y_blocks,
2862                &mut cb_blocks,
2863                &mut cr_blocks,
2864                y_raw_dct.as_deref_mut(),
2865                cb_raw_dct.as_deref_mut(),
2866                cr_raw_dct.as_deref_mut(),
2867                luma_h,
2868                luma_v,
2869            )?;
2870
2871            // Run DC trellis optimization if enabled
2872            // C mozjpeg processes DC trellis row by row (each row is an independent chain)
2873            if dc_trellis_enabled {
2874                let h = luma_h as usize;
2875                let v = luma_v as usize;
2876                let y_block_cols = mcu_cols * h;
2877                let y_block_rows = mcu_rows * v;
2878
2879                if let Some(ref y_raw) = y_raw_dct {
2880                    run_dc_trellis_by_row(
2881                        y_raw,
2882                        &mut y_blocks,
2883                        luma_qtable.values[0],
2884                        &dc_luma_derived,
2885                        self.trellis.lambda_log_scale1,
2886                        self.trellis.lambda_log_scale2,
2887                        y_block_rows,
2888                        y_block_cols,
2889                        mcu_cols,
2890                        h,
2891                        v,
2892                        self.trellis.delta_dc_weight,
2893                    );
2894                }
2895                // Chroma has 1x1 per MCU, so MCU order = row order
2896                if let Some(ref cb_raw) = cb_raw_dct {
2897                    run_dc_trellis_by_row(
2898                        cb_raw,
2899                        &mut cb_blocks,
2900                        chroma_qtable.values[0],
2901                        &dc_chroma_derived,
2902                        self.trellis.lambda_log_scale1,
2903                        self.trellis.lambda_log_scale2,
2904                        mcu_rows,
2905                        mcu_cols,
2906                        mcu_cols,
2907                        1,
2908                        1,
2909                        self.trellis.delta_dc_weight,
2910                    );
2911                }
2912                if let Some(ref cr_raw) = cr_raw_dct {
2913                    run_dc_trellis_by_row(
2914                        cr_raw,
2915                        &mut cr_blocks,
2916                        chroma_qtable.values[0],
2917                        &dc_chroma_derived,
2918                        self.trellis.lambda_log_scale1,
2919                        self.trellis.lambda_log_scale2,
2920                        mcu_rows,
2921                        mcu_cols,
2922                        mcu_cols,
2923                        1,
2924                        1,
2925                        self.trellis.delta_dc_weight,
2926                    );
2927                }
2928            }
2929
2930            // Count symbol frequencies
2931            let mut dc_luma_freq = FrequencyCounter::new();
2932            let mut dc_chroma_freq = FrequencyCounter::new();
2933            let mut ac_luma_freq = FrequencyCounter::new();
2934            let mut ac_chroma_freq = FrequencyCounter::new();
2935
2936            let mut counter = SymbolCounter::new();
2937            let blocks_per_mcu_y = (luma_h * luma_v) as usize;
2938            let mut y_idx = 0;
2939            let mut c_idx = 0;
2940
2941            for _mcu_row in 0..mcu_rows {
2942                for _mcu_col in 0..mcu_cols {
2943                    // Y blocks
2944                    for _ in 0..blocks_per_mcu_y {
2945                        counter.count_block(
2946                            &y_blocks[y_idx],
2947                            0,
2948                            &mut dc_luma_freq,
2949                            &mut ac_luma_freq,
2950                        );
2951                        y_idx += 1;
2952                    }
2953                    // Cb block
2954                    counter.count_block(
2955                        &cb_blocks[c_idx],
2956                        1,
2957                        &mut dc_chroma_freq,
2958                        &mut ac_chroma_freq,
2959                    );
2960                    // Cr block
2961                    counter.count_block(
2962                        &cr_blocks[c_idx],
2963                        2,
2964                        &mut dc_chroma_freq,
2965                        &mut ac_chroma_freq,
2966                    );
2967                    c_idx += 1;
2968                }
2969            }
2970
2971            // Generate optimized Huffman tables
2972            let opt_dc_luma_huff = dc_luma_freq.generate_table()?;
2973            let opt_dc_chroma_huff = dc_chroma_freq.generate_table()?;
2974            let opt_ac_luma_huff = ac_luma_freq.generate_table()?;
2975            let opt_ac_chroma_huff = ac_chroma_freq.generate_table()?;
2976
2977            let opt_dc_luma = DerivedTable::from_huff_table(&opt_dc_luma_huff, true)?;
2978            let opt_dc_chroma = DerivedTable::from_huff_table(&opt_dc_chroma_huff, true)?;
2979            let opt_ac_luma = DerivedTable::from_huff_table(&opt_ac_luma_huff, false)?;
2980            let opt_ac_chroma = DerivedTable::from_huff_table(&opt_ac_chroma_huff, false)?;
2981
2982            // Write DHT with optimized tables - combined into single marker
2983            marker_writer.write_dht_multiple(&[
2984                (0, false, &opt_dc_luma_huff),
2985                (1, false, &opt_dc_chroma_huff),
2986                (0, true, &opt_ac_luma_huff),
2987                (1, true, &opt_ac_chroma_huff),
2988            ])?;
2989
2990            // Write SOS and encode
2991            let scans = generate_baseline_scan(3);
2992            let scan = &scans[0];
2993            marker_writer.write_sos(scan, &components)?;
2994
2995            let mut output = marker_writer.into_inner();
2996
2997            // Use SIMD entropy encoder on x86_64 for ~2x faster encoding
2998            #[cfg(target_arch = "x86_64")]
2999            {
3000                let mut simd_entropy = SimdEntropyEncoder::new();
3001
3002                // Encode from stored blocks with restart marker support
3003                y_idx = 0;
3004                c_idx = 0;
3005                let restart_interval = self.restart_interval as usize;
3006                let mut mcu_count = 0usize;
3007                let mut restart_num = 0u8;
3008
3009                for _mcu_row in 0..mcu_rows {
3010                    for _mcu_col in 0..mcu_cols {
3011                        // Emit restart marker if needed (before this MCU, not first)
3012                        if restart_interval > 0
3013                            && mcu_count > 0
3014                            && mcu_count.is_multiple_of(restart_interval)
3015                        {
3016                            simd_entropy.emit_restart(restart_num);
3017                            restart_num = restart_num.wrapping_add(1) & 0x07;
3018                        }
3019
3020                        // Y blocks
3021                        for _ in 0..blocks_per_mcu_y {
3022                            simd_entropy.encode_block(
3023                                &y_blocks[y_idx],
3024                                0,
3025                                &opt_dc_luma,
3026                                &opt_ac_luma,
3027                            );
3028                            y_idx += 1;
3029                        }
3030                        // Cb block
3031                        simd_entropy.encode_block(
3032                            &cb_blocks[c_idx],
3033                            1,
3034                            &opt_dc_chroma,
3035                            &opt_ac_chroma,
3036                        );
3037                        // Cr block
3038                        simd_entropy.encode_block(
3039                            &cr_blocks[c_idx],
3040                            2,
3041                            &opt_dc_chroma,
3042                            &opt_ac_chroma,
3043                        );
3044                        c_idx += 1;
3045                        mcu_count += 1;
3046                    }
3047                }
3048
3049                simd_entropy.flush();
3050                output.write_all(simd_entropy.get_buffer())?;
3051            }
3052
3053            // Fallback for non-x86_64 platforms
3054            #[cfg(not(target_arch = "x86_64"))]
3055            {
3056                let mut bit_writer = BitWriter::new(output);
3057                let mut entropy = EntropyEncoder::new(&mut bit_writer);
3058
3059                // Encode from stored blocks with restart marker support
3060                y_idx = 0;
3061                c_idx = 0;
3062                let restart_interval = self.restart_interval as usize;
3063                let mut mcu_count = 0usize;
3064                let mut restart_num = 0u8;
3065
3066                for _mcu_row in 0..mcu_rows {
3067                    for _mcu_col in 0..mcu_cols {
3068                        // Emit restart marker if needed (before this MCU, not first)
3069                        if restart_interval > 0
3070                            && mcu_count > 0
3071                            && mcu_count.is_multiple_of(restart_interval)
3072                        {
3073                            entropy.emit_restart(restart_num)?;
3074                            restart_num = restart_num.wrapping_add(1) & 0x07;
3075                        }
3076
3077                        // Y blocks
3078                        for _ in 0..blocks_per_mcu_y {
3079                            entropy.encode_block(
3080                                &y_blocks[y_idx],
3081                                0,
3082                                &opt_dc_luma,
3083                                &opt_ac_luma,
3084                            )?;
3085                            y_idx += 1;
3086                        }
3087                        // Cb block
3088                        entropy.encode_block(
3089                            &cb_blocks[c_idx],
3090                            1,
3091                            &opt_dc_chroma,
3092                            &opt_ac_chroma,
3093                        )?;
3094                        // Cr block
3095                        entropy.encode_block(
3096                            &cr_blocks[c_idx],
3097                            2,
3098                            &opt_dc_chroma,
3099                            &opt_ac_chroma,
3100                        )?;
3101                        c_idx += 1;
3102                        mcu_count += 1;
3103                    }
3104                }
3105
3106                bit_writer.flush()?;
3107                output = bit_writer.into_inner();
3108            }
3109
3110            output.write_all(&[0xFF, 0xD9])?;
3111        } else {
3112            // Baseline mode: Encode directly (streaming)
3113            let scans = generate_baseline_scan(3);
3114            let scan = &scans[0]; // Baseline has only one scan
3115            marker_writer.write_sos(scan, &components)?;
3116
3117            // Encode MCU data
3118            let output = marker_writer.into_inner();
3119            let mut bit_writer = BitWriter::new(output);
3120            let mut entropy = EntropyEncoder::new(&mut bit_writer);
3121
3122            self.encode_mcus(
3123                y_mcu,
3124                mcu_width,
3125                mcu_height,
3126                cb_mcu,
3127                cr_mcu,
3128                mcu_chroma_w,
3129                mcu_chroma_h,
3130                &luma_qtable.values,
3131                &chroma_qtable.values,
3132                &dc_luma_derived,
3133                &dc_chroma_derived,
3134                &ac_luma_derived,
3135                &ac_chroma_derived,
3136                &mut entropy,
3137                luma_h,
3138                luma_v,
3139            )?;
3140
3141            // Flush bits and get output back
3142            bit_writer.flush()?;
3143            let mut output = bit_writer.into_inner();
3144
3145            // EOI
3146            output.write_all(&[0xFF, 0xD9])?;
3147        }
3148
3149        Ok(())
3150    }
3151
3152    /// Encode all MCUs (Minimum Coded Units).
3153    #[allow(clippy::too_many_arguments)]
3154    fn encode_mcus<W: Write>(
3155        &self,
3156        y_plane: &[u8],
3157        y_width: usize,
3158        y_height: usize,
3159        cb_plane: &[u8],
3160        cr_plane: &[u8],
3161        chroma_width: usize,
3162        _chroma_height: usize,
3163        luma_qtable: &[u16; DCTSIZE2],
3164        chroma_qtable: &[u16; DCTSIZE2],
3165        dc_luma: &DerivedTable,
3166        dc_chroma: &DerivedTable,
3167        ac_luma: &DerivedTable,
3168        ac_chroma: &DerivedTable,
3169        entropy: &mut EntropyEncoder<W>,
3170        h_samp: u8,
3171        v_samp: u8,
3172    ) -> Result<()> {
3173        let mcu_rows = y_height / (DCTSIZE * v_samp as usize);
3174        let mcu_cols = y_width / (DCTSIZE * h_samp as usize);
3175        let total_mcus = mcu_rows * mcu_cols;
3176
3177        let mut dct_block = [0i16; DCTSIZE2];
3178        let mut quant_block = [0i16; DCTSIZE2];
3179
3180        // Restart marker tracking
3181        let restart_interval = self.restart_interval as usize;
3182        let mut mcu_count = 0usize;
3183        let mut restart_num = 0u8;
3184
3185        for mcu_row in 0..mcu_rows {
3186            for mcu_col in 0..mcu_cols {
3187                // Check if we need to emit a restart marker BEFORE this MCU
3188                // (except for the first MCU)
3189                if restart_interval > 0
3190                    && mcu_count > 0
3191                    && mcu_count.is_multiple_of(restart_interval)
3192                {
3193                    entropy.emit_restart(restart_num)?;
3194                    restart_num = restart_num.wrapping_add(1) & 0x07;
3195                }
3196
3197                // Encode Y blocks (may be multiple per MCU for subsampling)
3198                for v in 0..v_samp as usize {
3199                    for h in 0..h_samp as usize {
3200                        let block_row = mcu_row * v_samp as usize + v;
3201                        let block_col = mcu_col * h_samp as usize + h;
3202
3203                        self.encode_block(
3204                            y_plane,
3205                            y_width,
3206                            block_row,
3207                            block_col,
3208                            luma_qtable,
3209                            dc_luma,
3210                            ac_luma,
3211                            0, // Y component
3212                            entropy,
3213                            &mut dct_block,
3214                            &mut quant_block,
3215                        )?;
3216                    }
3217                }
3218
3219                // Encode Cb block
3220                self.encode_block(
3221                    cb_plane,
3222                    chroma_width,
3223                    mcu_row,
3224                    mcu_col,
3225                    chroma_qtable,
3226                    dc_chroma,
3227                    ac_chroma,
3228                    1, // Cb component
3229                    entropy,
3230                    &mut dct_block,
3231                    &mut quant_block,
3232                )?;
3233
3234                // Encode Cr block
3235                self.encode_block(
3236                    cr_plane,
3237                    chroma_width,
3238                    mcu_row,
3239                    mcu_col,
3240                    chroma_qtable,
3241                    dc_chroma,
3242                    ac_chroma,
3243                    2, // Cr component
3244                    entropy,
3245                    &mut dct_block,
3246                    &mut quant_block,
3247                )?;
3248
3249                mcu_count += 1;
3250            }
3251        }
3252
3253        // Suppress unused variable warning
3254        let _ = total_mcus;
3255
3256        Ok(())
3257    }
3258
3259    /// Encode a single 8x8 block.
3260    #[allow(clippy::too_many_arguments)]
3261    fn encode_block<W: Write>(
3262        &self,
3263        plane: &[u8],
3264        plane_width: usize,
3265        block_row: usize,
3266        block_col: usize,
3267        qtable: &[u16; DCTSIZE2],
3268        dc_table: &DerivedTable,
3269        ac_table: &DerivedTable,
3270        component: usize,
3271        entropy: &mut EntropyEncoder<W>,
3272        dct_block: &mut [i16; DCTSIZE2],
3273        quant_block: &mut [i16; DCTSIZE2],
3274    ) -> Result<()> {
3275        // Extract 8x8 block from plane
3276        let mut samples = [0u8; DCTSIZE2];
3277        let base_y = block_row * DCTSIZE;
3278        let base_x = block_col * DCTSIZE;
3279
3280        for row in 0..DCTSIZE {
3281            let src_offset = (base_y + row) * plane_width + base_x;
3282            let dst_offset = row * DCTSIZE;
3283            samples[dst_offset..dst_offset + DCTSIZE]
3284                .copy_from_slice(&plane[src_offset..src_offset + DCTSIZE]);
3285        }
3286
3287        // Level shift (center around 0 for DCT)
3288        let mut shifted = [0i16; DCTSIZE2];
3289        for i in 0..DCTSIZE2 {
3290            shifted[i] = (samples[i] as i16) - 128;
3291        }
3292
3293        // Apply overshoot deringing if enabled (reduces ringing on white backgrounds)
3294        if self.overshoot_deringing {
3295            preprocess_deringing(&mut shifted, qtable[0]);
3296        }
3297
3298        // Forward DCT (output scaled by factor of 8)
3299        self.simd.do_forward_dct(&shifted, dct_block);
3300
3301        // Convert to i32 for quantization
3302        let mut dct_i32 = [0i32; DCTSIZE2];
3303        for i in 0..DCTSIZE2 {
3304            dct_i32[i] = dct_block[i] as i32;
3305        }
3306
3307        // Use trellis quantization if enabled
3308        // Both paths expect raw DCT (scaled by 8) and handle the scaling internally
3309        if self.trellis.enabled {
3310            trellis_quantize_block(&dct_i32, quant_block, qtable, ac_table, &self.trellis);
3311        } else {
3312            // Non-trellis path: use single-step quantization matching C mozjpeg
3313            // This takes raw DCT (scaled by 8) and uses q_scaled = 8 * qtable[i]
3314            quantize_block_raw(&dct_i32, qtable, quant_block);
3315        }
3316
3317        // Entropy encode
3318        entropy.encode_block(quant_block, component, dc_table, ac_table)?;
3319
3320        Ok(())
3321    }
3322
3323    /// Collect all quantized DCT blocks for progressive encoding.
3324    /// Also collects raw DCT blocks if DC trellis is enabled.
3325    #[allow(clippy::too_many_arguments)]
3326    fn collect_blocks(
3327        &self,
3328        y_plane: &[u8],
3329        y_width: usize,
3330        y_height: usize,
3331        cb_plane: &[u8],
3332        cr_plane: &[u8],
3333        chroma_width: usize,
3334        _chroma_height: usize,
3335        luma_qtable: &[u16; DCTSIZE2],
3336        chroma_qtable: &[u16; DCTSIZE2],
3337        ac_luma: &DerivedTable,
3338        ac_chroma: &DerivedTable,
3339        y_blocks: &mut [[i16; DCTSIZE2]],
3340        cb_blocks: &mut [[i16; DCTSIZE2]],
3341        cr_blocks: &mut [[i16; DCTSIZE2]],
3342        mut y_raw_dct: Option<&mut [[i32; DCTSIZE2]]>,
3343        mut cb_raw_dct: Option<&mut [[i32; DCTSIZE2]]>,
3344        mut cr_raw_dct: Option<&mut [[i32; DCTSIZE2]]>,
3345        h_samp: u8,
3346        v_samp: u8,
3347    ) -> Result<()> {
3348        let mcu_rows = y_height / (DCTSIZE * v_samp as usize);
3349        let mcu_cols = y_width / (DCTSIZE * h_samp as usize);
3350
3351        let mut y_idx = 0;
3352        let mut c_idx = 0;
3353        let mut dct_block = [0i16; DCTSIZE2];
3354
3355        for mcu_row in 0..mcu_rows {
3356            for mcu_col in 0..mcu_cols {
3357                // Collect Y blocks (may be multiple per MCU for subsampling)
3358                for v in 0..v_samp as usize {
3359                    for h in 0..h_samp as usize {
3360                        let block_row = mcu_row * v_samp as usize + v;
3361                        let block_col = mcu_col * h_samp as usize + h;
3362
3363                        // Get mutable reference to raw DCT output if collecting
3364                        let raw_dct_out = y_raw_dct.as_mut().map(|arr| &mut arr[y_idx][..]);
3365                        self.process_block_to_storage_with_raw(
3366                            y_plane,
3367                            y_width,
3368                            block_row,
3369                            block_col,
3370                            luma_qtable,
3371                            ac_luma,
3372                            &mut y_blocks[y_idx],
3373                            &mut dct_block,
3374                            raw_dct_out,
3375                        )?;
3376                        y_idx += 1;
3377                    }
3378                }
3379
3380                // Collect Cb block
3381                let raw_dct_out = cb_raw_dct.as_mut().map(|arr| &mut arr[c_idx][..]);
3382                self.process_block_to_storage_with_raw(
3383                    cb_plane,
3384                    chroma_width,
3385                    mcu_row,
3386                    mcu_col,
3387                    chroma_qtable,
3388                    ac_chroma,
3389                    &mut cb_blocks[c_idx],
3390                    &mut dct_block,
3391                    raw_dct_out,
3392                )?;
3393
3394                // Collect Cr block
3395                let raw_dct_out = cr_raw_dct.as_mut().map(|arr| &mut arr[c_idx][..]);
3396                self.process_block_to_storage_with_raw(
3397                    cr_plane,
3398                    chroma_width,
3399                    mcu_row,
3400                    mcu_col,
3401                    chroma_qtable,
3402                    ac_chroma,
3403                    &mut cr_blocks[c_idx],
3404                    &mut dct_block,
3405                    raw_dct_out,
3406                )?;
3407
3408                c_idx += 1;
3409            }
3410        }
3411
3412        Ok(())
3413    }
3414
3415    /// Process a block: DCT + quantize, storing the result.
3416    /// Optionally stores raw DCT coefficients for DC trellis.
3417    #[allow(clippy::too_many_arguments)]
3418    fn process_block_to_storage_with_raw(
3419        &self,
3420        plane: &[u8],
3421        plane_width: usize,
3422        block_row: usize,
3423        block_col: usize,
3424        qtable: &[u16; DCTSIZE2],
3425        ac_table: &DerivedTable,
3426        out_block: &mut [i16; DCTSIZE2],
3427        dct_block: &mut [i16; DCTSIZE2],
3428        raw_dct_out: Option<&mut [i32]>,
3429    ) -> Result<()> {
3430        // Extract 8x8 block from plane
3431        let mut samples = [0u8; DCTSIZE2];
3432        let base_y = block_row * DCTSIZE;
3433        let base_x = block_col * DCTSIZE;
3434
3435        for row in 0..DCTSIZE {
3436            let src_offset = (base_y + row) * plane_width + base_x;
3437            let dst_offset = row * DCTSIZE;
3438            samples[dst_offset..dst_offset + DCTSIZE]
3439                .copy_from_slice(&plane[src_offset..src_offset + DCTSIZE]);
3440        }
3441
3442        // Level shift (center around 0 for DCT)
3443        let mut shifted = [0i16; DCTSIZE2];
3444        for i in 0..DCTSIZE2 {
3445            shifted[i] = (samples[i] as i16) - 128;
3446        }
3447
3448        // Apply overshoot deringing if enabled (reduces ringing on white backgrounds)
3449        if self.overshoot_deringing {
3450            preprocess_deringing(&mut shifted, qtable[0]);
3451        }
3452
3453        // Forward DCT (output scaled by factor of 8)
3454        self.simd.do_forward_dct(&shifted, dct_block);
3455
3456        // Convert to i32 for quantization
3457        let mut dct_i32 = [0i32; DCTSIZE2];
3458        for i in 0..DCTSIZE2 {
3459            dct_i32[i] = dct_block[i] as i32;
3460        }
3461
3462        // Store raw DCT if requested (for DC trellis)
3463        if let Some(raw_out) = raw_dct_out {
3464            raw_out.copy_from_slice(&dct_i32);
3465        }
3466
3467        // Use trellis quantization if enabled
3468        // Both paths expect raw DCT (scaled by 8) and handle the scaling internally
3469        if self.trellis.enabled {
3470            trellis_quantize_block(&dct_i32, out_block, qtable, ac_table, &self.trellis);
3471        } else {
3472            // Non-trellis path: use single-step quantization matching C mozjpeg
3473            // This takes raw DCT (scaled by 8) and uses q_scaled = 8 * qtable[i]
3474            quantize_block_raw(&dct_i32, qtable, out_block);
3475        }
3476
3477        Ok(())
3478    }
3479
3480    /// Optimize progressive scan configuration (C mozjpeg-compatible).
3481    ///
3482    /// This implements the optimize_scans feature from C mozjpeg:
3483    /// 1. Generate 64 individual candidate scans
3484    /// 2. Trial-encode scans SEQUENTIALLY to get accurate sizes
3485    /// 3. Use ScanSelector to find optimal Al levels and frequency splits
3486    /// 4. Build the final scan script from the selection
3487    ///
3488    /// IMPORTANT: Scans must be encoded sequentially (not independently) because
3489    /// refinement scans (Ah > 0) need context from previous scans to produce
3490    /// correct output sizes.
3491    #[allow(clippy::too_many_arguments)]
3492    fn optimize_progressive_scans(
3493        &self,
3494        num_components: u8,
3495        y_blocks: &[[i16; DCTSIZE2]],
3496        cb_blocks: &[[i16; DCTSIZE2]],
3497        cr_blocks: &[[i16; DCTSIZE2]],
3498        mcu_rows: usize,
3499        mcu_cols: usize,
3500        h_samp: u8,
3501        v_samp: u8,
3502        actual_width: usize,
3503        actual_height: usize,
3504        chroma_width: usize,
3505        chroma_height: usize,
3506        dc_luma: &DerivedTable,
3507        dc_chroma: &DerivedTable,
3508        ac_luma: &DerivedTable,
3509        ac_chroma: &DerivedTable,
3510    ) -> Result<Vec<crate::types::ScanInfo>> {
3511        let config = ScanSearchConfig::default();
3512        let candidate_scans = generate_search_scans(num_components, &config);
3513
3514        // Use ScanTrialEncoder for sequential trial encoding with proper state tracking
3515        let mut trial_encoder = ScanTrialEncoder::new(
3516            y_blocks,
3517            cb_blocks,
3518            cr_blocks,
3519            dc_luma,
3520            dc_chroma,
3521            ac_luma,
3522            ac_chroma,
3523            mcu_rows,
3524            mcu_cols,
3525            h_samp,
3526            v_samp,
3527            actual_width,
3528            actual_height,
3529            chroma_width,
3530            chroma_height,
3531        );
3532
3533        // Trial-encode all scans sequentially to get accurate sizes
3534        let scan_sizes = trial_encoder.encode_all_scans(&candidate_scans)?;
3535
3536        // Use ScanSelector to find the optimal configuration
3537        let selector = ScanSelector::new(num_components, config.clone());
3538        let result = selector.select_best(&scan_sizes);
3539
3540        // Build the final scan script from the selection
3541        Ok(result.build_final_scans(num_components, &config))
3542    }
3543
3544    /// Encode a single progressive scan.
3545    #[allow(clippy::too_many_arguments)]
3546    fn encode_progressive_scan<W: Write>(
3547        &self,
3548        scan: &crate::types::ScanInfo,
3549        y_blocks: &[[i16; DCTSIZE2]],
3550        cb_blocks: &[[i16; DCTSIZE2]],
3551        cr_blocks: &[[i16; DCTSIZE2]],
3552        mcu_rows: usize,
3553        mcu_cols: usize,
3554        h_samp: u8,
3555        v_samp: u8,
3556        actual_width: usize,
3557        actual_height: usize,
3558        chroma_width: usize,
3559        chroma_height: usize,
3560        dc_luma: &DerivedTable,
3561        dc_chroma: &DerivedTable,
3562        ac_luma: &DerivedTable,
3563        ac_chroma: &DerivedTable,
3564        encoder: &mut ProgressiveEncoder<W>,
3565    ) -> Result<()> {
3566        let is_dc_scan = scan.ss == 0 && scan.se == 0;
3567        let is_refinement = scan.ah != 0;
3568
3569        if is_dc_scan {
3570            // DC scan - can be interleaved (multiple components)
3571            self.encode_dc_scan(
3572                scan,
3573                y_blocks,
3574                cb_blocks,
3575                cr_blocks,
3576                mcu_rows,
3577                mcu_cols,
3578                h_samp,
3579                v_samp,
3580                dc_luma,
3581                dc_chroma,
3582                is_refinement,
3583                encoder,
3584            )?;
3585        } else {
3586            // AC scan - single component only (non-interleaved)
3587            // For non-interleaved scans, use actual component block dimensions
3588            let comp_idx = scan.component_index[0] as usize;
3589            let blocks = match comp_idx {
3590                0 => y_blocks,
3591                1 => cb_blocks,
3592                2 => cr_blocks,
3593                _ => return Err(Error::InvalidComponentIndex(comp_idx)),
3594            };
3595            let ac_table = if comp_idx == 0 { ac_luma } else { ac_chroma };
3596
3597            // Calculate actual block dimensions for this component.
3598            // Non-interleaved AC scans encode only the actual image blocks, not MCU padding.
3599            // This differs from interleaved DC scans which encode all MCU blocks.
3600            // Reference: ITU-T T.81 Section F.2.3
3601            let (block_cols, block_rows) = if comp_idx == 0 {
3602                // Y component: full resolution
3603                (
3604                    actual_width.div_ceil(DCTSIZE),
3605                    actual_height.div_ceil(DCTSIZE),
3606                )
3607            } else {
3608                // Chroma components: subsampled resolution
3609                (
3610                    chroma_width.div_ceil(DCTSIZE),
3611                    chroma_height.div_ceil(DCTSIZE),
3612                )
3613            };
3614
3615            self.encode_ac_scan(
3616                scan,
3617                blocks,
3618                mcu_rows,
3619                mcu_cols,
3620                h_samp,
3621                v_samp,
3622                comp_idx,
3623                block_cols,
3624                block_rows,
3625                ac_table,
3626                is_refinement,
3627                encoder,
3628            )?;
3629        }
3630
3631        Ok(())
3632    }
3633
3634    /// Encode a DC scan (Ss=Se=0).
3635    #[allow(clippy::too_many_arguments)]
3636    fn encode_dc_scan<W: Write>(
3637        &self,
3638        scan: &crate::types::ScanInfo,
3639        y_blocks: &[[i16; DCTSIZE2]],
3640        cb_blocks: &[[i16; DCTSIZE2]],
3641        cr_blocks: &[[i16; DCTSIZE2]],
3642        mcu_rows: usize,
3643        mcu_cols: usize,
3644        h_samp: u8,
3645        v_samp: u8,
3646        dc_luma: &DerivedTable,
3647        dc_chroma: &DerivedTable,
3648        is_refinement: bool,
3649        encoder: &mut ProgressiveEncoder<W>,
3650    ) -> Result<()> {
3651        let blocks_per_mcu_y = (h_samp * v_samp) as usize;
3652        let mut y_idx = 0;
3653        let mut c_idx = 0;
3654
3655        for _mcu_row in 0..mcu_rows {
3656            for _mcu_col in 0..mcu_cols {
3657                // Encode Y blocks
3658                for _ in 0..blocks_per_mcu_y {
3659                    if is_refinement {
3660                        encoder.encode_dc_refine(&y_blocks[y_idx], scan.al)?;
3661                    } else {
3662                        encoder.encode_dc_first(&y_blocks[y_idx], 0, dc_luma, scan.al)?;
3663                    }
3664                    y_idx += 1;
3665                }
3666
3667                // Encode Cb
3668                if is_refinement {
3669                    encoder.encode_dc_refine(&cb_blocks[c_idx], scan.al)?;
3670                } else {
3671                    encoder.encode_dc_first(&cb_blocks[c_idx], 1, dc_chroma, scan.al)?;
3672                }
3673
3674                // Encode Cr
3675                if is_refinement {
3676                    encoder.encode_dc_refine(&cr_blocks[c_idx], scan.al)?;
3677                } else {
3678                    encoder.encode_dc_first(&cr_blocks[c_idx], 2, dc_chroma, scan.al)?;
3679                }
3680
3681                c_idx += 1;
3682            }
3683        }
3684
3685        Ok(())
3686    }
3687
3688    /// Encode an AC scan (Ss > 0).
3689    ///
3690    /// **IMPORTANT**: Progressive AC scans are always non-interleaved, meaning blocks
3691    /// must be encoded in component raster order (row-major within the component's
3692    /// block grid), NOT in MCU-interleaved order.
3693    ///
3694    /// For non-interleaved scans, the number of blocks is determined by the actual
3695    /// component dimensions (ceil(width/8) × ceil(height/8)), NOT the MCU-padded
3696    /// dimensions. This is different from interleaved DC scans which use MCU order.
3697    /// The padding blocks (beyond actual image dimensions) have DC coefficients but
3698    /// no AC coefficients - the decoder only outputs the actual image dimensions.
3699    ///
3700    /// Reference: ITU-T T.81 Section F.2.3 - "The scan data for a non-interleaved
3701    /// scan shall consist of a sequence of entropy-coded segments... The data units
3702    /// are processed in the order defined by the scan component."
3703    #[allow(clippy::too_many_arguments)]
3704    fn encode_ac_scan<W: Write>(
3705        &self,
3706        scan: &crate::types::ScanInfo,
3707        blocks: &[[i16; DCTSIZE2]],
3708        _mcu_rows: usize,
3709        mcu_cols: usize,
3710        h_samp: u8,
3711        v_samp: u8,
3712        comp_idx: usize,
3713        block_cols: usize,
3714        block_rows: usize,
3715        ac_table: &DerivedTable,
3716        is_refinement: bool,
3717        encoder: &mut ProgressiveEncoder<W>,
3718    ) -> Result<()> {
3719        // For Y component with subsampling, blocks are stored in MCU-interleaved order
3720        // but AC scans must encode them in component raster order.
3721        // For chroma components (1 block per MCU), the orders are identical.
3722        //
3723        // For non-interleaved scans, encode only the actual image blocks (block_rows × block_cols),
3724        // not all MCU-padded blocks. Padding blocks have DC coefficients but no AC coefficients.
3725
3726        let blocks_per_mcu = if comp_idx == 0 {
3727            (h_samp * v_samp) as usize
3728        } else {
3729            1
3730        };
3731
3732        if blocks_per_mcu == 1 {
3733            // Chroma or 4:4:4 Y: storage order = raster order
3734            let total_blocks = block_rows * block_cols;
3735            for block in blocks.iter().take(total_blocks) {
3736                if is_refinement {
3737                    encoder
3738                        .encode_ac_refine(block, scan.ss, scan.se, scan.ah, scan.al, ac_table)?;
3739                } else {
3740                    encoder.encode_ac_first(block, scan.ss, scan.se, scan.al, ac_table)?;
3741                }
3742            }
3743        } else {
3744            // Y component with subsampling (h_samp > 1 or v_samp > 1)
3745            // Convert from MCU-interleaved storage to component raster order
3746            let h = h_samp as usize;
3747            let v = v_samp as usize;
3748
3749            for block_row in 0..block_rows {
3750                for block_col in 0..block_cols {
3751                    // Convert raster position to MCU-interleaved storage index
3752                    let mcu_row = block_row / v;
3753                    let mcu_col = block_col / h;
3754                    let v_idx = block_row % v;
3755                    let h_idx = block_col % h;
3756                    let storage_idx = mcu_row * (mcu_cols * blocks_per_mcu)
3757                        + mcu_col * blocks_per_mcu
3758                        + v_idx * h
3759                        + h_idx;
3760
3761                    if is_refinement {
3762                        encoder.encode_ac_refine(
3763                            &blocks[storage_idx],
3764                            scan.ss,
3765                            scan.se,
3766                            scan.ah,
3767                            scan.al,
3768                            ac_table,
3769                        )?;
3770                    } else {
3771                        encoder.encode_ac_first(
3772                            &blocks[storage_idx],
3773                            scan.ss,
3774                            scan.se,
3775                            scan.al,
3776                            ac_table,
3777                        )?;
3778                    }
3779                }
3780            }
3781        }
3782
3783        Ok(())
3784    }
3785
3786    /// Count DC symbols for a progressive DC scan.
3787    #[allow(clippy::too_many_arguments)]
3788    fn count_dc_scan_symbols(
3789        &self,
3790        scan: &crate::types::ScanInfo,
3791        y_blocks: &[[i16; DCTSIZE2]],
3792        cb_blocks: &[[i16; DCTSIZE2]],
3793        cr_blocks: &[[i16; DCTSIZE2]],
3794        mcu_rows: usize,
3795        mcu_cols: usize,
3796        h_samp: u8,
3797        v_samp: u8,
3798        dc_luma_freq: &mut FrequencyCounter,
3799        dc_chroma_freq: &mut FrequencyCounter,
3800    ) {
3801        let blocks_per_mcu_y = (h_samp * v_samp) as usize;
3802        let mut y_idx = 0;
3803        let mut c_idx = 0;
3804        let mut counter = ProgressiveSymbolCounter::new();
3805
3806        for _mcu_row in 0..mcu_rows {
3807            for _mcu_col in 0..mcu_cols {
3808                // Y blocks
3809                for _ in 0..blocks_per_mcu_y {
3810                    counter.count_dc_first(&y_blocks[y_idx], 0, scan.al, dc_luma_freq);
3811                    y_idx += 1;
3812                }
3813                // Cb block
3814                counter.count_dc_first(&cb_blocks[c_idx], 1, scan.al, dc_chroma_freq);
3815                // Cr block
3816                counter.count_dc_first(&cr_blocks[c_idx], 2, scan.al, dc_chroma_freq);
3817                c_idx += 1;
3818            }
3819        }
3820    }
3821
3822    /// Count AC symbols for a progressive AC scan.
3823    ///
3824    /// Must iterate blocks in the same order as `encode_ac_scan` (component raster order)
3825    /// to ensure EOBRUN counts match and Huffman tables are correct.
3826    ///
3827    /// Uses actual block dimensions (not MCU-padded) for non-interleaved scans.
3828    #[allow(clippy::too_many_arguments)]
3829    fn count_ac_scan_symbols(
3830        &self,
3831        scan: &crate::types::ScanInfo,
3832        blocks: &[[i16; DCTSIZE2]],
3833        _mcu_rows: usize,
3834        mcu_cols: usize,
3835        h_samp: u8,
3836        v_samp: u8,
3837        comp_idx: usize,
3838        block_cols: usize,
3839        block_rows: usize,
3840        ac_freq: &mut FrequencyCounter,
3841    ) {
3842        let blocks_per_mcu = if comp_idx == 0 {
3843            (h_samp * v_samp) as usize
3844        } else {
3845            1
3846        };
3847
3848        let mut counter = ProgressiveSymbolCounter::new();
3849        let is_refinement = scan.ah != 0;
3850
3851        if blocks_per_mcu == 1 {
3852            // Chroma or 4:4:4 Y: storage order = raster order
3853            let total_blocks = block_rows * block_cols;
3854            for block in blocks.iter().take(total_blocks) {
3855                if is_refinement {
3856                    counter.count_ac_refine(block, scan.ss, scan.se, scan.ah, scan.al, ac_freq);
3857                } else {
3858                    counter.count_ac_first(block, scan.ss, scan.se, scan.al, ac_freq);
3859                }
3860            }
3861        } else {
3862            // Y component with subsampling - iterate in raster order (matching encode_ac_scan)
3863            let h = h_samp as usize;
3864            let v = v_samp as usize;
3865
3866            for block_row in 0..block_rows {
3867                for block_col in 0..block_cols {
3868                    // Convert raster position to MCU-interleaved storage index
3869                    let mcu_row = block_row / v;
3870                    let mcu_col = block_col / h;
3871                    let v_idx = block_row % v;
3872                    let h_idx = block_col % h;
3873                    let storage_idx = mcu_row * (mcu_cols * blocks_per_mcu)
3874                        + mcu_col * blocks_per_mcu
3875                        + v_idx * h
3876                        + h_idx;
3877
3878                    if is_refinement {
3879                        counter.count_ac_refine(
3880                            &blocks[storage_idx],
3881                            scan.ss,
3882                            scan.se,
3883                            scan.ah,
3884                            scan.al,
3885                            ac_freq,
3886                        );
3887                    } else {
3888                        counter.count_ac_first(
3889                            &blocks[storage_idx],
3890                            scan.ss,
3891                            scan.se,
3892                            scan.al,
3893                            ac_freq,
3894                        );
3895                    }
3896                }
3897            }
3898        }
3899
3900        // Flush any pending EOBRUN
3901        counter.finish_scan(Some(ac_freq));
3902    }
3903}
3904
3905// ============================================================================
3906// Encode Trait Implementation
3907// ============================================================================
3908
3909impl Encode for Encoder {
3910    fn encode_rgb(&self, rgb_data: &[u8], width: u32, height: u32) -> Result<Vec<u8>> {
3911        self.encode_rgb(rgb_data, width, height)
3912    }
3913
3914    fn encode_gray(&self, gray_data: &[u8], width: u32, height: u32) -> Result<Vec<u8>> {
3915        self.encode_gray(gray_data, width, height)
3916    }
3917}
3918
3919// Note: StreamingEncoder and EncodingStream are in the `streaming` module.
3920
3921// Add streaming() method to Encoder
3922impl Encoder {
3923    /// Create a streaming encoder.
3924    ///
3925    /// Returns a [`StreamingEncoder`] which supports scanline-by-scanline encoding.
3926    /// Note that streaming mode does NOT support trellis quantization, progressive
3927    /// mode, or Huffman optimization (these require buffering the entire image).
3928    ///
3929    /// For full-featured encoding with all mozjpeg optimizations, use [`Encoder::new()`]
3930    /// with [`encode_rgb()`](Encoder::encode_rgb) or [`encode_gray()`](Encoder::encode_gray).
3931    ///
3932    /// # Example
3933    ///
3934    /// ```ignore
3935    /// use mozjpeg_rs::Encoder;
3936    /// use std::fs::File;
3937    ///
3938    /// let file = File::create("output.jpg")?;
3939    /// let mut stream = Encoder::streaming()
3940    ///     .quality(85)
3941    ///     .start_rgb(1920, 1080, file)?;
3942    ///
3943    /// // Write scanlines...
3944    /// stream.finish()?;
3945    /// ```
3946    pub fn streaming() -> StreamingEncoder {
3947        StreamingEncoder::baseline_fastest()
3948    }
3949}
3950
3951// ============================================================================
3952// C mozjpeg encoding (optional feature)
3953// ============================================================================
3954
3955#[cfg(feature = "mozjpeg-sys-config")]
3956impl Encoder {
3957    /// Convert this encoder to a C mozjpeg encoder.
3958    ///
3959    /// Returns a [`CMozjpeg`](crate::CMozjpeg) that can encode images using
3960    /// the C mozjpeg library with settings matching this Rust encoder.
3961    ///
3962    /// # Example
3963    ///
3964    /// ```no_run
3965    /// use mozjpeg_rs::{Encoder, Preset};
3966    ///
3967    /// let pixels: Vec<u8> = vec![128; 64 * 64 * 3];
3968    /// let encoder = Encoder::new(Preset::ProgressiveBalanced).quality(85);
3969    ///
3970    /// // Encode with C mozjpeg
3971    /// let c_jpeg = encoder.to_c_mozjpeg().encode_rgb(&pixels, 64, 64)?;
3972    ///
3973    /// // Compare with Rust encoder
3974    /// let rust_jpeg = encoder.encode_rgb(&pixels, 64, 64)?;
3975    /// # Ok::<(), mozjpeg_rs::Error>(())
3976    /// ```
3977    pub fn to_c_mozjpeg(&self) -> crate::compat::CMozjpeg {
3978        crate::compat::CMozjpeg {
3979            quality: self.quality,
3980            force_baseline: self.force_baseline,
3981            subsampling: self.subsampling,
3982            progressive: self.progressive,
3983            optimize_huffman: self.optimize_huffman,
3984            optimize_scans: self.optimize_scans,
3985            trellis: self.trellis,
3986            overshoot_deringing: self.overshoot_deringing,
3987            smoothing: self.smoothing,
3988            restart_interval: self.restart_interval,
3989            quant_table_idx: self.quant_table_idx,
3990            has_custom_qtables: self.custom_luma_qtable.is_some()
3991                || self.custom_chroma_qtable.is_some(),
3992            exif_data: self.exif_data.clone(),
3993            icc_profile: self.icc_profile.clone(),
3994            custom_markers: self.custom_markers.clone(),
3995        }
3996    }
3997}
3998
3999/// Unit tests for private encoder internals.
4000/// Public API tests are in tests/encode_tests.rs.
4001#[cfg(test)]
4002mod tests {
4003    use super::*;
4004
4005    #[test]
4006    fn test_encoder_defaults() {
4007        // Default preset is ProgressiveBalanced
4008        let enc = Encoder::default();
4009        assert_eq!(enc.quality, 75);
4010        assert!(enc.progressive); // ProgressiveBalanced is progressive
4011        assert_eq!(enc.subsampling, Subsampling::S420);
4012        assert!(enc.trellis.enabled);
4013        assert!(enc.optimize_huffman);
4014        assert!(!enc.optimize_scans); // ProgressiveBalanced does NOT include optimize_scans
4015    }
4016
4017    #[test]
4018    fn test_encoder_presets() {
4019        let fastest = Encoder::new(Preset::BaselineFastest);
4020        assert!(!fastest.progressive);
4021        assert!(!fastest.trellis.enabled);
4022        assert!(!fastest.optimize_huffman);
4023
4024        let baseline = Encoder::new(Preset::BaselineBalanced);
4025        assert!(!baseline.progressive);
4026        assert!(baseline.trellis.enabled);
4027        assert!(baseline.optimize_huffman);
4028
4029        let prog_balanced = Encoder::new(Preset::ProgressiveBalanced);
4030        assert!(prog_balanced.progressive);
4031        assert!(prog_balanced.trellis.enabled);
4032        assert!(!prog_balanced.optimize_scans);
4033
4034        let prog_smallest = Encoder::new(Preset::ProgressiveSmallest);
4035        assert!(prog_smallest.progressive);
4036        assert!(prog_smallest.optimize_scans);
4037    }
4038
4039    #[test]
4040    fn test_encoder_builder_fields() {
4041        let enc = Encoder::baseline_optimized()
4042            .quality(90)
4043            .progressive(true)
4044            .subsampling(Subsampling::S444);
4045
4046        assert_eq!(enc.quality, 90);
4047        assert!(enc.progressive);
4048        assert_eq!(enc.subsampling, Subsampling::S444);
4049    }
4050
4051    #[test]
4052    fn test_quality_clamping() {
4053        let enc = Encoder::baseline_optimized().quality(0);
4054        assert_eq!(enc.quality, 1);
4055
4056        let enc = Encoder::baseline_optimized().quality(150);
4057        assert_eq!(enc.quality, 100);
4058    }
4059
4060    #[test]
4061    fn test_natural_to_zigzag() {
4062        let mut natural = [0u16; 64];
4063        for i in 0..64 {
4064            natural[i] = i as u16;
4065        }
4066        let zigzag = natural_to_zigzag(&natural);
4067
4068        assert_eq!(zigzag[0], 0);
4069        assert_eq!(zigzag[1], 1);
4070    }
4071
4072    #[test]
4073    fn test_max_compression_uses_all_optimizations() {
4074        let encoder = Encoder::max_compression();
4075        assert!(encoder.trellis.enabled);
4076        assert!(encoder.progressive);
4077        assert!(encoder.optimize_huffman);
4078        assert!(encoder.optimize_scans);
4079    }
4080
4081    #[test]
4082    fn test_encode_ycbcr_planar_444() {
4083        let width = 32u32;
4084        let height = 32u32;
4085
4086        // Create test image with gradient pattern
4087        let y_plane: Vec<u8> = (0..width * height)
4088            .map(|i| ((i % width) * 255 / width) as u8)
4089            .collect();
4090        let cb_plane: Vec<u8> = (0..width * height)
4091            .map(|i| ((i / width) * 255 / height) as u8)
4092            .collect();
4093        let cr_plane: Vec<u8> = vec![128u8; (width * height) as usize];
4094
4095        let encoder = Encoder::new(Preset::BaselineBalanced)
4096            .quality(85)
4097            .subsampling(Subsampling::S444);
4098
4099        let jpeg_data = encoder
4100            .encode_ycbcr_planar(&y_plane, &cb_plane, &cr_plane, width, height)
4101            .expect("encode_ycbcr_planar should succeed");
4102
4103        // Verify it's a valid JPEG
4104        assert!(jpeg_data.starts_with(&[0xFF, 0xD8, 0xFF])); // SOI + marker
4105        assert!(jpeg_data.ends_with(&[0xFF, 0xD9])); // EOI
4106        assert!(jpeg_data.len() > 200); // Reasonable size for 32x32
4107    }
4108
4109    #[test]
4110    fn test_encode_ycbcr_planar_420() {
4111        let width = 32u32;
4112        let height = 32u32;
4113
4114        // For 4:2:0, chroma planes are half resolution in each dimension
4115        let chroma_w = (width + 1) / 2;
4116        let chroma_h = (height + 1) / 2;
4117
4118        let y_plane: Vec<u8> = vec![128u8; (width * height) as usize];
4119        let cb_plane: Vec<u8> = vec![100u8; (chroma_w * chroma_h) as usize];
4120        let cr_plane: Vec<u8> = vec![150u8; (chroma_w * chroma_h) as usize];
4121
4122        let encoder = Encoder::new(Preset::BaselineBalanced)
4123            .quality(85)
4124            .subsampling(Subsampling::S420);
4125
4126        let jpeg_data = encoder
4127            .encode_ycbcr_planar(&y_plane, &cb_plane, &cr_plane, width, height)
4128            .expect("encode_ycbcr_planar with 4:2:0 should succeed");
4129
4130        // Verify it's a valid JPEG
4131        assert!(jpeg_data.starts_with(&[0xFF, 0xD8, 0xFF]));
4132        assert!(jpeg_data.ends_with(&[0xFF, 0xD9]));
4133    }
4134
4135    #[test]
4136    fn test_encode_ycbcr_planar_422() {
4137        let width = 32u32;
4138        let height = 32u32;
4139
4140        // For 4:2:2, chroma is half width, full height
4141        let chroma_w = (width + 1) / 2;
4142
4143        let y_plane: Vec<u8> = vec![128u8; (width * height) as usize];
4144        let cb_plane: Vec<u8> = vec![100u8; (chroma_w * height) as usize];
4145        let cr_plane: Vec<u8> = vec![150u8; (chroma_w * height) as usize];
4146
4147        let encoder = Encoder::new(Preset::BaselineBalanced)
4148            .quality(85)
4149            .subsampling(Subsampling::S422);
4150
4151        let jpeg_data = encoder
4152            .encode_ycbcr_planar(&y_plane, &cb_plane, &cr_plane, width, height)
4153            .expect("encode_ycbcr_planar with 4:2:2 should succeed");
4154
4155        assert!(jpeg_data.starts_with(&[0xFF, 0xD8, 0xFF]));
4156        assert!(jpeg_data.ends_with(&[0xFF, 0xD9]));
4157    }
4158
4159    #[test]
4160    fn test_encode_ycbcr_planar_wrong_size() {
4161        let width = 32u32;
4162        let height = 32u32;
4163
4164        // Correct Y plane but wrong chroma plane sizes for 4:2:0
4165        let y_plane: Vec<u8> = vec![128u8; (width * height) as usize];
4166        let cb_plane: Vec<u8> = vec![100u8; 10]; // Too small!
4167        let cr_plane: Vec<u8> = vec![150u8; 10]; // Too small!
4168
4169        let encoder = Encoder::new(Preset::BaselineBalanced)
4170            .quality(85)
4171            .subsampling(Subsampling::S420);
4172
4173        let result = encoder.encode_ycbcr_planar(&y_plane, &cb_plane, &cr_plane, width, height);
4174
4175        assert!(result.is_err());
4176    }
4177
4178    #[test]
4179    fn test_encode_ycbcr_planar_strided() {
4180        let width = 30u32; // Not a multiple of stride
4181        let height = 20u32;
4182        let y_stride = 32usize; // Stride with 2 bytes padding per row
4183
4184        // For 4:2:0, chroma is half resolution
4185        let chroma_width = 15usize;
4186        let chroma_height = 10usize;
4187        let cb_stride = 16usize; // Stride with 1 byte padding per row
4188
4189        // Create Y plane with stride (fill with gradient, padding with zeros)
4190        let mut y_plane = vec![0u8; y_stride * height as usize];
4191        for row in 0..height as usize {
4192            for col in 0..width as usize {
4193                y_plane[row * y_stride + col] = ((col * 255) / width as usize) as u8;
4194            }
4195        }
4196
4197        // Create chroma planes with stride
4198        let mut cb_plane = vec![0u8; cb_stride * chroma_height];
4199        let mut cr_plane = vec![0u8; cb_stride * chroma_height];
4200        for row in 0..chroma_height {
4201            for col in 0..chroma_width {
4202                cb_plane[row * cb_stride + col] = 100;
4203                cr_plane[row * cb_stride + col] = 150;
4204            }
4205        }
4206
4207        let encoder = Encoder::new(Preset::BaselineBalanced)
4208            .quality(85)
4209            .subsampling(Subsampling::S420);
4210
4211        let jpeg_data = encoder
4212            .encode_ycbcr_planar_strided(
4213                &y_plane, y_stride, &cb_plane, cb_stride, &cr_plane, cb_stride, width, height,
4214            )
4215            .expect("strided encoding should succeed");
4216
4217        // Verify it's a valid JPEG
4218        assert!(jpeg_data.starts_with(&[0xFF, 0xD8, 0xFF]));
4219        assert!(jpeg_data.ends_with(&[0xFF, 0xD9]));
4220    }
4221
4222    #[test]
4223    fn test_encode_ycbcr_planar_strided_matches_packed() {
4224        let width = 32u32;
4225        let height = 32u32;
4226
4227        // Create packed plane data
4228        let y_packed: Vec<u8> = (0..width * height).map(|i| (i % 256) as u8).collect();
4229        let chroma_w = (width + 1) / 2;
4230        let chroma_h = (height + 1) / 2;
4231        let cb_packed: Vec<u8> = vec![100u8; (chroma_w * chroma_h) as usize];
4232        let cr_packed: Vec<u8> = vec![150u8; (chroma_w * chroma_h) as usize];
4233
4234        let encoder = Encoder::new(Preset::BaselineBalanced)
4235            .quality(85)
4236            .subsampling(Subsampling::S420);
4237
4238        // Encode with packed API
4239        let jpeg_packed = encoder
4240            .encode_ycbcr_planar(&y_packed, &cb_packed, &cr_packed, width, height)
4241            .expect("packed encoding should succeed");
4242
4243        // Encode with strided API (stride == width means packed)
4244        let jpeg_strided = encoder
4245            .encode_ycbcr_planar_strided(
4246                &y_packed,
4247                width as usize,
4248                &cb_packed,
4249                chroma_w as usize,
4250                &cr_packed,
4251                chroma_w as usize,
4252                width,
4253                height,
4254            )
4255            .expect("strided encoding should succeed");
4256
4257        // Both should produce identical output
4258        assert_eq!(jpeg_packed, jpeg_strided);
4259    }
4260
4261    // =========================================================================
4262    // Resource Estimation Tests
4263    // =========================================================================
4264
4265    #[test]
4266    fn test_estimate_resources_basic() {
4267        let encoder = Encoder::new(Preset::BaselineBalanced);
4268        let estimate = encoder.estimate_resources(1920, 1080);
4269
4270        // Should have reasonable memory estimate (> input size)
4271        let input_size = 1920 * 1080 * 3;
4272        assert!(
4273            estimate.peak_memory_bytes > input_size,
4274            "Peak memory {} should exceed input size {}",
4275            estimate.peak_memory_bytes,
4276            input_size
4277        );
4278
4279        // Should have reasonable CPU cost (> 1.0 due to trellis)
4280        assert!(
4281            estimate.cpu_cost_multiplier > 1.0,
4282            "CPU cost {} should be > 1.0 for BaselineBalanced",
4283            estimate.cpu_cost_multiplier
4284        );
4285
4286        // Block count should match expected
4287        assert!(estimate.block_count > 0, "Block count should be > 0");
4288    }
4289
4290    #[test]
4291    fn test_estimate_resources_fastest_has_lower_cpu() {
4292        let fastest = Encoder::new(Preset::BaselineFastest);
4293        let balanced = Encoder::new(Preset::BaselineBalanced);
4294
4295        let est_fast = fastest.estimate_resources(512, 512);
4296        let est_balanced = balanced.estimate_resources(512, 512);
4297
4298        // Fastest should have lower CPU cost (no trellis)
4299        assert!(
4300            est_fast.cpu_cost_multiplier < est_balanced.cpu_cost_multiplier,
4301            "Fastest ({:.2}) should have lower CPU cost than Balanced ({:.2})",
4302            est_fast.cpu_cost_multiplier,
4303            est_balanced.cpu_cost_multiplier
4304        );
4305    }
4306
4307    #[test]
4308    fn test_estimate_resources_progressive_has_higher_cpu() {
4309        let baseline = Encoder::new(Preset::BaselineBalanced);
4310        let progressive = Encoder::new(Preset::ProgressiveBalanced);
4311
4312        let est_baseline = baseline.estimate_resources(512, 512);
4313        let est_prog = progressive.estimate_resources(512, 512);
4314
4315        // Progressive should have higher CPU cost (multiple scans)
4316        assert!(
4317            est_prog.cpu_cost_multiplier > est_baseline.cpu_cost_multiplier,
4318            "Progressive ({:.2}) should have higher CPU cost than Baseline ({:.2})",
4319            est_prog.cpu_cost_multiplier,
4320            est_baseline.cpu_cost_multiplier
4321        );
4322    }
4323
4324    #[test]
4325    fn test_estimate_resources_gray() {
4326        let encoder = Encoder::new(Preset::BaselineBalanced);
4327        let rgb_estimate = encoder.estimate_resources(512, 512);
4328        let gray_estimate = encoder.estimate_resources_gray(512, 512);
4329
4330        // Grayscale should use less memory (1 channel vs 3)
4331        assert!(
4332            gray_estimate.peak_memory_bytes < rgb_estimate.peak_memory_bytes,
4333            "Grayscale memory {} should be less than RGB {}",
4334            gray_estimate.peak_memory_bytes,
4335            rgb_estimate.peak_memory_bytes
4336        );
4337
4338        // Grayscale should have lower CPU cost
4339        assert!(
4340            gray_estimate.cpu_cost_multiplier < rgb_estimate.cpu_cost_multiplier,
4341            "Grayscale CPU {:.2} should be less than RGB {:.2}",
4342            gray_estimate.cpu_cost_multiplier,
4343            rgb_estimate.cpu_cost_multiplier
4344        );
4345    }
4346
4347    // =========================================================================
4348    // Resource Limit Tests
4349    // =========================================================================
4350
4351    #[test]
4352    fn test_dimension_limit_width() {
4353        let limits = Limits::default().max_width(100).max_height(100);
4354        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4355
4356        let pixels = vec![128u8; 200 * 50 * 3];
4357        let result = encoder.encode_rgb(&pixels, 200, 50);
4358
4359        assert!(matches!(result, Err(Error::DimensionLimitExceeded { .. })));
4360    }
4361
4362    #[test]
4363    fn test_dimension_limit_height() {
4364        let limits = Limits::default().max_width(100).max_height(100);
4365        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4366
4367        let pixels = vec![128u8; 50 * 200 * 3];
4368        let result = encoder.encode_rgb(&pixels, 50, 200);
4369
4370        assert!(matches!(result, Err(Error::DimensionLimitExceeded { .. })));
4371    }
4372
4373    #[test]
4374    fn test_dimension_limit_passes_when_within() {
4375        let limits = Limits::default().max_width(100).max_height(100);
4376        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4377
4378        let pixels = vec![128u8; 64 * 64 * 3];
4379        let result = encoder.encode_rgb(&pixels, 64, 64);
4380
4381        assert!(result.is_ok());
4382    }
4383
4384    #[test]
4385    fn test_allocation_limit() {
4386        let limits = Limits::default().max_alloc_bytes(1000); // Very small limit
4387        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4388
4389        let pixels = vec![128u8; 256 * 256 * 3];
4390        let result = encoder.encode_rgb(&pixels, 256, 256);
4391
4392        assert!(matches!(result, Err(Error::AllocationLimitExceeded { .. })));
4393    }
4394
4395    #[test]
4396    fn test_allocation_limit_passes_when_within() {
4397        let limits = Limits::default().max_alloc_bytes(10_000_000); // 10 MB limit
4398        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4399
4400        let pixels = vec![128u8; 64 * 64 * 3];
4401        let result = encoder.encode_rgb(&pixels, 64, 64);
4402
4403        assert!(result.is_ok());
4404    }
4405
4406    #[test]
4407    fn test_pixel_count_limit() {
4408        let limits = Limits::default().max_pixel_count(1000); // Very small limit
4409        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4410
4411        let pixels = vec![128u8; 64 * 64 * 3]; // 4096 pixels
4412        let result = encoder.encode_rgb(&pixels, 64, 64);
4413
4414        assert!(matches!(result, Err(Error::PixelCountExceeded { .. })));
4415    }
4416
4417    #[test]
4418    fn test_pixel_count_limit_passes_when_within() {
4419        let limits = Limits::default().max_pixel_count(10000); // 10000 pixels
4420        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4421
4422        let pixels = vec![128u8; 64 * 64 * 3]; // 4096 pixels
4423        let result = encoder.encode_rgb(&pixels, 64, 64);
4424
4425        assert!(result.is_ok());
4426    }
4427
4428    #[test]
4429    fn test_icc_profile_size_limit() {
4430        let limits = Limits::default().max_icc_profile_bytes(100);
4431        let encoder = Encoder::new(Preset::BaselineFastest)
4432            .limits(limits)
4433            .icc_profile(vec![0u8; 1000]); // 1000 byte ICC profile
4434
4435        let pixels = vec![128u8; 64 * 64 * 3];
4436        let result = encoder.encode_rgb(&pixels, 64, 64);
4437
4438        assert!(matches!(result, Err(Error::IccProfileTooLarge { .. })));
4439    }
4440
4441    #[test]
4442    fn test_icc_profile_size_limit_passes_when_within() {
4443        let limits = Limits::default().max_icc_profile_bytes(2000);
4444        let encoder = Encoder::new(Preset::BaselineFastest)
4445            .limits(limits)
4446            .icc_profile(vec![0u8; 1000]); // 1000 byte ICC profile
4447
4448        let pixels = vec![128u8; 64 * 64 * 3];
4449        let result = encoder.encode_rgb(&pixels, 64, 64);
4450
4451        assert!(result.is_ok());
4452    }
4453
4454    #[test]
4455    fn test_limits_disabled_by_default() {
4456        let encoder = Encoder::new(Preset::BaselineFastest);
4457        assert_eq!(encoder.limits, Limits::none());
4458    }
4459
4460    #[test]
4461    fn test_limits_has_limits() {
4462        assert!(!Limits::none().has_limits());
4463        assert!(Limits::default().max_width(100).has_limits());
4464        assert!(Limits::default().max_height(100).has_limits());
4465        assert!(Limits::default().max_pixel_count(1000).has_limits());
4466        assert!(Limits::default().max_alloc_bytes(1000).has_limits());
4467        assert!(Limits::default().max_icc_profile_bytes(1000).has_limits());
4468    }
4469
4470    // =========================================================================
4471    // Cancellation Tests
4472    // =========================================================================
4473
4474    #[test]
4475    fn test_cancellable_with_no_cancellation() {
4476        let encoder = Encoder::new(Preset::BaselineFastest);
4477        let pixels = vec![128u8; 64 * 64 * 3];
4478
4479        let result = encoder.encode_rgb_cancellable(&pixels, 64, 64, None, None);
4480
4481        assert!(result.is_ok());
4482    }
4483
4484    #[test]
4485    fn test_cancellable_immediate_cancel() {
4486        let encoder = Encoder::new(Preset::BaselineFastest);
4487        let pixels = vec![128u8; 64 * 64 * 3];
4488        let cancel = AtomicBool::new(true); // Already cancelled
4489
4490        let result = encoder.encode_rgb_cancellable(&pixels, 64, 64, Some(&cancel), None);
4491
4492        assert!(matches!(result, Err(Error::Cancelled)));
4493    }
4494
4495    #[test]
4496    fn test_cancellable_with_timeout() {
4497        let encoder = Encoder::new(Preset::BaselineFastest);
4498        let pixels = vec![128u8; 64 * 64 * 3];
4499
4500        // 10 second timeout - should complete well within this
4501        let result =
4502            encoder.encode_rgb_cancellable(&pixels, 64, 64, None, Some(Duration::from_secs(10)));
4503
4504        assert!(result.is_ok());
4505    }
4506
4507    #[test]
4508    fn test_cancellable_gray() {
4509        let encoder = Encoder::new(Preset::BaselineFastest);
4510        let pixels = vec![128u8; 64 * 64];
4511
4512        let result = encoder.encode_gray_cancellable(&pixels, 64, 64, None, None);
4513
4514        assert!(result.is_ok());
4515    }
4516
4517    #[test]
4518    fn test_cancellable_with_limits() {
4519        // Test that limits work in cancellable method too
4520        let limits = Limits::default().max_width(32);
4521        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4522
4523        let pixels = vec![128u8; 64 * 64 * 3];
4524        let result = encoder.encode_rgb_cancellable(&pixels, 64, 64, None, None);
4525
4526        assert!(matches!(result, Err(Error::DimensionLimitExceeded { .. })));
4527    }
4528
4529    #[test]
4530    fn test_cancellation_context_none() {
4531        let ctx = CancellationContext::none();
4532        assert!(ctx.check().is_ok());
4533    }
4534
4535    #[test]
4536    fn test_cancellation_context_with_cancel_flag() {
4537        use std::sync::atomic::Ordering;
4538
4539        let cancel = AtomicBool::new(false);
4540        let ctx = CancellationContext::new(Some(&cancel), None);
4541        assert!(ctx.check().is_ok());
4542
4543        cancel.store(true, Ordering::Relaxed);
4544        assert!(matches!(ctx.check(), Err(Error::Cancelled)));
4545    }
4546
4547    #[test]
4548    fn test_cancellation_context_with_expired_deadline() {
4549        // Create a deadline that's already passed
4550        let ctx = CancellationContext {
4551            cancel: None,
4552            deadline: Some(Instant::now() - Duration::from_secs(1)),
4553        };
4554
4555        assert!(matches!(ctx.check(), Err(Error::TimedOut)));
4556    }
4557
4558    #[test]
4559    fn test_dimension_exact_at_limit_passes() {
4560        // Dimensions exactly at limit should pass
4561        let limits = Limits::default().max_width(64).max_height(64);
4562        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4563
4564        let pixels = vec![128u8; 64 * 64 * 3];
4565        let result = encoder.encode_rgb(&pixels, 64, 64);
4566
4567        assert!(result.is_ok());
4568    }
4569
4570    #[test]
4571    fn test_pixel_count_exact_at_limit_passes() {
4572        // Pixel count exactly at limit should pass
4573        let limits = Limits::default().max_pixel_count(4096); // Exactly 64*64
4574        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4575
4576        let pixels = vec![128u8; 64 * 64 * 3];
4577        let result = encoder.encode_rgb(&pixels, 64, 64);
4578
4579        assert!(result.is_ok());
4580    }
4581
4582    #[test]
4583    fn test_multiple_limits_all_checked() {
4584        // Test that all limits are checked, not just the first
4585        let limits = Limits::default()
4586            .max_width(1000)
4587            .max_height(1000)
4588            .max_pixel_count(100); // This should fail
4589
4590        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4591        let pixels = vec![128u8; 64 * 64 * 3]; // 4096 pixels
4592
4593        let result = encoder.encode_rgb(&pixels, 64, 64);
4594        assert!(matches!(result, Err(Error::PixelCountExceeded { .. })));
4595    }
4596
4597    #[test]
4598    fn test_limits_with_grayscale() {
4599        let limits = Limits::default().max_pixel_count(100);
4600        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4601
4602        let pixels = vec![128u8; 64 * 64]; // Grayscale, 4096 pixels
4603        let result = encoder.encode_gray(&pixels, 64, 64);
4604
4605        assert!(matches!(result, Err(Error::PixelCountExceeded { .. })));
4606    }
4607
4608    #[test]
4609    fn test_estimate_resources_with_subsampling() {
4610        let encoder_444 = Encoder::new(Preset::BaselineBalanced).subsampling(Subsampling::S444);
4611        let encoder_420 = Encoder::new(Preset::BaselineBalanced).subsampling(Subsampling::S420);
4612
4613        let est_444 = encoder_444.estimate_resources(512, 512);
4614        let est_420 = encoder_420.estimate_resources(512, 512);
4615
4616        // 4:4:4 should use more memory than 4:2:0 (no chroma downsampling)
4617        assert!(
4618            est_444.peak_memory_bytes > est_420.peak_memory_bytes,
4619            "4:4:4 memory {} should exceed 4:2:0 memory {}",
4620            est_444.peak_memory_bytes,
4621            est_420.peak_memory_bytes
4622        );
4623    }
4624
4625    #[test]
4626    fn test_estimate_resources_block_count() {
4627        // With 4:2:0 subsampling (default): Y gets full blocks, chroma gets 1/4
4628        let encoder = Encoder::new(Preset::BaselineFastest);
4629
4630        // 64x64 image with 4:2:0:
4631        // Y blocks: 8x8 = 64
4632        // Chroma: 32x32 pixels, 4x4 blocks each = 16 per component
4633        // Total: 64 + 16 + 16 = 96
4634        let estimate = encoder.estimate_resources(64, 64);
4635        assert_eq!(estimate.block_count, 96);
4636
4637        // With 4:4:4 subsampling: all components get full blocks
4638        let encoder_444 = Encoder::new(Preset::BaselineFastest).subsampling(Subsampling::S444);
4639        let estimate_444 = encoder_444.estimate_resources(64, 64);
4640        // 64 blocks * 3 components = 192
4641        assert_eq!(estimate_444.block_count, 192);
4642    }
4643
4644    #[test]
4645    fn test_cancellable_gray_with_limits() {
4646        let limits = Limits::default().max_width(32);
4647        let encoder = Encoder::new(Preset::BaselineFastest).limits(limits);
4648
4649        let pixels = vec![128u8; 64 * 64];
4650        let result = encoder.encode_gray_cancellable(&pixels, 64, 64, None, None);
4651
4652        assert!(matches!(result, Err(Error::DimensionLimitExceeded { .. })));
4653    }
4654}