jpegli/
encode.rs

1//! JPEG encoder implementation.
2//!
3//! This module provides the main encoder interface for creating JPEG images.
4
5use crate::adaptive_quant::compute_aq_strength_map;
6use crate::alloc::{
7    checked_size_2d, try_alloc_filled, try_alloc_zeroed_f32, validate_dimensions,
8    DEFAULT_MAX_PIXELS,
9};
10use crate::color;
11use crate::consts::{
12    DCT_BLOCK_SIZE, DCT_SIZE, ICC_PROFILE_SIGNATURE, JPEG_NATURAL_ORDER, JPEG_ZIGZAG_ORDER,
13    MARKER_APP14, MARKER_APP2, MARKER_DHT, MARKER_DQT, MARKER_DRI, MARKER_EOI, MARKER_SOF0,
14    MARKER_SOF2, MARKER_SOI, MARKER_SOS, MAX_ICC_BYTES_PER_MARKER, XYB_ICC_PROFILE,
15};
16use crate::dct::forward_dct_8x8;
17use crate::entropy::{self, EntropyEncoder};
18use crate::error::{Error, Result};
19use crate::huffman::HuffmanEncodeTable;
20use crate::huffman_opt::{
21    FrequencyCounter, OptimizedHuffmanTables, OptimizedTable, ProgressiveTokenBuffer,
22};
23use crate::quant::{self, Quality, QuantTable, ZeroBiasParams};
24use crate::types::{ColorSpace, JpegMode, PixelFormat, Subsampling};
25use crate::xyb::srgb_to_scaled_xyb;
26
27#[cfg(feature = "hybrid-trellis")]
28use crate::hybrid::{hybrid_quantize_block, StandardHuffmanTables};
29
30/// Progressive scan parameters.
31#[derive(Debug, Clone)]
32struct ProgressiveScan {
33    /// Component indices in this scan (0=Y, 1=Cb, 2=Cr)
34    components: Vec<u8>,
35    /// Spectral selection start (0=DC, 1-63=AC)
36    ss: u8,
37    /// Spectral selection end (0-63)
38    se: u8,
39    /// Successive approximation high bit (previous pass)
40    ah: u8,
41    /// Successive approximation low bit (current pass)
42    al: u8,
43}
44
45/// Encoder configuration.
46#[derive(Debug, Clone)]
47pub struct EncoderConfig {
48    /// Image width
49    pub width: u32,
50    /// Image height
51    pub height: u32,
52    /// Input pixel format
53    pub pixel_format: PixelFormat,
54    /// Quality setting
55    pub quality: Quality,
56    /// Encoding mode
57    pub mode: JpegMode,
58    /// Chroma subsampling
59    pub subsampling: Subsampling,
60    /// Use XYB color space
61    pub use_xyb: bool,
62    /// Restart interval (0 = disabled)
63    pub restart_interval: u16,
64    /// Use optimized Huffman tables
65    pub optimize_huffman: bool,
66    /// Hybrid quantization configuration (jpegli AQ + mozjpeg trellis)
67    /// Requires the `hybrid-trellis` feature
68    #[cfg(feature = "hybrid-trellis")]
69    pub hybrid_config: crate::hybrid_config::HybridConfig,
70    /// Custom AQ map (optional). If None, computed automatically.
71    /// Allows pre-scaling the AQ map for size control.
72    #[cfg(feature = "hybrid-trellis")]
73    pub custom_aq_map: Option<crate::adaptive_quant::AQStrengthMap>,
74}
75
76impl Default for EncoderConfig {
77    fn default() -> Self {
78        Self {
79            width: 0,
80            height: 0,
81            pixel_format: PixelFormat::Rgb,
82            quality: Quality::default(),
83            mode: JpegMode::Baseline,
84            // Use 4:4:4 - this is what the encoder actually supports currently
85            subsampling: Subsampling::S444,
86            use_xyb: false,
87            restart_interval: 0,
88            // Match C++ jpegli default: optimize_coding = true
89            optimize_huffman: true,
90            #[cfg(feature = "hybrid-trellis")]
91            hybrid_config: crate::hybrid_config::HybridConfig::disabled(),
92            #[cfg(feature = "hybrid-trellis")]
93            custom_aq_map: None,
94        }
95    }
96}
97
98/// Quantization context for hybrid trellis mode.
99///
100/// This struct holds pre-built Huffman tables and hybrid config for use
101/// during hybrid quantization (jpegli AQ + mozjpeg trellis).
102#[cfg(feature = "hybrid-trellis")]
103struct HybridQuantContext {
104    huff_tables: StandardHuffmanTables,
105    config: crate::hybrid_config::HybridConfig,
106}
107
108#[cfg(feature = "hybrid-trellis")]
109impl HybridQuantContext {
110    /// Creates a new hybrid quantization context with the given config.
111    fn new(config: crate::hybrid_config::HybridConfig) -> Self {
112        Self {
113            huff_tables: StandardHuffmanTables::new(),
114            config,
115        }
116    }
117
118    /// Quantize a block using hybrid AQ + trellis.
119    ///
120    /// # Arguments
121    /// * `dct_coeffs` - DCT coefficients
122    /// * `quant` - Quantization table
123    /// * `aq_strength` - Per-block AQ strength
124    /// * `dampen` - Quality-based AQ dampen factor (0-1)
125    /// * `is_luma` - True for Y component, false for Cb/Cr
126    fn quantize_block(
127        &self,
128        dct_coeffs: &[f32; DCT_BLOCK_SIZE],
129        quant: &[u16; DCT_BLOCK_SIZE],
130        aq_strength: f32,
131        dampen: f32,
132        is_luma: bool,
133    ) -> [i16; DCT_BLOCK_SIZE] {
134        let ac_table = if is_luma {
135            &self.huff_tables.luma_ac
136        } else {
137            &self.huff_tables.chroma_ac
138        };
139
140        // Generate per-block trellis config based on AQ and hybrid settings
141        let trellis_config = self.config.to_trellis_config(aq_strength, dampen, !is_luma);
142
143        hybrid_quantize_block(dct_coeffs, quant, aq_strength, ac_table, &trellis_config)
144    }
145}
146
147/// JPEG encoder.
148pub struct Encoder {
149    config: EncoderConfig,
150}
151
152impl Encoder {
153    /// Creates a new encoder with default settings.
154    #[must_use]
155    pub fn new() -> Self {
156        Self {
157            config: EncoderConfig::default(),
158        }
159    }
160
161    /// Creates an encoder from configuration.
162    #[must_use]
163    pub fn from_config(config: EncoderConfig) -> Self {
164        Self { config }
165    }
166
167    /// Sets the image width.
168    #[must_use]
169    pub fn width(mut self, width: u32) -> Self {
170        self.config.width = width;
171        self
172    }
173
174    /// Sets the image height.
175    #[must_use]
176    pub fn height(mut self, height: u32) -> Self {
177        self.config.height = height;
178        self
179    }
180
181    /// Sets the pixel format.
182    #[must_use]
183    pub fn pixel_format(mut self, format: PixelFormat) -> Self {
184        self.config.pixel_format = format;
185        self
186    }
187
188    /// Sets the quality.
189    #[must_use]
190    pub fn quality(mut self, quality: Quality) -> Self {
191        self.config.quality = quality;
192        self
193    }
194
195    /// Sets the encoding mode.
196    #[must_use]
197    pub fn mode(mut self, mode: JpegMode) -> Self {
198        self.config.mode = mode;
199        self
200    }
201
202    /// Sets chroma subsampling.
203    #[must_use]
204    pub fn subsampling(mut self, subsampling: Subsampling) -> Self {
205        self.config.subsampling = subsampling;
206        self
207    }
208
209    /// Enables XYB-optimized encoding mode.
210    ///
211    /// XYB mode encodes images using the perceptually-optimized XYB color space
212    /// from JPEG XL. This provides better quality at the same file size compared
213    /// to standard YCbCr encoding.
214    ///
215    /// The implementation includes:
216    /// 1. Full sRGB → linear RGB → XYB color space conversion
217    /// 2. XYB value scaling for optimal quantization
218    /// 3. Embedded ICC profile for decoder color interpretation
219    /// 4. Blue channel subsampling (R:2×2, G:2×2, B:1×1)
220    /// 5. Separate XYB-optimized quant tables per component
221    ///
222    /// The ICC profile allows any ICC-aware decoder (including djpegli, ImageMagick,
223    /// and most image viewers) to correctly interpret the XYB values back to sRGB.
224    ///
225    /// Note: Without ICC profile support in the decoder, images will display with
226    /// incorrect colors. Use standard YCbCr mode for maximum compatibility.
227    #[must_use]
228    pub fn use_xyb(mut self, enable: bool) -> Self {
229        self.config.use_xyb = enable;
230        self
231    }
232
233    /// Sets the restart interval.
234    #[must_use]
235    pub fn restart_interval(mut self, interval: u16) -> Self {
236        self.config.restart_interval = interval;
237        self
238    }
239
240    /// Enables optimized Huffman tables.
241    #[must_use]
242    pub fn optimize_huffman(mut self, enable: bool) -> Self {
243        self.config.optimize_huffman = enable;
244        self
245    }
246
247    /// Enable hybrid quantization (jpegli AQ + mozjpeg trellis).
248    ///
249    /// This combines jpegli's adaptive quantization (which determines WHERE
250    /// to spend bits based on image content) with mozjpeg's trellis quantization
251    /// (which optimizes HOW to spend bits via rate-distortion optimization).
252    ///
253    /// Requires the `hybrid-trellis` feature.
254    #[cfg(feature = "hybrid-trellis")]
255    #[must_use]
256    pub fn hybrid_trellis(mut self, enable: bool) -> Self {
257        if enable {
258            self.config.hybrid_config = crate::hybrid_config::HybridConfig::default();
259        } else {
260            self.config.hybrid_config = crate::hybrid_config::HybridConfig::disabled();
261        }
262        self
263    }
264
265    /// Set custom hybrid quantization configuration.
266    ///
267    /// Allows fine-tuning all hybrid AQ+trellis parameters.
268    /// See [`HybridConfig`](crate::hybrid_config::HybridConfig) for available options.
269    ///
270    /// Requires the `hybrid-trellis` feature.
271    #[cfg(feature = "hybrid-trellis")]
272    #[must_use]
273    pub fn hybrid_config(mut self, config: crate::hybrid_config::HybridConfig) -> Self {
274        self.config.hybrid_config = config;
275        self
276    }
277
278    /// Sets a custom AQ (adaptive quantization) strength map.
279    ///
280    /// This allows pre-scaling the AQ map to control file size. When the AQ map
281    /// is scaled up, more bits are allocated to complex regions (larger files).
282    /// When scaled down, fewer bits are allocated (smaller files).
283    ///
284    /// If not provided, the AQ map is computed automatically from the image.
285    ///
286    /// # Example
287    /// ```ignore
288    /// use jpegli::adaptive_quant::compute_aq_strength_map;
289    ///
290    /// // Compute AQ map from Y plane
291    /// let mut aq_map = compute_aq_strength_map(&y_plane, width, height, 8);
292    ///
293    /// // Scale down to reduce file size by ~16%
294    /// let scale = aq_map.scale_for_size_reduction(16.0);
295    /// aq_map.scale(scale);
296    ///
297    /// // Use the scaled map
298    /// let jpeg = Encoder::new()
299    ///     .width(width as u32)
300    ///     .height(height as u32)
301    ///     .hybrid_config(HybridConfig::default())
302    ///     .aq_map(aq_map)
303    ///     .encode(&pixels)?;
304    /// ```
305    ///
306    /// Requires the `hybrid-trellis` feature.
307    #[cfg(feature = "hybrid-trellis")]
308    #[must_use]
309    pub fn aq_map(mut self, map: crate::adaptive_quant::AQStrengthMap) -> Self {
310        self.config.custom_aq_map = Some(map);
311        self
312    }
313
314    /// Validates the configuration.
315    fn validate(&self) -> Result<()> {
316        // Use validate_dimensions for comprehensive checks (zero, max dimension, max pixels)
317        validate_dimensions(self.config.width, self.config.height, DEFAULT_MAX_PIXELS)?;
318        Ok(())
319    }
320
321    /// Encodes the image data.
322    pub fn encode(&self, data: &[u8]) -> Result<Vec<u8>> {
323        self.validate()?;
324
325        // Calculate expected size with overflow checking
326        let expected_size =
327            checked_size_2d(self.config.width as usize, self.config.height as usize)?;
328        let expected_size =
329            checked_size_2d(expected_size, self.config.pixel_format.bytes_per_pixel())?;
330
331        if data.len() != expected_size {
332            return Err(Error::InvalidBufferSize {
333                expected: expected_size,
334                actual: data.len(),
335            });
336        }
337
338        // For now, implement baseline encoding only
339        match self.config.mode {
340            JpegMode::Baseline => self.encode_baseline(data),
341            JpegMode::Progressive => self.encode_progressive(data),
342            _ => Err(Error::UnsupportedFeature {
343                feature: "extended/lossless encoding",
344            }),
345        }
346    }
347
348    /// Encodes as baseline JPEG.
349    fn encode_baseline(&self, data: &[u8]) -> Result<Vec<u8>> {
350        let mut output = Vec::with_capacity(data.len() / 4);
351
352        if self.config.use_xyb {
353            self.encode_baseline_xyb(data, &mut output)
354        } else {
355            self.encode_baseline_ycbcr(data, &mut output)
356        }
357    }
358
359    /// Encodes using standard YCbCr color space.
360    fn encode_baseline_ycbcr(&self, data: &[u8], output: &mut Vec<u8>) -> Result<Vec<u8>> {
361        // Convert to YCbCr using f32 precision throughout (matches C++ jpegli)
362        let (y_plane, cb_plane, cr_plane) = self.convert_to_ycbcr_f32(data)?;
363
364        let width = self.config.width as usize;
365        let height = self.config.height as usize;
366
367        // Handle chroma subsampling
368        let (cb_plane_final, cr_plane_final, c_width, c_height) = match self.config.subsampling {
369            Subsampling::S420 => {
370                // 4:2:0: Downsample both Cb and Cr by 2x2
371                let cb_down = self.downsample_2x2_f32(&cb_plane, width, height)?;
372                let cr_down = self.downsample_2x2_f32(&cr_plane, width, height)?;
373                let c_w = (width + 1) / 2;
374                let c_h = (height + 1) / 2;
375                (cb_down, cr_down, c_w, c_h)
376            }
377            Subsampling::S422 => {
378                // 4:2:2: Downsample horizontally only
379                let cb_down = self.downsample_2x1_f32(&cb_plane, width, height)?;
380                let cr_down = self.downsample_2x1_f32(&cr_plane, width, height)?;
381                let c_w = (width + 1) / 2;
382                (cb_down, cr_down, c_w, height)
383            }
384            Subsampling::S440 => {
385                // 4:4:0: Downsample vertically only
386                let cb_down = self.downsample_1x2_f32(&cb_plane, width, height)?;
387                let cr_down = self.downsample_1x2_f32(&cr_plane, width, height)?;
388                let c_h = (height + 1) / 2;
389                (cb_down, cr_down, width, c_h)
390            }
391            Subsampling::S444 => {
392                // 4:4:4: No subsampling
393                (cb_plane, cr_plane, width, height)
394            }
395        };
396
397        // Generate quantization tables (3 separate tables like C++ cjpegli)
398        // Apply 4:2:0 quality compensation if using 4:2:0 subsampling
399        let is_420 = self.config.subsampling == Subsampling::S420;
400        let y_quant =
401            quant::generate_quant_table(self.config.quality, 0, ColorSpace::YCbCr, false, is_420);
402        let cb_quant =
403            quant::generate_quant_table(self.config.quality, 1, ColorSpace::YCbCr, false, is_420);
404        let cr_quant =
405            quant::generate_quant_table(self.config.quality, 2, ColorSpace::YCbCr, false, is_420);
406
407        // Quantize all blocks first (needed for both standard and optimized encoding)
408        let (y_blocks, cb_blocks, cr_blocks) = self.quantize_all_blocks_subsampled(
409            &y_plane,
410            width,
411            height,
412            &cb_plane_final,
413            &cr_plane_final,
414            c_width,
415            c_height,
416            &y_quant,
417            &cb_quant,
418            &cr_quant,
419        )?;
420        let is_color = self.config.pixel_format != PixelFormat::Gray;
421
422        // Write JPEG structure
423        self.write_header(output)?;
424        self.write_quant_tables(output, &y_quant, &cb_quant, &cr_quant)?;
425        self.write_frame_header(output)?;
426
427        // For optimized Huffman, build tables from block frequencies before writing DHT
428        let scan_data = if self.config.optimize_huffman {
429            let tables =
430                self.build_optimized_tables(&y_blocks, &cb_blocks, &cr_blocks, is_color)?;
431            self.write_huffman_tables_optimized(output, &tables)?;
432
433            if self.config.restart_interval > 0 {
434                self.write_restart_interval(output)?;
435            }
436            self.write_scan_header(output)?;
437
438            // Encode with optimized tables
439            self.encode_with_tables(&y_blocks, &cb_blocks, &cr_blocks, is_color, &tables)?
440        } else {
441            self.write_huffman_tables(output)?;
442
443            if self.config.restart_interval > 0 {
444                self.write_restart_interval(output)?;
445            }
446            self.write_scan_header(output)?;
447
448            // Encode with standard tables
449            self.encode_blocks_standard(&y_blocks, &cb_blocks, &cr_blocks, is_color)?
450        };
451
452        output.extend_from_slice(&scan_data);
453
454        // Write EOI
455        output.push(0xFF);
456        output.push(MARKER_EOI);
457
458        Ok(std::mem::take(output))
459    }
460
461    /// Encodes using XYB mode (perceptually optimized color space).
462    ///
463    /// XYB encoding pipeline:
464    /// 1. sRGB → linear RGB → XYB → scaled XYB (values in [0, 1])
465    /// 2. Multiply by 255 for JPEG sample range
466    /// 3. Level shift by subtracting 128 for DCT
467    fn encode_baseline_xyb(&self, data: &[u8], output: &mut Vec<u8>) -> Result<Vec<u8>> {
468        let width = self.config.width as usize;
469        let height = self.config.height as usize;
470
471        // Convert sRGB to scaled XYB (full color conversion pipeline)
472        let (x_plane, y_plane, b_plane) = self.convert_to_scaled_xyb(data)?;
473
474        // Downsample B channel (XYB subsamples B to 1/4 resolution)
475        let b_downsampled = self.downsample_2x2_f32(&b_plane, width, height)?;
476        let b_width = (width + 1) / 2;
477        let b_height = (height + 1) / 2;
478
479        // Generate XYB quantization tables (one per component)
480        // XYB mode doesn't use 4:2:0 quality compensation
481        let x_quant = quant::generate_quant_table(
482            self.config.quality,
483            0, // X component
484            ColorSpace::Rgb,
485            true,
486            false, // is_420
487        );
488        let y_quant = quant::generate_quant_table(
489            self.config.quality,
490            1, // Y component (luma-like)
491            ColorSpace::Rgb,
492            true,
493            false, // is_420
494        );
495        let b_quant = quant::generate_quant_table(
496            self.config.quality,
497            2, // B component
498            ColorSpace::Rgb,
499            true,
500            false, // is_420
501        );
502
503        // Compute AQ map from Y plane (XYB's Y is the luma-like channel)
504        // Scale Y plane from [0,1] to [0,255] range for AQ computation
505        let y_plane_scaled: Vec<f32> = y_plane.iter().map(|&v| v * 255.0).collect();
506        let y_quant_01 = y_quant.values[1];
507        #[cfg(feature = "hybrid-trellis")]
508        let aq_map = if let Some(ref custom) = self.config.custom_aq_map {
509            custom.clone()
510        } else {
511            compute_aq_strength_map(&y_plane_scaled, width, height, y_quant_01)
512        };
513        #[cfg(not(feature = "hybrid-trellis"))]
514        let aq_map = compute_aq_strength_map(&y_plane_scaled, width, height, y_quant_01);
515
516        // Zero-bias parameters for XYB (use YCbCr tables as approximation)
517        // X and Y are luma-like (full-res), B is chroma-like (downsampled)
518        let effective_distance = quant::quant_vals_to_distance(&x_quant, &y_quant, &b_quant);
519        let x_zero_bias = ZeroBiasParams::for_ycbcr(effective_distance, 0); // X uses luma params
520        let y_zero_bias = ZeroBiasParams::for_ycbcr(effective_distance, 0); // Y uses luma params
521        let b_zero_bias = ZeroBiasParams::for_ycbcr(effective_distance, 1); // B uses chroma params
522
523        // Create hybrid quantization context if enabled
524        #[cfg(feature = "hybrid-trellis")]
525        let hybrid_ctx = if self.config.hybrid_config.enabled {
526            Some(HybridQuantContext::new(self.config.hybrid_config))
527        } else {
528            None
529        };
530
531        // Write JPEG structure for XYB mode (no JFIF, just ICC profile)
532        self.write_header_xyb(output)?;
533        // Write APP14 Adobe marker for RGB colorspace (required by some decoders)
534        // See: https://github.com/google/jpegli/pull/135
535        self.write_app14_adobe(output, 0)?; // 0 = RGB (no transform)
536                                            // Write XYB ICC profile so decoders can interpret the colors correctly
537        self.write_icc_profile(output, &XYB_ICC_PROFILE)?;
538        self.write_quant_tables_xyb(output, &x_quant, &y_quant, &b_quant)?;
539        self.write_frame_header_xyb(output)?;
540
541        // For optimized Huffman, quantize all blocks first to collect frequencies
542        let scan_data = if self.config.optimize_huffman {
543            #[cfg(feature = "hybrid-trellis")]
544            let (x_blocks, y_blocks, b_blocks) = self.quantize_all_blocks_xyb_with_aq(
545                &x_plane,
546                &y_plane,
547                &b_downsampled,
548                width,
549                height,
550                b_width,
551                b_height,
552                &x_quant,
553                &y_quant,
554                &b_quant,
555                &aq_map,
556                hybrid_ctx.as_ref(),
557            );
558            #[cfg(not(feature = "hybrid-trellis"))]
559            let (x_blocks, y_blocks, b_blocks) = self.quantize_all_blocks_xyb_with_aq_simple(
560                &x_plane,
561                &y_plane,
562                &b_downsampled,
563                width,
564                height,
565                b_width,
566                b_height,
567                &x_quant,
568                &y_quant,
569                &b_quant,
570                &aq_map,
571                &x_zero_bias,
572                &y_zero_bias,
573                &b_zero_bias,
574            );
575            let (dc_table, ac_table) =
576                self.build_optimized_tables_xyb(&x_blocks, &y_blocks, &b_blocks)?;
577            self.write_huffman_tables_xyb_optimized(output, &dc_table, &ac_table);
578
579            if self.config.restart_interval > 0 {
580                self.write_restart_interval(output)?;
581            }
582            self.write_scan_header_xyb(output)?;
583
584            // Encode with optimized tables
585            self.encode_with_tables_xyb(&x_blocks, &y_blocks, &b_blocks, &dc_table, &ac_table)?
586        } else {
587            self.write_huffman_tables(output)?;
588
589            if self.config.restart_interval > 0 {
590                self.write_restart_interval(output)?;
591            }
592            self.write_scan_header_xyb(output)?;
593
594            // Encode with standard tables
595            self.encode_scan_xyb_float(
596                &x_plane,
597                &y_plane,
598                &b_downsampled,
599                width,
600                height,
601                b_width,
602                b_height,
603                &x_quant,
604                &y_quant,
605                &b_quant,
606            )?
607        };
608
609        output.extend_from_slice(&scan_data);
610
611        // Write EOI
612        output.push(0xFF);
613        output.push(MARKER_EOI);
614
615        Ok(std::mem::take(output))
616    }
617
618    /// Encodes progressive JPEG using XYB color space.
619    ///
620    /// This uses the same progressive scan structure as YCbCr encoding
621    /// but with XYB color conversion and appropriate headers (ICC profile, APP14).
622    fn encode_progressive_xyb(&self, data: &[u8]) -> Result<Vec<u8>> {
623        let mut output = Vec::with_capacity(data.len() / 4);
624
625        // Convert sRGB to scaled XYB
626        let (x_plane, y_plane, b_plane) = self.convert_to_scaled_xyb(data)?;
627
628        // XYB progressive uses 4:4:4 (no B channel downsampling unlike baseline XYB)
629        // This is because progressive scans work best with same-size components
630
631        // Generate XYB quantization tables
632        let x_quant =
633            quant::generate_quant_table(self.config.quality, 0, ColorSpace::Rgb, true, false);
634        let y_quant =
635            quant::generate_quant_table(self.config.quality, 1, ColorSpace::Rgb, true, false);
636        let b_quant =
637            quant::generate_quant_table(self.config.quality, 2, ColorSpace::Rgb, true, false);
638
639        // Quantize all blocks for progressive encoding
640        // Use X, Y, B as if they were Y, Cb, Cr for the progressive structure
641        let (x_blocks, y_blocks, b_blocks) =
642            self.quantize_all_blocks(&x_plane, &y_plane, &b_plane, &x_quant, &y_quant, &b_quant)?;
643        let is_color = self.config.pixel_format != PixelFormat::Gray;
644
645        // Write XYB-specific headers
646        self.write_header_xyb(&mut output)?;
647        // Write APP14 Adobe marker for RGB (required by some decoders)
648        self.write_app14_adobe(&mut output, 0)?; // 0 = RGB (no transform)
649                                                 // Write XYB ICC profile
650        self.write_icc_profile(&mut output, &XYB_ICC_PROFILE)?;
651        // Write quantization tables
652        self.write_quant_tables(&mut output, &x_quant, &y_quant, &b_quant)?;
653        // Write SOF2 frame header for progressive
654        self.write_frame_header(&mut output)?;
655
656        // Use standard Huffman tables (optimized tables could be added later)
657        self.write_huffman_tables(&mut output)?;
658        let tables: Option<OptimizedHuffmanTables> = None;
659
660        if self.config.restart_interval > 0 {
661            self.write_restart_interval(&mut output)?;
662        }
663
664        // Get progressive scan script
665        let scans = self.get_progressive_scan_script(is_color);
666
667        // Encode each scan (reusing the YCbCr progressive scan logic)
668        for scan in &scans {
669            self.write_progressive_scan_header(&mut output, scan, is_color)?;
670            let scan_data = self.encode_progressive_scan(
671                &x_blocks, &y_blocks, &b_blocks, scan, is_color, &tables,
672            )?;
673            output.extend_from_slice(&scan_data);
674        }
675
676        // Write EOI
677        output.push(0xFF);
678        output.push(MARKER_EOI);
679
680        Ok(output)
681    }
682
683    /// Converts input data to scaled XYB planes.
684    ///
685    /// Performs the full conversion: sRGB u8 → linear RGB → XYB → scaled XYB
686    /// Output values are in [0, 1] range, ready to be scaled to [0, 255] for JPEG.
687    fn convert_to_scaled_xyb(&self, data: &[u8]) -> Result<(Vec<f32>, Vec<f32>, Vec<f32>)> {
688        let width = self.config.width as usize;
689        let height = self.config.height as usize;
690        let num_pixels = checked_size_2d(width, height)?;
691
692        let mut x_plane = try_alloc_zeroed_f32(num_pixels, "allocating XYB X plane")?;
693        let mut y_plane = try_alloc_zeroed_f32(num_pixels, "allocating XYB Y plane")?;
694        let mut b_plane = try_alloc_zeroed_f32(num_pixels, "allocating XYB B plane")?;
695
696        match self.config.pixel_format {
697            PixelFormat::Rgb => {
698                for i in 0..num_pixels {
699                    let (x, y, b) =
700                        srgb_to_scaled_xyb(data[i * 3], data[i * 3 + 1], data[i * 3 + 2]);
701                    x_plane[i] = x;
702                    y_plane[i] = y;
703                    b_plane[i] = b;
704                }
705            }
706            PixelFormat::Rgba => {
707                for i in 0..num_pixels {
708                    let (x, y, b) =
709                        srgb_to_scaled_xyb(data[i * 4], data[i * 4 + 1], data[i * 4 + 2]);
710                    x_plane[i] = x;
711                    y_plane[i] = y;
712                    b_plane[i] = b;
713                }
714            }
715            PixelFormat::Gray => {
716                // Grayscale: R=G=B
717                for i in 0..num_pixels {
718                    let (x, y, b) = srgb_to_scaled_xyb(data[i], data[i], data[i]);
719                    x_plane[i] = x;
720                    y_plane[i] = y;
721                    b_plane[i] = b;
722                }
723            }
724            PixelFormat::Bgr => {
725                for i in 0..num_pixels {
726                    let (x, y, b) =
727                        srgb_to_scaled_xyb(data[i * 3 + 2], data[i * 3 + 1], data[i * 3]);
728                    x_plane[i] = x;
729                    y_plane[i] = y;
730                    b_plane[i] = b;
731                }
732            }
733            PixelFormat::Bgra => {
734                for i in 0..num_pixels {
735                    let (x, y, b) =
736                        srgb_to_scaled_xyb(data[i * 4 + 2], data[i * 4 + 1], data[i * 4]);
737                    x_plane[i] = x;
738                    y_plane[i] = y;
739                    b_plane[i] = b;
740                }
741            }
742            PixelFormat::Cmyk => {
743                return Err(Error::UnsupportedFeature {
744                    feature: "CMYK with XYB mode",
745                });
746            }
747        }
748
749        Ok((x_plane, y_plane, b_plane))
750    }
751
752    /// Downsamples a float plane by 2x2 (box filter averaging).
753    fn downsample_2x2_f32(&self, plane: &[f32], width: usize, height: usize) -> Result<Vec<f32>> {
754        let new_width = (width + 1) / 2;
755        let new_height = (height + 1) / 2;
756        let result_size = checked_size_2d(new_width, new_height)?;
757        let mut result = try_alloc_zeroed_f32(result_size, "allocating downsampled plane")?;
758
759        for y in 0..new_height {
760            for x in 0..new_width {
761                let x0 = x * 2;
762                let y0 = y * 2;
763                let x1 = (x0 + 1).min(width - 1);
764                let y1 = (y0 + 1).min(height - 1);
765
766                let p00 = plane[y0 * width + x0];
767                let p10 = plane[y0 * width + x1];
768                let p01 = plane[y1 * width + x0];
769                let p11 = plane[y1 * width + x1];
770
771                result[y * new_width + x] = (p00 + p10 + p01 + p11) * 0.25;
772            }
773        }
774
775        Ok(result)
776    }
777
778    /// Downsamples a float plane by 2x1 (horizontal only, box filter averaging).
779    fn downsample_2x1_f32(&self, plane: &[f32], width: usize, height: usize) -> Result<Vec<f32>> {
780        let new_width = (width + 1) / 2;
781        let result_size = checked_size_2d(new_width, height)?;
782        let mut result = try_alloc_zeroed_f32(result_size, "allocating downsampled plane")?;
783
784        for y in 0..height {
785            for x in 0..new_width {
786                let x0 = x * 2;
787                let x1 = (x0 + 1).min(width - 1);
788
789                let p0 = plane[y * width + x0];
790                let p1 = plane[y * width + x1];
791
792                result[y * new_width + x] = (p0 + p1) * 0.5;
793            }
794        }
795
796        Ok(result)
797    }
798
799    /// Downsamples a float plane by 1x2 (vertical only, box filter averaging).
800    fn downsample_1x2_f32(&self, plane: &[f32], width: usize, height: usize) -> Result<Vec<f32>> {
801        let new_height = (height + 1) / 2;
802        let result_size = checked_size_2d(width, new_height)?;
803        let mut result = try_alloc_zeroed_f32(result_size, "allocating downsampled plane")?;
804
805        for y in 0..new_height {
806            for x in 0..width {
807                let y0 = y * 2;
808                let y1 = (y0 + 1).min(height - 1);
809
810                let p0 = plane[y0 * width + x];
811                let p1 = plane[y1 * width + x];
812
813                result[y * width + x] = (p0 + p1) * 0.5;
814            }
815        }
816
817        Ok(result)
818    }
819
820    /// Encodes as progressive JPEG (level 2, matching cjpegli default).
821    ///
822    /// Progressive level 2 uses the following scan script:
823    /// 1. DC first: Ss=0, Se=0, Ah=0, Al=0 (DC only, full precision)
824    /// 2. AC 1-2: Ss=1, Se=2, Ah=0, Al=0 (low AC, full precision)
825    /// 3. AC 3-63 first: Ss=3, Se=63, Ah=0, Al=2 (high AC, top bits)
826    /// 4. AC 3-63 refine: Ss=3, Se=63, Ah=2, Al=1 (bit 1 refinement)
827    /// 5. AC 3-63 refine: Ss=3, Se=63, Ah=1, Al=0 (bit 0 refinement)
828    fn encode_progressive(&self, data: &[u8]) -> Result<Vec<u8>> {
829        // XYB progressive mode - route to specialized encoder
830        if self.config.use_xyb {
831            return self.encode_progressive_xyb(data);
832        }
833
834        // Use tokenization-based approach when optimizing Huffman tables
835        if self.config.optimize_huffman {
836            return self.encode_progressive_optimized(data);
837        }
838
839        let mut output = Vec::with_capacity(data.len() / 4);
840
841        // Convert to YCbCr using f32 precision
842        let (y_plane, cb_plane, cr_plane) = self.convert_to_ycbcr_f32(data)?;
843
844        // Generate quantization tables (3 separate tables like C++ cjpegli)
845        // Progressive mode uses 4:4:4, so is_420 = false
846        let y_quant =
847            quant::generate_quant_table(self.config.quality, 0, ColorSpace::YCbCr, false, false);
848        let cb_quant =
849            quant::generate_quant_table(self.config.quality, 1, ColorSpace::YCbCr, false, false);
850        let cr_quant =
851            quant::generate_quant_table(self.config.quality, 2, ColorSpace::YCbCr, false, false);
852
853        // Quantize all blocks to get full-precision coefficients
854        let (y_blocks, cb_blocks, cr_blocks) = self.quantize_all_blocks(
855            &y_plane, &cb_plane, &cr_plane, &y_quant, &cb_quant, &cr_quant,
856        )?;
857        let is_color = self.config.pixel_format != PixelFormat::Gray;
858
859        // Write JPEG structure
860        self.write_header(&mut output)?;
861        self.write_quant_tables(&mut output, &y_quant, &cb_quant, &cr_quant)?;
862        self.write_frame_header(&mut output)?; // Uses SOF2 for progressive
863
864        // For non-optimized progressive, use standard Huffman tables
865        self.write_huffman_tables(&mut output)?;
866        let tables: Option<OptimizedHuffmanTables> = None;
867
868        if self.config.restart_interval > 0 {
869            self.write_restart_interval(&mut output)?;
870        }
871
872        // Define progressive scan script (level 2)
873        // For 4:4:4 (no subsampling), DC can be interleaved
874        let scans = self.get_progressive_scan_script(is_color);
875
876        // Encode each scan
877        for scan in &scans {
878            // Write SOS header for this scan
879            self.write_progressive_scan_header(&mut output, scan, is_color)?;
880
881            // Encode the scan data
882            let scan_data = self.encode_progressive_scan(
883                &y_blocks, &cb_blocks, &cr_blocks, scan, is_color, &tables,
884            )?;
885            output.extend_from_slice(&scan_data);
886        }
887
888        // Write EOI
889        output.push(0xFF);
890        output.push(MARKER_EOI);
891
892        Ok(output)
893    }
894
895    /// Encodes progressive JPEG with optimized Huffman tables using two-pass tokenization.
896    ///
897    /// This approach:
898    /// 1. Tokenizes all scans first to collect actual symbol usage
899    /// 2. Builds histograms from actual tokens (not estimated baseline statistics)
900    /// 3. Clusters similar histograms to minimize table overhead
901    /// 4. Generates optimal Huffman tables from clustered histograms
902    /// 5. Replays tokens with optimized tables
903    fn encode_progressive_optimized(&self, data: &[u8]) -> Result<Vec<u8>> {
904        let mut output = Vec::with_capacity(data.len() / 4);
905
906        // Convert to YCbCr using f32 precision
907        let (y_plane, cb_plane, cr_plane) = self.convert_to_ycbcr_f32(data)?;
908
909        // Generate quantization tables (3 separate tables like C++ cjpegli)
910        // Progressive mode uses 4:4:4, so is_420 = false
911        let y_quant =
912            quant::generate_quant_table(self.config.quality, 0, ColorSpace::YCbCr, false, false);
913        let cb_quant =
914            quant::generate_quant_table(self.config.quality, 1, ColorSpace::YCbCr, false, false);
915        let cr_quant =
916            quant::generate_quant_table(self.config.quality, 2, ColorSpace::YCbCr, false, false);
917
918        // Quantize all blocks to get full-precision coefficients
919        let (y_blocks, cb_blocks, cr_blocks) = self.quantize_all_blocks(
920            &y_plane, &cb_plane, &cr_plane, &y_quant, &cb_quant, &cr_quant,
921        )?;
922        let is_color = self.config.pixel_format != PixelFormat::Gray;
923        let num_components = if is_color { 3 } else { 1 };
924
925        // Define progressive scan script
926        let scans = self.get_progressive_scan_script(is_color);
927
928        // ========== PASS 1: TOKENIZATION ==========
929        // Tokenize all scans to collect symbol statistics
930        let mut token_buffer = ProgressiveTokenBuffer::new(num_components, scans.len());
931
932        for scan in scans.iter() {
933            // Calculate context for this scan
934            // Context determines which Huffman table histogram to use
935            let context = if scan.ss == 0 && scan.se == 0 {
936                // DC scan: use component index as context (0=Y, 1=Cb, 2=Cr)
937                scan.components[0]
938            } else {
939                // AC scan: use num_components + component_index as context
940                // This ensures Y always uses luma table, Cb/Cr use chroma table
941                // regardless of scan order (which varies with subsampling mode)
942                (num_components as u8) + scan.components[0]
943            };
944
945            if scan.ss == 0 && scan.se == 0 {
946                // DC scan
947                let blocks: Vec<&[[i16; DCT_BLOCK_SIZE]]> = scan
948                    .components
949                    .iter()
950                    .map(|&c| match c {
951                        0 => y_blocks.as_slice(),
952                        1 => cb_blocks.as_slice(),
953                        2 => cr_blocks.as_slice(),
954                        _ => &[][..],
955                    })
956                    .collect();
957                let component_indices: Vec<usize> =
958                    scan.components.iter().map(|&c| c as usize).collect();
959                token_buffer.tokenize_dc_scan(&blocks, &component_indices, scan.al, scan.ah);
960            } else if scan.ah == 0 {
961                // AC first scan
962                let blocks: &[[i16; DCT_BLOCK_SIZE]] = match scan.components[0] {
963                    0 => &y_blocks,
964                    1 => &cb_blocks,
965                    2 => &cr_blocks,
966                    _ => {
967                        return Err(Error::InternalError {
968                            reason: "Invalid component",
969                        })
970                    }
971                };
972                token_buffer.tokenize_ac_first_scan(blocks, context, scan.ss, scan.se, scan.al);
973            } else {
974                // AC refinement scan
975                let blocks: &[[i16; DCT_BLOCK_SIZE]] = match scan.components[0] {
976                    0 => &y_blocks,
977                    1 => &cb_blocks,
978                    2 => &cr_blocks,
979                    _ => {
980                        return Err(Error::InternalError {
981                            reason: "Invalid component",
982                        })
983                    }
984                };
985                token_buffer.tokenize_ac_refinement_scan(
986                    blocks, context, scan.ss, scan.se, scan.ah, scan.al,
987                );
988            }
989        }
990
991        // ========== GENERATE OPTIMIZED TABLES ==========
992        // Use explicit luma/chroma grouping to ensure table assignment matches
993        // what the replay code expects (luma=0, chroma=1)
994        let (num_dc_tables, tables) = token_buffer.generate_luma_chroma_tables(num_components)?;
995
996        // Convert to OptimizedHuffmanTables format for compatibility
997        let opt_tables =
998            self.build_progressive_huffman_tables(&tables, num_components, num_dc_tables)?;
999
1000        // ========== WRITE JPEG STRUCTURE ==========
1001        self.write_header(&mut output)?;
1002        self.write_quant_tables(&mut output, &y_quant, &cb_quant, &cr_quant)?;
1003        self.write_frame_header(&mut output)?; // Uses SOF2 for progressive
1004
1005        // Write optimized Huffman tables
1006        self.write_huffman_tables_optimized(&mut output, &opt_tables)?;
1007
1008        if self.config.restart_interval > 0 {
1009            self.write_restart_interval(&mut output)?;
1010        }
1011
1012        // ========== PASS 2: REPLAY TOKENS ==========
1013        // Encode each scan by replaying tokens with optimized tables
1014        for (scan_idx, scan) in scans.iter().enumerate() {
1015            // Write SOS header
1016            self.write_progressive_scan_header(&mut output, scan, is_color)?;
1017
1018            // Replay tokens for this scan
1019            let scan_data =
1020                self.replay_progressive_scan(&token_buffer, scan_idx, scan, is_color, &opt_tables)?;
1021            output.extend_from_slice(&scan_data);
1022        }
1023
1024        // Write EOI
1025        output.push(0xFF);
1026        output.push(MARKER_EOI);
1027
1028        Ok(output)
1029    }
1030
1031    /// Builds OptimizedHuffmanTables from the clustered tables.
1032    fn build_progressive_huffman_tables(
1033        &self,
1034        tables: &[OptimizedTable],
1035        num_components: usize,
1036        num_dc_tables: usize,
1037    ) -> Result<OptimizedHuffmanTables> {
1038        // Tables are arranged: DC clusters first, then AC clusters
1039        // num_dc_tables tells us where DC ends and AC begins
1040
1041        let dc_luma = tables.first().cloned().unwrap_or_else(|| {
1042            // Create a minimal default table
1043            let mut counter = FrequencyCounter::new();
1044            counter.count(0);
1045            counter.generate_table_with_dht().unwrap()
1046        });
1047
1048        // DC chroma is the second DC table if it exists
1049        let dc_chroma = if num_components > 1 && num_dc_tables > 1 {
1050            tables.get(1).cloned().unwrap_or_else(|| dc_luma.clone())
1051        } else {
1052            dc_luma.clone()
1053        };
1054
1055        // AC tables start after DC tables
1056        let ac_luma = tables.get(num_dc_tables).cloned().unwrap_or_else(|| {
1057            let mut counter = FrequencyCounter::new();
1058            counter.count(0);
1059            counter.generate_table_with_dht().unwrap()
1060        });
1061
1062        // AC chroma is the second AC table if it exists
1063        let ac_chroma = if num_components > 1 && tables.len() > num_dc_tables + 1 {
1064            tables
1065                .get(num_dc_tables + 1)
1066                .cloned()
1067                .unwrap_or_else(|| ac_luma.clone())
1068        } else {
1069            ac_luma.clone()
1070        };
1071
1072        Ok(OptimizedHuffmanTables {
1073            dc_luma,
1074            ac_luma,
1075            dc_chroma,
1076            ac_chroma,
1077        })
1078    }
1079
1080    /// Replays tokens for a progressive scan with optimized tables.
1081    fn replay_progressive_scan(
1082        &self,
1083        token_buffer: &ProgressiveTokenBuffer,
1084        scan_idx: usize,
1085        scan: &ProgressiveScan,
1086        is_color: bool,
1087        tables: &OptimizedHuffmanTables,
1088    ) -> Result<Vec<u8>> {
1089        let mut encoder = EntropyEncoder::new();
1090
1091        // Set up Huffman tables
1092        encoder.set_dc_table(0, tables.dc_luma.table.clone());
1093        encoder.set_ac_table(0, tables.ac_luma.table.clone());
1094        if is_color {
1095            encoder.set_dc_table(1, tables.dc_chroma.table.clone());
1096            encoder.set_ac_table(1, tables.ac_chroma.table.clone());
1097        }
1098
1099        if self.config.restart_interval > 0 {
1100            encoder.set_restart_interval(self.config.restart_interval);
1101        }
1102
1103        // Get scan info
1104        let scan_info = token_buffer
1105            .scan_info
1106            .get(scan_idx)
1107            .ok_or(Error::InternalError {
1108                reason: "Scan info not found",
1109            })?;
1110
1111        if scan.ss == 0 && scan.se == 0 {
1112            // DC scan: replay DC tokens
1113            let tokens = token_buffer.scan_tokens(scan_idx);
1114            // Create context map for DC (component index -> table index)
1115            let context_to_table: Vec<usize> = (0..4)
1116                .map(|c| if is_color && c > 0 { 1 } else { 0 })
1117                .collect();
1118            encoder.write_dc_tokens(tokens, &context_to_table)?;
1119        } else if scan.ah == 0 {
1120            // AC first scan: replay AC tokens
1121            let tokens = token_buffer.scan_tokens(scan_idx);
1122            let table_idx = if is_color && scan.components[0] > 0 {
1123                1
1124            } else {
1125                0
1126            };
1127            encoder.write_ac_first_tokens(tokens, table_idx)?;
1128        } else {
1129            // AC refinement scan: replay refinement tokens
1130            let table_idx = if is_color && scan.components[0] > 0 {
1131                1
1132            } else {
1133                0
1134            };
1135            encoder.write_ac_refinement_tokens(scan_info, table_idx)?;
1136        }
1137
1138        Ok(encoder.finish())
1139    }
1140
1141    /// Returns the progressive scan script for level 2.
1142    fn get_progressive_scan_script(&self, is_color: bool) -> Vec<ProgressiveScan> {
1143        let num_components = if is_color { 3 } else { 1 };
1144        let mut scans = Vec::new();
1145
1146        // For 4:4:4 subsampling, DC can be interleaved
1147        let dc_interleaved = matches!(self.config.subsampling, Subsampling::S444);
1148
1149        // DC first scan
1150        if dc_interleaved && is_color {
1151            // Interleaved DC for all components
1152            scans.push(ProgressiveScan {
1153                components: vec![0, 1, 2],
1154                ss: 0,
1155                se: 0,
1156                ah: 0,
1157                al: 0,
1158            });
1159        } else {
1160            // Non-interleaved DC
1161            for c in 0..num_components {
1162                scans.push(ProgressiveScan {
1163                    components: vec![c],
1164                    ss: 0,
1165                    se: 0,
1166                    ah: 0,
1167                    al: 0,
1168                });
1169            }
1170        }
1171
1172        // AC scans are always non-interleaved
1173        // Progressive Level 2 with successive approximation (matches C++ jpegli)
1174        let use_refinement = true;
1175
1176        for c in 0..num_components {
1177            if use_refinement {
1178                // Level 2: with successive approximation
1179                // AC 1-2: full precision (low frequency, most visible)
1180                scans.push(ProgressiveScan {
1181                    components: vec![c],
1182                    ss: 1,
1183                    se: 2,
1184                    ah: 0,
1185                    al: 0,
1186                });
1187
1188                // AC 3-63 first pass: top bits only (Al=2 means bits 2+)
1189                scans.push(ProgressiveScan {
1190                    components: vec![c],
1191                    ss: 3,
1192                    se: 63,
1193                    ah: 0,
1194                    al: 2,
1195                });
1196
1197                // AC 3-63 refinement: bit 1 (Ah=2, Al=1)
1198                scans.push(ProgressiveScan {
1199                    components: vec![c],
1200                    ss: 3,
1201                    se: 63,
1202                    ah: 2,
1203                    al: 1,
1204                });
1205
1206                // AC 3-63 refinement: bit 0 (Ah=1, Al=0)
1207                scans.push(ProgressiveScan {
1208                    components: vec![c],
1209                    ss: 3,
1210                    se: 63,
1211                    ah: 1,
1212                    al: 0,
1213                });
1214            } else {
1215                // Level 0: no successive approximation (simpler, works)
1216                scans.push(ProgressiveScan {
1217                    components: vec![c],
1218                    ss: 1,
1219                    se: 63,
1220                    ah: 0,
1221                    al: 0,
1222                });
1223            }
1224        }
1225
1226        scans
1227    }
1228
1229    /// Writes SOS header for a progressive scan.
1230    fn write_progressive_scan_header(
1231        &self,
1232        output: &mut Vec<u8>,
1233        scan: &ProgressiveScan,
1234        is_color: bool,
1235    ) -> Result<()> {
1236        output.push(0xFF);
1237        output.push(MARKER_SOS);
1238
1239        let num_components = scan.components.len() as u8;
1240        let length = 6u16 + num_components as u16 * 2;
1241        output.push((length >> 8) as u8);
1242        output.push(length as u8);
1243
1244        output.push(num_components);
1245
1246        for &comp_idx in &scan.components {
1247            // Component ID (1-based for YCbCr)
1248            let comp_id = comp_idx + 1;
1249            output.push(comp_id);
1250
1251            // DC/AC table selectors
1252            // For DC scans (ss=0): use DC table for the component
1253            // For AC scans (ss>0): use AC table for the component
1254            let table_selector = if is_color && comp_idx > 0 {
1255                0x11 // DC table 1, AC table 1 for chroma
1256            } else {
1257                0x00 // DC table 0, AC table 0 for luma
1258            };
1259            output.push(table_selector);
1260        }
1261
1262        output.push(scan.ss); // Spectral selection start
1263        output.push(scan.se); // Spectral selection end
1264        output.push((scan.ah << 4) | scan.al); // Successive approximation
1265
1266        Ok(())
1267    }
1268
1269    /// Encodes a single progressive scan.
1270    fn encode_progressive_scan(
1271        &self,
1272        y_blocks: &[[i16; DCT_BLOCK_SIZE]],
1273        cb_blocks: &[[i16; DCT_BLOCK_SIZE]],
1274        cr_blocks: &[[i16; DCT_BLOCK_SIZE]],
1275        scan: &ProgressiveScan,
1276        is_color: bool,
1277        tables: &Option<OptimizedHuffmanTables>,
1278    ) -> Result<Vec<u8>> {
1279        let mut encoder = EntropyEncoder::new();
1280
1281        // Set up Huffman tables
1282        if let Some(ref opt_tables) = tables {
1283            encoder.set_dc_table(0, opt_tables.dc_luma.table.clone());
1284            encoder.set_ac_table(0, opt_tables.ac_luma.table.clone());
1285            if is_color {
1286                encoder.set_dc_table(1, opt_tables.dc_chroma.table.clone());
1287                encoder.set_ac_table(1, opt_tables.ac_chroma.table.clone());
1288            }
1289        } else {
1290            encoder.set_dc_table(0, HuffmanEncodeTable::std_dc_luminance());
1291            encoder.set_ac_table(0, HuffmanEncodeTable::std_ac_luminance());
1292            if is_color {
1293                encoder.set_dc_table(1, HuffmanEncodeTable::std_dc_chrominance());
1294                encoder.set_ac_table(1, HuffmanEncodeTable::std_ac_chrominance());
1295            }
1296        }
1297
1298        if self.config.restart_interval > 0 {
1299            encoder.set_restart_interval(self.config.restart_interval);
1300        }
1301
1302        let width = self.config.width as usize;
1303        let height = self.config.height as usize;
1304        let blocks_h = (width + DCT_SIZE - 1) / DCT_SIZE;
1305        let blocks_v = (height + DCT_SIZE - 1) / DCT_SIZE;
1306
1307        // Determine scan type and encode accordingly
1308        if scan.ss == 0 && scan.se == 0 {
1309            // DC scan (first or refinement)
1310            self.encode_dc_scan(
1311                &mut encoder,
1312                y_blocks,
1313                cb_blocks,
1314                cr_blocks,
1315                scan,
1316                blocks_h,
1317                blocks_v,
1318                is_color,
1319            )?;
1320        } else if scan.ah == 0 {
1321            // AC first scan
1322            self.encode_ac_first_scan(
1323                &mut encoder,
1324                y_blocks,
1325                cb_blocks,
1326                cr_blocks,
1327                scan,
1328                blocks_h,
1329                blocks_v,
1330                is_color,
1331            )?;
1332        } else {
1333            // AC refinement scan
1334            self.encode_ac_refine_scan(
1335                &mut encoder,
1336                y_blocks,
1337                cb_blocks,
1338                cr_blocks,
1339                scan,
1340                blocks_h,
1341                blocks_v,
1342                is_color,
1343            )?;
1344        }
1345
1346        Ok(encoder.finish())
1347    }
1348
1349    /// Encodes DC scan (first or refinement).
1350    fn encode_dc_scan(
1351        &self,
1352        encoder: &mut EntropyEncoder,
1353        y_blocks: &[[i16; DCT_BLOCK_SIZE]],
1354        cb_blocks: &[[i16; DCT_BLOCK_SIZE]],
1355        cr_blocks: &[[i16; DCT_BLOCK_SIZE]],
1356        scan: &ProgressiveScan,
1357        blocks_h: usize,
1358        blocks_v: usize,
1359        is_color: bool,
1360    ) -> Result<()> {
1361        for by in 0..blocks_v {
1362            for bx in 0..blocks_h {
1363                let block_idx = by * blocks_h + bx;
1364
1365                for (comp_num, &comp_idx) in scan.components.iter().enumerate() {
1366                    let blocks: &[[i16; DCT_BLOCK_SIZE]] = match comp_idx {
1367                        0 => y_blocks,
1368                        1 => cb_blocks,
1369                        2 => cr_blocks,
1370                        _ => {
1371                            return Err(Error::InternalError {
1372                                reason: "Invalid component index",
1373                            })
1374                        }
1375                    };
1376
1377                    if block_idx >= blocks.len() {
1378                        continue;
1379                    }
1380
1381                    let dc = blocks[block_idx][0];
1382                    let table = if is_color && comp_idx > 0 { 1 } else { 0 };
1383
1384                    encoder.encode_dc_progressive(dc, comp_num, table, scan.al, scan.ah)?;
1385                }
1386            }
1387        }
1388
1389        Ok(())
1390    }
1391
1392    /// Encodes AC first scan (Ah=0, ss>0).
1393    fn encode_ac_first_scan(
1394        &self,
1395        encoder: &mut EntropyEncoder,
1396        y_blocks: &[[i16; DCT_BLOCK_SIZE]],
1397        cb_blocks: &[[i16; DCT_BLOCK_SIZE]],
1398        cr_blocks: &[[i16; DCT_BLOCK_SIZE]],
1399        scan: &ProgressiveScan,
1400        blocks_h: usize,
1401        blocks_v: usize,
1402        is_color: bool,
1403    ) -> Result<()> {
1404        // AC first scan is always non-interleaved (single component)
1405        assert_eq!(scan.components.len(), 1);
1406        let comp_idx = scan.components[0];
1407
1408        let blocks: &[[i16; DCT_BLOCK_SIZE]] = match comp_idx {
1409            0 => y_blocks,
1410            1 => cb_blocks,
1411            2 => cr_blocks,
1412            _ => {
1413                return Err(Error::InternalError {
1414                    reason: "Invalid component index",
1415                })
1416            }
1417        };
1418
1419        let table_idx = if is_color && comp_idx > 0 { 1 } else { 0 };
1420
1421        let mut eob_run = 0u16;
1422
1423        for by in 0..blocks_v {
1424            for bx in 0..blocks_h {
1425                let block_idx = by * blocks_h + bx;
1426
1427                if block_idx >= blocks.len() {
1428                    continue;
1429                }
1430
1431                encoder.encode_ac_progressive_first(
1432                    &blocks[block_idx],
1433                    table_idx,
1434                    scan.ss,
1435                    scan.se,
1436                    scan.al,
1437                    &mut eob_run,
1438                )?;
1439            }
1440        }
1441
1442        // Flush remaining EOB run
1443        encoder.flush_eob_run(table_idx, eob_run)?;
1444
1445        Ok(())
1446    }
1447
1448    /// Encodes AC refinement scan (Ah>0, ss>0).
1449    fn encode_ac_refine_scan(
1450        &self,
1451        encoder: &mut EntropyEncoder,
1452        y_blocks: &[[i16; DCT_BLOCK_SIZE]],
1453        cb_blocks: &[[i16; DCT_BLOCK_SIZE]],
1454        cr_blocks: &[[i16; DCT_BLOCK_SIZE]],
1455        scan: &ProgressiveScan,
1456        blocks_h: usize,
1457        blocks_v: usize,
1458        is_color: bool,
1459    ) -> Result<()> {
1460        // AC refinement scan is always non-interleaved
1461        assert_eq!(scan.components.len(), 1);
1462        let comp_idx = scan.components[0];
1463
1464        let blocks: &[[i16; DCT_BLOCK_SIZE]] = match comp_idx {
1465            0 => y_blocks,
1466            1 => cb_blocks,
1467            2 => cr_blocks,
1468            _ => {
1469                return Err(Error::InternalError {
1470                    reason: "Invalid component index",
1471                })
1472            }
1473        };
1474
1475        let table_idx = if is_color && comp_idx > 0 { 1 } else { 0 };
1476
1477        let mut eob_run = 0u16;
1478
1479        for by in 0..blocks_v {
1480            for bx in 0..blocks_h {
1481                let block_idx = by * blocks_h + bx;
1482
1483                if block_idx >= blocks.len() {
1484                    continue;
1485                }
1486
1487                encoder.encode_ac_progressive_refine(
1488                    &blocks[block_idx],
1489                    table_idx,
1490                    scan.ss,
1491                    scan.se,
1492                    scan.ah,
1493                    scan.al,
1494                    &mut eob_run,
1495                )?;
1496            }
1497        }
1498
1499        // Flush remaining EOB run
1500        encoder.flush_refine_eob(table_idx, eob_run)?;
1501
1502        Ok(())
1503    }
1504
1505    /// Converts input data to YCbCr planes (u8 version - legacy).
1506    #[allow(dead_code)]
1507    fn convert_to_ycbcr(&self, data: &[u8]) -> Result<(Vec<u8>, Vec<u8>, Vec<u8>)> {
1508        let width = self.config.width as usize;
1509        let height = self.config.height as usize;
1510        let num_pixels = checked_size_2d(width, height)?;
1511
1512        match self.config.pixel_format {
1513            PixelFormat::Gray => {
1514                let y = data.to_vec();
1515                let cb = try_alloc_filled(num_pixels, 128u8, "YCbCr Cb plane")?;
1516                let cr = try_alloc_filled(num_pixels, 128u8, "YCbCr Cr plane")?;
1517                Ok((y, cb, cr))
1518            }
1519            PixelFormat::Rgb => color::rgb_to_ycbcr_planes(data, width, height),
1520            PixelFormat::Rgba => {
1521                // Strip alpha and convert
1522                let rgb: Vec<u8> = data
1523                    .chunks(4)
1524                    .flat_map(|chunk| [chunk[0], chunk[1], chunk[2]])
1525                    .collect();
1526                color::rgb_to_ycbcr_planes(&rgb, width, height)
1527            }
1528            PixelFormat::Bgr => {
1529                let rgb: Vec<u8> = data
1530                    .chunks(3)
1531                    .flat_map(|chunk| [chunk[2], chunk[1], chunk[0]])
1532                    .collect();
1533                color::rgb_to_ycbcr_planes(&rgb, width, height)
1534            }
1535            PixelFormat::Bgra => {
1536                let rgb: Vec<u8> = data
1537                    .chunks(4)
1538                    .flat_map(|chunk| [chunk[2], chunk[1], chunk[0]])
1539                    .collect();
1540                color::rgb_to_ycbcr_planes(&rgb, width, height)
1541            }
1542            PixelFormat::Cmyk => Err(Error::UnsupportedFeature {
1543                feature: "CMYK encoding",
1544            }),
1545        }
1546    }
1547
1548    /// Converts input data to YCbCr planes using full f32 precision.
1549    /// This matches C++ jpegli which uses float throughout the pipeline.
1550    /// Output values are in [0, 255] range (not level-shifted).
1551    fn convert_to_ycbcr_f32(&self, data: &[u8]) -> Result<(Vec<f32>, Vec<f32>, Vec<f32>)> {
1552        let width = self.config.width as usize;
1553        let height = self.config.height as usize;
1554        let num_pixels = checked_size_2d(width, height)?;
1555
1556        let mut y_plane = try_alloc_zeroed_f32(num_pixels, "YCbCr Y plane f32")?;
1557        let mut cb_plane = try_alloc_zeroed_f32(num_pixels, "YCbCr Cb plane f32")?;
1558        let mut cr_plane = try_alloc_zeroed_f32(num_pixels, "YCbCr Cr plane f32")?;
1559
1560        match self.config.pixel_format {
1561            PixelFormat::Gray => {
1562                for i in 0..num_pixels {
1563                    y_plane[i] = data[i] as f32;
1564                    cb_plane[i] = 128.0;
1565                    cr_plane[i] = 128.0;
1566                }
1567            }
1568            PixelFormat::Rgb => {
1569                for i in 0..num_pixels {
1570                    let (y, cb, cr) = color::rgb_to_ycbcr_f32(
1571                        data[i * 3] as f32,
1572                        data[i * 3 + 1] as f32,
1573                        data[i * 3 + 2] as f32,
1574                    );
1575                    y_plane[i] = y;
1576                    cb_plane[i] = cb;
1577                    cr_plane[i] = cr;
1578                }
1579            }
1580            PixelFormat::Rgba => {
1581                for i in 0..num_pixels {
1582                    let (y, cb, cr) = color::rgb_to_ycbcr_f32(
1583                        data[i * 4] as f32,
1584                        data[i * 4 + 1] as f32,
1585                        data[i * 4 + 2] as f32,
1586                    );
1587                    y_plane[i] = y;
1588                    cb_plane[i] = cb;
1589                    cr_plane[i] = cr;
1590                }
1591            }
1592            PixelFormat::Bgr => {
1593                for i in 0..num_pixels {
1594                    let (y, cb, cr) = color::rgb_to_ycbcr_f32(
1595                        data[i * 3 + 2] as f32,
1596                        data[i * 3 + 1] as f32,
1597                        data[i * 3] as f32,
1598                    );
1599                    y_plane[i] = y;
1600                    cb_plane[i] = cb;
1601                    cr_plane[i] = cr;
1602                }
1603            }
1604            PixelFormat::Bgra => {
1605                for i in 0..num_pixels {
1606                    let (y, cb, cr) = color::rgb_to_ycbcr_f32(
1607                        data[i * 4 + 2] as f32,
1608                        data[i * 4 + 1] as f32,
1609                        data[i * 4] as f32,
1610                    );
1611                    y_plane[i] = y;
1612                    cb_plane[i] = cb;
1613                    cr_plane[i] = cr;
1614                }
1615            }
1616            PixelFormat::Cmyk => {
1617                return Err(Error::UnsupportedFeature {
1618                    feature: "CMYK encoding",
1619                });
1620            }
1621        }
1622
1623        Ok((y_plane, cb_plane, cr_plane))
1624    }
1625
1626    /// Writes the JPEG header (SOI only, no JFIF APP0).
1627    ///
1628    /// Note: C++ jpegli does not write JFIF APP0, so we skip it for parity.
1629    /// The JFIF marker is optional and many modern decoders don't require it.
1630    fn write_header(&self, output: &mut Vec<u8>) -> Result<()> {
1631        // SOI only - no JFIF marker for C++ parity
1632        output.push(0xFF);
1633        output.push(MARKER_SOI);
1634        Ok(())
1635    }
1636
1637    /// Writes the JPEG header for XYB mode (SOI only, no JFIF).
1638    ///
1639    /// XYB mode uses RGB component IDs and an ICC profile for color interpretation.
1640    /// JFIF APP0 is not appropriate because it implies YCbCr colorspace.
1641    fn write_header_xyb(&self, output: &mut Vec<u8>) -> Result<()> {
1642        // SOI only - no JFIF marker for XYB mode
1643        output.push(0xFF);
1644        output.push(MARKER_SOI);
1645        Ok(())
1646    }
1647
1648    /// Writes an APP14 Adobe marker for RGB/CMYK/YCCK colorspaces.
1649    ///
1650    /// The APP14 marker is required by some decoders to properly interpret
1651    /// RGB (including XYB), CMYK, and YCCK colorspaces.
1652    ///
1653    /// See: https://github.com/google/jpegli/pull/135
1654    ///
1655    /// # Arguments
1656    /// * `transform` - Color transform type:
1657    ///   - 0 = RGB or CMYK (no transform)
1658    ///   - 1 = YCbCr
1659    ///   - 2 = YCCK
1660    fn write_app14_adobe(&self, output: &mut Vec<u8>, transform: u8) -> Result<()> {
1661        output.push(0xFF);
1662        output.push(MARKER_APP14);
1663        output.extend_from_slice(&[
1664            0x00, 0x0E, // Length: 14 bytes (includes length field)
1665            b'A', b'd', b'o', b'b', b'e', // Signature
1666            0x00, 0x64, // DCTEncodeVersion (100)
1667            0x00, 0x00, // APP14Flags0
1668            0x00, 0x00,      // APP14Flags1
1669            transform, // Color transform
1670        ]);
1671        Ok(())
1672    }
1673
1674    /// Writes an ICC profile to the JPEG output.
1675    ///
1676    /// ICC profiles are stored in APP2 marker segments with the signature "ICC_PROFILE\0".
1677    /// Large profiles are split into multiple segments (max ~65519 bytes per segment).
1678    fn write_icc_profile(&self, output: &mut Vec<u8>, icc_data: &[u8]) -> Result<()> {
1679        if icc_data.is_empty() {
1680            return Ok(());
1681        }
1682
1683        // Calculate number of chunks needed
1684        let num_chunks = (icc_data.len() + MAX_ICC_BYTES_PER_MARKER - 1) / MAX_ICC_BYTES_PER_MARKER;
1685
1686        let mut offset = 0;
1687        for chunk_num in 0..num_chunks {
1688            let chunk_size = (icc_data.len() - offset).min(MAX_ICC_BYTES_PER_MARKER);
1689
1690            // APP2 marker
1691            output.push(0xFF);
1692            output.push(MARKER_APP2);
1693
1694            // Length: 2 (length field) + 12 (signature) + 2 (chunk info) + data
1695            let segment_length = 2 + 12 + 2 + chunk_size;
1696            output.push((segment_length >> 8) as u8);
1697            output.push(segment_length as u8);
1698
1699            // ICC_PROFILE signature
1700            output.extend_from_slice(&ICC_PROFILE_SIGNATURE);
1701
1702            // Chunk number (1-based) and total chunks
1703            output.push((chunk_num + 1) as u8);
1704            output.push(num_chunks as u8);
1705
1706            // ICC data chunk
1707            output.extend_from_slice(&icc_data[offset..offset + chunk_size]);
1708
1709            offset += chunk_size;
1710        }
1711
1712        Ok(())
1713    }
1714
1715    /// Writes quantization tables (3 separate tables for Y, Cb, Cr).
1716    /// This matches C++ jpegli behavior with add_two_chroma_tables=true.
1717    fn write_quant_tables(
1718        &self,
1719        output: &mut Vec<u8>,
1720        y_quant: &QuantTable,
1721        cb_quant: &QuantTable,
1722        cr_quant: &QuantTable,
1723    ) -> Result<()> {
1724        // Write all 3 tables in one DQT segment
1725        // Length = 2 + 3 * (1 + 64) = 197 bytes
1726        output.push(0xFF);
1727        output.push(MARKER_DQT);
1728        output.push(0x00);
1729        output.push(0xC5); // Length: 197 bytes
1730
1731        // Table 0 (Y) - values must be written in zigzag order
1732        output.push(0x00); // 8-bit precision, table 0
1733        for i in 0..DCT_BLOCK_SIZE {
1734            output.push(y_quant.values[JPEG_NATURAL_ORDER[i] as usize] as u8);
1735        }
1736
1737        // Table 1 (Cb)
1738        output.push(0x01); // 8-bit precision, table 1
1739        for i in 0..DCT_BLOCK_SIZE {
1740            output.push(cb_quant.values[JPEG_NATURAL_ORDER[i] as usize] as u8);
1741        }
1742
1743        // Table 2 (Cr)
1744        output.push(0x02); // 8-bit precision, table 2
1745        for i in 0..DCT_BLOCK_SIZE {
1746            output.push(cr_quant.values[JPEG_NATURAL_ORDER[i] as usize] as u8);
1747        }
1748
1749        Ok(())
1750    }
1751
1752    /// Writes quantization tables for XYB mode (3 separate tables).
1753    fn write_quant_tables_xyb(
1754        &self,
1755        output: &mut Vec<u8>,
1756        r_quant: &QuantTable,
1757        g_quant: &QuantTable,
1758        b_quant: &QuantTable,
1759    ) -> Result<()> {
1760        // Write all 3 tables in one DQT segment
1761        // Length = 2 + 3 * (1 + 64) = 197 bytes
1762        output.push(0xFF);
1763        output.push(MARKER_DQT);
1764        output.push(0x00);
1765        output.push(0xC5); // Length: 197 bytes
1766
1767        // Table 0 (Red)
1768        output.push(0x00); // 8-bit precision, table 0
1769        for i in 0..DCT_BLOCK_SIZE {
1770            output.push(r_quant.values[JPEG_NATURAL_ORDER[i] as usize] as u8);
1771        }
1772
1773        // Table 1 (Green)
1774        output.push(0x01); // 8-bit precision, table 1
1775        for i in 0..DCT_BLOCK_SIZE {
1776            output.push(g_quant.values[JPEG_NATURAL_ORDER[i] as usize] as u8);
1777        }
1778
1779        // Table 2 (Blue)
1780        output.push(0x02); // 8-bit precision, table 2
1781        for i in 0..DCT_BLOCK_SIZE {
1782            output.push(b_quant.values[JPEG_NATURAL_ORDER[i] as usize] as u8);
1783        }
1784
1785        Ok(())
1786    }
1787
1788    /// Writes the frame header (SOF0).
1789    fn write_frame_header(&self, output: &mut Vec<u8>) -> Result<()> {
1790        let marker = if self.config.mode == JpegMode::Progressive {
1791            MARKER_SOF2
1792        } else {
1793            MARKER_SOF0
1794        };
1795
1796        output.push(0xFF);
1797        output.push(marker);
1798
1799        let num_components = if self.config.pixel_format == PixelFormat::Gray {
1800            1u8
1801        } else {
1802            3u8
1803        };
1804
1805        let length = 8u16 + num_components as u16 * 3;
1806        output.push((length >> 8) as u8);
1807        output.push(length as u8);
1808
1809        output.push(8); // Sample precision
1810        output.push((self.config.height >> 8) as u8);
1811        output.push(self.config.height as u8);
1812        output.push((self.config.width >> 8) as u8);
1813        output.push(self.config.width as u8);
1814        output.push(num_components);
1815
1816        if num_components == 1 {
1817            // Grayscale
1818            output.push(1); // Component ID
1819            output.push(0x11); // 1x1 sampling
1820            output.push(0); // Quant table 0
1821        } else {
1822            // Y component
1823            let (h_samp, v_samp) = match self.config.subsampling {
1824                Subsampling::S444 => (1, 1),
1825                Subsampling::S422 => (2, 1),
1826                Subsampling::S420 => (2, 2),
1827                Subsampling::S440 => (1, 2),
1828            };
1829
1830            output.push(1); // Component ID = 1 (Y)
1831            output.push((h_samp << 4) | v_samp);
1832            output.push(0); // Quant table 0
1833
1834            output.push(2); // Component ID = 2 (Cb)
1835            output.push(0x11); // 1x1 sampling
1836            output.push(1); // Quant table 1
1837
1838            output.push(3); // Component ID = 3 (Cr)
1839            output.push(0x11); // 1x1 sampling
1840            output.push(2); // Quant table 2 (separate Cr table like C++ cjpegli)
1841        }
1842
1843        Ok(())
1844    }
1845
1846    /// Writes the frame header for XYB mode (RGB with B subsampling).
1847    fn write_frame_header_xyb(&self, output: &mut Vec<u8>) -> Result<()> {
1848        output.push(0xFF);
1849        output.push(MARKER_SOF0); // Baseline DCT
1850
1851        // 3 components: R, G, B
1852        let length = 8u16 + 3 * 3; // 17 bytes
1853        output.push((length >> 8) as u8);
1854        output.push(length as u8);
1855
1856        output.push(8); // Sample precision
1857        output.push((self.config.height >> 8) as u8);
1858        output.push(self.config.height as u8);
1859        output.push((self.config.width >> 8) as u8);
1860        output.push(self.config.width as u8);
1861        output.push(3); // Number of components
1862
1863        // XYB sampling: R:2×2, G:2×2, B:1×1
1864        // This means R and G are full resolution, B is 1/4 resolution
1865        output.push(b'R'); // Component ID = 'R' (82)
1866        output.push(0x22); // 2x2 sampling
1867        output.push(0); // Quant table 0
1868
1869        output.push(b'G'); // Component ID = 'G' (71)
1870        output.push(0x22); // 2x2 sampling
1871        output.push(1); // Quant table 1
1872
1873        output.push(b'B'); // Component ID = 'B' (66)
1874        output.push(0x11); // 1x1 sampling (subsampled)
1875        output.push(2); // Quant table 2
1876
1877        Ok(())
1878    }
1879
1880    /// Writes standard Huffman tables in a single DHT segment.
1881    fn write_huffman_tables(&self, output: &mut Vec<u8>) -> Result<()> {
1882        use crate::huffman::{
1883            STD_AC_CHROMINANCE_BITS, STD_AC_CHROMINANCE_VALUES, STD_AC_LUMINANCE_BITS,
1884            STD_AC_LUMINANCE_VALUES, STD_DC_CHROMINANCE_BITS, STD_DC_CHROMINANCE_VALUES,
1885            STD_DC_LUMINANCE_BITS, STD_DC_LUMINANCE_VALUES,
1886        };
1887
1888        // Write all 4 Huffman tables in a single DHT segment (like C++ jpegli)
1889        output.push(0xFF);
1890        output.push(MARKER_DHT);
1891
1892        // Calculate total length
1893        let total_len = 2
1894            + (1 + 16 + STD_DC_LUMINANCE_VALUES.len())
1895            + (1 + 16 + STD_AC_LUMINANCE_VALUES.len())
1896            + (1 + 16 + STD_DC_CHROMINANCE_VALUES.len())
1897            + (1 + 16 + STD_AC_CHROMINANCE_VALUES.len());
1898
1899        output.push((total_len >> 8) as u8);
1900        output.push(total_len as u8);
1901
1902        // DC luminance (class 0, id 0)
1903        output.push(0x00);
1904        output.extend_from_slice(&STD_DC_LUMINANCE_BITS);
1905        output.extend_from_slice(&STD_DC_LUMINANCE_VALUES);
1906
1907        // AC luminance (class 1, id 0)
1908        output.push(0x10);
1909        output.extend_from_slice(&STD_AC_LUMINANCE_BITS);
1910        output.extend_from_slice(&STD_AC_LUMINANCE_VALUES);
1911
1912        // DC chrominance (class 0, id 1)
1913        output.push(0x01);
1914        output.extend_from_slice(&STD_DC_CHROMINANCE_BITS);
1915        output.extend_from_slice(&STD_DC_CHROMINANCE_VALUES);
1916
1917        // AC chrominance (class 1, id 1)
1918        output.push(0x11);
1919        output.extend_from_slice(&STD_AC_CHROMINANCE_BITS);
1920        output.extend_from_slice(&STD_AC_CHROMINANCE_VALUES);
1921
1922        Ok(())
1923    }
1924
1925    /// Writes optimized Huffman tables.
1926    ///
1927    /// This is used when `optimize_huffman` is enabled to write the
1928    /// image-specific optimized tables to the DHT markers.
1929    fn write_huffman_tables_optimized(
1930        &self,
1931        output: &mut Vec<u8>,
1932        tables: &OptimizedHuffmanTables,
1933    ) -> Result<()> {
1934        // Write all 4 Huffman tables in a single DHT segment (like C++ jpegli)
1935        // This saves 12 bytes compared to 4 separate segments
1936        output.push(0xFF);
1937        output.push(MARKER_DHT);
1938
1939        // Calculate total length: 2 (length field) + 4 tables × (1 + 16 + values.len())
1940        let total_len = 2
1941            + (1 + 16 + tables.dc_luma.values.len())
1942            + (1 + 16 + tables.ac_luma.values.len())
1943            + (1 + 16 + tables.dc_chroma.values.len())
1944            + (1 + 16 + tables.ac_chroma.values.len());
1945
1946        output.push((total_len >> 8) as u8);
1947        output.push(total_len as u8);
1948
1949        // DC luminance (class 0, id 0)
1950        output.push(0x00);
1951        output.extend_from_slice(&tables.dc_luma.bits);
1952        output.extend_from_slice(&tables.dc_luma.values);
1953
1954        // AC luminance (class 1, id 0)
1955        output.push(0x10);
1956        output.extend_from_slice(&tables.ac_luma.bits);
1957        output.extend_from_slice(&tables.ac_luma.values);
1958
1959        // DC chrominance (class 0, id 1)
1960        output.push(0x01);
1961        output.extend_from_slice(&tables.dc_chroma.bits);
1962        output.extend_from_slice(&tables.dc_chroma.values);
1963
1964        // AC chrominance (class 1, id 1)
1965        output.push(0x11);
1966        output.extend_from_slice(&tables.ac_chroma.bits);
1967        output.extend_from_slice(&tables.ac_chroma.values);
1968
1969        Ok(())
1970    }
1971
1972    /// Writes restart interval.
1973    fn write_restart_interval(&self, output: &mut Vec<u8>) -> Result<()> {
1974        output.push(0xFF);
1975        output.push(MARKER_DRI);
1976        output.push(0x00);
1977        output.push(0x04); // Length
1978        output.push((self.config.restart_interval >> 8) as u8);
1979        output.push(self.config.restart_interval as u8);
1980        Ok(())
1981    }
1982
1983    /// Writes scan header.
1984    fn write_scan_header(&self, output: &mut Vec<u8>) -> Result<()> {
1985        output.push(0xFF);
1986        output.push(MARKER_SOS);
1987
1988        let num_components = if self.config.pixel_format == PixelFormat::Gray {
1989            1u8
1990        } else {
1991            3u8
1992        };
1993
1994        let length = 6u16 + num_components as u16 * 2;
1995        output.push((length >> 8) as u8);
1996        output.push(length as u8);
1997
1998        output.push(num_components);
1999
2000        if num_components == 1 {
2001            output.push(1); // Component selector
2002            output.push(0x00); // DC/AC table selectors
2003        } else {
2004            output.push(1); // Y component
2005            output.push(0x00); // DC table 0, AC table 0
2006
2007            output.push(2); // Cb component
2008            output.push(0x11); // DC table 1, AC table 1
2009
2010            output.push(3); // Cr component
2011            output.push(0x11); // DC table 1, AC table 1
2012        }
2013
2014        output.push(0x00); // Ss (spectral selection start)
2015        output.push(0x3F); // Se (spectral selection end = 63)
2016        output.push(0x00); // Ah/Al (successive approximation)
2017
2018        Ok(())
2019    }
2020
2021    /// Writes scan header for XYB mode.
2022    fn write_scan_header_xyb(&self, output: &mut Vec<u8>) -> Result<()> {
2023        output.push(0xFF);
2024        output.push(MARKER_SOS);
2025
2026        // 3 components: R, G, B
2027        let length = 6u16 + 3 * 2; // 12 bytes
2028        output.push((length >> 8) as u8);
2029        output.push(length as u8);
2030
2031        output.push(3); // Number of components
2032
2033        // R component: DC table 0, AC table 0
2034        output.push(b'R');
2035        output.push(0x00);
2036
2037        // G component: DC table 0, AC table 0
2038        output.push(b'G');
2039        output.push(0x00);
2040
2041        // B component: DC table 0, AC table 0
2042        output.push(b'B');
2043        output.push(0x00);
2044
2045        output.push(0x00); // Ss (spectral selection start)
2046        output.push(0x3F); // Se (spectral selection end = 63)
2047        output.push(0x00); // Ah/Al (successive approximation)
2048
2049        Ok(())
2050    }
2051
2052    /// Encodes the scan data (u8 version - legacy).
2053    #[allow(dead_code)]
2054    fn encode_scan(
2055        &self,
2056        y_plane: &[u8],
2057        cb_plane: &[u8],
2058        cr_plane: &[u8],
2059        y_quant: &QuantTable,
2060        c_quant: &QuantTable,
2061    ) -> Result<Vec<u8>> {
2062        let mut encoder = EntropyEncoder::new();
2063
2064        // Set up Huffman tables
2065        encoder.set_dc_table(0, HuffmanEncodeTable::std_dc_luminance());
2066        encoder.set_ac_table(0, HuffmanEncodeTable::std_ac_luminance());
2067        encoder.set_dc_table(1, HuffmanEncodeTable::std_dc_chrominance());
2068        encoder.set_ac_table(1, HuffmanEncodeTable::std_ac_chrominance());
2069
2070        if self.config.restart_interval > 0 {
2071            encoder.set_restart_interval(self.config.restart_interval);
2072        }
2073
2074        let width = self.config.width as usize;
2075        let height = self.config.height as usize;
2076
2077        // For 4:2:0, process MCUs
2078        let _mcu_width = ((width + 15) / 16) * 16;
2079        let _mcu_height = ((height + 15) / 16) * 16;
2080
2081        // TODO: Implement full MCU processing with subsampling
2082        // For now, simplified 4:4:4 encoding
2083        let blocks_h = (width + 7) / 8;
2084        let blocks_v = (height + 7) / 8;
2085
2086        // Zero-bias parameters for each component
2087        // Use effective distance inferred from quant tables (like C++ QuantValsToDistance)
2088        // For YCbCr mode, Cb and Cr share the same quant table (c_quant)
2089        let _input_distance = self.config.quality.to_distance();
2090        let effective_distance = quant::quant_vals_to_distance(y_quant, c_quant, c_quant);
2091        let y_zero_bias = ZeroBiasParams::for_ycbcr(effective_distance, 0);
2092        let cb_zero_bias = ZeroBiasParams::for_ycbcr(effective_distance, 1);
2093        let cr_zero_bias = ZeroBiasParams::for_ycbcr(effective_distance, 2);
2094
2095        // Convert Y plane to f32 for AQ computation
2096        let y_plane_f32: Vec<f32> = y_plane.iter().map(|&v| v as f32).collect();
2097
2098        // Compute per-block adaptive quantization strength from Y plane
2099        // C++ uses y_quant_01 = quant_table[1] for dampen calculation
2100        let y_quant_01 = y_quant.values[1];
2101        #[cfg(feature = "hybrid-trellis")]
2102        let aq_map = if let Some(ref custom) = self.config.custom_aq_map {
2103            custom.clone()
2104        } else {
2105            compute_aq_strength_map(&y_plane_f32, width, height, y_quant_01)
2106        };
2107        #[cfg(not(feature = "hybrid-trellis"))]
2108        let aq_map = compute_aq_strength_map(&y_plane_f32, width, height, y_quant_01);
2109
2110        // Create hybrid quantization context if enabled
2111        #[cfg(feature = "hybrid-trellis")]
2112        let hybrid_ctx = if self.config.hybrid_config.enabled {
2113            Some(HybridQuantContext::new(self.config.hybrid_config))
2114        } else {
2115            None
2116        };
2117
2118        for by in 0..blocks_v {
2119            for bx in 0..blocks_h {
2120                // Get per-block aq_strength (C++ AQ produces 0.0-0.2, mean ~0.08)
2121                let aq_strength = aq_map.get(bx, by);
2122
2123                // Extract and encode Y block
2124                let y_block = self.extract_block(y_plane, width, height, bx, by);
2125                let y_dct = forward_dct_8x8(&y_block);
2126
2127                #[cfg(feature = "hybrid-trellis")]
2128                let y_quant_coeffs = if let Some(ref ctx) = hybrid_ctx {
2129                    ctx.quantize_block(&y_dct, &y_quant.values, aq_strength, 1.0, true)
2130                } else {
2131                    quant::quantize_block_with_zero_bias(
2132                        &y_dct,
2133                        &y_quant.values,
2134                        &y_zero_bias,
2135                        aq_strength,
2136                    )
2137                };
2138                #[cfg(not(feature = "hybrid-trellis"))]
2139                let y_quant_coeffs = quant::quantize_block_with_zero_bias(
2140                    &y_dct,
2141                    &y_quant.values,
2142                    &y_zero_bias,
2143                    aq_strength,
2144                );
2145
2146                let y_zigzag = natural_to_zigzag(&y_quant_coeffs);
2147                encoder.encode_block(&y_zigzag, 0, 0, 0)?;
2148
2149                if self.config.pixel_format != PixelFormat::Gray {
2150                    // Cb block
2151                    let cb_block = self.extract_block(cb_plane, width, height, bx, by);
2152                    let cb_dct = forward_dct_8x8(&cb_block);
2153
2154                    #[cfg(feature = "hybrid-trellis")]
2155                    let cb_quant_coeffs = if let Some(ref ctx) = hybrid_ctx {
2156                        ctx.quantize_block(&cb_dct, &c_quant.values, aq_strength, 1.0, false)
2157                    } else {
2158                        quant::quantize_block_with_zero_bias(
2159                            &cb_dct,
2160                            &c_quant.values,
2161                            &cb_zero_bias,
2162                            aq_strength,
2163                        )
2164                    };
2165                    #[cfg(not(feature = "hybrid-trellis"))]
2166                    let cb_quant_coeffs = quant::quantize_block_with_zero_bias(
2167                        &cb_dct,
2168                        &c_quant.values,
2169                        &cb_zero_bias,
2170                        aq_strength,
2171                    );
2172
2173                    let cb_zigzag = natural_to_zigzag(&cb_quant_coeffs);
2174                    encoder.encode_block(&cb_zigzag, 1, 1, 1)?;
2175
2176                    // Cr block
2177                    let cr_block = self.extract_block(cr_plane, width, height, bx, by);
2178                    let cr_dct = forward_dct_8x8(&cr_block);
2179
2180                    #[cfg(feature = "hybrid-trellis")]
2181                    let cr_quant_coeffs = if let Some(ref ctx) = hybrid_ctx {
2182                        ctx.quantize_block(&cr_dct, &c_quant.values, aq_strength, 1.0, false)
2183                    } else {
2184                        quant::quantize_block_with_zero_bias(
2185                            &cr_dct,
2186                            &c_quant.values,
2187                            &cr_zero_bias,
2188                            aq_strength,
2189                        )
2190                    };
2191                    #[cfg(not(feature = "hybrid-trellis"))]
2192                    let cr_quant_coeffs = quant::quantize_block_with_zero_bias(
2193                        &cr_dct,
2194                        &c_quant.values,
2195                        &cr_zero_bias,
2196                        aq_strength,
2197                    );
2198
2199                    let cr_zigzag = natural_to_zigzag(&cr_quant_coeffs);
2200                    encoder.encode_block(&cr_zigzag, 2, 1, 1)?;
2201                }
2202
2203                encoder.check_restart();
2204            }
2205        }
2206
2207        Ok(encoder.finish())
2208    }
2209
2210    /// Quantizes all blocks in the image.
2211    ///
2212    /// This is separated from encoding to allow Huffman optimization:
2213    /// 1. Quantize all blocks
2214    /// 2. Collect frequencies to build optimal tables
2215    /// 3. Encode with optimal tables
2216    fn quantize_all_blocks(
2217        &self,
2218        y_plane: &[f32],
2219        cb_plane: &[f32],
2220        cr_plane: &[f32],
2221        y_quant: &QuantTable,
2222        cb_quant: &QuantTable,
2223        cr_quant: &QuantTable,
2224    ) -> Result<(
2225        Vec<[i16; DCT_BLOCK_SIZE]>,
2226        Vec<[i16; DCT_BLOCK_SIZE]>,
2227        Vec<[i16; DCT_BLOCK_SIZE]>,
2228    )> {
2229        let width = self.config.width as usize;
2230        let height = self.config.height as usize;
2231        let blocks_h = (width + 7) / 8;
2232        let blocks_v = (height + 7) / 8;
2233        let is_color = self.config.pixel_format != PixelFormat::Gray;
2234
2235        // Zero-bias parameters for each component
2236        // Use effective distance inferred from quant tables (like C++ QuantValsToDistance)
2237        // This is important at Q100 where quant values are all 1s but input distance is 0.01
2238        let _input_distance = self.config.quality.to_distance();
2239        let effective_distance = quant::quant_vals_to_distance(y_quant, cb_quant, cr_quant);
2240        let y_zero_bias = ZeroBiasParams::for_ycbcr(effective_distance, 0);
2241        let cb_zero_bias = ZeroBiasParams::for_ycbcr(effective_distance, 1);
2242        let cr_zero_bias = ZeroBiasParams::for_ycbcr(effective_distance, 2);
2243
2244        // Compute per-block adaptive quantization strength from Y plane
2245        // C++ uses y_quant_01 = quant_table[1] for dampen calculation
2246        let y_quant_01 = y_quant.values[1];
2247        #[cfg(feature = "hybrid-trellis")]
2248        let aq_map = if let Some(ref custom) = self.config.custom_aq_map {
2249            custom.clone()
2250        } else {
2251            compute_aq_strength_map(y_plane, width, height, y_quant_01)
2252        };
2253        #[cfg(not(feature = "hybrid-trellis"))]
2254        let aq_map = compute_aq_strength_map(y_plane, width, height, y_quant_01);
2255
2256        // Create hybrid quantization context if enabled
2257        #[cfg(feature = "hybrid-trellis")]
2258        let hybrid_ctx = if self.config.hybrid_config.enabled {
2259            Some(HybridQuantContext::new(self.config.hybrid_config))
2260        } else {
2261            None
2262        };
2263
2264        let mut y_blocks = Vec::with_capacity(blocks_h * blocks_v);
2265        let mut cb_blocks = Vec::with_capacity(if is_color { blocks_h * blocks_v } else { 0 });
2266        let mut cr_blocks = Vec::with_capacity(if is_color { blocks_h * blocks_v } else { 0 });
2267
2268        for by in 0..blocks_v {
2269            for bx in 0..blocks_h {
2270                // Get per-block aq_strength
2271                let aq_strength = aq_map.get(bx, by);
2272
2273                let y_block = self.extract_block_ycbcr_f32(y_plane, width, height, bx, by);
2274                let y_dct = forward_dct_8x8(&y_block);
2275
2276                #[cfg(feature = "hybrid-trellis")]
2277                let y_quant_coeffs = if let Some(ref ctx) = hybrid_ctx {
2278                    ctx.quantize_block(&y_dct, &y_quant.values, aq_strength, 1.0, true)
2279                } else {
2280                    quant::quantize_block_with_zero_bias(
2281                        &y_dct,
2282                        &y_quant.values,
2283                        &y_zero_bias,
2284                        aq_strength,
2285                    )
2286                };
2287                #[cfg(not(feature = "hybrid-trellis"))]
2288                let y_quant_coeffs = quant::quantize_block_with_zero_bias(
2289                    &y_dct,
2290                    &y_quant.values,
2291                    &y_zero_bias,
2292                    aq_strength,
2293                );
2294
2295                y_blocks.push(natural_to_zigzag(&y_quant_coeffs));
2296
2297                if is_color {
2298                    let cb_block = self.extract_block_ycbcr_f32(cb_plane, width, height, bx, by);
2299                    let cb_dct = forward_dct_8x8(&cb_block);
2300
2301                    #[cfg(feature = "hybrid-trellis")]
2302                    let cb_quant_coeffs = if let Some(ref ctx) = hybrid_ctx {
2303                        ctx.quantize_block(&cb_dct, &cb_quant.values, aq_strength, 1.0, false)
2304                    } else {
2305                        quant::quantize_block_with_zero_bias(
2306                            &cb_dct,
2307                            &cb_quant.values,
2308                            &cb_zero_bias,
2309                            aq_strength,
2310                        )
2311                    };
2312                    #[cfg(not(feature = "hybrid-trellis"))]
2313                    let cb_quant_coeffs = quant::quantize_block_with_zero_bias(
2314                        &cb_dct,
2315                        &cb_quant.values,
2316                        &cb_zero_bias,
2317                        aq_strength,
2318                    );
2319
2320                    cb_blocks.push(natural_to_zigzag(&cb_quant_coeffs));
2321
2322                    let cr_block = self.extract_block_ycbcr_f32(cr_plane, width, height, bx, by);
2323                    let cr_dct = forward_dct_8x8(&cr_block);
2324
2325                    #[cfg(feature = "hybrid-trellis")]
2326                    let cr_quant_coeffs = if let Some(ref ctx) = hybrid_ctx {
2327                        ctx.quantize_block(&cr_dct, &cr_quant.values, aq_strength, 1.0, false)
2328                    } else {
2329                        quant::quantize_block_with_zero_bias(
2330                            &cr_dct,
2331                            &cr_quant.values,
2332                            &cr_zero_bias,
2333                            aq_strength,
2334                        )
2335                    };
2336                    #[cfg(not(feature = "hybrid-trellis"))]
2337                    let cr_quant_coeffs = quant::quantize_block_with_zero_bias(
2338                        &cr_dct,
2339                        &cr_quant.values,
2340                        &cr_zero_bias,
2341                        aq_strength,
2342                    );
2343
2344                    cr_blocks.push(natural_to_zigzag(&cr_quant_coeffs));
2345                }
2346            }
2347        }
2348
2349        Ok((y_blocks, cb_blocks, cr_blocks))
2350    }
2351
2352    /// Quantizes all blocks with subsampling support.
2353    ///
2354    /// Unlike `quantize_all_blocks`, this version handles different dimensions
2355    /// for Y and chroma planes (needed for 4:2:0, 4:2:2, 4:4:0 subsampling).
2356    #[allow(clippy::too_many_arguments)]
2357    fn quantize_all_blocks_subsampled(
2358        &self,
2359        y_plane: &[f32],
2360        y_width: usize,
2361        y_height: usize,
2362        cb_plane: &[f32],
2363        cr_plane: &[f32],
2364        c_width: usize,
2365        c_height: usize,
2366        y_quant: &QuantTable,
2367        cb_quant: &QuantTable,
2368        cr_quant: &QuantTable,
2369    ) -> Result<(
2370        Vec<[i16; DCT_BLOCK_SIZE]>,
2371        Vec<[i16; DCT_BLOCK_SIZE]>,
2372        Vec<[i16; DCT_BLOCK_SIZE]>,
2373    )> {
2374        let y_blocks_h = (y_width + 7) / 8;
2375        let y_blocks_v = (y_height + 7) / 8;
2376        let c_blocks_h = (c_width + 7) / 8;
2377        let c_blocks_v = (c_height + 7) / 8;
2378        let is_color = self.config.pixel_format != PixelFormat::Gray;
2379
2380        // Zero-bias parameters for each component
2381        let effective_distance = quant::quant_vals_to_distance(y_quant, cb_quant, cr_quant);
2382        let y_zero_bias = ZeroBiasParams::for_ycbcr(effective_distance, 0);
2383        let cb_zero_bias = ZeroBiasParams::for_ycbcr(effective_distance, 1);
2384        let cr_zero_bias = ZeroBiasParams::for_ycbcr(effective_distance, 2);
2385
2386        // Compute per-block adaptive quantization strength from Y plane
2387        let y_quant_01 = y_quant.values[1];
2388        #[cfg(feature = "hybrid-trellis")]
2389        let aq_map = if let Some(ref custom) = self.config.custom_aq_map {
2390            custom.clone()
2391        } else {
2392            compute_aq_strength_map(y_plane, y_width, y_height, y_quant_01)
2393        };
2394        #[cfg(not(feature = "hybrid-trellis"))]
2395        let aq_map = compute_aq_strength_map(y_plane, y_width, y_height, y_quant_01);
2396
2397        // Create hybrid quantization context if enabled
2398        #[cfg(feature = "hybrid-trellis")]
2399        let hybrid_ctx = if self.config.hybrid_config.enabled {
2400            Some(HybridQuantContext::new(self.config.hybrid_config))
2401        } else {
2402            None
2403        };
2404
2405        let mut y_blocks = Vec::with_capacity(y_blocks_h * y_blocks_v);
2406        let mut cb_blocks = Vec::with_capacity(if is_color { c_blocks_h * c_blocks_v } else { 0 });
2407        let mut cr_blocks = Vec::with_capacity(if is_color { c_blocks_h * c_blocks_v } else { 0 });
2408
2409        // Quantize Y blocks
2410        for by in 0..y_blocks_v {
2411            for bx in 0..y_blocks_h {
2412                let aq_strength = aq_map.get(bx, by);
2413                let y_block = self.extract_block_ycbcr_f32(y_plane, y_width, y_height, bx, by);
2414                let y_dct = forward_dct_8x8(&y_block);
2415
2416                #[cfg(feature = "hybrid-trellis")]
2417                let y_quant_coeffs = if let Some(ref ctx) = hybrid_ctx {
2418                    ctx.quantize_block(&y_dct, &y_quant.values, aq_strength, 1.0, true)
2419                } else {
2420                    quant::quantize_block_with_zero_bias(
2421                        &y_dct,
2422                        &y_quant.values,
2423                        &y_zero_bias,
2424                        aq_strength,
2425                    )
2426                };
2427                #[cfg(not(feature = "hybrid-trellis"))]
2428                let y_quant_coeffs = quant::quantize_block_with_zero_bias(
2429                    &y_dct,
2430                    &y_quant.values,
2431                    &y_zero_bias,
2432                    aq_strength,
2433                );
2434
2435                y_blocks.push(natural_to_zigzag(&y_quant_coeffs));
2436            }
2437        }
2438
2439        // Quantize chroma blocks (from possibly downsampled planes)
2440        if is_color {
2441            for by in 0..c_blocks_v {
2442                for bx in 0..c_blocks_h {
2443                    // For chroma, use average AQ strength from corresponding Y region
2444                    // For 4:2:0, each chroma block corresponds to 2x2 Y blocks
2445                    let y_bx = (bx * y_blocks_h) / c_blocks_h;
2446                    let y_by = (by * y_blocks_v) / c_blocks_v;
2447                    let aq_strength =
2448                        aq_map.get(y_bx.min(y_blocks_h - 1), y_by.min(y_blocks_v - 1));
2449
2450                    let cb_block =
2451                        self.extract_block_ycbcr_f32(cb_plane, c_width, c_height, bx, by);
2452                    let cb_dct = forward_dct_8x8(&cb_block);
2453
2454                    #[cfg(feature = "hybrid-trellis")]
2455                    let cb_quant_coeffs = if let Some(ref ctx) = hybrid_ctx {
2456                        ctx.quantize_block(&cb_dct, &cb_quant.values, aq_strength, 1.0, false)
2457                    } else {
2458                        quant::quantize_block_with_zero_bias(
2459                            &cb_dct,
2460                            &cb_quant.values,
2461                            &cb_zero_bias,
2462                            aq_strength,
2463                        )
2464                    };
2465                    #[cfg(not(feature = "hybrid-trellis"))]
2466                    let cb_quant_coeffs = quant::quantize_block_with_zero_bias(
2467                        &cb_dct,
2468                        &cb_quant.values,
2469                        &cb_zero_bias,
2470                        aq_strength,
2471                    );
2472
2473                    cb_blocks.push(natural_to_zigzag(&cb_quant_coeffs));
2474
2475                    let cr_block =
2476                        self.extract_block_ycbcr_f32(cr_plane, c_width, c_height, bx, by);
2477                    let cr_dct = forward_dct_8x8(&cr_block);
2478
2479                    #[cfg(feature = "hybrid-trellis")]
2480                    let cr_quant_coeffs = if let Some(ref ctx) = hybrid_ctx {
2481                        ctx.quantize_block(&cr_dct, &cr_quant.values, aq_strength, 1.0, false)
2482                    } else {
2483                        quant::quantize_block_with_zero_bias(
2484                            &cr_dct,
2485                            &cr_quant.values,
2486                            &cr_zero_bias,
2487                            aq_strength,
2488                        )
2489                    };
2490                    #[cfg(not(feature = "hybrid-trellis"))]
2491                    let cr_quant_coeffs = quant::quantize_block_with_zero_bias(
2492                        &cr_dct,
2493                        &cr_quant.values,
2494                        &cr_zero_bias,
2495                        aq_strength,
2496                    );
2497
2498                    cr_blocks.push(natural_to_zigzag(&cr_quant_coeffs));
2499                }
2500            }
2501        }
2502
2503        Ok((y_blocks, cb_blocks, cr_blocks))
2504    }
2505
2506    /// Builds optimized Huffman tables from quantized blocks.
2507    ///
2508    /// Collects symbol frequencies from all blocks and generates optimal
2509    /// Huffman tables with their DHT marker representations.
2510    ///
2511    /// For subsampled modes, this iterates blocks in MCU order to correctly
2512    /// account for padding blocks.
2513    fn build_optimized_tables(
2514        &self,
2515        y_blocks: &[[i16; DCT_BLOCK_SIZE]],
2516        cb_blocks: &[[i16; DCT_BLOCK_SIZE]],
2517        cr_blocks: &[[i16; DCT_BLOCK_SIZE]],
2518        is_color: bool,
2519    ) -> Result<OptimizedHuffmanTables> {
2520        let mut dc_luma_freq = FrequencyCounter::new();
2521        let mut dc_chroma_freq = FrequencyCounter::new();
2522        let mut ac_luma_freq = FrequencyCounter::new();
2523        let mut ac_chroma_freq = FrequencyCounter::new();
2524
2525        let width = self.config.width as usize;
2526        let height = self.config.height as usize;
2527        let (h_samp, v_samp) = match self.config.subsampling {
2528            Subsampling::S444 => (1, 1),
2529            Subsampling::S422 => (2, 1),
2530            Subsampling::S420 => (2, 2),
2531            Subsampling::S440 => (1, 2),
2532        };
2533
2534        // Zero block for padding
2535        const ZERO_BLOCK: [i16; DCT_BLOCK_SIZE] = [0i16; DCT_BLOCK_SIZE];
2536
2537        if h_samp == 1 && v_samp == 1 {
2538            // 4:4:4 mode - simple iteration, no padding needed
2539            let mut prev_y_dc: i16 = 0;
2540            let mut prev_cb_dc: i16 = 0;
2541            let mut prev_cr_dc: i16 = 0;
2542
2543            for (i, y_block) in y_blocks.iter().enumerate() {
2544                Self::collect_block_frequencies(
2545                    y_block,
2546                    prev_y_dc,
2547                    &mut dc_luma_freq,
2548                    &mut ac_luma_freq,
2549                );
2550                prev_y_dc = y_block[0];
2551
2552                if is_color {
2553                    Self::collect_block_frequencies(
2554                        &cb_blocks[i],
2555                        prev_cb_dc,
2556                        &mut dc_chroma_freq,
2557                        &mut ac_chroma_freq,
2558                    );
2559                    prev_cb_dc = cb_blocks[i][0];
2560
2561                    Self::collect_block_frequencies(
2562                        &cr_blocks[i],
2563                        prev_cr_dc,
2564                        &mut dc_chroma_freq,
2565                        &mut ac_chroma_freq,
2566                    );
2567                    prev_cr_dc = cr_blocks[i][0];
2568                }
2569            }
2570        } else {
2571            // Subsampled mode - iterate in MCU order with padding
2572            let y_blocks_h = (width + 7) / 8;
2573            let y_blocks_v = (height + 7) / 8;
2574            // Use ceiling division for chroma dimensions: (n + d - 1) / d
2575            let c_width = (width + h_samp - 1) / h_samp;
2576            let c_height = (height + v_samp - 1) / v_samp;
2577            let c_blocks_h = (c_width + 7) / 8;
2578            let c_blocks_v = (c_height + 7) / 8;
2579            let mcu_h = (y_blocks_h + h_samp - 1) / h_samp;
2580            let mcu_v = (y_blocks_v + v_samp - 1) / v_samp;
2581
2582            let mut prev_y_dc: i16 = 0;
2583            let mut prev_cb_dc: i16 = 0;
2584            let mut prev_cr_dc: i16 = 0;
2585
2586            for mcu_y in 0..mcu_v {
2587                for mcu_x in 0..mcu_h {
2588                    // Y blocks in this MCU
2589                    for dy in 0..v_samp {
2590                        for dx in 0..h_samp {
2591                            let y_bx = mcu_x * h_samp + dx;
2592                            let y_by = mcu_y * v_samp + dy;
2593                            let block = if y_bx < y_blocks_h && y_by < y_blocks_v {
2594                                let y_idx = y_by * y_blocks_h + y_bx;
2595                                &y_blocks[y_idx]
2596                            } else {
2597                                &ZERO_BLOCK
2598                            };
2599                            Self::collect_block_frequencies(
2600                                block,
2601                                prev_y_dc,
2602                                &mut dc_luma_freq,
2603                                &mut ac_luma_freq,
2604                            );
2605                            prev_y_dc = block[0];
2606                        }
2607                    }
2608
2609                    // Chroma blocks
2610                    if is_color {
2611                        let (cb_block, cr_block) = if mcu_x < c_blocks_h && mcu_y < c_blocks_v {
2612                            let c_idx = mcu_y * c_blocks_h + mcu_x;
2613                            (&cb_blocks[c_idx], &cr_blocks[c_idx])
2614                        } else {
2615                            (&ZERO_BLOCK, &ZERO_BLOCK)
2616                        };
2617
2618                        Self::collect_block_frequencies(
2619                            cb_block,
2620                            prev_cb_dc,
2621                            &mut dc_chroma_freq,
2622                            &mut ac_chroma_freq,
2623                        );
2624                        prev_cb_dc = cb_block[0];
2625
2626                        Self::collect_block_frequencies(
2627                            cr_block,
2628                            prev_cr_dc,
2629                            &mut dc_chroma_freq,
2630                            &mut ac_chroma_freq,
2631                        );
2632                        prev_cr_dc = cr_block[0];
2633                    }
2634                }
2635            }
2636        }
2637
2638        // Build optimized tables with DHT data
2639        let dc_luma = dc_luma_freq.generate_table_with_dht()?;
2640        let ac_luma = ac_luma_freq.generate_table_with_dht()?;
2641
2642        let (dc_chroma, ac_chroma) = if is_color {
2643            (
2644                dc_chroma_freq.generate_table_with_dht()?,
2645                ac_chroma_freq.generate_table_with_dht()?,
2646            )
2647        } else {
2648            // Use standard tables for grayscale (won't be used but needed for structure)
2649            use crate::huffman::{
2650                STD_AC_CHROMINANCE_BITS, STD_AC_CHROMINANCE_VALUES, STD_DC_CHROMINANCE_BITS,
2651                STD_DC_CHROMINANCE_VALUES,
2652            };
2653            use crate::huffman_opt::OptimizedTable;
2654
2655            (
2656                OptimizedTable {
2657                    table: HuffmanEncodeTable::std_dc_chrominance(),
2658                    bits: STD_DC_CHROMINANCE_BITS,
2659                    values: STD_DC_CHROMINANCE_VALUES.to_vec(),
2660                },
2661                OptimizedTable {
2662                    table: HuffmanEncodeTable::std_ac_chrominance(),
2663                    bits: STD_AC_CHROMINANCE_BITS,
2664                    values: STD_AC_CHROMINANCE_VALUES.to_vec(),
2665                },
2666            )
2667        };
2668
2669        Ok(OptimizedHuffmanTables {
2670            dc_luma,
2671            ac_luma,
2672            dc_chroma,
2673            ac_chroma,
2674        })
2675    }
2676
2677    /// Encodes blocks using optimized Huffman tables.
2678    ///
2679    /// Handles MCU interleaving for subsampled modes (4:2:0, 4:2:2, 4:4:0).
2680    fn encode_with_tables(
2681        &self,
2682        y_blocks: &[[i16; DCT_BLOCK_SIZE]],
2683        cb_blocks: &[[i16; DCT_BLOCK_SIZE]],
2684        cr_blocks: &[[i16; DCT_BLOCK_SIZE]],
2685        is_color: bool,
2686        tables: &OptimizedHuffmanTables,
2687    ) -> Result<Vec<u8>> {
2688        let mut encoder = EntropyEncoder::new();
2689
2690        encoder.set_dc_table(0, tables.dc_luma.table.clone());
2691        encoder.set_ac_table(0, tables.ac_luma.table.clone());
2692        encoder.set_dc_table(1, tables.dc_chroma.table.clone());
2693        encoder.set_ac_table(1, tables.ac_chroma.table.clone());
2694
2695        if self.config.restart_interval > 0 {
2696            encoder.set_restart_interval(self.config.restart_interval);
2697        }
2698
2699        let width = self.config.width as usize;
2700        let height = self.config.height as usize;
2701        let (h_samp, v_samp) = match self.config.subsampling {
2702            Subsampling::S444 => (1, 1),
2703            Subsampling::S422 => (2, 1),
2704            Subsampling::S420 => (2, 2),
2705            Subsampling::S440 => (1, 2),
2706        };
2707
2708        if h_samp == 1 && v_samp == 1 {
2709            // 4:4:4 mode - simple 1:1 interleaving
2710            for (i, y_block) in y_blocks.iter().enumerate() {
2711                encoder.encode_block(y_block, 0, 0, 0)?;
2712
2713                if is_color {
2714                    encoder.encode_block(&cb_blocks[i], 1, 1, 1)?;
2715                    encoder.encode_block(&cr_blocks[i], 2, 1, 1)?;
2716                }
2717
2718                encoder.check_restart();
2719            }
2720        } else {
2721            // Subsampled mode - MCU interleaving
2722            let y_blocks_h = (width + 7) / 8;
2723            let y_blocks_v = (height + 7) / 8;
2724            // Use ceiling division for chroma dimensions: (n + d - 1) / d
2725            let c_width = (width + h_samp - 1) / h_samp;
2726            let c_height = (height + v_samp - 1) / v_samp;
2727            let c_blocks_h = (c_width + 7) / 8;
2728            let c_blocks_v = (c_height + 7) / 8;
2729
2730            let mcu_h = (y_blocks_h + h_samp - 1) / h_samp;
2731            let mcu_v = (y_blocks_v + v_samp - 1) / v_samp;
2732
2733            // Zero block for padding out-of-bounds MCU positions
2734            const ZERO_BLOCK: [i16; DCT_BLOCK_SIZE] = [0i16; DCT_BLOCK_SIZE];
2735
2736            for mcu_y in 0..mcu_v {
2737                for mcu_x in 0..mcu_h {
2738                    // Encode Y blocks in this MCU (must encode all 4 even if out of bounds)
2739                    for dy in 0..v_samp {
2740                        for dx in 0..h_samp {
2741                            let y_bx = mcu_x * h_samp + dx;
2742                            let y_by = mcu_y * v_samp + dy;
2743                            if y_bx < y_blocks_h && y_by < y_blocks_v {
2744                                let y_idx = y_by * y_blocks_h + y_bx;
2745                                encoder.encode_block(&y_blocks[y_idx], 0, 0, 0)?;
2746                            } else {
2747                                // Out of bounds - encode zero block (padding)
2748                                encoder.encode_block(&ZERO_BLOCK, 0, 0, 0)?;
2749                            }
2750                        }
2751                    }
2752
2753                    // Encode Cb and Cr blocks (always, even if out of bounds)
2754                    if is_color {
2755                        if mcu_x < c_blocks_h && mcu_y < c_blocks_v {
2756                            let c_idx = mcu_y * c_blocks_h + mcu_x;
2757                            encoder.encode_block(&cb_blocks[c_idx], 1, 1, 1)?;
2758                            encoder.encode_block(&cr_blocks[c_idx], 2, 1, 1)?;
2759                        } else {
2760                            // Out of bounds - encode zero blocks (padding)
2761                            encoder.encode_block(&ZERO_BLOCK, 1, 1, 1)?;
2762                            encoder.encode_block(&ZERO_BLOCK, 2, 1, 1)?;
2763                        }
2764                    }
2765
2766                    encoder.check_restart();
2767                }
2768            }
2769        }
2770
2771        Ok(encoder.finish())
2772    }
2773
2774    /// Encodes blocks using standard (fixed) Huffman tables - single pass.
2775    ///
2776    /// Handles MCU interleaving for subsampled modes (4:2:0, 4:2:2, 4:4:0).
2777    fn encode_blocks_standard(
2778        &self,
2779        y_blocks: &[[i16; DCT_BLOCK_SIZE]],
2780        cb_blocks: &[[i16; DCT_BLOCK_SIZE]],
2781        cr_blocks: &[[i16; DCT_BLOCK_SIZE]],
2782        is_color: bool,
2783    ) -> Result<Vec<u8>> {
2784        let mut encoder = EntropyEncoder::new();
2785
2786        encoder.set_dc_table(0, HuffmanEncodeTable::std_dc_luminance());
2787        encoder.set_ac_table(0, HuffmanEncodeTable::std_ac_luminance());
2788        encoder.set_dc_table(1, HuffmanEncodeTable::std_dc_chrominance());
2789        encoder.set_ac_table(1, HuffmanEncodeTable::std_ac_chrominance());
2790
2791        if self.config.restart_interval > 0 {
2792            encoder.set_restart_interval(self.config.restart_interval);
2793        }
2794
2795        let width = self.config.width as usize;
2796        let height = self.config.height as usize;
2797        let (h_samp, v_samp) = match self.config.subsampling {
2798            Subsampling::S444 => (1, 1),
2799            Subsampling::S422 => (2, 1),
2800            Subsampling::S420 => (2, 2),
2801            Subsampling::S440 => (1, 2),
2802        };
2803
2804        if h_samp == 1 && v_samp == 1 {
2805            // 4:4:4 mode - simple 1:1 interleaving
2806            for (i, y_block) in y_blocks.iter().enumerate() {
2807                encoder.encode_block(y_block, 0, 0, 0)?;
2808
2809                if is_color {
2810                    encoder.encode_block(&cb_blocks[i], 1, 1, 1)?;
2811                    encoder.encode_block(&cr_blocks[i], 2, 1, 1)?;
2812                }
2813
2814                encoder.check_restart();
2815            }
2816        } else {
2817            // Subsampled mode - MCU interleaving
2818            let y_blocks_h = (width + 7) / 8;
2819            let y_blocks_v = (height + 7) / 8;
2820            // Use ceiling division for chroma dimensions: (n + d - 1) / d
2821            let c_width = (width + h_samp - 1) / h_samp;
2822            let c_height = (height + v_samp - 1) / v_samp;
2823            let c_blocks_h = (c_width + 7) / 8;
2824            let c_blocks_v = (c_height + 7) / 8;
2825
2826            // MCU dimensions in terms of Y blocks
2827            let mcu_h = (y_blocks_h + h_samp - 1) / h_samp;
2828            let mcu_v = (y_blocks_v + v_samp - 1) / v_samp;
2829
2830            // Zero block for padding out-of-bounds MCU positions
2831            const ZERO_BLOCK: [i16; DCT_BLOCK_SIZE] = [0i16; DCT_BLOCK_SIZE];
2832
2833            for mcu_y in 0..mcu_v {
2834                for mcu_x in 0..mcu_h {
2835                    // Encode Y blocks in this MCU (must encode all even if out of bounds)
2836                    for dy in 0..v_samp {
2837                        for dx in 0..h_samp {
2838                            let y_bx = mcu_x * h_samp + dx;
2839                            let y_by = mcu_y * v_samp + dy;
2840                            if y_bx < y_blocks_h && y_by < y_blocks_v {
2841                                let y_idx = y_by * y_blocks_h + y_bx;
2842                                encoder.encode_block(&y_blocks[y_idx], 0, 0, 0)?;
2843                            } else {
2844                                // Out of bounds - encode zero block (padding)
2845                                encoder.encode_block(&ZERO_BLOCK, 0, 0, 0)?;
2846                            }
2847                        }
2848                    }
2849
2850                    // Encode Cb and Cr blocks (always, even if out of bounds)
2851                    if is_color {
2852                        if mcu_x < c_blocks_h && mcu_y < c_blocks_v {
2853                            let c_idx = mcu_y * c_blocks_h + mcu_x;
2854                            encoder.encode_block(&cb_blocks[c_idx], 1, 1, 1)?;
2855                            encoder.encode_block(&cr_blocks[c_idx], 2, 1, 1)?;
2856                        } else {
2857                            // Out of bounds - encode zero blocks (padding)
2858                            encoder.encode_block(&ZERO_BLOCK, 1, 1, 1)?;
2859                            encoder.encode_block(&ZERO_BLOCK, 2, 1, 1)?;
2860                        }
2861                    }
2862
2863                    encoder.check_restart();
2864                }
2865            }
2866        }
2867
2868        Ok(encoder.finish())
2869    }
2870
2871    /// Collects symbol frequencies from a block for Huffman optimization.
2872    fn collect_block_frequencies(
2873        coeffs: &[i16; DCT_BLOCK_SIZE],
2874        prev_dc: i16,
2875        dc_freq: &mut FrequencyCounter,
2876        ac_freq: &mut FrequencyCounter,
2877    ) {
2878        // DC coefficient - limit category to 11 for 8-bit JPEG compatibility
2879        let dc_diff = coeffs[0] - prev_dc;
2880        let dc_category = entropy::category(dc_diff).min(11);
2881        dc_freq.count(dc_category);
2882
2883        // AC coefficients
2884        let mut run = 0u8;
2885        for i in 1..DCT_BLOCK_SIZE {
2886            let ac = coeffs[i];
2887
2888            if ac == 0 {
2889                run += 1;
2890            } else {
2891                // Encode runs of 16 zeros (ZRL)
2892                while run >= 16 {
2893                    ac_freq.count(0xF0);
2894                    run -= 16;
2895                }
2896
2897                // Encode run/size symbol
2898                let ac_category = entropy::category(ac);
2899                let symbol = (run << 4) | ac_category;
2900                ac_freq.count(symbol);
2901                run = 0;
2902            }
2903        }
2904
2905        // EOB if trailing zeros
2906        if run > 0 {
2907            ac_freq.count(0x00);
2908        }
2909    }
2910
2911    /// Quantizes all XYB blocks for Huffman optimization.
2912    ///
2913    /// Returns quantized blocks for X, Y, and B components.
2914    /// B component is already downsampled (half resolution).
2915    #[allow(clippy::too_many_arguments)]
2916    #[allow(dead_code)] // Reserved for future XYB encoding improvements
2917    fn quantize_all_blocks_xyb(
2918        &self,
2919        x_plane: &[f32],
2920        y_plane: &[f32],
2921        b_plane: &[f32], // Already downsampled
2922        width: usize,
2923        height: usize,
2924        b_width: usize,
2925        b_height: usize,
2926        x_quant: &QuantTable,
2927        y_quant: &QuantTable,
2928        b_quant: &QuantTable,
2929    ) -> (
2930        Vec<[i16; DCT_BLOCK_SIZE]>,
2931        Vec<[i16; DCT_BLOCK_SIZE]>,
2932        Vec<[i16; DCT_BLOCK_SIZE]>,
2933    ) {
2934        // MCU size for 2×2, 2×2, 1×1 sampling: 16×16 pixels
2935        let mcu_cols = (width + 15) / 16;
2936        let mcu_rows = (height + 15) / 16;
2937        let num_xy_blocks = mcu_cols * mcu_rows * 4; // 4 blocks per MCU for X and Y
2938        let num_b_blocks = mcu_cols * mcu_rows; // 1 block per MCU for B
2939
2940        let mut x_blocks = Vec::with_capacity(num_xy_blocks);
2941        let mut y_blocks = Vec::with_capacity(num_xy_blocks);
2942        let mut b_blocks = Vec::with_capacity(num_b_blocks);
2943
2944        for mcu_y in 0..mcu_rows {
2945            for mcu_x in 0..mcu_cols {
2946                // Process 4 X blocks (2×2 arrangement within 16×16 MCU)
2947                for block_y in 0..2 {
2948                    for block_x in 0..2 {
2949                        let bx = mcu_x * 2 + block_x;
2950                        let by = mcu_y * 2 + block_y;
2951                        let x_block = self.extract_block_f32(x_plane, width, height, bx, by);
2952                        let x_dct = forward_dct_8x8(&x_block);
2953                        let x_quant_coeffs = quant::quantize_block(&x_dct, &x_quant.values);
2954                        x_blocks.push(natural_to_zigzag(&x_quant_coeffs));
2955                    }
2956                }
2957
2958                // Process 4 Y blocks (2×2 arrangement within 16×16 MCU)
2959                for block_y in 0..2 {
2960                    for block_x in 0..2 {
2961                        let bx = mcu_x * 2 + block_x;
2962                        let by = mcu_y * 2 + block_y;
2963                        let y_block = self.extract_block_f32(y_plane, width, height, bx, by);
2964                        let y_dct = forward_dct_8x8(&y_block);
2965                        let y_quant_coeffs = quant::quantize_block(&y_dct, &y_quant.values);
2966                        y_blocks.push(natural_to_zigzag(&y_quant_coeffs));
2967                    }
2968                }
2969
2970                // Process 1 B block (from downsampled plane)
2971                let b_block = self.extract_block_f32(b_plane, b_width, b_height, mcu_x, mcu_y);
2972                let b_dct = forward_dct_8x8(&b_block);
2973                let b_quant_coeffs = quant::quantize_block(&b_dct, &b_quant.values);
2974                b_blocks.push(natural_to_zigzag(&b_quant_coeffs));
2975            }
2976        }
2977
2978        (x_blocks, y_blocks, b_blocks)
2979    }
2980
2981    /// Quantizes all XYB blocks with jpegli-style adaptive quantization (no trellis).
2982    ///
2983    /// This version uses the AQ map for per-block modulation with zero-bias,
2984    /// matching jpegli's default AQ behavior without hybrid trellis.
2985    ///
2986    /// For XYB mode:
2987    /// - X and Y use luma tables (both are full-resolution "luma-like" channels)
2988    /// - B uses chroma tables (downsampled blue channel)
2989    #[allow(clippy::too_many_arguments)]
2990    fn quantize_all_blocks_xyb_with_aq_simple(
2991        &self,
2992        x_plane: &[f32],
2993        y_plane: &[f32],
2994        b_plane: &[f32], // Already downsampled
2995        width: usize,
2996        height: usize,
2997        b_width: usize,
2998        b_height: usize,
2999        x_quant: &QuantTable,
3000        y_quant: &QuantTable,
3001        b_quant: &QuantTable,
3002        aq_map: &crate::adaptive_quant::AQStrengthMap,
3003        x_zero_bias: &ZeroBiasParams,
3004        y_zero_bias: &ZeroBiasParams,
3005        b_zero_bias: &ZeroBiasParams,
3006    ) -> (
3007        Vec<[i16; DCT_BLOCK_SIZE]>,
3008        Vec<[i16; DCT_BLOCK_SIZE]>,
3009        Vec<[i16; DCT_BLOCK_SIZE]>,
3010    ) {
3011        // MCU size for 2×2, 2×2, 1×1 sampling: 16×16 pixels
3012        let mcu_cols = (width + 15) / 16;
3013        let mcu_rows = (height + 15) / 16;
3014        let num_xy_blocks = mcu_cols * mcu_rows * 4; // 4 blocks per MCU for X and Y
3015        let num_b_blocks = mcu_cols * mcu_rows; // 1 block per MCU for B
3016
3017        let mut x_blocks = Vec::with_capacity(num_xy_blocks);
3018        let mut y_blocks = Vec::with_capacity(num_xy_blocks);
3019        let mut b_blocks = Vec::with_capacity(num_b_blocks);
3020
3021        for mcu_y in 0..mcu_rows {
3022            for mcu_x in 0..mcu_cols {
3023                // Process 4 X blocks (2×2 arrangement within 16×16 MCU)
3024                for block_y in 0..2 {
3025                    for block_x in 0..2 {
3026                        let bx = mcu_x * 2 + block_x;
3027                        let by = mcu_y * 2 + block_y;
3028                        let aq_strength = aq_map.get(bx, by);
3029
3030                        let x_block = self.extract_block_f32(x_plane, width, height, bx, by);
3031                        let x_dct = forward_dct_8x8(&x_block);
3032                        let x_quant_coeffs = quant::quantize_block_with_zero_bias(
3033                            &x_dct,
3034                            &x_quant.values,
3035                            x_zero_bias,
3036                            aq_strength,
3037                        );
3038                        x_blocks.push(natural_to_zigzag(&x_quant_coeffs));
3039                    }
3040                }
3041
3042                // Process 4 Y blocks (2×2 arrangement within 16×16 MCU)
3043                for block_y in 0..2 {
3044                    for block_x in 0..2 {
3045                        let bx = mcu_x * 2 + block_x;
3046                        let by = mcu_y * 2 + block_y;
3047                        let aq_strength = aq_map.get(bx, by);
3048
3049                        let y_block = self.extract_block_f32(y_plane, width, height, bx, by);
3050                        let y_dct = forward_dct_8x8(&y_block);
3051                        let y_quant_coeffs = quant::quantize_block_with_zero_bias(
3052                            &y_dct,
3053                            &y_quant.values,
3054                            y_zero_bias,
3055                            aq_strength,
3056                        );
3057                        y_blocks.push(natural_to_zigzag(&y_quant_coeffs));
3058                    }
3059                }
3060
3061                // Process 1 B block (from downsampled plane)
3062                // For B channel: Average AQ from 4 parent full-res blocks
3063                let b_aq_strength = {
3064                    let mut sum = 0.0f32;
3065                    for dy in 0..2 {
3066                        for dx in 0..2 {
3067                            let bx = mcu_x * 2 + dx;
3068                            let by = mcu_y * 2 + dy;
3069                            sum += aq_map.get(bx, by);
3070                        }
3071                    }
3072                    sum / 4.0
3073                };
3074
3075                let b_block = self.extract_block_f32(b_plane, b_width, b_height, mcu_x, mcu_y);
3076                let b_dct = forward_dct_8x8(&b_block);
3077                let b_quant_coeffs = quant::quantize_block_with_zero_bias(
3078                    &b_dct,
3079                    &b_quant.values,
3080                    b_zero_bias,
3081                    b_aq_strength,
3082                );
3083                b_blocks.push(natural_to_zigzag(&b_quant_coeffs));
3084            }
3085        }
3086
3087        (x_blocks, y_blocks, b_blocks)
3088    }
3089
3090    /// Quantizes all XYB blocks with adaptive quantization support.
3091    ///
3092    /// This version uses the AQ map for per-block modulation and optionally
3093    /// applies hybrid trellis quantization when enabled.
3094    ///
3095    /// For XYB mode:
3096    /// - X and Y use luma tables (both are full-resolution "luma-like" channels)
3097    /// - B uses chroma tables (downsampled blue channel)
3098    #[cfg(feature = "hybrid-trellis")]
3099    #[allow(clippy::too_many_arguments)]
3100    fn quantize_all_blocks_xyb_with_aq(
3101        &self,
3102        x_plane: &[f32],
3103        y_plane: &[f32],
3104        b_plane: &[f32], // Already downsampled
3105        width: usize,
3106        height: usize,
3107        b_width: usize,
3108        b_height: usize,
3109        x_quant: &QuantTable,
3110        y_quant: &QuantTable,
3111        b_quant: &QuantTable,
3112        aq_map: &crate::adaptive_quant::AQStrengthMap,
3113        hybrid_ctx: Option<&HybridQuantContext>,
3114    ) -> (
3115        Vec<[i16; DCT_BLOCK_SIZE]>,
3116        Vec<[i16; DCT_BLOCK_SIZE]>,
3117        Vec<[i16; DCT_BLOCK_SIZE]>,
3118    ) {
3119        // MCU size for 2×2, 2×2, 1×1 sampling: 16×16 pixels
3120        let mcu_cols = (width + 15) / 16;
3121        let mcu_rows = (height + 15) / 16;
3122        let num_xy_blocks = mcu_cols * mcu_rows * 4; // 4 blocks per MCU for X and Y
3123        let num_b_blocks = mcu_cols * mcu_rows; // 1 block per MCU for B
3124
3125        let mut x_blocks = Vec::with_capacity(num_xy_blocks);
3126        let mut y_blocks = Vec::with_capacity(num_xy_blocks);
3127        let mut b_blocks = Vec::with_capacity(num_b_blocks);
3128
3129        for mcu_y in 0..mcu_rows {
3130            for mcu_x in 0..mcu_cols {
3131                // Process 4 X blocks (2×2 arrangement within 16×16 MCU)
3132                for block_y in 0..2 {
3133                    for block_x in 0..2 {
3134                        let bx = mcu_x * 2 + block_x;
3135                        let by = mcu_y * 2 + block_y;
3136                        let aq_strength = aq_map.get(bx, by);
3137
3138                        let x_block = self.extract_block_f32(x_plane, width, height, bx, by);
3139                        let x_dct = forward_dct_8x8(&x_block);
3140
3141                        // X is luma-like in XYB, dampen=1.0
3142                        let x_quant_coeffs = if let Some(ctx) = hybrid_ctx {
3143                            ctx.quantize_block(&x_dct, &x_quant.values, aq_strength, 1.0, true)
3144                        } else {
3145                            quant::quantize_block(&x_dct, &x_quant.values)
3146                        };
3147                        x_blocks.push(natural_to_zigzag(&x_quant_coeffs));
3148                    }
3149                }
3150
3151                // Process 4 Y blocks (2×2 arrangement within 16×16 MCU)
3152                for block_y in 0..2 {
3153                    for block_x in 0..2 {
3154                        let bx = mcu_x * 2 + block_x;
3155                        let by = mcu_y * 2 + block_y;
3156                        let aq_strength = aq_map.get(bx, by);
3157
3158                        let y_block = self.extract_block_f32(y_plane, width, height, bx, by);
3159                        let y_dct = forward_dct_8x8(&y_block);
3160
3161                        // Y is the primary luma channel in XYB, dampen=1.0
3162                        let y_quant_coeffs = if let Some(ctx) = hybrid_ctx {
3163                            ctx.quantize_block(&y_dct, &y_quant.values, aq_strength, 1.0, true)
3164                        } else {
3165                            quant::quantize_block(&y_dct, &y_quant.values)
3166                        };
3167                        y_blocks.push(natural_to_zigzag(&y_quant_coeffs));
3168                    }
3169                }
3170
3171                // Process 1 B block (from downsampled plane)
3172                // Average AQ from the 4 corresponding full-res blocks
3173                let b_aq_strength = {
3174                    let mut sum = 0.0f32;
3175                    for dy in 0..2 {
3176                        for dx in 0..2 {
3177                            let bx = mcu_x * 2 + dx;
3178                            let by = mcu_y * 2 + dy;
3179                            sum += aq_map.get(bx, by);
3180                        }
3181                    }
3182                    sum / 4.0
3183                };
3184
3185                let b_block = self.extract_block_f32(b_plane, b_width, b_height, mcu_x, mcu_y);
3186                let b_dct = forward_dct_8x8(&b_block);
3187
3188                // B is chroma-like (blue channel), is_luma=false
3189                let b_quant_coeffs = if let Some(ctx) = hybrid_ctx {
3190                    ctx.quantize_block(&b_dct, &b_quant.values, b_aq_strength, 1.0, false)
3191                } else {
3192                    quant::quantize_block(&b_dct, &b_quant.values)
3193                };
3194                b_blocks.push(natural_to_zigzag(&b_quant_coeffs));
3195            }
3196        }
3197
3198        (x_blocks, y_blocks, b_blocks)
3199    }
3200
3201    /// Builds optimized Huffman tables for XYB mode.
3202    ///
3203    /// XYB uses a single shared table for all components (luminance tables).
3204    /// Returns the optimized DC and AC tables.
3205    fn build_optimized_tables_xyb(
3206        &self,
3207        x_blocks: &[[i16; DCT_BLOCK_SIZE]],
3208        y_blocks: &[[i16; DCT_BLOCK_SIZE]],
3209        b_blocks: &[[i16; DCT_BLOCK_SIZE]],
3210    ) -> Result<(
3211        crate::huffman_opt::OptimizedTable,
3212        crate::huffman_opt::OptimizedTable,
3213    )> {
3214        let mut dc_freq = FrequencyCounter::new();
3215        let mut ac_freq = FrequencyCounter::new();
3216
3217        // Collect frequencies from all components
3218        // Note: XYB MCU order is 4 X blocks, 4 Y blocks, 1 B block per MCU
3219        // But since all share the same table, we just iterate through them
3220
3221        // In XYB mode, we have interleaved blocks per MCU:
3222        // [X0, X1, X2, X3, Y0, Y1, Y2, Y3, B0] per MCU
3223        // DC prediction carries across MCUs for each component (standard JPEG behavior)
3224
3225        let mcu_count = b_blocks.len();
3226
3227        // Each component maintains its own DC prediction across all MCUs
3228        let mut prev_dc_x: i16 = 0;
3229        let mut prev_dc_y: i16 = 0;
3230        let mut prev_dc_b: i16 = 0;
3231
3232        for mcu_idx in 0..mcu_count {
3233            // X blocks (4 per MCU)
3234            let x_start = mcu_idx * 4;
3235            for i in 0..4 {
3236                let block = &x_blocks[x_start + i];
3237                Self::collect_block_frequencies(block, prev_dc_x, &mut dc_freq, &mut ac_freq);
3238                prev_dc_x = block[0];
3239            }
3240
3241            // Y blocks (4 per MCU)
3242            let y_start = mcu_idx * 4;
3243            for i in 0..4 {
3244                let block = &y_blocks[y_start + i];
3245                Self::collect_block_frequencies(block, prev_dc_y, &mut dc_freq, &mut ac_freq);
3246                prev_dc_y = block[0];
3247            }
3248
3249            // B block (1 per MCU)
3250            Self::collect_block_frequencies(
3251                &b_blocks[mcu_idx],
3252                prev_dc_b,
3253                &mut dc_freq,
3254                &mut ac_freq,
3255            );
3256            prev_dc_b = b_blocks[mcu_idx][0];
3257        }
3258
3259        // Generate optimized tables
3260        let dc_table = dc_freq.generate_table_with_dht()?;
3261        let ac_table = ac_freq.generate_table_with_dht()?;
3262
3263        Ok((dc_table, ac_table))
3264    }
3265
3266    /// Encodes XYB blocks using optimized Huffman tables.
3267    #[allow(clippy::too_many_arguments)]
3268    fn encode_with_tables_xyb(
3269        &self,
3270        x_blocks: &[[i16; DCT_BLOCK_SIZE]],
3271        y_blocks: &[[i16; DCT_BLOCK_SIZE]],
3272        b_blocks: &[[i16; DCT_BLOCK_SIZE]],
3273        dc_table: &crate::huffman_opt::OptimizedTable,
3274        ac_table: &crate::huffman_opt::OptimizedTable,
3275    ) -> Result<Vec<u8>> {
3276        let mut encoder = EntropyEncoder::new();
3277
3278        // Use the same optimized table for all components
3279        encoder.set_dc_table(0, dc_table.table.clone());
3280        encoder.set_ac_table(0, ac_table.table.clone());
3281
3282        if self.config.restart_interval > 0 {
3283            encoder.set_restart_interval(self.config.restart_interval);
3284        }
3285
3286        let mcu_count = b_blocks.len();
3287        for mcu_idx in 0..mcu_count {
3288            // X blocks (4 per MCU)
3289            let x_start = mcu_idx * 4;
3290            for i in 0..4 {
3291                encoder.encode_block(&x_blocks[x_start + i], 0, 0, 0)?;
3292            }
3293
3294            // Y blocks (4 per MCU)
3295            let y_start = mcu_idx * 4;
3296            for i in 0..4 {
3297                encoder.encode_block(&y_blocks[y_start + i], 1, 0, 0)?;
3298            }
3299
3300            // B block (1 per MCU)
3301            encoder.encode_block(&b_blocks[mcu_idx], 2, 0, 0)?;
3302
3303            encoder.check_restart();
3304        }
3305
3306        Ok(encoder.finish())
3307    }
3308
3309    /// Writes DHT markers for XYB optimized tables.
3310    fn write_huffman_tables_xyb_optimized(
3311        &self,
3312        output: &mut Vec<u8>,
3313        dc_table: &crate::huffman_opt::OptimizedTable,
3314        ac_table: &crate::huffman_opt::OptimizedTable,
3315    ) {
3316        let write_table = |out: &mut Vec<u8>, class: u8, id: u8, bits: &[u8; 16], values: &[u8]| {
3317            out.push(0xFF);
3318            out.push(MARKER_DHT);
3319            let length = 2 + 1 + 16 + values.len();
3320            out.push((length >> 8) as u8);
3321            out.push(length as u8);
3322            out.push((class << 4) | id);
3323            out.extend_from_slice(bits);
3324            out.extend_from_slice(values);
3325        };
3326
3327        // DC table (class=0, id=0)
3328        write_table(output, 0, 0, &dc_table.bits, &dc_table.values);
3329        // AC table (class=1, id=0)
3330        write_table(output, 1, 0, &ac_table.bits, &ac_table.values);
3331    }
3332
3333    /// Encodes scan data for XYB mode with float planes.
3334    ///
3335    /// Uses scaled XYB values (in [0, 1] range), converts to [0, 255],
3336    /// then level shifts by subtracting 128 before DCT.
3337    #[allow(clippy::too_many_arguments)]
3338    fn encode_scan_xyb_float(
3339        &self,
3340        x_plane: &[f32],
3341        y_plane: &[f32],
3342        b_plane: &[f32], // Already downsampled
3343        width: usize,
3344        height: usize,
3345        b_width: usize,
3346        b_height: usize,
3347        x_quant: &QuantTable,
3348        y_quant: &QuantTable,
3349        b_quant: &QuantTable,
3350    ) -> Result<Vec<u8>> {
3351        let mut encoder = EntropyEncoder::new();
3352
3353        // Set up Huffman tables - use luminance tables for all components in XYB mode
3354        encoder.set_dc_table(0, HuffmanEncodeTable::std_dc_luminance());
3355        encoder.set_ac_table(0, HuffmanEncodeTable::std_ac_luminance());
3356
3357        if self.config.restart_interval > 0 {
3358            encoder.set_restart_interval(self.config.restart_interval);
3359        }
3360
3361        // MCU size for 2×2, 2×2, 1×1 sampling: 16×16 pixels
3362        // Each MCU contains: 4 X blocks + 4 Y blocks + 1 B block = 9 blocks
3363        let mcu_cols = (width + 15) / 16;
3364        let mcu_rows = (height + 15) / 16;
3365
3366        for mcu_y in 0..mcu_rows {
3367            for mcu_x in 0..mcu_cols {
3368                // Process 4 X blocks (2×2 arrangement within 16×16 MCU)
3369                for block_y in 0..2 {
3370                    for block_x in 0..2 {
3371                        let bx = mcu_x * 2 + block_x;
3372                        let by = mcu_y * 2 + block_y;
3373                        let x_block = self.extract_block_f32(x_plane, width, height, bx, by);
3374                        let x_dct = forward_dct_8x8(&x_block);
3375                        let x_quant_coeffs = quant::quantize_block(&x_dct, &x_quant.values);
3376                        let x_zigzag = natural_to_zigzag(&x_quant_coeffs);
3377                        encoder.encode_block(&x_zigzag, 0, 0, 0)?;
3378                    }
3379                }
3380
3381                // Process 4 Y blocks (2×2 arrangement within 16×16 MCU)
3382                for block_y in 0..2 {
3383                    for block_x in 0..2 {
3384                        let bx = mcu_x * 2 + block_x;
3385                        let by = mcu_y * 2 + block_y;
3386                        let y_block = self.extract_block_f32(y_plane, width, height, bx, by);
3387                        let y_dct = forward_dct_8x8(&y_block);
3388                        let y_quant_coeffs = quant::quantize_block(&y_dct, &y_quant.values);
3389                        let y_zigzag = natural_to_zigzag(&y_quant_coeffs);
3390                        encoder.encode_block(&y_zigzag, 1, 0, 0)?;
3391                    }
3392                }
3393
3394                // Process 1 B block (from downsampled plane)
3395                let b_block = self.extract_block_f32(b_plane, b_width, b_height, mcu_x, mcu_y);
3396                let b_dct = forward_dct_8x8(&b_block);
3397                let b_quant_coeffs = quant::quantize_block(&b_dct, &b_quant.values);
3398                let b_zigzag = natural_to_zigzag(&b_quant_coeffs);
3399                encoder.encode_block(&b_zigzag, 2, 0, 0)?;
3400
3401                encoder.check_restart();
3402            }
3403        }
3404
3405        Ok(encoder.finish())
3406    }
3407
3408    /// Extracts an 8x8 block from a float plane (scaled XYB values).
3409    ///
3410    /// Scaled XYB values are in [0, 1] range. This method:
3411    /// 1. Multiplies by 255 to get to [0, 255] range
3412    /// 2. Subtracts 128 for level shifting (DCT input is [-128, 127])
3413    fn extract_block_f32(
3414        &self,
3415        plane: &[f32],
3416        width: usize,
3417        height: usize,
3418        bx: usize,
3419        by: usize,
3420    ) -> [f32; DCT_BLOCK_SIZE] {
3421        let mut block = [0.0f32; DCT_BLOCK_SIZE];
3422
3423        for y in 0..DCT_SIZE {
3424            for x in 0..DCT_SIZE {
3425                let px = (bx * DCT_SIZE + x).min(width - 1);
3426                let py = (by * DCT_SIZE + y).min(height - 1);
3427                let idx = py * width + px;
3428                // Scale from [0, 1] to [0, 255], then level shift by -128
3429                block[y * DCT_SIZE + x] = plane[idx] * 255.0 - 128.0;
3430            }
3431        }
3432
3433        block
3434    }
3435
3436    /// Extracts an 8x8 block from a u8 plane with level shift.
3437    #[allow(dead_code)]
3438    fn extract_block(
3439        &self,
3440        plane: &[u8],
3441        width: usize,
3442        height: usize,
3443        bx: usize,
3444        by: usize,
3445    ) -> [f32; DCT_BLOCK_SIZE] {
3446        let mut block = [0.0f32; DCT_BLOCK_SIZE];
3447
3448        for y in 0..DCT_SIZE {
3449            for x in 0..DCT_SIZE {
3450                let px = (bx * DCT_SIZE + x).min(width - 1);
3451                let py = (by * DCT_SIZE + y).min(height - 1);
3452                let idx = py * width + px;
3453                // Level shift: subtract 128
3454                block[y * DCT_SIZE + x] = plane[idx] as f32 - 128.0;
3455            }
3456        }
3457
3458        block
3459    }
3460
3461    /// Extracts an 8x8 block from a YCbCr f32 plane with level shift.
3462    /// Input values are in [0, 255] range, output is level-shifted by -128.
3463    fn extract_block_ycbcr_f32(
3464        &self,
3465        plane: &[f32],
3466        width: usize,
3467        height: usize,
3468        bx: usize,
3469        by: usize,
3470    ) -> [f32; DCT_BLOCK_SIZE] {
3471        let mut block = [0.0f32; DCT_BLOCK_SIZE];
3472
3473        for y in 0..DCT_SIZE {
3474            for x in 0..DCT_SIZE {
3475                let px = (bx * DCT_SIZE + x).min(width - 1);
3476                let py = (by * DCT_SIZE + y).min(height - 1);
3477                let idx = py * width + px;
3478                // Level shift: subtract 128 (values are already in [0, 255])
3479                block[y * DCT_SIZE + x] = plane[idx] - 128.0;
3480            }
3481        }
3482
3483        block
3484    }
3485}
3486
3487impl Default for Encoder {
3488    fn default() -> Self {
3489        Self::new()
3490    }
3491}
3492
3493/// Converts coefficients from natural order to zigzag order for JPEG encoding.
3494fn natural_to_zigzag(natural: &[i16; DCT_BLOCK_SIZE]) -> [i16; DCT_BLOCK_SIZE] {
3495    let mut zigzag = [0i16; DCT_BLOCK_SIZE];
3496    for i in 0..DCT_BLOCK_SIZE {
3497        zigzag[JPEG_ZIGZAG_ORDER[i] as usize] = natural[i];
3498    }
3499    zigzag
3500}
3501
3502#[cfg(test)]
3503mod tests {
3504    use super::*;
3505
3506    #[test]
3507    fn test_encoder_creation() {
3508        let encoder = Encoder::new()
3509            .width(640)
3510            .height(480)
3511            .quality(Quality::from_quality(90.0));
3512
3513        assert_eq!(encoder.config.width, 640);
3514        assert_eq!(encoder.config.height, 480);
3515    }
3516
3517    #[test]
3518    fn test_encoder_validation() {
3519        let encoder = Encoder::new();
3520        assert!(encoder.validate().is_err());
3521
3522        let encoder = Encoder::new().width(100).height(100);
3523        assert!(encoder.validate().is_ok());
3524    }
3525
3526    #[test]
3527    fn test_encode_small_gray() {
3528        let encoder = Encoder::new()
3529            .width(8)
3530            .height(8)
3531            .pixel_format(PixelFormat::Gray)
3532            .quality(Quality::from_quality(90.0));
3533
3534        let data = vec![128u8; 64];
3535        let result = encoder.encode(&data);
3536        assert!(result.is_ok());
3537
3538        let jpeg = result.unwrap();
3539        // Should start with SOI
3540        assert_eq!(jpeg[0], 0xFF);
3541        assert_eq!(jpeg[1], MARKER_SOI);
3542        // Should end with EOI
3543        assert_eq!(jpeg[jpeg.len() - 2], 0xFF);
3544        assert_eq!(jpeg[jpeg.len() - 1], MARKER_EOI);
3545    }
3546
3547    #[test]
3548    fn test_encode_rgb_xyb_mode() {
3549        // Test XYB mode encoding with a 16x16 RGB image
3550        let encoder = Encoder::new()
3551            .width(16)
3552            .height(16)
3553            .pixel_format(PixelFormat::Rgb)
3554            .quality(Quality::from_quality(90.0))
3555            .use_xyb(true);
3556
3557        // Create a simple gradient test image
3558        let mut data = vec![0u8; 16 * 16 * 3];
3559        for y in 0..16 {
3560            for x in 0..16 {
3561                let idx = (y * 16 + x) * 3;
3562                data[idx] = (x * 16) as u8; // Red gradient
3563                data[idx + 1] = (y * 16) as u8; // Green gradient
3564                data[idx + 2] = 128; // Constant blue
3565            }
3566        }
3567
3568        let result = encoder.encode(&data);
3569        assert!(result.is_ok(), "XYB encoding failed: {:?}", result.err());
3570
3571        let jpeg = result.unwrap();
3572        // Should start with SOI
3573        assert_eq!(jpeg[0], 0xFF);
3574        assert_eq!(jpeg[1], MARKER_SOI);
3575        // Should end with EOI
3576        assert_eq!(jpeg[jpeg.len() - 2], 0xFF);
3577        assert_eq!(jpeg[jpeg.len() - 1], MARKER_EOI);
3578
3579        // Should be a valid size (not too small)
3580        assert!(jpeg.len() > 100, "JPEG too small: {} bytes", jpeg.len());
3581        println!("XYB encoded JPEG size: {} bytes", jpeg.len());
3582    }
3583
3584    #[test]
3585    fn test_encode_rgb_xyb_larger() {
3586        // Test XYB mode with a larger image (32x32)
3587        let encoder = Encoder::new()
3588            .width(32)
3589            .height(32)
3590            .pixel_format(PixelFormat::Rgb)
3591            .quality(Quality::from_quality(75.0))
3592            .use_xyb(true);
3593
3594        // Create a test pattern
3595        let mut data = vec![0u8; 32 * 32 * 3];
3596        for y in 0..32 {
3597            for x in 0..32 {
3598                let idx = (y * 32 + x) * 3;
3599                // Checkerboard pattern
3600                let checker = ((x / 4) + (y / 4)) % 2 == 0;
3601                data[idx] = if checker { 255 } else { 0 }; // Red
3602                data[idx + 1] = if checker { 0 } else { 255 }; // Green
3603                data[idx + 2] = 128; // Blue
3604            }
3605        }
3606
3607        let result = encoder.encode(&data);
3608        assert!(result.is_ok(), "XYB encoding failed: {:?}", result.err());
3609
3610        let jpeg = result.unwrap();
3611        assert_eq!(jpeg[0], 0xFF);
3612        assert_eq!(jpeg[1], MARKER_SOI);
3613        assert_eq!(jpeg[jpeg.len() - 2], 0xFF);
3614        assert_eq!(jpeg[jpeg.len() - 1], MARKER_EOI);
3615        println!("XYB encoded 32x32 JPEG size: {} bytes", jpeg.len());
3616    }
3617
3618    #[test]
3619    fn test_huffman_optimization_produces_valid_jpeg() {
3620        // Create a gradient test image
3621        let width = 64u32;
3622        let height = 64u32;
3623        let mut data = vec![0u8; (width * height * 3) as usize];
3624
3625        for y in 0..height as usize {
3626            for x in 0..width as usize {
3627                let idx = (y * width as usize + x) * 3;
3628                data[idx] = (x * 4) as u8; // R
3629                data[idx + 1] = (y * 4) as u8; // G
3630                data[idx + 2] = ((x + y) * 2) as u8; // B
3631            }
3632        }
3633
3634        let encoder = Encoder::new()
3635            .width(width)
3636            .height(height)
3637            .quality(Quality::from_quality(75.0))
3638            .optimize_huffman(true);
3639
3640        let result = encoder.encode(&data);
3641        assert!(
3642            result.is_ok(),
3643            "Optimized Huffman encoding failed: {:?}",
3644            result.err()
3645        );
3646
3647        let jpeg = result.unwrap();
3648        assert_eq!(jpeg[0], 0xFF);
3649        assert_eq!(jpeg[1], MARKER_SOI);
3650        assert_eq!(jpeg[jpeg.len() - 2], 0xFF);
3651        assert_eq!(jpeg[jpeg.len() - 1], MARKER_EOI);
3652
3653        // Verify it's decodable
3654        let decoded = jpeg_decoder::Decoder::new(&jpeg[..]).decode();
3655        assert!(
3656            decoded.is_ok(),
3657            "Optimized JPEG not decodable: {:?}",
3658            decoded.err()
3659        );
3660    }
3661
3662    #[test]
3663    fn test_huffman_optimization_reduces_file_size() {
3664        // Create a more complex test image that benefits from optimization
3665        let width = 128u32;
3666        let height = 128u32;
3667        let mut data = vec![0u8; (width * height * 3) as usize];
3668
3669        // Create a pattern that will have non-uniform symbol frequencies
3670        for y in 0..height as usize {
3671            for x in 0..width as usize {
3672                let idx = (y * width as usize + x) * 3;
3673                // Create blocks with varying content
3674                let block_type = ((x / 16) + (y / 16)) % 4;
3675                match block_type {
3676                    0 => {
3677                        // Solid color
3678                        data[idx] = 180;
3679                        data[idx + 1] = 180;
3680                        data[idx + 2] = 180;
3681                    }
3682                    1 => {
3683                        // Gradient
3684                        data[idx] = (x * 2) as u8;
3685                        data[idx + 1] = (y * 2) as u8;
3686                        data[idx + 2] = 100;
3687                    }
3688                    2 => {
3689                        // Checkerboard
3690                        let checker = ((x + y) % 2) as u8 * 255;
3691                        data[idx] = checker;
3692                        data[idx + 1] = checker;
3693                        data[idx + 2] = checker;
3694                    }
3695                    _ => {
3696                        // Texture
3697                        data[idx] = ((x * 5 + y * 3) % 256) as u8;
3698                        data[idx + 1] = ((x * 3 + y * 7) % 256) as u8;
3699                        data[idx + 2] = ((x * 2 + y * 2) % 256) as u8;
3700                    }
3701                }
3702            }
3703        }
3704
3705        // Encode without optimization
3706        let jpeg_standard = Encoder::new()
3707            .width(width)
3708            .height(height)
3709            .quality(Quality::from_quality(75.0))
3710            .optimize_huffman(false)
3711            .encode(&data)
3712            .expect("Standard encoding failed");
3713
3714        // Encode with optimization
3715        let jpeg_optimized = Encoder::new()
3716            .width(width)
3717            .height(height)
3718            .quality(Quality::from_quality(75.0))
3719            .optimize_huffman(true)
3720            .encode(&data)
3721            .expect("Optimized encoding failed");
3722
3723        println!(
3724            "Standard size: {} bytes, Optimized size: {} bytes, Savings: {:.1}%",
3725            jpeg_standard.len(),
3726            jpeg_optimized.len(),
3727            (1.0 - jpeg_optimized.len() as f64 / jpeg_standard.len() as f64) * 100.0
3728        );
3729
3730        // Optimized should be smaller or equal (never larger)
3731        assert!(
3732            jpeg_optimized.len() <= jpeg_standard.len(),
3733            "Optimized ({}) should not be larger than standard ({})",
3734            jpeg_optimized.len(),
3735            jpeg_standard.len()
3736        );
3737
3738        // Verify both are decodable
3739        let decoded_std = jpeg_decoder::Decoder::new(&jpeg_standard[..]).decode();
3740        let decoded_opt = jpeg_decoder::Decoder::new(&jpeg_optimized[..]).decode();
3741        assert!(decoded_std.is_ok(), "Standard JPEG not decodable");
3742        assert!(decoded_opt.is_ok(), "Optimized JPEG not decodable");
3743    }
3744
3745    #[test]
3746    fn test_xyb_huffman_optimization() {
3747        // Create test image for XYB mode
3748        let width = 64u32;
3749        let height = 64u32;
3750        let mut data = vec![0u8; (width * height * 3) as usize];
3751
3752        for y in 0..height as usize {
3753            for x in 0..width as usize {
3754                let idx = (y * width as usize + x) * 3;
3755                data[idx] = (x * 4) as u8;
3756                data[idx + 1] = (y * 4) as u8;
3757                data[idx + 2] = ((x + y) * 2) as u8;
3758            }
3759        }
3760
3761        // Encode XYB without optimization
3762        let jpeg_standard = Encoder::new()
3763            .width(width)
3764            .height(height)
3765            .quality(Quality::from_quality(75.0))
3766            .use_xyb(true)
3767            .optimize_huffman(false)
3768            .encode(&data)
3769            .expect("Standard XYB encoding failed");
3770
3771        // Encode XYB with optimization
3772        let jpeg_optimized = Encoder::new()
3773            .width(width)
3774            .height(height)
3775            .quality(Quality::from_quality(75.0))
3776            .use_xyb(true)
3777            .optimize_huffman(true)
3778            .encode(&data)
3779            .expect("Optimized XYB encoding failed");
3780
3781        println!(
3782            "XYB Standard: {} bytes, Optimized: {} bytes, Savings: {:.1}%",
3783            jpeg_standard.len(),
3784            jpeg_optimized.len(),
3785            (1.0 - jpeg_optimized.len() as f64 / jpeg_standard.len() as f64) * 100.0
3786        );
3787
3788        // Verify both have valid JPEG structure
3789        assert_eq!(jpeg_standard[0], 0xFF);
3790        assert_eq!(jpeg_standard[1], MARKER_SOI);
3791        assert_eq!(jpeg_optimized[0], 0xFF);
3792        assert_eq!(jpeg_optimized[1], MARKER_SOI);
3793
3794        // Optimized should be smaller or equal
3795        assert!(
3796            jpeg_optimized.len() <= jpeg_standard.len(),
3797            "XYB Optimized ({}) should not be larger than standard ({})",
3798            jpeg_optimized.len(),
3799            jpeg_standard.len()
3800        );
3801    }
3802}