exr/compression/
mod.rs

1
2//! Contains the compression attribute definition
3//! and methods to compress and decompress data.
4
5
6// private modules make non-breaking changes easier
7mod zip;
8mod rle;
9mod piz;
10mod pxr24;
11mod b44;
12
13
14use std::convert::TryInto;
15use std::mem::size_of;
16use half::f16;
17use crate::meta::attribute::{IntegerBounds, SampleType, ChannelList};
18use crate::error::{Result, Error, usize_to_i32};
19use crate::meta::header::Header;
20
21
22/// A byte vector.
23pub type ByteVec = Vec<u8>;
24
25/// A byte slice.
26pub type Bytes<'s> = &'s [u8];
27
28/// Specifies which compression method to use.
29/// Use uncompressed data for fastest loading and writing speeds.
30/// Use RLE compression for fast loading and writing with slight memory savings.
31/// Use ZIP compression for slow processing with large memory savings.
32#[derive(Debug, Clone, Copy, PartialEq)]
33pub enum Compression {
34
35    /// Store uncompressed values.
36    /// Produces large files that can be read and written very quickly.
37    /// Consider using RLE instead, as it provides some compression with almost equivalent speed.
38    Uncompressed,
39
40    /// Produces slightly smaller files
41    /// that can still be read and written rather quickly.
42    /// The compressed file size is usually between 60 and 75 percent of the uncompressed size.
43    /// Works best for images with large flat areas, such as masks and abstract graphics.
44    /// This compression method is lossless.
45    RLE,
46
47    /// Uses ZIP compression to compress each line. Slowly produces small images
48    /// which can be read with moderate speed. This compression method is lossless.
49    /// Might be slightly faster but larger than `ZIP16´.
50    ZIP1,  // TODO ZIP { individual_lines: bool, compression_level: Option<u8> }  // TODO specify zip compression level?
51
52    /// Uses ZIP compression to compress blocks of 16 lines. Slowly produces small images
53    /// which can be read with moderate speed. This compression method is lossless.
54    /// Might be slightly slower but smaller than `ZIP1´.
55    ZIP16, // TODO collapse with ZIP1
56
57    /// PIZ compression works well for noisy and natural images. Works better with larger tiles.
58    /// Only supported for flat images, but not for deep data.
59    /// This compression method is lossless.
60    // A wavelet transform is applied to the pixel data, and the result is Huffman-
61    // encoded. This scheme tends to provide the best compression ratio for the types of
62    // images that are typically processed at Industrial Light & Magic. Files are
63    // compressed and decompressed at roughly the same speed. For photographic
64    // images with film grain, the files are reduced to between 35 and 55 percent of their
65    // uncompressed size.
66    // PIZ compression works well for scan-line based files, and also for tiled files with
67    // large tiles, but small tiles do not shrink much. (PIZ-compressed data start with a
68    // relatively long header; if the input to the compressor is short, adding the header
69    // tends to offset any size reduction of the input.)
70    PIZ,
71
72    /// Like `ZIP1`, but reduces precision of `f32` images to `f24`.
73    /// Therefore, this is lossless compression for `f16` and `u32` data, lossy compression for `f32` data.
74    /// This compression method works well for depth
75    /// buffers and similar images, where the possible range of values is very large, but
76    /// where full 32-bit floating-point accuracy is not necessary. Rounding improves
77    /// compression significantly by eliminating the pixels' 8 least significant bits, which
78    /// tend to be very noisy, and therefore difficult to compress.
79    /// This produces really small image files. Only supported for flat images, not for deep data.
80    // After reducing 32-bit floating-point data to 24 bits by rounding (while leaving 16-bit
81    // floating-point data unchanged), differences between horizontally adjacent pixels
82    // are compressed with zlib, similar to ZIP. PXR24 compression preserves image
83    // channels of type HALF and UINT exactly, but the relative error of FLOAT data
84    // increases to about ???.
85    PXR24, // TODO specify zip compression level?
86
87    /// This is a lossy compression method for f16 images.
88    /// It's the predecessor of the `B44A` compression,
89    /// which has improved compression rates for uniformly colored areas.
90    /// You should probably use `B44A` instead of the plain `B44`.
91    ///
92    /// Only supported for flat images, not for deep data.
93    // lossy 4-by-4 pixel block compression,
94    // flat fields are compressed more
95    // Channels of type HALF are split into blocks of four by four pixels or 32 bytes. Each
96    // block is then packed into 14 bytes, reducing the data to 44 percent of their
97    // uncompressed size. When B44 compression is applied to RGB images in
98    // combination with luminance/chroma encoding (see below), the size of the
99    // compressed pixels is about 22 percent of the size of the original RGB data.
100    // Channels of type UINT or FLOAT are not compressed.
101    // Decoding is fast enough to allow real-time playback of B44-compressed OpenEXR
102    // image sequences on commodity hardware.
103    // The size of a B44-compressed file depends on the number of pixels in the image,
104    // but not on the data in the pixels. All images with the same resolution and the same
105    // set of channels have the same size. This can be advantageous for systems that
106    // support real-time playback of image sequences; the predictable file size makes it
107    // easier to allocate space on storage media efficiently.
108    // B44 compression is only supported for flat images.
109    B44, // TODO B44 { optimize_uniform_areas: bool }
110
111    /// This is a lossy compression method for f16 images.
112    /// All f32 and u32 channels will be stored without compression.
113    /// All the f16 pixels are divided into 4x4 blocks.
114    /// Each block is then compressed as a whole.
115    ///
116    /// The 32 bytes of a block will require only ~14 bytes after compression,
117    /// independent of the actual pixel contents. With chroma subsampling,
118    /// a block will be compressed to ~7 bytes.
119    /// Uniformly colored blocks will be compressed to ~3 bytes.
120    ///
121    /// The 512 bytes of an f32 block will not be compressed at all.
122    ///
123    /// Should be fast enough for realtime playback.
124    /// Only supported for flat images, not for deep data.
125    B44A, // TODO collapse with B44
126
127    /// __This lossy compression is not yet supported by this implementation.__
128    // lossy DCT based compression, in blocks
129    // of 32 scanlines. More efficient for partial buffer access.
130    DWAA(Option<f32>), // TODO does this have a default value? make this non optional? default Compression Level setting is 45.0
131
132    /// __This lossy compression is not yet supported by this implementation.__
133    // lossy DCT based compression, in blocks
134    // of 256 scanlines. More efficient space
135    // wise and faster to decode full frames
136    // than DWAA_COMPRESSION.
137    DWAB(Option<f32>), // TODO collapse with B44. default Compression Level setting is 45.0
138}
139
140impl std::fmt::Display for Compression {
141    fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
142        write!(formatter, "{} compression", match self {
143            Compression::Uncompressed => "no",
144            Compression::RLE => "rle",
145            Compression::ZIP1 => "zip line",
146            Compression::ZIP16 => "zip block",
147            Compression::B44 => "b44",
148            Compression::B44A => "b44a",
149            Compression::DWAA(_) => "dwaa",
150            Compression::DWAB(_) => "dwab",
151            Compression::PIZ => "piz",
152            Compression::PXR24 => "pxr24",
153        })
154    }
155}
156
157
158
159impl Compression {
160
161    /// Compress the image section of bytes.
162    pub fn compress_image_section(self, header: &Header, uncompressed_native_endian: ByteVec, pixel_section: IntegerBounds) -> Result<ByteVec> {
163        let max_tile_size = header.max_block_pixel_size();
164
165        assert!(pixel_section.validate(Some(max_tile_size)).is_ok(), "decompress tile coordinate bug");
166        if header.deep { assert!(self.supports_deep_data()) }
167
168        use self::Compression::*;
169        let compressed_little_endian = match self {
170            Uncompressed => {
171                return Ok(convert_current_to_little_endian(
172                    uncompressed_native_endian, &header.channels, pixel_section
173                ))
174            },
175
176            // we need to clone here, because we might have to fallback to the uncompressed data later (when compressed data is larger than raw data)
177            ZIP16 => zip::compress_bytes(&header.channels, uncompressed_native_endian.clone(), pixel_section),
178            ZIP1 => zip::compress_bytes(&header.channels, uncompressed_native_endian.clone(), pixel_section),
179            RLE => rle::compress_bytes(&header.channels, uncompressed_native_endian.clone(), pixel_section),
180            PIZ => piz::compress(&header.channels, uncompressed_native_endian.clone(), pixel_section),
181            PXR24 => pxr24::compress(&header.channels, uncompressed_native_endian.clone(), pixel_section),
182            B44 => b44::compress(&header.channels, uncompressed_native_endian.clone(), pixel_section, false),
183            B44A => b44::compress(&header.channels, uncompressed_native_endian.clone(), pixel_section, true),
184            _ => return Err(Error::unsupported(format!("yet unimplemented compression method: {}", self)))
185        };
186
187        let compressed_little_endian = compressed_little_endian.map_err(|_|
188            Error::invalid(format!("pixels cannot be compressed ({})", self))
189        )?;
190
191        if self == Uncompressed || compressed_little_endian.len() < uncompressed_native_endian.len() {
192            // only write compressed if it actually is smaller than raw
193            Ok(compressed_little_endian)
194        }
195        else {
196            // if we do not use compression, manually convert uncompressed data
197            Ok(convert_current_to_little_endian(uncompressed_native_endian, &header.channels, pixel_section))
198        }
199    }
200
201    /// Decompress the image section of bytes.
202    pub fn decompress_image_section(self, header: &Header, compressed: ByteVec, pixel_section: IntegerBounds, pedantic: bool) -> Result<ByteVec> {
203        let max_tile_size = header.max_block_pixel_size();
204
205        assert!(pixel_section.validate(Some(max_tile_size)).is_ok(), "decompress tile coordinate bug");
206        if header.deep { assert!(self.supports_deep_data()) }
207
208        let expected_byte_size = pixel_section.size.area() * header.channels.bytes_per_pixel; // FIXME this needs to account for subsampling anywhere
209
210        // note: always true where self == Uncompressed
211        if compressed.len() == expected_byte_size {
212            // the compressed data was larger than the raw data, so the small raw data has been written
213            Ok(convert_little_endian_to_current(compressed, &header.channels, pixel_section))
214        }
215        else {
216            use self::Compression::*;
217            let bytes = match self {
218                Uncompressed => Ok(convert_little_endian_to_current(compressed, &header.channels, pixel_section)),
219                ZIP16 => zip::decompress_bytes(&header.channels, compressed, pixel_section, expected_byte_size, pedantic),
220                ZIP1 => zip::decompress_bytes(&header.channels, compressed, pixel_section, expected_byte_size, pedantic),
221                RLE => rle::decompress_bytes(&header.channels, compressed, pixel_section, expected_byte_size, pedantic),
222                PIZ => piz::decompress(&header.channels, compressed, pixel_section, expected_byte_size, pedantic),
223                PXR24 => pxr24::decompress(&header.channels, compressed, pixel_section, expected_byte_size, pedantic),
224                B44 | B44A => b44::decompress(&header.channels, compressed, pixel_section, expected_byte_size, pedantic),
225                _ => return Err(Error::unsupported(format!("yet unimplemented compression method: {}", self)))
226            };
227
228            // map all errors to compression errors
229            let bytes = bytes
230                .map_err(|decompression_error| match decompression_error {
231                    Error::NotSupported(message) =>
232                        Error::unsupported(format!("yet unimplemented compression special case ({})", message)),
233
234                    error => Error::invalid(format!(
235                        "compressed {:?} data ({})",
236                        self, error.to_string()
237                    )),
238                })?;
239
240            if bytes.len() != expected_byte_size {
241                Err(Error::invalid("decompressed data"))
242            }
243
244            else { Ok(bytes) }
245        }
246    }
247
248    /// For scan line images and deep scan line images, one or more scan lines may be
249    /// stored together as a scan line block. The number of scan lines per block
250    /// depends on how the pixel data are compressed.
251    pub fn scan_lines_per_block(self) -> usize {
252        use self::Compression::*;
253        match self {
254            Uncompressed | RLE   | ZIP1    => 1,
255            ZIP16 | PXR24                  => 16,
256            PIZ   | B44   | B44A | DWAA(_) => 32,
257            DWAB(_)                        => 256,
258        }
259    }
260
261    /// Deep data can only be compressed using RLE or ZIP compression.
262    pub fn supports_deep_data(self) -> bool {
263        use self::Compression::*;
264        match self {
265            Uncompressed | RLE | ZIP1 => true,
266            _ => false,
267        }
268    }
269
270    /// Most compression methods will reconstruct the exact pixel bytes,
271    /// but some might throw away unimportant data for specific types of samples.
272    pub fn is_lossless_for(self, sample_type: SampleType) -> bool {
273        use self::Compression::*;
274        match self {
275            PXR24 => sample_type != SampleType::F32, // pxr reduces f32 to f24
276            B44 | B44A => sample_type != SampleType::F16, // b44 only compresses f16 values, others are left uncompressed
277            Uncompressed | RLE | ZIP1 | ZIP16 | PIZ => true,
278            DWAB(_) | DWAA(_) => false,
279        }
280    }
281
282    /// Most compression methods will reconstruct the exact pixel bytes,
283    /// but some might throw away unimportant data in some cases.
284    pub fn may_loose_data(self) -> bool {
285        use self::Compression::*;
286        match self {
287            Uncompressed | RLE | ZIP1 | ZIP16 | PIZ => false,
288            PXR24 | B44 | B44A | DWAB(_) | DWAA(_)  => true,
289        }
290    }
291
292    /// Most compression methods will reconstruct the exact pixel bytes,
293    /// but some might replace NaN with zeroes.
294    pub fn supports_nan(self) -> bool {
295        use self::Compression::*;
296        match self {
297            B44 | B44A | DWAB(_) | DWAA(_) => false, // TODO dwa might support it?
298            _ => true
299        }
300    }
301
302}
303
304// see https://github.com/AcademySoftwareFoundation/openexr/blob/6a9f8af6e89547bcd370ae3cec2b12849eee0b54/OpenEXR/IlmImf/ImfMisc.cpp#L1456-L1541
305
306#[allow(unused)] // allows the extra parameters to be unused
307fn convert_current_to_little_endian(mut bytes: ByteVec, channels: &ChannelList, rectangle: IntegerBounds) -> ByteVec {
308    #[cfg(target = "big_endian")]
309    reverse_block_endianness(&mut byte_vec, channels, rectangle);
310
311    bytes
312}
313
314#[allow(unused)] // allows the extra parameters to be unused
315fn convert_little_endian_to_current(mut bytes: ByteVec, channels: &ChannelList, rectangle: IntegerBounds) -> ByteVec {
316    #[cfg(target = "big_endian")]
317    reverse_block_endianness(&mut bytes, channels, rectangle);
318
319    bytes
320}
321
322#[allow(unused)] // unused when on little endian system
323fn reverse_block_endianness(bytes: &mut [u8], channels: &ChannelList, rectangle: IntegerBounds){
324    let mut remaining_bytes: &mut [u8] = bytes;
325
326    for y in rectangle.position.y() .. rectangle.end().y() {
327        for channel in &channels.list {
328            let line_is_subsampled = mod_p(y, usize_to_i32(channel.sampling.y())) != 0;
329            if line_is_subsampled { continue; }
330
331            let sample_count = rectangle.size.width() / channel.sampling.x();
332
333            match channel.sample_type {
334                SampleType::F16 => remaining_bytes = chomp_convert_n::<f16>(reverse_2_bytes, remaining_bytes, sample_count),
335                SampleType::F32 => remaining_bytes = chomp_convert_n::<f32>(reverse_4_bytes, remaining_bytes, sample_count),
336                SampleType::U32 => remaining_bytes = chomp_convert_n::<u32>(reverse_4_bytes, remaining_bytes, sample_count),
337            }
338        }
339    }
340
341    #[inline]
342    fn chomp_convert_n<T>(convert_single_value: fn(&mut[u8]), mut bytes: &mut [u8], count: usize) -> &mut [u8] {
343        let type_size = size_of::<T>();
344        let (line_bytes, rest) = bytes.split_at_mut(count * type_size);
345        let value_byte_chunks = line_bytes.chunks_exact_mut(type_size);
346
347        for value_bytes in value_byte_chunks {
348            convert_single_value(value_bytes);
349        }
350
351        rest
352    }
353
354    debug_assert!(remaining_bytes.is_empty(), "not all bytes were converted to little endian");
355}
356
357#[inline]
358fn reverse_2_bytes(bytes: &mut [u8]){
359    // this code seems like it could be optimized easily by the compiler
360    let two_bytes: [u8; 2] = bytes.try_into().expect("invalid byte count");
361    bytes.copy_from_slice(&[two_bytes[1], two_bytes[0]]);
362}
363
364#[inline]
365fn reverse_4_bytes(bytes: &mut [u8]){
366    let four_bytes: [u8; 4] = bytes.try_into().expect("invalid byte count");
367    bytes.copy_from_slice(&[four_bytes[3], four_bytes[2], four_bytes[1], four_bytes[0]]);
368}
369
370#[inline]
371fn div_p (x: i32, y: i32) -> i32 {
372    if x >= 0 {
373        if y >= 0 { x  / y }
374        else { -(x  / -y) }
375    }
376    else {
377        if y >= 0 { -((y-1-x) / y) }
378        else { (-y-1-x) / -y }
379    }
380}
381
382#[inline]
383fn mod_p(x: i32, y: i32) -> i32 {
384    x - y * div_p(x, y)
385}
386
387/// A collection of functions used to prepare data for compression.
388mod optimize_bytes {
389
390    /// Integrate over all differences to the previous value in order to reconstruct sample values.
391    pub fn differences_to_samples(buffer: &mut [u8]) {
392        // The naive implementation is very simple:
393        //
394        // for index in 1..buffer.len() {
395        //    buffer[index] = (buffer[index - 1] as i32 + buffer[index] as i32 - 128) as u8;
396        // }
397        //
398        // But we process elements in pairs to take advantage of instruction-level parallelism.
399        // When computations within a pair do not depend on each other, they can be processed in parallel.
400        // Since this function is responsible for a very large chunk of execution time,
401        // this tweak alone improves decoding performance of RLE images by 20%.
402        if let Some(first) = buffer.get(0) {
403            let mut previous = *first as i16;
404            for chunk in &mut buffer[1..].chunks_exact_mut(2) {
405                // no bounds checks here due to indices and chunk size being constant
406                let diff0 = chunk[0] as i16;
407                let diff1 = chunk[1] as i16;
408                // these two computations do not depend on each other, unlike in the naive version,
409                // so they can be executed by the CPU in parallel via instruction-level parallelism
410                let sample0 = (previous + diff0 - 128) as u8;
411                let sample1 = (previous + diff0 + diff1 - 128 * 2) as u8;
412                chunk[0] = sample0;
413                chunk[1] = sample1;
414                previous = sample1 as i16;
415            }
416            // handle the remaining element at the end not processed by the loop over pairs, if present
417            for elem in &mut buffer[1..].chunks_exact_mut(2).into_remainder().iter_mut() {
418                let sample = (previous + *elem as i16 - 128) as u8;
419                *elem = sample;
420                previous = sample as i16;
421            }
422        }
423    }
424
425    /// Derive over all values in order to produce differences to the previous value.
426    pub fn samples_to_differences(buffer: &mut [u8]){
427        // naive version:
428        // for index in (1..buffer.len()).rev() {
429        //     buffer[index] = (buffer[index] as i32 - buffer[index - 1] as i32 + 128) as u8;
430        // }
431        //
432        // But we process elements in batches to take advantage of autovectorization.
433        // If the target platform has no vector instructions (e.g. 32-bit ARM without `-C target-cpu=native`)
434        // this will instead take advantage of instruction-level parallelism.
435        if let Some(first) = buffer.get(0) {
436            let mut previous = *first as i16;
437            // Chunk size is 16 because we process bytes (8 bits),
438            // and 8*16 = 128 bits is the size of a typical SIMD register.
439            // Even WASM has 128-bit SIMD registers.
440            for chunk in &mut buffer[1..].chunks_exact_mut(16) {
441                // no bounds checks here due to indices and chunk size being constant
442                let sample0 = chunk[0] as i16;
443                let sample1 = chunk[1] as i16;
444                let sample2 = chunk[2] as i16;
445                let sample3 = chunk[3] as i16;
446                let sample4 = chunk[4] as i16;
447                let sample5 = chunk[5] as i16;
448                let sample6 = chunk[6] as i16;
449                let sample7 = chunk[7] as i16;
450                let sample8 = chunk[8] as i16;
451                let sample9 = chunk[9] as i16;
452                let sample10 = chunk[10] as i16;
453                let sample11 = chunk[11] as i16;
454                let sample12 = chunk[12] as i16;
455                let sample13 = chunk[13] as i16;
456                let sample14 = chunk[14] as i16;
457                let sample15 = chunk[15] as i16;
458                // Unlike in decoding, computations in here are truly independent from each other,
459                // which enables the compiler to vectorize this loop.
460                // Even if the target platform has no vector instructions,
461                // so using more parallelism doesn't imply doing more work,
462                // and we're not really limited in how wide we can go.
463                chunk[0] = (sample0 - previous + 128) as u8;
464                chunk[1] = (sample1 - sample0 + 128) as u8;
465                chunk[2] = (sample2 - sample1 + 128) as u8;
466                chunk[3] = (sample3 - sample2 + 128) as u8;
467                chunk[4] = (sample4 - sample3 + 128) as u8;
468                chunk[5] = (sample5 - sample4 + 128) as u8;
469                chunk[6] = (sample6 - sample5 + 128) as u8;
470                chunk[7] = (sample7 - sample6 + 128) as u8;
471                chunk[8] = (sample8 - sample7 + 128) as u8;
472                chunk[9] = (sample9 - sample8 + 128) as u8;
473                chunk[10] = (sample10 - sample9 + 128) as u8;
474                chunk[11] = (sample11 - sample10 + 128) as u8;
475                chunk[12] = (sample12 - sample11 + 128) as u8;
476                chunk[13] = (sample13 - sample12 + 128) as u8;
477                chunk[14] = (sample14 - sample13 + 128) as u8;
478                chunk[15] = (sample15 - sample14 + 128) as u8;
479                previous = sample15;
480            }
481            // Handle the remaining element at the end not processed by the loop over batches, if present
482            // This is what the iterator-based version of this function would look like without vectorization
483            for elem in &mut buffer[1..].chunks_exact_mut(16).into_remainder().iter_mut() {
484                let diff = (*elem as i16 - previous + 128) as u8;
485                previous = *elem as i16;
486                *elem = diff;
487            }
488        }
489    }
490
491    use std::cell::Cell;
492    thread_local! {
493        // A buffer for reusing between invocations of interleaving and deinterleaving.
494        // Allocating memory is cheap, but zeroing or otherwise initializing it is not.
495        // Doing it hundreds of times (once per block) would be expensive.
496        // This optimization brings down the time spent in interleaving from 15% to 5%.
497        static SCRATCH_SPACE: Cell<Vec<u8>> = Cell::new(Vec::new());
498    }
499
500    fn with_reused_buffer<F>(length: usize, mut func: F) where F: FnMut(&mut [u8]) {
501        SCRATCH_SPACE.with(|scratch_space| {
502            // reuse a buffer if we've already initialized one
503            let mut buffer = scratch_space.take();
504            if buffer.len() < length {
505                // Efficiently create a zeroed Vec by requesting zeroed memory from the OS.
506                // This is slightly faster than a `memcpy()` plus `memset()` that would happen otherwise,
507                // but is not a big deal either way since it's not a hot codepath.
508                buffer = vec![0u8; length];
509            }
510
511            // call the function
512            func(&mut buffer[..length]);
513
514            // save the internal buffer for reuse
515            scratch_space.set(buffer);
516        });
517    }
518
519    /// Interleave the bytes such that the second half of the array is every other byte.
520    pub fn interleave_byte_blocks(separated: &mut [u8]) {
521        with_reused_buffer(separated.len(), |interleaved| {
522
523            // Split the two halves that we are going to interleave.
524            let (first_half, second_half) = separated.split_at((separated.len() + 1) / 2);
525            // The first half can be 1 byte longer than the second if the length of the input is odd,
526            // but the loop below only processes numbers in pairs.
527            // To handle it, preserve the last element of the first slice, to be handled after the loop.
528            let first_half_last = first_half.last();
529            // Truncate the first half to match the lenght of the second one; more optimizer-friendly
530            let first_half_iter = &first_half[..second_half.len()];
531
532            // Main loop that performs the interleaving
533            for ((first, second), interleaved) in first_half_iter.iter().zip(second_half.iter())
534                .zip(interleaved.chunks_exact_mut(2)) {
535                    // The length of each chunk is known to be 2 at compile time,
536                    // and each index is also a constant.
537                    // This allows the compiler to remove the bounds checks.
538                    interleaved[0] = *first;
539                    interleaved[1] = *second;
540            }
541
542            // If the length of the slice was odd, restore the last element of the first half that we saved
543            if interleaved.len() % 2 == 1 {
544                if let Some(value) = first_half_last {
545                    // we can unwrap() here because we just checked that the lenght is non-zero:
546                    // `% 2 == 1` will fail for zero
547                    *interleaved.last_mut().unwrap() = *value;
548                }
549            }
550
551            // write out the results
552            separated.copy_from_slice(&interleaved);
553        });
554    }
555
556/// Separate the bytes such that the second half contains every other byte.
557/// This performs deinterleaving - the inverse of interleaving.
558pub fn separate_bytes_fragments(source: &mut [u8]) {
559    with_reused_buffer(source.len(), |separated| {
560
561        // Split the two halves that we are going to interleave.
562        let (first_half, second_half) = separated.split_at_mut((source.len() + 1) / 2);
563        // The first half can be 1 byte longer than the second if the length of the input is odd,
564        // but the loop below only processes numbers in pairs.
565        // To handle it, preserve the last element of the input, to be handled after the loop.
566        let last = source.last();
567        let first_half_iter = &mut first_half[..second_half.len()];
568
569        // Main loop that performs the deinterleaving
570        for ((first, second), interleaved) in first_half_iter.iter_mut().zip(second_half.iter_mut())
571            .zip(source.chunks_exact(2)) {
572                // The length of each chunk is known to be 2 at compile time,
573                // and each index is also a constant.
574                // This allows the compiler to remove the bounds checks.
575                *first = interleaved[0];
576                *second = interleaved[1];
577        }
578
579        // If the length of the slice was odd, restore the last element of the input that we saved
580        if source.len() % 2 == 1 {
581            if let Some(value) = last {
582                // we can unwrap() here because we just checked that the lenght is non-zero:
583                // `% 2 == 1` will fail for zero
584                *first_half.last_mut().unwrap() = *value;
585            }
586        }
587
588        // write out the results
589        source.copy_from_slice(&separated);
590    });
591}
592
593
594    #[cfg(test)]
595    pub mod test {
596
597        #[test]
598        fn roundtrip_interleave(){
599            let source = vec![ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ];
600            let mut modified = source.clone();
601
602            super::separate_bytes_fragments(&mut modified);
603            super::interleave_byte_blocks(&mut modified);
604
605            assert_eq!(source, modified);
606        }
607
608        #[test]
609        fn roundtrip_derive(){
610            let source = vec![ 0, 1, 2, 7, 4, 5, 6, 7, 13, 9, 10 ];
611            let mut modified = source.clone();
612
613            super::samples_to_differences(&mut modified);
614            super::differences_to_samples(&mut modified);
615
616            assert_eq!(source, modified);
617        }
618
619    }
620}
621
622
623#[cfg(test)]
624pub mod test {
625    use super::*;
626    use crate::meta::attribute::ChannelDescription;
627    use crate::block::samples::IntoNativeSample;
628
629    #[test]
630    fn roundtrip_endianness_mixed_channels(){
631        let a32 = ChannelDescription::new("A", SampleType::F32, true);
632        let y16 = ChannelDescription::new("Y", SampleType::F16, true);
633        let channels = ChannelList::new(smallvec![ a32, y16 ]);
634
635        let data = vec![
636            23582740683_f32.to_ne_bytes().as_slice(),
637            35827420683_f32.to_ne_bytes().as_slice(),
638            27406832358_f32.to_f16().to_ne_bytes().as_slice(),
639            74062358283_f32.to_f16().to_ne_bytes().as_slice(),
640
641            52582740683_f32.to_ne_bytes().as_slice(),
642            45827420683_f32.to_ne_bytes().as_slice(),
643            15406832358_f32.to_f16().to_ne_bytes().as_slice(),
644            65062358283_f32.to_f16().to_ne_bytes().as_slice(),
645        ].into_iter().flatten().map(|x| *x).collect();
646
647        roundtrip_convert_endianness(
648            data, &channels,
649            IntegerBounds::from_dimensions((2, 2))
650        );
651    }
652
653    fn roundtrip_convert_endianness(
654        current_endian: ByteVec, channels: &ChannelList, rectangle: IntegerBounds
655    ){
656        let little_endian = convert_current_to_little_endian(
657            current_endian.clone(), channels, rectangle
658        );
659
660        let current_endian_decoded = convert_little_endian_to_current(
661            little_endian.clone(), channels, rectangle
662        );
663
664        assert_eq!(current_endian, current_endian_decoded, "endianness conversion failed");
665    }
666}