Skip to main content

dicom_toolkit_codec/
rle.rs

1//! DICOM RLE (Lossless) codec — port of DCMTK's `dcmdata/libsrc/dcrledec.h`
2//! and `dcmcrle`/`dcmdrle` tools.
3//!
4//! # DICOM RLE frame structure (PS3.5 §G)
5//!
6//! Each RLE-compressed frame starts with a 64-byte header:
7//! ```text
8//! Uint32 num_segments        // number of RLE segments (1..15)
9//! Uint32 offsets[15]         // byte offsets to each segment from start of frame
10//! ```
11//!
12//! For an 8-bit image with N channels there are N segments.
13//! For a 16-bit image with N channels there are 2×N segments
14//! (MSB segment first, then LSB segment for each channel).
15//!
16//! Each segment is PackBits-compressed (same algorithm as TIFF PackBits).
17
18use dicom_toolkit_core::error::{DcmError, DcmResult};
19
20// ── PackBits (RLE) decoder ────────────────────────────────────────────────────
21
22/// Decode a single PackBits-compressed RLE segment, stopping once `max_bytes`
23/// of output have been produced (to handle DICOM even-byte padding).
24fn decode_segment(data: &[u8], max_bytes: usize) -> DcmResult<Vec<u8>> {
25    let mut out = Vec::with_capacity(max_bytes.min(data.len() * 2));
26    let mut pos = 0;
27    while pos < data.len() && out.len() < max_bytes {
28        let ctrl = data[pos] as i8;
29        pos += 1;
30        if ctrl >= 0 {
31            // Literal run: copy next (ctrl + 1) bytes
32            let count = (ctrl as usize + 1).min(max_bytes - out.len());
33            if pos + count > data.len() {
34                // Tolerate truncated literal run at end of stream (may be due
35                // to DICOM even-byte padding appended after the last segment).
36                out.extend_from_slice(&data[pos..data.len().min(pos + count)]);
37                break;
38            }
39            out.extend_from_slice(&data[pos..pos + count]);
40            pos += ctrl as usize + 1; // advance by original count, not clamped
41        } else if ctrl != -128_i8 {
42            // Replicate run: repeat next byte (257 - ctrl_u8) times
43            let repeat = (257u16 - data[pos - 1] as u16) as usize;
44            let count = repeat.min(max_bytes - out.len());
45            if pos >= data.len() {
46                return Err(DcmError::Other(
47                    "RLE replicate run: missing value byte".to_string(),
48                ));
49            }
50            let value = data[pos];
51            pos += 1;
52            for _ in 0..count {
53                out.push(value);
54            }
55        }
56        // ctrl == -128 (0x80) → no-op
57    }
58    Ok(out)
59}
60
61/// Encode a byte slice as a PackBits-compressed RLE segment.
62fn encode_segment(data: &[u8]) -> Vec<u8> {
63    let mut out = Vec::with_capacity(data.len() + data.len() / 128 + 1);
64    let mut i = 0;
65    while i < data.len() {
66        // Look ahead for a run of equal bytes.
67        let mut run = 1;
68        while i + run < data.len() && data[i + run] == data[i] && run < 128 {
69            run += 1;
70        }
71        if run >= 2 {
72            // Replicate run: emit 257 - run as signed byte, then the value.
73            out.push((257 - run as u16) as u8);
74            out.push(data[i]);
75            i += run;
76        } else {
77            // Literal run: find how long the non-repeating section is.
78            let mut lit = 1;
79            while i + lit < data.len() && lit < 128 {
80                // Peek ahead: if the next 2 bytes are equal, end literal run.
81                if i + lit + 1 < data.len() && data[i + lit] == data[i + lit + 1] {
82                    break;
83                }
84                lit += 1;
85            }
86            out.push((lit - 1) as u8); // ctrl byte: (n-1) for n literal bytes
87            out.extend_from_slice(&data[i..i + lit]);
88            i += lit;
89        }
90    }
91    out
92}
93
94// ── DICOM RLE frame decoder ───────────────────────────────────────────────────
95
96/// Decode one DICOM RLE-compressed frame into raw interleaved pixel bytes.
97///
98/// # Parameters
99/// - `frame_data`: raw bytes of a single DICOM pixel item (including the 64-byte header)
100/// - `rows`, `columns`: pixel dimensions
101/// - `samples_per_pixel`: 1 (grayscale) or 3 (RGB / YCbCr)
102/// - `bits_allocated`: 8 or 16
103///
104/// # Returns
105/// Interleaved pixel bytes in row-major order.
106pub fn rle_decode_frame(
107    frame_data: &[u8],
108    rows: u16,
109    columns: u16,
110    samples_per_pixel: u8,
111    bits_allocated: u8,
112) -> DcmResult<Vec<u8>> {
113    if frame_data.len() < 64 {
114        return Err(DcmError::Other(format!(
115            "RLE frame too short: {} bytes (need at least 64 for header)",
116            frame_data.len()
117        )));
118    }
119
120    // Parse the 64-byte header (16 × u32 LE).
121    let num_segments = u32::from_le_bytes(frame_data[0..4].try_into().unwrap()) as usize;
122    if num_segments == 0 || num_segments > 15 {
123        return Err(DcmError::Other(format!(
124            "RLE header: invalid segment count {num_segments}"
125        )));
126    }
127
128    let expected_segments = samples_per_pixel as usize * (bits_allocated as usize / 8);
129    if num_segments != expected_segments {
130        return Err(DcmError::Other(format!(
131            "RLE header: expected {expected_segments} segments for {}spp/{}bpp, got {num_segments}",
132            samples_per_pixel, bits_allocated
133        )));
134    }
135
136    // Read segment byte offsets.
137    let mut offsets = [0u32; 15];
138    for (i, off) in offsets.iter_mut().enumerate() {
139        let start = 4 + i * 4;
140        *off = u32::from_le_bytes(frame_data[start..start + 4].try_into().unwrap());
141    }
142
143    let num_pixels = rows as usize * columns as usize;
144
145    // Decode each segment.
146    let mut segments: Vec<Vec<u8>> = Vec::with_capacity(num_segments);
147    for seg_idx in 0..num_segments {
148        let seg_start = offsets[seg_idx] as usize;
149        let seg_end = if seg_idx + 1 < num_segments {
150            offsets[seg_idx + 1] as usize
151        } else {
152            frame_data.len()
153        };
154        if seg_start > frame_data.len() || seg_end > frame_data.len() || seg_start > seg_end {
155            return Err(DcmError::Other(format!(
156                "RLE segment {seg_idx} offset out of bounds: {seg_start}..{seg_end} in {} bytes",
157                frame_data.len()
158            )));
159        }
160        let seg_data = decode_segment(&frame_data[seg_start..seg_end], num_pixels)?;
161        if seg_data.len() < num_pixels {
162            return Err(DcmError::Other(format!(
163                "RLE segment {seg_idx} decoded to {} bytes, expected at least {num_pixels}",
164                seg_data.len()
165            )));
166        }
167        segments.push(seg_data);
168    }
169
170    // Interleave segments back into pixel data.
171    //
172    // For 8-bit, N-channel:   seg[0]=ch0, seg[1]=ch1, ... → interleave by pixel
173    // For 16-bit, N-channel:  seg[0]=ch0_MSB, seg[1]=ch0_LSB, seg[2]=ch1_MSB, ...
174    //                         → each pixel is 2 bytes MSB first (big-endian per DICOM RLE spec)
175    let bytes_per_sample = bits_allocated as usize / 8;
176    let output_len = num_pixels * samples_per_pixel as usize * bytes_per_sample;
177    let mut output = vec![0u8; output_len];
178
179    #[allow(clippy::needless_range_loop)]
180    for px in 0..num_pixels {
181        for ch in 0..samples_per_pixel as usize {
182            for byte_plane in 0..bytes_per_sample {
183                let seg_idx = ch * bytes_per_sample + byte_plane;
184                let out_byte = byte_plane; // MSB first
185                let out_pos = px * (samples_per_pixel as usize * bytes_per_sample)
186                    + ch * bytes_per_sample
187                    + out_byte;
188                output[out_pos] = segments[seg_idx][px];
189            }
190        }
191    }
192
193    Ok(output)
194}
195
196// ── DICOM RLE frame encoder ───────────────────────────────────────────────────
197
198/// Encode raw interleaved pixel bytes into a DICOM RLE-compressed frame.
199///
200/// # Parameters
201/// - `pixels`: raw pixel bytes in row-major, interleaved order
202/// - `rows`, `columns`: pixel dimensions
203/// - `samples_per_pixel`: 1 (grayscale) or 3 (RGB / YCbCr)
204/// - `bits_allocated`: 8 or 16
205///
206/// # Returns
207/// Complete RLE frame bytes (64-byte header + compressed segments).
208pub fn rle_encode_frame(
209    pixels: &[u8],
210    rows: u16,
211    columns: u16,
212    samples_per_pixel: u8,
213    bits_allocated: u8,
214) -> DcmResult<Vec<u8>> {
215    let num_pixels = rows as usize * columns as usize;
216    let bytes_per_sample = bits_allocated as usize / 8;
217    let num_segments = samples_per_pixel as usize * bytes_per_sample;
218    let expected_len = num_pixels * samples_per_pixel as usize * bytes_per_sample;
219
220    if pixels.len() < expected_len {
221        return Err(DcmError::Other(format!(
222            "RLE encode: pixel buffer too small: {} bytes, expected {expected_len}",
223            pixels.len()
224        )));
225    }
226
227    // Extract one byte-plane per segment (de-interleave).
228    let mut planes: Vec<Vec<u8>> = vec![Vec::with_capacity(num_pixels); num_segments];
229    for px in 0..num_pixels {
230        for ch in 0..samples_per_pixel as usize {
231            for byte_plane in 0..bytes_per_sample {
232                let in_pos = px * (samples_per_pixel as usize * bytes_per_sample)
233                    + ch * bytes_per_sample
234                    + byte_plane;
235                let seg_idx = ch * bytes_per_sample + byte_plane;
236                planes[seg_idx].push(pixels[in_pos]);
237            }
238        }
239    }
240
241    // Compress each plane.
242    let compressed: Vec<Vec<u8>> = planes.iter().map(|p| encode_segment(p)).collect();
243
244    // Build the 64-byte header.
245    let mut header = [0u32; 16];
246    header[0] = num_segments as u32;
247    let mut offset = 64u32; // segments start immediately after the 64-byte header
248    for (i, seg) in compressed.iter().enumerate() {
249        header[i + 1] = offset;
250        offset += seg.len() as u32;
251    }
252
253    // Serialize to bytes (little-endian u32 values).
254    let mut out = Vec::with_capacity(64 + compressed.iter().map(|s| s.len()).sum::<usize>());
255    for word in &header {
256        out.extend_from_slice(&word.to_le_bytes());
257    }
258    for seg in &compressed {
259        out.extend_from_slice(seg);
260    }
261
262    // `offset` now points just past the last segment — record the true data end.
263    // The DICOM stream writer is responsible for even-byte alignment of the pixel
264    // item; we do NOT add padding inside the RLE frame, as that confuses the
265    // segment-boundary calculation during decoding.
266    //
267    // If the caller needs an even-length buffer (e.g. for direct pixel-item
268    // embedding), they should pad the returned Vec themselves.
269    let _data_end = offset; // == out.len() at this point
270
271    Ok(out)
272}
273
274// ── RleCodec ──────────────────────────────────────────────────────────────────
275
276/// DICOM RLE Lossless codec with DICOM-spec-compliant byte-plane ordering
277/// (PS3.5 §G).
278///
279/// For multi-byte samples the most-significant byte occupies the
280/// lower-numbered segment (segment 0 = MSBs, segment 1 = LSBs).
281/// This matches what DCMTK's `dcmdrle`/`dcmcrle` tools produce and
282/// what DICOM peers expect.
283///
284/// Supported configurations
285/// ─────────────────────────
286/// | `bits_allocated` | `samples` | segments |
287/// |------------------|-----------|---------|
288/// | 8                | 1         | 1       |
289/// | 16               | 1         | 2       |
290/// | 8                | 3         | 3       |
291/// | 16               | 3         | 6       |
292pub struct RleCodec;
293
294impl RleCodec {
295    /// Decode an RLE-compressed DICOM fragment into raw little-endian pixel bytes.
296    ///
297    /// `data` must be the complete fragment payload, starting with the 64-byte
298    /// RLE segment-offset header.
299    pub fn decode(
300        data: &[u8],
301        rows: u16,
302        cols: u16,
303        bits_allocated: u16,
304        samples: u16,
305    ) -> DcmResult<Vec<u8>> {
306        rle_codec_decode(data, rows, cols, bits_allocated, samples)
307    }
308
309    /// Encode raw little-endian pixel bytes into a DICOM RLE fragment.
310    ///
311    /// The returned buffer includes the 64-byte segment-offset header and all
312    /// compressed segments.  Its length is always even (DICOM requirement).
313    pub fn encode(
314        data: &[u8],
315        rows: u16,
316        cols: u16,
317        bits_allocated: u16,
318        samples: u16,
319    ) -> DcmResult<Vec<u8>> {
320        rle_codec_encode(data, rows, cols, bits_allocated, samples)
321    }
322}
323
324// ── Implementation ────────────────────────────────────────────────────────────
325
326/// Number of RLE segments for the given image parameters.
327fn rle_num_segments(bits_allocated: u16, samples: u16) -> DcmResult<usize> {
328    let bytes_per_sample: usize = match bits_allocated {
329        8 => 1,
330        16 => 2,
331        other => {
332            return Err(DcmError::DecompressionError {
333                reason: format!("RLE codec supports 8- or 16-bit samples, got {}", other),
334            })
335        }
336    };
337    Ok(samples as usize * bytes_per_sample)
338}
339
340fn rle_codec_decode(
341    data: &[u8],
342    rows: u16,
343    cols: u16,
344    bits_allocated: u16,
345    samples: u16,
346) -> DcmResult<Vec<u8>> {
347    const HDR: usize = 64;
348    const MAX_SEG: usize = 15;
349
350    if data.len() < HDR {
351        return Err(DcmError::DecompressionError {
352            reason: format!(
353                "RLE fragment too short: {} bytes (need ≥ {})",
354                data.len(),
355                HDR
356            ),
357        });
358    }
359
360    let num_segments = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize;
361    let expected = rle_num_segments(bits_allocated, samples)?;
362    if num_segments != expected {
363        return Err(DcmError::DecompressionError {
364            reason: format!(
365                "expected {} RLE segments for {}bpp × {} samples, got {}",
366                expected, bits_allocated, samples, num_segments
367            ),
368        });
369    }
370
371    let mut offsets = [0u32; MAX_SEG];
372    for (i, slot) in offsets.iter_mut().enumerate() {
373        let b = 4 + i * 4;
374        *slot = u32::from_le_bytes(data[b..b + 4].try_into().unwrap());
375    }
376
377    let num_pixels = rows as usize * cols as usize;
378    let bps = (bits_allocated as usize).div_ceil(8); // bytes per sample
379    let bpp = samples as usize * bps; // bytes per pixel
380    let mut out = vec![0u8; num_pixels * bpp];
381
382    for seg in 0..num_segments {
383        let start = offsets[seg] as usize;
384        let end = if seg + 1 < num_segments && offsets[seg + 1] != 0 {
385            offsets[seg + 1] as usize
386        } else {
387            data.len()
388        };
389        if start > data.len() || end > data.len() || start > end {
390            return Err(DcmError::DecompressionError {
391                reason: format!(
392                    "RLE segment {} has invalid byte range [{}, {})",
393                    seg, start, end
394                ),
395            });
396        }
397
398        let seg_bytes = decode_segment(&data[start..end], num_pixels)?;
399        if seg_bytes.len() < num_pixels {
400            return Err(DcmError::DecompressionError {
401                reason: format!(
402                    "RLE segment {} decoded to {} bytes, expected {}",
403                    seg,
404                    seg_bytes.len(),
405                    num_pixels
406                ),
407            });
408        }
409
410        // Segment index encodes the sample and byte-plane:
411        //   sample_idx = seg / bps
412        //   plane      = seg % bps   (0 = MSB plane)
413        // In LE layout the MSB lives at byte offset (bps - 1), so:
414        //   native byte offset within the sample = bps - 1 - plane
415        let sample_idx = seg / bps;
416        let plane = seg % bps;
417        let byte_off = bps - 1 - plane; // LE position of this plane
418
419        for (p, &byte) in seg_bytes.iter().enumerate().take(num_pixels) {
420            out[p * bpp + sample_idx * bps + byte_off] = byte;
421        }
422    }
423
424    Ok(out)
425}
426
427fn rle_codec_encode(
428    data: &[u8],
429    rows: u16,
430    cols: u16,
431    bits_allocated: u16,
432    samples: u16,
433) -> DcmResult<Vec<u8>> {
434    const HDR: usize = 64;
435    const MAX_SEG: usize = 15;
436
437    let num_segments = rle_num_segments(bits_allocated, samples)?;
438    let num_pixels = rows as usize * cols as usize;
439    let bps = (bits_allocated as usize).div_ceil(8);
440    let bpp = samples as usize * bps;
441    let expected_len = num_pixels * bpp;
442
443    if data.len() != expected_len {
444        return Err(DcmError::CompressionError {
445            reason: format!(
446                "input length {} ≠ expected {} ({}×{}×{}bpp×{} sample(s))",
447                data.len(),
448                expected_len,
449                rows,
450                cols,
451                bits_allocated,
452                samples
453            ),
454        });
455    }
456
457    // Extract and compress each byte-plane.
458    let mut compressed: Vec<Vec<u8>> = Vec::with_capacity(num_segments);
459    for seg in 0..num_segments {
460        let sample_idx = seg / bps;
461        let plane = seg % bps;
462        let byte_off = bps - 1 - plane; // LE position of MSB plane
463
464        let mut plane_bytes = Vec::with_capacity(num_pixels);
465        for p in 0..num_pixels {
466            plane_bytes.push(data[p * bpp + sample_idx * bps + byte_off]);
467        }
468        compressed.push(encode_segment(&plane_bytes));
469    }
470
471    // Write 64-byte header.
472    let mut out: Vec<u8> =
473        Vec::with_capacity(HDR + compressed.iter().map(|s| s.len()).sum::<usize>());
474    out.extend_from_slice(&(num_segments as u32).to_le_bytes());
475    let mut offset = HDR as u32;
476    for seg in &compressed {
477        out.extend_from_slice(&offset.to_le_bytes());
478        offset += seg.len() as u32;
479    }
480    for _ in num_segments..MAX_SEG {
481        out.extend_from_slice(&0u32.to_le_bytes());
482    }
483
484    for seg_bytes in &compressed {
485        out.extend_from_slice(seg_bytes);
486    }
487
488    // Even-length requirement.
489    if out.len() % 2 != 0 {
490        out.push(0x00);
491    }
492
493    Ok(out)
494}
495
496// ── Tests ─────────────────────────────────────────────────────────────────────
497
498#[cfg(test)]
499mod tests {
500    use super::*;
501
502    #[test]
503    fn encode_decode_segment_roundtrip_uniform() {
504        // All-same bytes — should compress very well.
505        let data = vec![42u8; 256];
506        let compressed = encode_segment(&data);
507        let decoded = decode_segment(&compressed, 256).unwrap();
508        assert_eq!(decoded[..256], data[..]);
509    }
510
511    #[test]
512    fn encode_decode_segment_roundtrip_varied() {
513        // Varied bytes — will result in literal runs.
514        let data: Vec<u8> = (0u8..=255).collect();
515        let compressed = encode_segment(&data);
516        let decoded = decode_segment(&compressed, 256).unwrap();
517        assert_eq!(decoded, data);
518    }
519
520    #[test]
521    fn rle_frame_roundtrip_8bit_grayscale() {
522        let rows = 4u16;
523        let cols = 4u16;
524        let samples = 1u8;
525        let bits = 8u8;
526        let pixels: Vec<u8> = (0..16).map(|i| (i * 17) as u8).collect();
527
528        let encoded = rle_encode_frame(&pixels, rows, cols, samples, bits).unwrap();
529        let decoded = rle_decode_frame(&encoded, rows, cols, samples, bits).unwrap();
530        assert_eq!(&decoded[..16], &pixels[..]);
531    }
532
533    #[test]
534    fn rle_frame_roundtrip_8bit_rgb() {
535        let rows = 2u16;
536        let cols = 2u16;
537        let samples = 3u8;
538        let bits = 8u8;
539        let pixels: Vec<u8> = vec![
540            255, 0, 0, // red
541            0, 255, 0, // green
542            0, 0, 255, // blue
543            128, 128, 128, // grey
544        ];
545
546        let encoded = rle_encode_frame(&pixels, rows, cols, samples, bits).unwrap();
547        let decoded = rle_decode_frame(&encoded, rows, cols, samples, bits).unwrap();
548        assert_eq!(&decoded[..12], &pixels[..]);
549    }
550
551    #[test]
552    fn rle_frame_roundtrip_16bit_grayscale() {
553        let rows = 2u16;
554        let cols = 2u16;
555        let samples = 1u8;
556        let bits = 16u8;
557        // 4 pixels × 2 bytes each = 8 bytes (MSB first per segment order)
558        let pixels: Vec<u8> = vec![0x01, 0x00, 0x02, 0x00, 0xFF, 0xFF, 0x80, 0x00];
559
560        let encoded = rle_encode_frame(&pixels, rows, cols, samples, bits).unwrap();
561        let decoded = rle_decode_frame(&encoded, rows, cols, samples, bits).unwrap();
562        assert_eq!(&decoded[..8], &pixels[..]);
563    }
564
565    #[test]
566    fn rle_header_too_short_returns_error() {
567        let result = rle_decode_frame(&[0u8; 32], 4, 4, 1, 8);
568        assert!(matches!(result, Err(DcmError::Other(_))));
569    }
570
571    // ── RleCodec tests ────────────────────────────────────────────────────────
572
573    #[test]
574    fn rle_encode_decode_roundtrip_8bit() {
575        let data: Vec<u8> = (0u8..16).collect();
576        let (rows, cols, bits, samples) = (4u16, 4u16, 8u16, 1u16);
577
578        let enc = RleCodec::encode(&data, rows, cols, bits, samples).unwrap();
579        let dec = RleCodec::decode(&enc, rows, cols, bits, samples).unwrap();
580        assert_eq!(dec, data);
581    }
582
583    #[test]
584    fn rle_encode_decode_roundtrip_16bit() {
585        // 4×4 image, 16-bit grayscale, stored as LE u16 pairs.
586        let data: Vec<u8> = (0u16..16).flat_map(|v| v.to_le_bytes()).collect();
587        let (rows, cols, bits, samples) = (4u16, 4u16, 16u16, 1u16);
588
589        let enc = RleCodec::encode(&data, rows, cols, bits, samples).unwrap();
590        let dec = RleCodec::decode(&enc, rows, cols, bits, samples).unwrap();
591        assert_eq!(dec, data);
592    }
593
594    #[test]
595    fn rle_decode_known_pattern() {
596        // Manually build an RLE fragment: 1 segment, 4 literal bytes [0,1,2,3].
597        let mut fragment = vec![0u8; 64]; // 64-byte header
598        fragment[0] = 1; // num_segments = 1
599        fragment[4] = 64; // segment_offsets[0] = 64
600
601        // PackBits literal run: header byte 0x03 → copy 4 bytes.
602        fragment.push(3);
603        fragment.push(0x00);
604        fragment.push(0x01);
605        fragment.push(0x02);
606        fragment.push(0x03);
607
608        let dec = RleCodec::decode(&fragment, 2, 2, 8, 1).unwrap();
609        assert_eq!(dec, [0x00, 0x01, 0x02, 0x03]);
610    }
611
612    #[test]
613    fn rle_encoded_output_is_even_length() {
614        let data = vec![42u8; 3]; // 1×3 → odd pixel count
615        let enc = RleCodec::encode(&data, 1, 3, 8, 1).unwrap();
616        assert_eq!(enc.len() % 2, 0, "encoded fragment must be even-length");
617    }
618}