lerc-rs 0.1.0

Pure Rust LERC (Limited Error Raster Compression) encoder and decoder
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
use alloc::vec;
use alloc::vec::Vec;

use crate::error::{LercError, Result};

const HUFFMAN_RLE: u8 = 0x01;
const HUFFMAN_NO_ENCODING: u8 = 0x02;
const HUFFMAN_PACKBITS: u8 = 0x03;

/// Extract (decompress) a byte plane from the FPL compressed format.
pub(super) fn extract_buffer(compressed: &[u8], expected_size: usize) -> Result<Vec<u8>> {
    if compressed.is_empty() {
        return Err(LercError::InvalidData("empty FPL compressed data".into()));
    }

    let mode = compressed[0];
    let data = &compressed[1..];

    match mode {
        HUFFMAN_RLE => decode_rle(data, expected_size),
        HUFFMAN_NO_ENCODING => {
            if data.len() < expected_size {
                return Err(LercError::BufferTooSmall {
                    needed: expected_size,
                    available: data.len(),
                });
            }
            Ok(data[..expected_size].to_vec())
        }
        HUFFMAN_PACKBITS => decode_packbits(data, expected_size),
        0x00 => decode_fpl_huffman(data, expected_size),
        _ => Err(LercError::InvalidData(alloc::format!(
            "unknown FPL compression mode: {mode:#x}"
        ))),
    }
}

fn decode_rle(data: &[u8], expected_size: usize) -> Result<Vec<u8>> {
    if data.len() < 5 {
        return Err(LercError::InvalidData("FPL RLE too short".into()));
    }

    let byte_val = data[0];
    let count = u32::from_le_bytes(data[1..5].try_into().unwrap()) as usize;

    if count != expected_size {
        return Err(LercError::InvalidData("FPL RLE count mismatch".into()));
    }

    Ok(vec![byte_val; count])
}

fn decode_packbits(data: &[u8], expected_size: usize) -> Result<Vec<u8>> {
    // Matches C++ fpl_EsriHuffman decodePackBits format:
    //   b <= 127: literal of (b + 1) bytes
    //   b > 127:  run of (b - 126) identical bytes
    let mut out = Vec::with_capacity(expected_size);
    let mut pos = 0;

    while pos < data.len() && out.len() < expected_size {
        let b = data[pos] as u32;
        pos += 1;

        if b <= 127 {
            // Literal: copy next (b + 1) bytes
            let count = (b + 1) as usize;
            if pos + count > data.len() {
                return Err(LercError::InvalidData("FPL PackBits overflow".into()));
            }
            out.extend_from_slice(&data[pos..pos + count]);
            pos += count;
        } else {
            // Run: repeat next byte (b - 126) times
            let count = (b - 126) as usize;
            if pos >= data.len() {
                return Err(LercError::InvalidData("FPL PackBits missing byte".into()));
            }
            let byte_val = data[pos];
            pos += 1;
            out.resize(out.len() + count, byte_val);
        }
    }

    if out.len() != expected_size {
        return Err(LercError::InvalidData("FPL PackBits size mismatch".into()));
    }

    Ok(out)
}

fn decode_fpl_huffman(data: &[u8], expected_size: usize) -> Result<Vec<u8>> {
    // FPL Huffman: simplified Huffman for byte values
    // Read code table, then decode
    use crate::huffman::HuffmanCodec;

    let mut codec = HuffmanCodec::new();
    let mut pos = 0;
    codec.read_code_table(data, &mut pos, 6)?;
    let num_bits_lut = codec.build_tree_from_codes()?;

    let mut result = Vec::with_capacity(expected_size);
    let mut byte_pos = pos;
    let mut bit_pos = 0i32;

    for _ in 0..expected_size {
        let val = codec.decode_one_value(data, &mut byte_pos, &mut bit_pos, num_bits_lut)?;
        result.push(val as u8);
    }

    Ok(result)
}

// === Encoding functions ===

const COMPRESS_HUFFMAN: u8 = 0x00;
const COMPRESS_RLE: u8 = 0x01;
const COMPRESS_NO_ENCODING: u8 = 0x02;
const COMPRESS_PACKBITS: u8 = 0x03;

/// Compress a byte plane using the best available method.
/// Returns the compressed data (including the mode byte prefix).
pub(super) fn compress_buffer(data: &[u8]) -> Vec<u8> {
    compress_buffer_with_histo(data, None)
}

/// Compress a byte plane using the best available method, optionally reusing a
/// pre-computed histogram. Returns the compressed data (including the mode byte
/// prefix).
pub(super) fn compress_buffer_with_histo(
    data: &[u8],
    precomputed_histo: Option<&[i32; 256]>,
) -> Vec<u8> {
    if data.is_empty() {
        return encode_raw(data);
    }

    // Build histogram (or reuse the caller's).
    let mut local_histo = [0i32; 256];
    let histo: &[i32; 256] = if let Some(h) = precomputed_histo {
        h
    } else {
        for &b in data {
            local_histo[b as usize] += 1;
        }
        &local_histo
    };

    // Count distinct values and find max frequency
    let mut distinct = 0u32;
    let mut sole_value = 0u8;
    let mut max_count = 0i32;
    for (i, &count) in histo.iter().enumerate() {
        if count > 0 {
            distinct += 1;
            sole_value = i as u8;
            if count > max_count {
                max_count = count;
            }
        }
    }

    // RLE: all same byte
    if distinct == 1 {
        let mut buf = Vec::with_capacity(6);
        buf.push(COMPRESS_RLE);
        buf.push(sole_value);
        buf.extend_from_slice(&(data.len() as u32).to_le_bytes());
        return buf;
    }

    // Raw size is 1 + data.len(). Track best size without allocating raw eagerly.
    let raw_size = 1 + data.len();
    let mut best: Option<Vec<u8>> = None;
    let mut best_len = raw_size;

    // Compute entropy once, used for both PackBits and Huffman skip decisions.
    let entropy_bits_per_byte = compute_entropy_bits_per_byte(histo, data.len());

    // Try PackBits -- skip if data looks uniformly distributed (no runs expected).
    // PackBits helps when there are many runs of identical bytes. A rough heuristic:
    // if the most frequent value covers < 1% of the data and there are many distinct
    // values, or if entropy is high, PackBits is unlikely to help.
    let skip_packbits =
        (distinct > 128 && (max_count as usize) < data.len() / 100) || entropy_bits_per_byte > 6.0;
    if !skip_packbits {
        let packbits = encode_packbits(data);
        if packbits.len() < best_len {
            best_len = packbits.len();
            best = Some(packbits);
        }
    }

    // Skip Huffman if entropy is too high (near 8 bits/byte). The overhead of
    // the Huffman code table makes it a net loss when entropy > ~7.5 bits/byte.
    let skip_huffman = entropy_bits_per_byte > 7.5;

    // Try Huffman if we have enough data and at least 2 distinct values.
    if !skip_huffman
        && data.len() >= 4
        && distinct >= 2
        && let Some(huffman) = encode_fpl_huffman_with_histo_bounded(data, histo, best_len)
        && huffman.len() < best_len
    {
        best = Some(huffman);
    }

    best.unwrap_or_else(|| encode_raw(data))
}

/// Estimate the compressed size (in bytes) of a buffer without actually
/// compressing it. This matches the C++ `getEntropySize` function: it samples
/// every PRIME_MULT-th byte (stride 7), builds a histogram from those samples,
/// and computes the Shannon entropy to approximate the compressed size.
pub(super) fn estimate_compressed_size(data: &[u8]) -> usize {
    const PRIME_MULT: usize = 7;

    let mut table = [0u32; 256];
    let mut total_count = 0u32;

    let mut i = 0;
    while i < data.len() {
        table[data[i] as usize] += 1;
        total_count += 1;
        i += PRIME_MULT;
    }

    if total_count == 0 {
        return 0;
    }

    let mut total_bits = 0.0f64;
    for &count in &table {
        if count > 0 {
            let p = total_count as f64 / count as f64;
            let bits = p.log2();
            total_bits += bits * count as f64;
        }
    }

    ((total_bits + 7.0) / 8.0) as usize
}

/// Compute entropy in bits per byte from a histogram.
fn compute_entropy_bits_per_byte(histo: &[i32; 256], total: usize) -> f64 {
    if total == 0 {
        return 0.0;
    }
    let n = total as f64;
    let mut entropy = 0.0;
    for &count in histo.iter() {
        if count > 0 {
            let p = count as f64 / n;
            entropy -= p * p.log2();
        }
    }
    entropy
}

fn encode_raw(data: &[u8]) -> Vec<u8> {
    let mut buf = Vec::with_capacity(1 + data.len());
    buf.push(COMPRESS_NO_ENCODING);
    buf.extend_from_slice(data);
    buf
}

fn encode_rle(data: &[u8]) -> Option<Vec<u8>> {
    if data.is_empty() {
        return None;
    }

    let first = data[0];
    if data.iter().all(|&b| b == first) {
        let mut buf = Vec::with_capacity(6);
        buf.push(COMPRESS_RLE);
        buf.push(first);
        buf.extend_from_slice(&(data.len() as u32).to_le_bytes());
        Some(buf)
    } else {
        None
    }
}

fn encode_packbits(data: &[u8]) -> Vec<u8> {
    // Matches C++ fpl_EsriHuffman encodePackBits format:
    //   Literal header: (count - 1), range 0..=127, followed by count data bytes
    //   Run header: (127 + repeat_count), range 128..=255, followed by 1 data byte
    //     where repeat_count = total_run_length - 1 (1..=128)
    //     so total run length = 2..=129
    //
    // The C++ encoder detects runs of 2+ identical bytes (repeat_count >= 1).
    let mut buf = Vec::with_capacity(1 + data.len() * 2 + 1);
    buf.push(COMPRESS_PACKBITS);

    let len = data.len();
    let mut i = 0;
    let mut literal_start: Option<usize> = None;
    let mut literal_count: usize = 0;

    while i <= len {
        let b = if i == len { None } else { Some(data[i]) };

        let mut repeat_count: usize = 0;

        if let Some(bv) = b {
            while i + repeat_count < len - 1
                && data[i + repeat_count + 1] == bv
                && repeat_count < 128
            {
                repeat_count += 1;
            }
        }

        i += 1;

        if let (0, Some(byte_val)) = (repeat_count, b) {
            // Literal byte
            if literal_start.is_none() {
                literal_start = Some(buf.len());
                buf.push(0); // placeholder for header
            }

            buf.push(byte_val);
            literal_count += 1;

            if literal_count == 128 {
                buf[literal_start.unwrap()] = (literal_count - 1) as u8;
                literal_count = 0;
                literal_start = None;
            }
        } else {
            // Flush pending literals
            if literal_count > 0 {
                buf[literal_start.unwrap()] = (literal_count - 1) as u8;
                literal_start = None;
                literal_count = 0;
            }

            if repeat_count > 0 {
                buf.push((127 + repeat_count) as u8);
                buf.push(b.unwrap());
                i += repeat_count; // skip the repeated bytes (already counted by repeat_count)
            }
        }
    }

    buf
}

/// Huffman encode with a pre-built histogram (avoids rebuilding it).
/// `max_size` is an upper bound: if the estimated Huffman output
/// would exceed this, skip the expensive full encoding entirely.
fn encode_fpl_huffman_with_histo_bounded(
    data: &[u8],
    histo: &[i32; 256],
    max_size: usize,
) -> Option<Vec<u8>> {
    if data.len() < 4 {
        return None;
    }

    // Build Huffman codes.
    use crate::huffman::{HuffmanCodec, encode_huffman_with_codec_histo};
    let mut codec = HuffmanCodec::new();
    if !codec.compute_codes(histo) {
        return None;
    }

    // Estimate compressed size before doing the expensive full encode.
    // If the estimate already exceeds the current best, skip.
    if let Some((est_bytes, _)) = codec.compute_compressed_size(histo)
        && (est_bytes as usize + 1) >= max_size
    {
        return None;
    }

    // Encode using the codec we already built (avoids recomputing codes).
    // Pass the histogram so total_bits can be computed from it (O(256))
    // instead of scanning all data bytes (O(n)).
    let encoded = encode_huffman_with_codec_histo(&codec, data, Some(histo))?;

    let mut buf = Vec::with_capacity(1 + encoded.len());
    buf.push(COMPRESS_HUFFMAN);
    buf.extend_from_slice(&encoded);
    Some(buf)
}

fn encode_fpl_huffman(data: &[u8]) -> Option<Vec<u8>> {
    if data.len() < 4 {
        return None;
    }

    let mut histo = [0i32; 256];
    for &b in data {
        histo[b as usize] += 1;
    }

    // Use usize::MAX as bound so we never skip
    encode_fpl_huffman_with_histo_bounded(data, &histo, usize::MAX)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn round_trip_raw() {
        let data: Vec<u8> = (0..100).map(|i| (i * 7 % 256) as u8).collect();
        let compressed = encode_raw(&data);
        let decompressed = extract_buffer(&compressed, data.len()).unwrap();
        assert_eq!(decompressed, data);
    }

    #[test]
    fn round_trip_rle() {
        let data = vec![42u8; 200];
        let compressed = encode_rle(&data).unwrap();
        let decompressed = extract_buffer(&compressed, data.len()).unwrap();
        assert_eq!(decompressed, data);
    }

    #[test]
    fn round_trip_packbits() {
        let data: Vec<u8> = (0..100).map(|i| (i * 3 % 256) as u8).collect();
        let compressed = encode_packbits(&data);
        let decompressed = extract_buffer(&compressed, data.len()).unwrap();
        assert_eq!(decompressed, data);
    }

    #[test]
    fn round_trip_packbits_runs() {
        let mut data = Vec::new();
        data.extend_from_slice(&[42u8; 50]);
        data.extend_from_slice(&[99u8; 50]);
        data.extend_from_slice(&(0u8..50).collect::<Vec<_>>());
        let compressed = encode_packbits(&data);
        let decompressed = extract_buffer(&compressed, data.len()).unwrap();
        assert_eq!(decompressed, data);
    }

    #[test]
    fn round_trip_huffman() {
        let data: Vec<u8> = (0..500).map(|i| (i * 13 % 256) as u8).collect();
        if let Some(compressed) = encode_fpl_huffman(&data) {
            let decompressed = extract_buffer(&compressed, data.len()).unwrap();
            assert_eq!(decompressed, data);
        }
    }

    #[test]
    fn round_trip_compress_buffer() {
        let data: Vec<u8> = (0..500).map(|i| (i * 7 % 256) as u8).collect();
        let compressed = compress_buffer(&data);
        let decompressed = extract_buffer(&compressed, data.len()).unwrap();
        assert_eq!(decompressed, data);
    }
}