Skip to main content

oxihuman_core/
run_length.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! Run-length encoding/decoding for byte sequences.
5
6#![allow(dead_code)]
7
8/// A single run: (value, count).
9#[allow(dead_code)]
10#[derive(Debug, Clone, Copy, PartialEq)]
11pub struct Run {
12    pub value: u8,
13    pub count: u32,
14}
15
16/// Run-length encode a byte slice.
17#[allow(dead_code)]
18pub fn rle_encode(data: &[u8]) -> Vec<Run> {
19    if data.is_empty() {
20        return Vec::new();
21    }
22    let mut out = Vec::new();
23    let mut current = data[0];
24    let mut count = 1u32;
25    for &b in &data[1..] {
26        if b == current {
27            count += 1;
28        } else {
29            out.push(Run {
30                value: current,
31                count,
32            });
33            current = b;
34            count = 1;
35        }
36    }
37    out.push(Run {
38        value: current,
39        count,
40    });
41    out
42}
43
44/// Run-length decode back to bytes.
45#[allow(dead_code)]
46pub fn rle_decode(runs: &[Run]) -> Vec<u8> {
47    let total: u32 = runs.iter().map(|r| r.count).sum();
48    let mut out = Vec::with_capacity(total as usize);
49    for r in runs {
50        for _ in 0..r.count {
51            out.push(r.value);
52        }
53    }
54    out
55}
56
57/// Return the number of runs.
58#[allow(dead_code)]
59pub fn rle_run_count(runs: &[Run]) -> usize {
60    runs.len()
61}
62
63/// Return the total number of decoded bytes.
64#[allow(dead_code)]
65pub fn rle_decoded_len(runs: &[Run]) -> usize {
66    runs.iter().map(|r| r.count as usize).sum()
67}
68
69/// Compression ratio: decoded_len / encoded_len. Returns 1.0 for empty.
70#[allow(dead_code)]
71pub fn rle_compression_ratio_v2(runs: &[Run]) -> f32 {
72    if runs.is_empty() {
73        return 1.0;
74    }
75    rle_decoded_len(runs) as f32 / runs.len() as f32
76}
77
78/// Find the most frequent byte value in the original sequence.
79#[allow(dead_code)]
80pub fn rle_most_frequent(runs: &[Run]) -> Option<u8> {
81    if runs.is_empty() {
82        return None;
83    }
84    runs.iter().max_by_key(|r| r.count).map(|r| r.value)
85}
86
87/// Check if the encoded sequence is uniform (single run).
88#[allow(dead_code)]
89pub fn rle_is_uniform(runs: &[Run]) -> bool {
90    runs.len() == 1
91}
92
93/// Merge adjacent runs with the same value (e.g., after concatenation).
94#[allow(dead_code)]
95pub fn rle_merge(runs: &[Run]) -> Vec<Run> {
96    if runs.is_empty() {
97        return Vec::new();
98    }
99    let mut out = Vec::new();
100    let mut cur = runs[0];
101    for &r in &runs[1..] {
102        if r.value == cur.value {
103            cur.count += r.count;
104        } else {
105            out.push(cur);
106            cur = r;
107        }
108    }
109    out.push(cur);
110    out
111}
112
113/// Encode then decode to verify roundtrip (returns true if data matches).
114#[allow(dead_code)]
115pub fn rle_verify_roundtrip(data: &[u8]) -> bool {
116    let runs = rle_encode(data);
117    let decoded = rle_decode(&runs);
118    decoded == data
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124
125    #[test]
126    fn encode_empty() {
127        assert!(rle_encode(&[]).is_empty());
128    }
129
130    #[test]
131    fn decode_empty() {
132        assert!(rle_decode(&[]).is_empty());
133    }
134
135    #[test]
136    fn encode_uniform() {
137        let runs = rle_encode(&[5u8; 10]);
138        assert_eq!(runs.len(), 1);
139        assert_eq!(runs[0].value, 5);
140        assert_eq!(runs[0].count, 10);
141    }
142
143    #[test]
144    fn roundtrip_basic() {
145        let data = vec![1u8, 1, 2, 3, 3, 3, 4];
146        assert!(rle_verify_roundtrip(&data));
147    }
148
149    #[test]
150    fn roundtrip_alternating() {
151        let data: Vec<u8> = (0..10).map(|i| i % 2).collect();
152        assert!(rle_verify_roundtrip(&data));
153    }
154
155    #[test]
156    fn decoded_len_correct() {
157        let data = vec![0u8, 0, 0, 1, 1];
158        let runs = rle_encode(&data);
159        assert_eq!(rle_decoded_len(&runs), 5);
160    }
161
162    #[test]
163    fn most_frequent() {
164        let data = vec![1u8, 2, 2, 2, 3];
165        let runs = rle_encode(&data);
166        assert_eq!(rle_most_frequent(&runs), Some(2));
167    }
168
169    #[test]
170    fn is_uniform_true() {
171        let runs = rle_encode(&[7u8; 5]);
172        assert!(rle_is_uniform(&runs));
173    }
174
175    #[test]
176    fn merge_adjacent() {
177        let runs = vec![
178            Run { value: 1, count: 3 },
179            Run { value: 1, count: 2 },
180            Run { value: 2, count: 1 },
181        ];
182        let merged = rle_merge(&runs);
183        assert_eq!(merged.len(), 2);
184        assert_eq!(merged[0].count, 5);
185    }
186
187    #[test]
188    fn compression_ratio_uniform() {
189        let data = vec![9u8; 100];
190        let runs = rle_encode(&data);
191        let ratio = rle_compression_ratio_v2(&runs);
192        assert!(ratio >= 100.0);
193    }
194}