1use std::io::{self, Read};
31
32#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct Run<T> {
35 pub value: T,
37 pub length: u64,
39}
40
41impl<T> Run<T> {
42 #[must_use]
44 pub fn new(value: T, length: u64) -> Self {
45 Self { value, length }
46 }
47}
48
49#[derive(Debug, Clone)]
54pub struct RunLengthEncoding {
55 runs: Vec<Run<u64>>,
57 total_count: usize,
59}
60
61impl<'a> IntoIterator for &'a RunLengthEncoding {
62 type Item = u64;
63 type IntoIter = RunLengthIterator<'a>;
64
65 fn into_iter(self) -> Self::IntoIter {
66 self.iter()
67 }
68}
69
70impl RunLengthEncoding {
71 #[must_use]
80 pub fn encode(values: &[u64]) -> Self {
81 if values.is_empty() {
82 return Self {
83 runs: Vec::new(),
84 total_count: 0,
85 };
86 }
87
88 let mut runs = Vec::new();
89 let mut current_value = values[0];
90 let mut current_length = 1u64;
91
92 for &value in &values[1..] {
93 if value == current_value {
94 current_length += 1;
95 } else {
96 runs.push(Run::new(current_value, current_length));
97 current_value = value;
98 current_length = 1;
99 }
100 }
101
102 runs.push(Run::new(current_value, current_length));
104
105 Self {
106 runs,
107 total_count: values.len(),
108 }
109 }
110
111 #[must_use]
113 pub fn from_runs(runs: Vec<Run<u64>>) -> Self {
114 let total_count = runs.iter().map(|r| r.length as usize).sum();
115 Self { runs, total_count }
116 }
117
118 #[must_use]
120 pub fn decode(&self) -> Vec<u64> {
121 let mut values = Vec::with_capacity(self.total_count);
122
123 for run in &self.runs {
124 for _ in 0..run.length {
125 values.push(run.value);
126 }
127 }
128
129 values
130 }
131
132 #[must_use]
134 pub fn run_count(&self) -> usize {
135 self.runs.len()
136 }
137
138 #[must_use]
140 pub fn total_count(&self) -> usize {
141 self.total_count
142 }
143
144 #[must_use]
146 pub fn runs(&self) -> &[Run<u64>] {
147 &self.runs
148 }
149
150 #[must_use]
152 pub fn is_empty(&self) -> bool {
153 self.total_count == 0
154 }
155
156 #[must_use]
160 pub fn compression_ratio(&self) -> f64 {
161 if self.runs.is_empty() {
162 return 1.0;
163 }
164
165 let original_size = self.total_count * 8;
168 let encoded_size = self.runs.len() * 16;
169
170 if encoded_size == 0 {
171 return 1.0;
172 }
173
174 original_size as f64 / encoded_size as f64
175 }
176
177 #[must_use]
181 pub fn is_beneficial(&self) -> bool {
182 self.compression_ratio() > 1.0
183 }
184
185 #[must_use]
187 pub fn encoded_size(&self) -> usize {
188 self.runs.len() * 16
190 }
191
192 pub fn to_bytes(&self) -> Vec<u8> {
194 let mut bytes = Vec::with_capacity(8 + self.runs.len() * 16);
195
196 bytes.extend_from_slice(&(self.runs.len() as u64).to_le_bytes());
198
199 for run in &self.runs {
201 bytes.extend_from_slice(&run.value.to_le_bytes());
202 bytes.extend_from_slice(&run.length.to_le_bytes());
203 }
204
205 bytes
206 }
207
208 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
210 let mut cursor = io::Cursor::new(bytes);
211
212 let mut buf = [0u8; 8];
214 cursor.read_exact(&mut buf)?;
215 let run_count = u64::from_le_bytes(buf) as usize;
216
217 let mut runs = Vec::with_capacity(run_count);
219 for _ in 0..run_count {
220 cursor.read_exact(&mut buf)?;
221 let value = u64::from_le_bytes(buf);
222
223 cursor.read_exact(&mut buf)?;
224 let length = u64::from_le_bytes(buf);
225
226 runs.push(Run::new(value, length));
227 }
228
229 Ok(Self::from_runs(runs))
230 }
231
232 #[must_use]
236 pub fn get(&self, index: usize) -> Option<u64> {
237 if index >= self.total_count {
238 return None;
239 }
240
241 let mut offset = 0usize;
242 for run in &self.runs {
243 let run_end = offset + run.length as usize;
244 if index < run_end {
245 return Some(run.value);
246 }
247 offset = run_end;
248 }
249
250 None
251 }
252
253 pub fn iter(&self) -> RunLengthIterator<'_> {
255 RunLengthIterator {
256 runs: &self.runs,
257 run_index: 0,
258 within_run: 0,
259 }
260 }
261}
262
263pub struct RunLengthIterator<'a> {
265 runs: &'a [Run<u64>],
266 run_index: usize,
267 within_run: u64,
268}
269
270impl Iterator for RunLengthIterator<'_> {
271 type Item = u64;
272
273 fn next(&mut self) -> Option<Self::Item> {
274 while self.run_index < self.runs.len() {
275 let run = &self.runs[self.run_index];
276 if self.within_run < run.length {
277 self.within_run += 1;
278 return Some(run.value);
279 }
280 self.run_index += 1;
281 self.within_run = 0;
282 }
283 None
284 }
285
286 fn size_hint(&self) -> (usize, Option<usize>) {
287 let remaining: u64 = self.runs[self.run_index..]
288 .iter()
289 .map(|r| r.length)
290 .sum::<u64>()
291 - self.within_run;
292 (remaining as usize, Some(remaining as usize))
293 }
294}
295
296impl ExactSizeIterator for RunLengthIterator<'_> {}
297
298#[derive(Debug, Clone)]
302pub struct SignedRunLengthEncoding {
303 inner: RunLengthEncoding,
304}
305
306impl SignedRunLengthEncoding {
307 #[must_use]
309 pub fn encode(values: &[i64]) -> Self {
310 let unsigned: Vec<u64> = values.iter().map(|&v| zigzag_encode(v)).collect();
311 Self {
312 inner: RunLengthEncoding::encode(&unsigned),
313 }
314 }
315
316 #[must_use]
318 pub fn decode(&self) -> Vec<i64> {
319 self.inner.decode().into_iter().map(zigzag_decode).collect()
320 }
321
322 #[must_use]
324 pub fn compression_ratio(&self) -> f64 {
325 self.inner.compression_ratio()
326 }
327
328 #[must_use]
330 pub fn run_count(&self) -> usize {
331 self.inner.run_count()
332 }
333
334 pub fn to_bytes(&self) -> Vec<u8> {
336 self.inner.to_bytes()
337 }
338
339 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
341 Ok(Self {
342 inner: RunLengthEncoding::from_bytes(bytes)?,
343 })
344 }
345}
346
347#[inline]
349#[must_use]
350pub fn zigzag_encode(n: i64) -> u64 {
351 ((n << 1) ^ (n >> 63)) as u64
352}
353
354#[inline]
356#[must_use]
357pub fn zigzag_decode(n: u64) -> i64 {
358 ((n >> 1) as i64) ^ -((n & 1) as i64)
359}
360
361pub struct RunLengthAnalyzer;
363
364impl RunLengthAnalyzer {
365 #[must_use]
369 pub fn estimate_ratio(values: &[u64]) -> f64 {
370 if values.is_empty() {
371 return 1.0;
372 }
373
374 let mut run_count = 1usize;
376 for i in 1..values.len() {
377 if values[i] != values[i - 1] {
378 run_count += 1;
379 }
380 }
381
382 let original = values.len() * 8;
385 let encoded = run_count * 16;
386
387 if encoded == 0 {
388 return 1.0;
389 }
390
391 original as f64 / encoded as f64
392 }
393
394 #[must_use]
396 pub fn is_beneficial(values: &[u64]) -> bool {
397 Self::estimate_ratio(values) > 1.0
398 }
399
400 #[must_use]
402 pub fn average_run_length(values: &[u64]) -> f64 {
403 if values.is_empty() {
404 return 0.0;
405 }
406
407 let mut run_count = 1usize;
408 for i in 1..values.len() {
409 if values[i] != values[i - 1] {
410 run_count += 1;
411 }
412 }
413
414 values.len() as f64 / run_count as f64
415 }
416}
417
418#[cfg(test)]
419mod tests {
420 use super::*;
421
422 #[test]
423 fn test_encode_decode_basic() {
424 let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3];
425 let encoded = RunLengthEncoding::encode(&values);
426
427 assert_eq!(encoded.run_count(), 3);
428 assert_eq!(encoded.total_count(), 10);
429
430 let decoded = encoded.decode();
431 assert_eq!(values, decoded);
432 }
433
434 #[test]
435 fn test_encode_empty() {
436 let values: Vec<u64> = vec![];
437 let encoded = RunLengthEncoding::encode(&values);
438
439 assert_eq!(encoded.run_count(), 0);
440 assert_eq!(encoded.total_count(), 0);
441 assert!(encoded.is_empty());
442
443 let decoded = encoded.decode();
444 assert!(decoded.is_empty());
445 }
446
447 #[test]
448 fn test_encode_single() {
449 let values = vec![42];
450 let encoded = RunLengthEncoding::encode(&values);
451
452 assert_eq!(encoded.run_count(), 1);
453 assert_eq!(encoded.total_count(), 1);
454
455 let decoded = encoded.decode();
456 assert_eq!(values, decoded);
457 }
458
459 #[test]
460 fn test_encode_all_same() {
461 let values = vec![7u64; 1000];
462 let encoded = RunLengthEncoding::encode(&values);
463
464 assert_eq!(encoded.run_count(), 1);
465 assert_eq!(encoded.total_count(), 1000);
466
467 let ratio = encoded.compression_ratio();
469 assert!(ratio > 50.0, "Expected ratio > 50, got {}", ratio);
470
471 let decoded = encoded.decode();
472 assert_eq!(values, decoded);
473 }
474
475 #[test]
476 fn test_encode_all_different() {
477 let values: Vec<u64> = (0..100).collect();
478 let encoded = RunLengthEncoding::encode(&values);
479
480 assert_eq!(encoded.run_count(), 100);
481 assert_eq!(encoded.total_count(), 100);
482
483 let ratio = encoded.compression_ratio();
485 assert!(ratio < 1.0, "Expected ratio < 1, got {}", ratio);
486
487 let decoded = encoded.decode();
488 assert_eq!(values, decoded);
489 }
490
491 #[test]
492 fn test_compression_ratio() {
493 let all_same = vec![1u64; 100];
495 let encoded = RunLengthEncoding::encode(&all_same);
496 assert!(encoded.compression_ratio() > 1.0);
497 assert!(encoded.is_beneficial());
498
499 let all_diff: Vec<u64> = (0..100).collect();
501 let encoded = RunLengthEncoding::encode(&all_diff);
502 assert!(encoded.compression_ratio() < 1.0);
503 assert!(!encoded.is_beneficial());
504 }
505
506 #[test]
507 fn test_serialization() {
508 let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3];
509 let encoded = RunLengthEncoding::encode(&values);
510
511 let bytes = encoded.to_bytes();
512 let decoded_encoding = RunLengthEncoding::from_bytes(&bytes).unwrap();
513
514 assert_eq!(encoded.run_count(), decoded_encoding.run_count());
515 assert_eq!(encoded.decode(), decoded_encoding.decode());
516 }
517
518 #[test]
519 fn test_get_index() {
520 let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3];
521 let encoded = RunLengthEncoding::encode(&values);
522
523 assert_eq!(encoded.get(0), Some(1));
524 assert_eq!(encoded.get(2), Some(1));
525 assert_eq!(encoded.get(3), Some(2));
526 assert_eq!(encoded.get(4), Some(2));
527 assert_eq!(encoded.get(5), Some(3));
528 assert_eq!(encoded.get(9), Some(3));
529 assert_eq!(encoded.get(10), None);
530 }
531
532 #[test]
533 fn test_iterator() {
534 let values = vec![1, 1, 1, 2, 2, 3];
535 let encoded = RunLengthEncoding::encode(&values);
536
537 let iterated: Vec<u64> = encoded.iter().collect();
538 assert_eq!(values, iterated);
539 }
540
541 #[test]
542 fn test_signed_integers() {
543 let values = vec![-5, -5, -5, 0, 0, 10, 10, 10, 10];
544 let encoded = SignedRunLengthEncoding::encode(&values);
545
546 assert_eq!(encoded.run_count(), 3);
547
548 let decoded = encoded.decode();
549 assert_eq!(values, decoded);
550 }
551
552 #[test]
553 fn test_signed_serialization() {
554 let values = vec![-100, -100, 0, 0, 0, 100];
555 let encoded = SignedRunLengthEncoding::encode(&values);
556
557 let bytes = encoded.to_bytes();
558 let decoded_encoding = SignedRunLengthEncoding::from_bytes(&bytes).unwrap();
559
560 assert_eq!(encoded.decode(), decoded_encoding.decode());
561 }
562
563 #[test]
564 fn test_zigzag() {
565 assert_eq!(zigzag_encode(0), 0);
566 assert_eq!(zigzag_encode(-1), 1);
567 assert_eq!(zigzag_encode(1), 2);
568 assert_eq!(zigzag_encode(-2), 3);
569 assert_eq!(zigzag_encode(2), 4);
570
571 for i in -1000i64..1000 {
572 assert_eq!(zigzag_decode(zigzag_encode(i)), i);
573 }
574 }
575
576 #[test]
577 fn test_analyzer_estimate() {
578 let all_same = vec![1u64; 100];
579 let ratio = RunLengthAnalyzer::estimate_ratio(&all_same);
580 assert!(ratio > 1.0);
581 assert!(RunLengthAnalyzer::is_beneficial(&all_same));
582
583 let all_diff: Vec<u64> = (0..100).collect();
584 let ratio = RunLengthAnalyzer::estimate_ratio(&all_diff);
585 assert!(ratio < 1.0);
586 assert!(!RunLengthAnalyzer::is_beneficial(&all_diff));
587 }
588
589 #[test]
590 fn test_analyzer_average_run_length() {
591 let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3]; let avg = RunLengthAnalyzer::average_run_length(&values);
593 assert!((avg - 3.33).abs() < 0.1);
594
595 let all_same = vec![1u64; 100];
596 let avg = RunLengthAnalyzer::average_run_length(&all_same);
597 assert!((avg - 100.0).abs() < 0.1);
598 }
599
600 #[test]
601 fn test_from_runs() {
602 let runs = vec![Run::new(1, 3), Run::new(2, 2), Run::new(3, 5)];
603 let encoded = RunLengthEncoding::from_runs(runs);
604
605 assert_eq!(encoded.run_count(), 3);
606 assert_eq!(encoded.total_count(), 10);
607 assert_eq!(encoded.decode(), vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3]);
608 }
609}