1use std::io::{self, Read};
31
32#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct Run<T> {
35 pub value: T,
37 pub length: u64,
39}
40
41impl<T> Run<T> {
42 #[must_use]
44 pub fn new(value: T, length: u64) -> Self {
45 Self { value, length }
46 }
47}
48
49#[derive(Debug, Clone)]
54pub struct RunLengthEncoding {
55 runs: Vec<Run<u64>>,
57 total_count: usize,
59}
60
61impl<'a> IntoIterator for &'a RunLengthEncoding {
62 type Item = u64;
63 type IntoIter = RunLengthIterator<'a>;
64
65 fn into_iter(self) -> Self::IntoIter {
66 self.iter()
67 }
68}
69
70impl RunLengthEncoding {
71 #[must_use]
81 pub fn encode(values: &[u64]) -> Self {
82 if values.is_empty() {
83 return Self {
84 runs: Vec::new(),
85 total_count: 0,
86 };
87 }
88
89 let mut runs = Vec::new();
90 let mut current_value = values[0];
91 let mut current_length = 1u64;
92
93 for &value in &values[1..] {
94 if value == current_value {
95 current_length += 1;
96 } else {
97 runs.push(Run::new(current_value, current_length));
98 current_value = value;
99 current_length = 1;
100 }
101 }
102
103 runs.push(Run::new(current_value, current_length));
105
106 Self {
107 runs,
108 total_count: values.len(),
109 }
110 }
111
112 #[must_use]
114 pub fn from_runs(runs: Vec<Run<u64>>) -> Self {
115 let total_count = runs
116 .iter()
117 .map(|r| {
118 #[allow(clippy::cast_possible_truncation)]
120 let len = r.length as usize;
121 len
122 })
123 .sum();
124 Self { runs, total_count }
125 }
126
127 #[must_use]
129 pub fn decode(&self) -> Vec<u64> {
130 let mut values = Vec::with_capacity(self.total_count);
131
132 for run in &self.runs {
133 for _ in 0..run.length {
134 values.push(run.value);
135 }
136 }
137
138 values
139 }
140
141 #[must_use]
143 pub fn run_count(&self) -> usize {
144 self.runs.len()
145 }
146
147 #[must_use]
149 pub fn total_count(&self) -> usize {
150 self.total_count
151 }
152
153 #[must_use]
155 pub fn runs(&self) -> &[Run<u64>] {
156 &self.runs
157 }
158
159 #[must_use]
161 pub fn is_empty(&self) -> bool {
162 self.total_count == 0
163 }
164
165 #[must_use]
169 pub fn compression_ratio(&self) -> f64 {
170 if self.runs.is_empty() {
171 return 1.0;
172 }
173
174 let original_size = self.total_count * 8;
177 let encoded_size = self.runs.len() * 16;
178
179 if encoded_size == 0 {
180 return 1.0;
181 }
182
183 original_size as f64 / encoded_size as f64
184 }
185
186 #[must_use]
190 pub fn is_beneficial(&self) -> bool {
191 self.compression_ratio() > 1.0
192 }
193
194 #[must_use]
196 pub fn encoded_size(&self) -> usize {
197 self.runs.len() * 16
199 }
200
201 pub fn to_bytes(&self) -> Vec<u8> {
203 let mut bytes = Vec::with_capacity(8 + self.runs.len() * 16);
204
205 bytes.extend_from_slice(&(self.runs.len() as u64).to_le_bytes());
207
208 for run in &self.runs {
210 bytes.extend_from_slice(&run.value.to_le_bytes());
211 bytes.extend_from_slice(&run.length.to_le_bytes());
212 }
213
214 bytes
215 }
216
217 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
223 let mut cursor = io::Cursor::new(bytes);
224
225 let mut buf = [0u8; 8];
227 cursor.read_exact(&mut buf)?;
228 #[allow(clippy::cast_possible_truncation)]
230 let run_count = u64::from_le_bytes(buf) as usize;
231
232 let mut runs = Vec::with_capacity(run_count);
234 for _ in 0..run_count {
235 cursor.read_exact(&mut buf)?;
236 let value = u64::from_le_bytes(buf);
237
238 cursor.read_exact(&mut buf)?;
239 let length = u64::from_le_bytes(buf);
240
241 runs.push(Run::new(value, length));
242 }
243
244 Ok(Self::from_runs(runs))
245 }
246
247 #[must_use]
251 pub fn get(&self, index: usize) -> Option<u64> {
252 if index >= self.total_count {
253 return None;
254 }
255
256 let mut offset = 0usize;
257 for run in &self.runs {
258 #[allow(clippy::cast_possible_truncation)]
260 let run_end = offset + run.length as usize;
261 if index < run_end {
262 return Some(run.value);
263 }
264 offset = run_end;
265 }
266
267 None
268 }
269
270 pub fn iter(&self) -> RunLengthIterator<'_> {
272 RunLengthIterator {
273 runs: &self.runs,
274 run_index: 0,
275 within_run: 0,
276 }
277 }
278}
279
280pub struct RunLengthIterator<'a> {
282 runs: &'a [Run<u64>],
283 run_index: usize,
284 within_run: u64,
285}
286
287impl Iterator for RunLengthIterator<'_> {
288 type Item = u64;
289
290 fn next(&mut self) -> Option<Self::Item> {
291 while self.run_index < self.runs.len() {
292 let run = &self.runs[self.run_index];
293 if self.within_run < run.length {
294 self.within_run += 1;
295 return Some(run.value);
296 }
297 self.run_index += 1;
298 self.within_run = 0;
299 }
300 None
301 }
302
303 fn size_hint(&self) -> (usize, Option<usize>) {
304 let remaining: u64 = self.runs[self.run_index..]
305 .iter()
306 .map(|r| r.length)
307 .sum::<u64>()
308 - self.within_run;
309 #[allow(clippy::cast_possible_truncation)]
311 (remaining as usize, Some(remaining as usize))
312 }
313}
314
315impl ExactSizeIterator for RunLengthIterator<'_> {}
316
317#[derive(Debug, Clone)]
321pub struct SignedRunLengthEncoding {
322 inner: RunLengthEncoding,
323}
324
325impl SignedRunLengthEncoding {
326 #[must_use]
328 pub fn encode(values: &[i64]) -> Self {
329 let unsigned: Vec<u64> = values.iter().map(|&v| zigzag_encode(v)).collect();
330 Self {
331 inner: RunLengthEncoding::encode(&unsigned),
332 }
333 }
334
335 #[must_use]
337 pub fn decode(&self) -> Vec<i64> {
338 self.inner.decode().into_iter().map(zigzag_decode).collect()
339 }
340
341 #[must_use]
343 pub fn compression_ratio(&self) -> f64 {
344 self.inner.compression_ratio()
345 }
346
347 #[must_use]
349 pub fn run_count(&self) -> usize {
350 self.inner.run_count()
351 }
352
353 pub fn to_bytes(&self) -> Vec<u8> {
355 self.inner.to_bytes()
356 }
357
358 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
364 Ok(Self {
365 inner: RunLengthEncoding::from_bytes(bytes)?,
366 })
367 }
368}
369
370#[inline]
372#[must_use]
373#[allow(clippy::cast_sign_loss)]
375pub fn zigzag_encode(n: i64) -> u64 {
376 ((n << 1) ^ (n >> 63)) as u64
377}
378
379#[inline]
381#[must_use]
382#[allow(clippy::cast_possible_wrap)]
384pub fn zigzag_decode(n: u64) -> i64 {
385 ((n >> 1) as i64) ^ -((n & 1) as i64)
386}
387
388pub struct RunLengthAnalyzer;
390
391impl RunLengthAnalyzer {
392 #[must_use]
396 pub fn estimate_ratio(values: &[u64]) -> f64 {
397 if values.is_empty() {
398 return 1.0;
399 }
400
401 let mut run_count = 1usize;
403 for i in 1..values.len() {
404 if values[i] != values[i - 1] {
405 run_count += 1;
406 }
407 }
408
409 let original = values.len() * 8;
412 let encoded = run_count * 16;
413
414 if encoded == 0 {
415 return 1.0;
416 }
417
418 original as f64 / encoded as f64
419 }
420
421 #[must_use]
423 pub fn is_beneficial(values: &[u64]) -> bool {
424 Self::estimate_ratio(values) > 1.0
425 }
426
427 #[must_use]
429 pub fn average_run_length(values: &[u64]) -> f64 {
430 if values.is_empty() {
431 return 0.0;
432 }
433
434 let mut run_count = 1usize;
435 for i in 1..values.len() {
436 if values[i] != values[i - 1] {
437 run_count += 1;
438 }
439 }
440
441 values.len() as f64 / run_count as f64
442 }
443}
444
445#[cfg(test)]
446mod tests {
447 use super::*;
448
449 #[test]
450 fn test_encode_decode_basic() {
451 let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3];
452 let encoded = RunLengthEncoding::encode(&values);
453
454 assert_eq!(encoded.run_count(), 3);
455 assert_eq!(encoded.total_count(), 10);
456
457 let decoded = encoded.decode();
458 assert_eq!(values, decoded);
459 }
460
461 #[test]
462 fn test_encode_empty() {
463 let values: Vec<u64> = vec![];
464 let encoded = RunLengthEncoding::encode(&values);
465
466 assert_eq!(encoded.run_count(), 0);
467 assert_eq!(encoded.total_count(), 0);
468 assert!(encoded.is_empty());
469
470 let decoded = encoded.decode();
471 assert!(decoded.is_empty());
472 }
473
474 #[test]
475 fn test_encode_single() {
476 let values = vec![42];
477 let encoded = RunLengthEncoding::encode(&values);
478
479 assert_eq!(encoded.run_count(), 1);
480 assert_eq!(encoded.total_count(), 1);
481
482 let decoded = encoded.decode();
483 assert_eq!(values, decoded);
484 }
485
486 #[test]
487 fn test_encode_all_same() {
488 let values = vec![7u64; 1000];
489 let encoded = RunLengthEncoding::encode(&values);
490
491 assert_eq!(encoded.run_count(), 1);
492 assert_eq!(encoded.total_count(), 1000);
493
494 let ratio = encoded.compression_ratio();
496 assert!(ratio > 50.0, "Expected ratio > 50, got {}", ratio);
497
498 let decoded = encoded.decode();
499 assert_eq!(values, decoded);
500 }
501
502 #[test]
503 fn test_encode_all_different() {
504 let values: Vec<u64> = (0..100).collect();
505 let encoded = RunLengthEncoding::encode(&values);
506
507 assert_eq!(encoded.run_count(), 100);
508 assert_eq!(encoded.total_count(), 100);
509
510 let ratio = encoded.compression_ratio();
512 assert!(ratio < 1.0, "Expected ratio < 1, got {}", ratio);
513
514 let decoded = encoded.decode();
515 assert_eq!(values, decoded);
516 }
517
518 #[test]
519 fn test_compression_ratio() {
520 let all_same = vec![1u64; 100];
522 let encoded = RunLengthEncoding::encode(&all_same);
523 assert!(encoded.compression_ratio() > 1.0);
524 assert!(encoded.is_beneficial());
525
526 let all_diff: Vec<u64> = (0..100).collect();
528 let encoded = RunLengthEncoding::encode(&all_diff);
529 assert!(encoded.compression_ratio() < 1.0);
530 assert!(!encoded.is_beneficial());
531 }
532
533 #[test]
534 fn test_serialization() {
535 let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3];
536 let encoded = RunLengthEncoding::encode(&values);
537
538 let bytes = encoded.to_bytes();
539 let decoded_encoding = RunLengthEncoding::from_bytes(&bytes).unwrap();
540
541 assert_eq!(encoded.run_count(), decoded_encoding.run_count());
542 assert_eq!(encoded.decode(), decoded_encoding.decode());
543 }
544
545 #[test]
546 fn test_get_index() {
547 let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3];
548 let encoded = RunLengthEncoding::encode(&values);
549
550 assert_eq!(encoded.get(0), Some(1));
551 assert_eq!(encoded.get(2), Some(1));
552 assert_eq!(encoded.get(3), Some(2));
553 assert_eq!(encoded.get(4), Some(2));
554 assert_eq!(encoded.get(5), Some(3));
555 assert_eq!(encoded.get(9), Some(3));
556 assert_eq!(encoded.get(10), None);
557 }
558
559 #[test]
560 fn test_iterator() {
561 let values = vec![1, 1, 1, 2, 2, 3];
562 let encoded = RunLengthEncoding::encode(&values);
563
564 let iterated: Vec<u64> = encoded.iter().collect();
565 assert_eq!(values, iterated);
566 }
567
568 #[test]
569 fn test_signed_integers() {
570 let values = vec![-5, -5, -5, 0, 0, 10, 10, 10, 10];
571 let encoded = SignedRunLengthEncoding::encode(&values);
572
573 assert_eq!(encoded.run_count(), 3);
574
575 let decoded = encoded.decode();
576 assert_eq!(values, decoded);
577 }
578
579 #[test]
580 fn test_signed_serialization() {
581 let values = vec![-100, -100, 0, 0, 0, 100];
582 let encoded = SignedRunLengthEncoding::encode(&values);
583
584 let bytes = encoded.to_bytes();
585 let decoded_encoding = SignedRunLengthEncoding::from_bytes(&bytes).unwrap();
586
587 assert_eq!(encoded.decode(), decoded_encoding.decode());
588 }
589
590 #[test]
591 fn test_zigzag() {
592 assert_eq!(zigzag_encode(0), 0);
593 assert_eq!(zigzag_encode(-1), 1);
594 assert_eq!(zigzag_encode(1), 2);
595 assert_eq!(zigzag_encode(-2), 3);
596 assert_eq!(zigzag_encode(2), 4);
597
598 for i in -1000i64..1000 {
599 assert_eq!(zigzag_decode(zigzag_encode(i)), i);
600 }
601 }
602
603 #[test]
604 fn test_analyzer_estimate() {
605 let all_same = vec![1u64; 100];
606 let ratio = RunLengthAnalyzer::estimate_ratio(&all_same);
607 assert!(ratio > 1.0);
608 assert!(RunLengthAnalyzer::is_beneficial(&all_same));
609
610 let all_diff: Vec<u64> = (0..100).collect();
611 let ratio = RunLengthAnalyzer::estimate_ratio(&all_diff);
612 assert!(ratio < 1.0);
613 assert!(!RunLengthAnalyzer::is_beneficial(&all_diff));
614 }
615
616 #[test]
617 fn test_analyzer_average_run_length() {
618 let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3]; let avg = RunLengthAnalyzer::average_run_length(&values);
620 assert!((avg - 3.33).abs() < 0.1);
621
622 let all_same = vec![1u64; 100];
623 let avg = RunLengthAnalyzer::average_run_length(&all_same);
624 assert!((avg - 100.0).abs() < 0.1);
625 }
626
627 #[test]
628 fn test_from_runs() {
629 let runs = vec![Run::new(1, 3), Run::new(2, 2), Run::new(3, 5)];
630 let encoded = RunLengthEncoding::from_runs(runs);
631
632 assert_eq!(encoded.run_count(), 3);
633 assert_eq!(encoded.total_count(), 10);
634 assert_eq!(encoded.decode(), vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3]);
635 }
636}