1use std::io::{self, Read};
31
32#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct Run<T> {
35 pub value: T,
37 pub length: u64,
39}
40
41impl<T> Run<T> {
42 #[must_use]
44 pub fn new(value: T, length: u64) -> Self {
45 Self { value, length }
46 }
47}
48
49#[derive(Debug, Clone)]
54pub struct RunLengthEncoding {
55 runs: Vec<Run<u64>>,
57 total_count: usize,
59}
60
61impl<'a> IntoIterator for &'a RunLengthEncoding {
62 type Item = u64;
63 type IntoIter = RunLengthIterator<'a>;
64
65 fn into_iter(self) -> Self::IntoIter {
66 self.iter()
67 }
68}
69
70impl RunLengthEncoding {
71 #[must_use]
81 pub fn encode(values: &[u64]) -> Self {
82 if values.is_empty() {
83 return Self {
84 runs: Vec::new(),
85 total_count: 0,
86 };
87 }
88
89 let mut runs = Vec::new();
90 let mut current_value = values[0];
91 let mut current_length = 1u64;
92
93 for &value in &values[1..] {
94 if value == current_value {
95 current_length += 1;
96 } else {
97 runs.push(Run::new(current_value, current_length));
98 current_value = value;
99 current_length = 1;
100 }
101 }
102
103 runs.push(Run::new(current_value, current_length));
105
106 Self {
107 runs,
108 total_count: values.len(),
109 }
110 }
111
112 #[must_use]
114 pub fn from_runs(runs: Vec<Run<u64>>) -> Self {
115 let total_count = runs.iter().map(|r| r.length as usize).sum();
116 Self { runs, total_count }
117 }
118
119 #[must_use]
121 pub fn decode(&self) -> Vec<u64> {
122 let mut values = Vec::with_capacity(self.total_count);
123
124 for run in &self.runs {
125 for _ in 0..run.length {
126 values.push(run.value);
127 }
128 }
129
130 values
131 }
132
133 #[must_use]
135 pub fn run_count(&self) -> usize {
136 self.runs.len()
137 }
138
139 #[must_use]
141 pub fn total_count(&self) -> usize {
142 self.total_count
143 }
144
145 #[must_use]
147 pub fn runs(&self) -> &[Run<u64>] {
148 &self.runs
149 }
150
151 #[must_use]
153 pub fn is_empty(&self) -> bool {
154 self.total_count == 0
155 }
156
157 #[must_use]
161 pub fn compression_ratio(&self) -> f64 {
162 if self.runs.is_empty() {
163 return 1.0;
164 }
165
166 let original_size = self.total_count * 8;
169 let encoded_size = self.runs.len() * 16;
170
171 if encoded_size == 0 {
172 return 1.0;
173 }
174
175 original_size as f64 / encoded_size as f64
176 }
177
178 #[must_use]
182 pub fn is_beneficial(&self) -> bool {
183 self.compression_ratio() > 1.0
184 }
185
186 #[must_use]
188 pub fn encoded_size(&self) -> usize {
189 self.runs.len() * 16
191 }
192
193 pub fn to_bytes(&self) -> Vec<u8> {
195 let mut bytes = Vec::with_capacity(8 + self.runs.len() * 16);
196
197 bytes.extend_from_slice(&(self.runs.len() as u64).to_le_bytes());
199
200 for run in &self.runs {
202 bytes.extend_from_slice(&run.value.to_le_bytes());
203 bytes.extend_from_slice(&run.length.to_le_bytes());
204 }
205
206 bytes
207 }
208
209 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
215 let mut cursor = io::Cursor::new(bytes);
216
217 let mut buf = [0u8; 8];
219 cursor.read_exact(&mut buf)?;
220 let run_count = u64::from_le_bytes(buf) as usize;
221
222 let mut runs = Vec::with_capacity(run_count);
224 for _ in 0..run_count {
225 cursor.read_exact(&mut buf)?;
226 let value = u64::from_le_bytes(buf);
227
228 cursor.read_exact(&mut buf)?;
229 let length = u64::from_le_bytes(buf);
230
231 runs.push(Run::new(value, length));
232 }
233
234 Ok(Self::from_runs(runs))
235 }
236
237 #[must_use]
241 pub fn get(&self, index: usize) -> Option<u64> {
242 if index >= self.total_count {
243 return None;
244 }
245
246 let mut offset = 0usize;
247 for run in &self.runs {
248 let run_end = offset + run.length as usize;
249 if index < run_end {
250 return Some(run.value);
251 }
252 offset = run_end;
253 }
254
255 None
256 }
257
258 pub fn iter(&self) -> RunLengthIterator<'_> {
260 RunLengthIterator {
261 runs: &self.runs,
262 run_index: 0,
263 within_run: 0,
264 }
265 }
266}
267
268pub struct RunLengthIterator<'a> {
270 runs: &'a [Run<u64>],
271 run_index: usize,
272 within_run: u64,
273}
274
275impl Iterator for RunLengthIterator<'_> {
276 type Item = u64;
277
278 fn next(&mut self) -> Option<Self::Item> {
279 while self.run_index < self.runs.len() {
280 let run = &self.runs[self.run_index];
281 if self.within_run < run.length {
282 self.within_run += 1;
283 return Some(run.value);
284 }
285 self.run_index += 1;
286 self.within_run = 0;
287 }
288 None
289 }
290
291 fn size_hint(&self) -> (usize, Option<usize>) {
292 let remaining: u64 = self.runs[self.run_index..]
293 .iter()
294 .map(|r| r.length)
295 .sum::<u64>()
296 - self.within_run;
297 (remaining as usize, Some(remaining as usize))
298 }
299}
300
301impl ExactSizeIterator for RunLengthIterator<'_> {}
302
303#[derive(Debug, Clone)]
307pub struct SignedRunLengthEncoding {
308 inner: RunLengthEncoding,
309}
310
311impl SignedRunLengthEncoding {
312 #[must_use]
314 pub fn encode(values: &[i64]) -> Self {
315 let unsigned: Vec<u64> = values.iter().map(|&v| zigzag_encode(v)).collect();
316 Self {
317 inner: RunLengthEncoding::encode(&unsigned),
318 }
319 }
320
321 #[must_use]
323 pub fn decode(&self) -> Vec<i64> {
324 self.inner.decode().into_iter().map(zigzag_decode).collect()
325 }
326
327 #[must_use]
329 pub fn compression_ratio(&self) -> f64 {
330 self.inner.compression_ratio()
331 }
332
333 #[must_use]
335 pub fn run_count(&self) -> usize {
336 self.inner.run_count()
337 }
338
339 pub fn to_bytes(&self) -> Vec<u8> {
341 self.inner.to_bytes()
342 }
343
344 pub fn from_bytes(bytes: &[u8]) -> io::Result<Self> {
350 Ok(Self {
351 inner: RunLengthEncoding::from_bytes(bytes)?,
352 })
353 }
354}
355
356#[inline]
358#[must_use]
359pub fn zigzag_encode(n: i64) -> u64 {
360 ((n << 1) ^ (n >> 63)) as u64
361}
362
363#[inline]
365#[must_use]
366pub fn zigzag_decode(n: u64) -> i64 {
367 ((n >> 1) as i64) ^ -((n & 1) as i64)
368}
369
370pub struct RunLengthAnalyzer;
372
373impl RunLengthAnalyzer {
374 #[must_use]
378 pub fn estimate_ratio(values: &[u64]) -> f64 {
379 if values.is_empty() {
380 return 1.0;
381 }
382
383 let mut run_count = 1usize;
385 for i in 1..values.len() {
386 if values[i] != values[i - 1] {
387 run_count += 1;
388 }
389 }
390
391 let original = values.len() * 8;
394 let encoded = run_count * 16;
395
396 if encoded == 0 {
397 return 1.0;
398 }
399
400 original as f64 / encoded as f64
401 }
402
403 #[must_use]
405 pub fn is_beneficial(values: &[u64]) -> bool {
406 Self::estimate_ratio(values) > 1.0
407 }
408
409 #[must_use]
411 pub fn average_run_length(values: &[u64]) -> f64 {
412 if values.is_empty() {
413 return 0.0;
414 }
415
416 let mut run_count = 1usize;
417 for i in 1..values.len() {
418 if values[i] != values[i - 1] {
419 run_count += 1;
420 }
421 }
422
423 values.len() as f64 / run_count as f64
424 }
425}
426
427#[cfg(test)]
428mod tests {
429 use super::*;
430
431 #[test]
432 fn test_encode_decode_basic() {
433 let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3];
434 let encoded = RunLengthEncoding::encode(&values);
435
436 assert_eq!(encoded.run_count(), 3);
437 assert_eq!(encoded.total_count(), 10);
438
439 let decoded = encoded.decode();
440 assert_eq!(values, decoded);
441 }
442
443 #[test]
444 fn test_encode_empty() {
445 let values: Vec<u64> = vec![];
446 let encoded = RunLengthEncoding::encode(&values);
447
448 assert_eq!(encoded.run_count(), 0);
449 assert_eq!(encoded.total_count(), 0);
450 assert!(encoded.is_empty());
451
452 let decoded = encoded.decode();
453 assert!(decoded.is_empty());
454 }
455
456 #[test]
457 fn test_encode_single() {
458 let values = vec![42];
459 let encoded = RunLengthEncoding::encode(&values);
460
461 assert_eq!(encoded.run_count(), 1);
462 assert_eq!(encoded.total_count(), 1);
463
464 let decoded = encoded.decode();
465 assert_eq!(values, decoded);
466 }
467
468 #[test]
469 fn test_encode_all_same() {
470 let values = vec![7u64; 1000];
471 let encoded = RunLengthEncoding::encode(&values);
472
473 assert_eq!(encoded.run_count(), 1);
474 assert_eq!(encoded.total_count(), 1000);
475
476 let ratio = encoded.compression_ratio();
478 assert!(ratio > 50.0, "Expected ratio > 50, got {}", ratio);
479
480 let decoded = encoded.decode();
481 assert_eq!(values, decoded);
482 }
483
484 #[test]
485 fn test_encode_all_different() {
486 let values: Vec<u64> = (0..100).collect();
487 let encoded = RunLengthEncoding::encode(&values);
488
489 assert_eq!(encoded.run_count(), 100);
490 assert_eq!(encoded.total_count(), 100);
491
492 let ratio = encoded.compression_ratio();
494 assert!(ratio < 1.0, "Expected ratio < 1, got {}", ratio);
495
496 let decoded = encoded.decode();
497 assert_eq!(values, decoded);
498 }
499
500 #[test]
501 fn test_compression_ratio() {
502 let all_same = vec![1u64; 100];
504 let encoded = RunLengthEncoding::encode(&all_same);
505 assert!(encoded.compression_ratio() > 1.0);
506 assert!(encoded.is_beneficial());
507
508 let all_diff: Vec<u64> = (0..100).collect();
510 let encoded = RunLengthEncoding::encode(&all_diff);
511 assert!(encoded.compression_ratio() < 1.0);
512 assert!(!encoded.is_beneficial());
513 }
514
515 #[test]
516 fn test_serialization() {
517 let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3];
518 let encoded = RunLengthEncoding::encode(&values);
519
520 let bytes = encoded.to_bytes();
521 let decoded_encoding = RunLengthEncoding::from_bytes(&bytes).unwrap();
522
523 assert_eq!(encoded.run_count(), decoded_encoding.run_count());
524 assert_eq!(encoded.decode(), decoded_encoding.decode());
525 }
526
527 #[test]
528 fn test_get_index() {
529 let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3];
530 let encoded = RunLengthEncoding::encode(&values);
531
532 assert_eq!(encoded.get(0), Some(1));
533 assert_eq!(encoded.get(2), Some(1));
534 assert_eq!(encoded.get(3), Some(2));
535 assert_eq!(encoded.get(4), Some(2));
536 assert_eq!(encoded.get(5), Some(3));
537 assert_eq!(encoded.get(9), Some(3));
538 assert_eq!(encoded.get(10), None);
539 }
540
541 #[test]
542 fn test_iterator() {
543 let values = vec![1, 1, 1, 2, 2, 3];
544 let encoded = RunLengthEncoding::encode(&values);
545
546 let iterated: Vec<u64> = encoded.iter().collect();
547 assert_eq!(values, iterated);
548 }
549
550 #[test]
551 fn test_signed_integers() {
552 let values = vec![-5, -5, -5, 0, 0, 10, 10, 10, 10];
553 let encoded = SignedRunLengthEncoding::encode(&values);
554
555 assert_eq!(encoded.run_count(), 3);
556
557 let decoded = encoded.decode();
558 assert_eq!(values, decoded);
559 }
560
561 #[test]
562 fn test_signed_serialization() {
563 let values = vec![-100, -100, 0, 0, 0, 100];
564 let encoded = SignedRunLengthEncoding::encode(&values);
565
566 let bytes = encoded.to_bytes();
567 let decoded_encoding = SignedRunLengthEncoding::from_bytes(&bytes).unwrap();
568
569 assert_eq!(encoded.decode(), decoded_encoding.decode());
570 }
571
572 #[test]
573 fn test_zigzag() {
574 assert_eq!(zigzag_encode(0), 0);
575 assert_eq!(zigzag_encode(-1), 1);
576 assert_eq!(zigzag_encode(1), 2);
577 assert_eq!(zigzag_encode(-2), 3);
578 assert_eq!(zigzag_encode(2), 4);
579
580 for i in -1000i64..1000 {
581 assert_eq!(zigzag_decode(zigzag_encode(i)), i);
582 }
583 }
584
585 #[test]
586 fn test_analyzer_estimate() {
587 let all_same = vec![1u64; 100];
588 let ratio = RunLengthAnalyzer::estimate_ratio(&all_same);
589 assert!(ratio > 1.0);
590 assert!(RunLengthAnalyzer::is_beneficial(&all_same));
591
592 let all_diff: Vec<u64> = (0..100).collect();
593 let ratio = RunLengthAnalyzer::estimate_ratio(&all_diff);
594 assert!(ratio < 1.0);
595 assert!(!RunLengthAnalyzer::is_beneficial(&all_diff));
596 }
597
598 #[test]
599 fn test_analyzer_average_run_length() {
600 let values = vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3]; let avg = RunLengthAnalyzer::average_run_length(&values);
602 assert!((avg - 3.33).abs() < 0.1);
603
604 let all_same = vec![1u64; 100];
605 let avg = RunLengthAnalyzer::average_run_length(&all_same);
606 assert!((avg - 100.0).abs() < 0.1);
607 }
608
609 #[test]
610 fn test_from_runs() {
611 let runs = vec![Run::new(1, 3), Run::new(2, 2), Run::new(3, 5)];
612 let encoded = RunLengthEncoding::from_runs(runs);
613
614 assert_eq!(encoded.run_count(), 3);
615 assert_eq!(encoded.total_count(), 10);
616 assert_eq!(encoded.decode(), vec![1, 1, 1, 2, 2, 3, 3, 3, 3, 3]);
617 }
618}