Skip to main content

timeseries_table_core/
coverage.rs

1//! In-memory coverage and gap analysis over a discrete bucket domain.
2//!
3//! This module is intentionally small and generic:
4//!
5//! - It wraps `roaring::RoaringBitmap` in a `Coverage` struct.
6//! - It does not know about timestamps, tables, or storage.
7//! - Callers are expected to map their own domain (for example, time buckets)
8//!   into `u32` bucket ids.
9//!
10//! Typical usage:
11//!
12//! ```
13//! use timeseries_table_core::coverage::{Coverage, RoaringBitmap};
14//!
15//! // "expected" domain: buckets 0..10
16//! let expected: RoaringBitmap = (0u32..10).collect();
17//!
18//! // "present" coverage: everything except bucket 5
19//! let mut present = RoaringBitmap::new();
20//! for b in 0u32..10 {
21//!     if b != 5 {
22//!         present.insert(b);
23//!     }
24//! }
25//!
26//! let cov = Coverage::from_bitmap(present);
27//!
28//! let missing = cov.missing_points(&expected);
29//! assert!(missing.contains(5));
30//!
31//! let ratio = cov.coverage_ratio(&expected);
32//! assert!((ratio - 0.9).abs() < 1e-9);
33//! ```
34pub mod bucket;
35pub mod io;
36pub mod layout;
37pub mod serde;
38
39use std::ops::RangeInclusive;
40
41// Re-export to let callers use `RoaringBitmap` without declaring `roaring` explicitly.
42pub use roaring::RoaringBitmap;
43
44/// Type alias for bucket ids used by Coverage.
45///
46/// For v0.1 we use `u32`, which is enough for common time-bucket domains
47/// (for example, minutes since epoch).
48pub type Bucket = u32;
49
50/// In-memory coverage over a discrete set of bucket ids.
51///
52/// This is a thin wrapper over `RoaringBitmap` that adds convenience
53/// methods for gap analysis.
54#[derive(Debug, Clone, Default)]
55pub struct Coverage {
56    bitmap: RoaringBitmap,
57}
58
59impl Coverage {
60    /// Construct an empty coverage set (no bucket present).
61    pub fn empty() -> Self {
62        Self {
63            bitmap: RoaringBitmap::new(),
64        }
65    }
66
67    /// Wrap an existing RoaringBitmap as Coverage.
68    pub fn from_bitmap(bitmap: RoaringBitmap) -> Self {
69        Self { bitmap }
70    }
71
72    /// Borrow the underlying bitmap of present buckets.
73    pub fn present(&self) -> &RoaringBitmap {
74        &self.bitmap
75    }
76
77    /// Consume the Coverage and return the underlying bitmap.
78    pub fn into_bitmap(self) -> RoaringBitmap {
79        self.bitmap
80    }
81
82    /// Return the union of `self` and `other`.
83    pub fn union(&self, other: &Coverage) -> Coverage {
84        let bitmap = &self.bitmap | &other.bitmap;
85        Coverage { bitmap }
86    }
87
88    /// Return the intersection of `self` and `other`.
89    pub fn intersect(&self, other: &Coverage) -> Coverage {
90        let bitmap = &self.bitmap & &other.bitmap;
91        Coverage { bitmap }
92    }
93
94    /// Number of buckets present in this coverage.
95    pub fn cardinality(&self) -> u64 {
96        self.bitmap.len()
97    }
98
99    /// Return bucket ids that are expected but not present in this coverage.
100    ///
101    /// This is `expected - present`.
102    pub fn missing_points(&self, expected: &RoaringBitmap) -> RoaringBitmap {
103        let mut missing = expected.clone();
104        missing -= &self.bitmap;
105        missing
106    }
107
108    /// Group missing buckets into contiguous runs, optionally splitting
109    /// long runs into chunks of at most `max_run_len`.
110    ///
111    /// - `expected` defines the domain we care about.
112    /// - Missing buckets are `expected - present`.
113    /// - Each returned range is inclusive in bucket space.
114    pub fn missing_runs(
115        &self,
116        expected: &RoaringBitmap,
117        max_run_len: Option<u64>,
118    ) -> Vec<RangeInclusive<Bucket>> {
119        let missing = self.missing_points(expected);
120        let base_runs = runs_from_bitmap(&missing);
121
122        if let Some(max_len) = max_run_len {
123            split_runs_by_len(base_runs, max_len)
124        } else {
125            base_runs
126        }
127    }
128
129    /// Return the last (highest) contiguous run of coverage of length
130    /// at least `min_len`, relative to `expected`.
131    ///
132    /// "Coverage" here means buckets that are both in `expected` and in
133    /// `self.present()`.
134    pub fn last_run_with_min_len(
135        &self,
136        expected: &RoaringBitmap,
137        min_len: u64,
138    ) -> Option<RangeInclusive<Bucket>> {
139        if min_len == 0 {
140            // Return None for min_len == 0 as it's a degenerate case.
141            // Callers can avoid this case if they prefer a different meaning.
142            return None;
143        }
144
145        let covered = &self.bitmap & expected;
146        let runs = runs_from_bitmap(&covered);
147
148        for range in runs.into_iter().rev() {
149            let (start, end) = (*range.start() as u64, *range.end() as u64);
150            let len = end.saturating_sub(start) + 1;
151            if len >= min_len {
152                return Some(start as Bucket..=end as Bucket);
153            }
154        }
155
156        None
157    }
158
159    /// Coverage ratio in `[0.0, 1.0]` relative to `expected`.
160    ///
161    /// Defined as:
162    ///
163    /// `(|present ∩ expected| as f64) / (|expected| as f64)`
164    ///
165    /// For `expected.is_empty()`, this returns `1.0` by convention
166    /// (vacuous full coverage).
167    pub fn coverage_ratio(&self, expected: &RoaringBitmap) -> f64 {
168        let expected_count = expected.len();
169        if expected_count == 0 {
170            return 1.0;
171        }
172
173        let covered = &self.bitmap & expected;
174        let covered_count = covered.len();
175        covered_count as f64 / expected_count as f64
176    }
177
178    /// Maximum gap length (in buckets) relative to `expected`.
179    ///
180    /// This is the length (number of buckets) of the longest missing run.
181    /// If there are no missing buckets, returns 0.
182    pub fn max_gap_len(&self, expected: &RoaringBitmap) -> u64 {
183        let missing = self.missing_points(expected);
184        let runs = runs_from_bitmap(&missing);
185
186        runs.into_iter()
187            .map(|r| {
188                let (start, end) = (*r.start(), *r.end());
189                (end as u64).saturating_sub(start as u64) + 1
190            })
191            .max()
192            .unwrap_or(0)
193    }
194
195    /// Merge another coverage bitmap into this one, accumulating all buckets.
196    ///
197    /// This performs an in-place union, so after the call `self` contains every
198    /// bucket that was set in either `self` or `other`.
199    pub fn union_inplace(&mut self, other: &Coverage) {
200        self.bitmap |= other.present();
201    }
202
203    /// Return the last fully-covered contiguous window of length `len`
204    /// ending at or before `end_bucket`, based on *present buckets only*.
205    pub fn last_window_at_or_before(
206        &self,
207        end_bucket: Bucket,
208        len: u64,
209    ) -> Option<RangeInclusive<Bucket>> {
210        if len == 0 {
211            return None;
212        }
213
214        let it = self.bitmap.iter().rev();
215
216        let mut started = false;
217        let mut run_end: Bucket = 0;
218        let mut prev_seen: Option<Bucket> = None;
219        let mut run_len: u64 = 0;
220
221        for b in it {
222            if b > end_bucket {
223                continue;
224            }
225
226            if !started {
227                started = true;
228                run_end = b;
229                run_len = 1;
230            } else if prev_seen == Some(b + 1) {
231                run_len += 1;
232            } else {
233                // break in coverage -> start a new run
234                run_end = b;
235                run_len = 1;
236            }
237
238            prev_seen = Some(b);
239
240            if run_len >= len {
241                let end_u64 = run_end as u64;
242                let start_u64 = end_u64.checked_add(1)?.checked_sub(len)?;
243
244                if start_u64 > u32::MAX as u64 {
245                    return None;
246                }
247                return Some(start_u64 as Bucket..=run_end);
248            }
249        }
250
251        None
252    }
253}
254
255impl FromIterator<Bucket> for Coverage {
256    fn from_iter<I>(iter: I) -> Self
257    where
258        I: IntoIterator<Item = Bucket>,
259    {
260        let bitmap: RoaringBitmap = iter.into_iter().collect();
261        Self { bitmap }
262    }
263}
264
265/// Convert a bitmap into contiguous runs of bucket ids.
266///
267/// Each run is returned as a `RangeInclusive<Bucket>`.
268fn runs_from_bitmap(bitmap: &RoaringBitmap) -> Vec<RangeInclusive<Bucket>> {
269    let mut out = Vec::new();
270    let mut iter = bitmap.iter();
271
272    let Some(mut start) = iter.next() else {
273        return out;
274    };
275    let mut prev = start;
276
277    for v in iter {
278        if prev.checked_add(1) == Some(v) {
279            // still in the same contiguous run
280            prev = v;
281        } else {
282            // close previous run and start a new one
283            out.push(start..=prev);
284            start = v;
285            prev = v;
286        }
287    }
288
289    // finalize last run
290    out.push(start..=prev);
291    out
292}
293
294/// Split runs into smaller runs of at most `max_len` buckets.
295fn split_runs_by_len(
296    runs: Vec<RangeInclusive<Bucket>>,
297    max_len: u64,
298) -> Vec<RangeInclusive<Bucket>> {
299    if max_len == 0 {
300        return Vec::new();
301    }
302
303    let mut out = Vec::new();
304    let step = max_len - 1;
305
306    for range in runs {
307        let (start, end) = (*range.start() as u64, *range.end() as u64);
308        let mut cur = start;
309        while cur <= end {
310            let chunk_end = match cur.checked_add(step) {
311                Some(v) => v.min(end),
312                None => break, // overflow would only happen at u64::MAX; stop splitting
313            };
314            out.push(cur as Bucket..=chunk_end as Bucket);
315
316            if chunk_end == end {
317                break;
318            }
319
320            cur = chunk_end + 1;
321        }
322    }
323
324    out
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330    use roaring::RoaringBitmap;
331
332    fn bm_from_range(start: u32, end_exclusive: u32) -> RoaringBitmap {
333        (start..end_exclusive).collect()
334    }
335
336    #[test]
337    fn from_iterator_builds_expected_bitmap() {
338        let cov: Coverage = (10u32..15u32).collect();
339        assert_eq!(cov.cardinality(), 5);
340        for b in 10u32..15u32 {
341            assert!(cov.present().contains(b));
342        }
343    }
344
345    #[test]
346    fn basic_api_cardinality_union_intersect_into_bitmap() {
347        let cov_a = Coverage::from_bitmap(bm_from_range(0, 5)); // {0,1,2,3,4}
348        let cov_b = Coverage::from_bitmap(bm_from_range(3, 8)); // {3,4,5,6,7}
349
350        // cardinality
351        assert_eq!(cov_a.cardinality(), 5);
352
353        // union: {0..8} without 8
354        let union = cov_a.union(&cov_b);
355        assert_eq!(union.cardinality(), 8);
356        assert!(union.present().contains(0));
357        assert!(union.present().contains(7));
358
359        // intersection: {3,4}
360        let inter = cov_a.intersect(&cov_b);
361        assert_eq!(inter.cardinality(), 2);
362        assert!(inter.present().contains(3));
363        assert!(inter.present().contains(4));
364        assert!(!inter.present().contains(2));
365
366        // into_bitmap returns underlying set
367        let bitmap = inter.into_bitmap();
368        assert!(bitmap.contains(3));
369        assert!(bitmap.contains(4));
370        assert_eq!(bitmap.len(), 2);
371    }
372
373    #[test]
374    fn full_coverage_continuous() {
375        // expected = {0..10}, present = {0..10}
376        let expected = bm_from_range(0, 10);
377        let present = bm_from_range(0, 10);
378
379        let cov = Coverage::from_bitmap(present);
380
381        let missing = cov.missing_points(&expected);
382        assert!(missing.is_empty());
383
384        let runs = cov.missing_runs(&expected, None);
385        assert!(runs.is_empty());
386
387        for min_len in 1..=10 {
388            let run = cov.last_run_with_min_len(&expected, min_len).unwrap();
389            assert_eq!(*run.start(), 0);
390            assert_eq!(*run.end(), 9);
391        }
392
393        // Past the run length, there should be no run.
394        assert!(cov.last_run_with_min_len(&expected, 11).is_none());
395
396        assert!((cov.coverage_ratio(&expected) - 1.0).abs() < 1e-12);
397        assert_eq!(cov.max_gap_len(&expected), 0);
398    }
399
400    #[test]
401    fn single_gap_in_middle() {
402        // expected = {0..10}, present = {0..10} \ {5}
403        let expected = bm_from_range(0, 10);
404
405        let mut present = bm_from_range(0, 10);
406        present.remove(5);
407
408        let cov = Coverage::from_bitmap(present);
409
410        let missing = cov.missing_points(&expected);
411        assert_eq!(missing.len(), 1);
412        assert!(missing.contains(5));
413
414        let runs = cov.missing_runs(&expected, None);
415        assert_eq!(runs.len(), 1);
416        let r = &runs[0];
417        assert_eq!((*r.start(), *r.end()), (5, 5));
418
419        assert_eq!(cov.max_gap_len(&expected), 1);
420    }
421
422    #[test]
423    fn multiple_gaps_and_run_splitting() {
424        // expected = {0..20}
425        // present = {0..20} \ {3,4,10,11,12,18}
426        let expected = bm_from_range(0, 20);
427
428        let mut present = bm_from_range(0, 20);
429        for b in [3, 4, 10, 11, 12, 18] {
430            present.remove(b);
431        }
432
433        let cov = Coverage::from_bitmap(present);
434
435        let missing = cov.missing_points(&expected);
436        let mut missing_vec: Vec<_> = missing.iter().collect();
437        missing_vec.sort_unstable();
438        assert_eq!(missing_vec, vec![3, 4, 10, 11, 12, 18]);
439
440        // Without max_run_len, we get contiguous runs.
441        let runs = cov.missing_runs(&expected, None);
442        assert_eq!(runs.len(), 3);
443        assert_eq!((*runs[0].start(), *runs[0].end()), (3, 4)); // len 2
444        assert_eq!((*runs[1].start(), *runs[1].end()), (10, 12)); // len 3
445        assert_eq!((*runs[2].start(), *runs[2].end()), (18, 18)); // len 1
446
447        // With max_run_len = 2, the {10..12} run should be split.
448        let runs_split = cov.missing_runs(&expected, Some(2));
449        // Gaps: [3,4], [10,11], [12,12], [18,18]
450        assert_eq!(runs_split.len(), 4);
451        assert_eq!((*runs_split[0].start(), *runs_split[0].end()), (3, 4));
452        assert_eq!((*runs_split[1].start(), *runs_split[1].end()), (10, 11));
453        assert_eq!((*runs_split[2].start(), *runs_split[2].end()), (12, 12));
454        assert_eq!((*runs_split[3].start(), *runs_split[3].end()), (18, 18));
455
456        // With max_run_len = 1, every gap is split into singletons.
457        let runs_split_single = cov.missing_runs(&expected, Some(1));
458        let expected_singletons = vec![3, 4, 10, 11, 12, 18];
459        assert_eq!(runs_split_single.len(), expected_singletons.len());
460        for (range, expected_bucket) in runs_split_single.iter().zip(expected_singletons.iter()) {
461            assert_eq!(
462                (*range.start(), *range.end()),
463                (*expected_bucket, *expected_bucket)
464            );
465        }
466
467        // With max_run_len = 0, nothing is returned per contract.
468        let runs_zero = cov.missing_runs(&expected, Some(0));
469        assert!(runs_zero.is_empty());
470    }
471
472    #[test]
473    fn edge_cases_empty_expected() {
474        let expected = RoaringBitmap::new();
475
476        // Nonempty present, but expected is empty.
477        let present = bm_from_range(0, 10);
478        let cov = Coverage::from_bitmap(present);
479
480        let missing = cov.missing_points(&expected);
481        assert!(missing.is_empty());
482
483        let runs = cov.missing_runs(&expected, None);
484        assert!(runs.is_empty());
485
486        let ratio = cov.coverage_ratio(&expected);
487        assert!((ratio - 1.0).abs() < 1e-12);
488
489        assert_eq!(cov.max_gap_len(&expected), 0);
490        assert!(cov.last_run_with_min_len(&expected, 1).is_none());
491    }
492
493    #[test]
494    fn edge_cases_empty_present() {
495        // Nonempty expected, empty present.
496        let expected = bm_from_range(0, 5);
497        let cov = Coverage::empty();
498
499        let missing = cov.missing_points(&expected);
500        assert_eq!(missing.len(), expected.len());
501
502        let runs = cov.missing_runs(&expected, None);
503        assert_eq!(runs.len(), 1);
504        let r = &runs[0];
505        assert_eq!((*r.start(), *r.end()), (0, 4));
506
507        assert_eq!(cov.coverage_ratio(&expected), 0.0);
508        assert_eq!(cov.max_gap_len(&expected), 5);
509
510        assert!(cov.last_run_with_min_len(&expected, 6).is_none());
511        assert!(cov.last_run_with_min_len(&expected, 3).is_none());
512    }
513
514    #[test]
515    fn single_point_cases() {
516        let mut expected = RoaringBitmap::new();
517        expected.insert(42);
518
519        // Present covers the single point.
520        let mut present = RoaringBitmap::new();
521        present.insert(42);
522        let cov = Coverage::from_bitmap(present);
523
524        assert!(cov.missing_points(&expected).is_empty());
525        assert!(cov.missing_runs(&expected, None).is_empty());
526        assert_eq!(cov.coverage_ratio(&expected), 1.0);
527        assert_eq!(cov.max_gap_len(&expected), 0);
528
529        let run = cov.last_run_with_min_len(&expected, 1).unwrap();
530        assert_eq!((*run.start(), *run.end()), (42, 42));
531
532        // Present is empty.
533        let cov_empty = Coverage::empty();
534        let missing = cov_empty.missing_points(&expected);
535        assert!(missing.contains(42));
536    }
537
538    #[test]
539    fn last_window_contiguous_runs() {
540        let cov = Coverage::from_bitmap(bm_from_range(0, 10)); // 0..=9
541
542        assert_eq!(cov.last_window_at_or_before(9, 3), Some(7u32..=9u32));
543
544        // Move the end back by one; window should slide accordingly.
545        assert_eq!(cov.last_window_at_or_before(8, 3), Some(6u32..=8u32));
546
547        // Request longer than available -> None.
548        assert!(cov.last_window_at_or_before(9, 11).is_none());
549    }
550
551    #[test]
552    fn last_window_skips_over_gaps() {
553        // Coverage with a gap between 4 and 7.
554        let mut bm = bm_from_range(0, 5); // 0..=4
555        bm.extend((7u32..=10u32).collect::<RoaringBitmap>()); // 7..=10
556        let cov = Coverage::from_bitmap(bm);
557
558        // Highest contiguous run ends at 10.
559        assert_eq!(cov.last_window_at_or_before(10, 3), Some(8u32..=10u32));
560        assert_eq!(cov.last_window_at_or_before(10, 4), Some(7u32..=10u32));
561
562        // If the end is inside the gap, we fall back to the previous run.
563        assert_eq!(cov.last_window_at_or_before(6, 2), Some(3u32..=4u32));
564
565        // End inside the run but request longer than that run; should fall back
566        // to the earlier run if it satisfies the length.
567        assert_eq!(cov.last_window_at_or_before(9, 5), Some(0u32..=4u32));
568    }
569
570    #[test]
571    fn last_window_handles_len_zero_and_empty() {
572        let cov = Coverage::empty();
573        assert!(cov.last_window_at_or_before(100, 1).is_none());
574        assert!(cov.last_window_at_or_before(100, 0).is_none());
575    }
576}