Skip to main content

int_interval_set/
u8.rs

1#[cfg(test)]
2mod tests;
3
4use std::sync::Arc;
5
6use int_interval::U8CO;
7
8/// Read-only canonical interval set for `U8CO`.
9///
10/// A `U8COSet` stores a normalized collection of half-open `U8CO` intervals:
11///
12/// - intervals are sorted by `start`;
13/// - intervals are non-overlapping;
14/// - adjacent intervals are merged;
15/// - all queries are performed against the sealed immutable array.
16///
17/// Construction is intentionally restricted. Use `U8COSetBuilder` for normal
18/// construction.
19pub mod set {
20    use super::*;
21
22    /// Immutable canonical interval set.
23    ///
24    /// Internally this is an `Arc<[U8CO]>`, so cloning a `U8COSet` is cheap.
25    ///
26    /// Canonical invariant:
27    ///
28    /// ```text
29    /// for every adjacent pair a, b:
30    ///     a.end_excl() < b.start()
31    /// ```
32    ///
33    /// The strict `<` means both overlap and adjacency have already been merged.
34    #[repr(transparent)]
35    #[derive(Clone, Debug, Eq, PartialEq)]
36    pub struct U8COSet {
37        intervals: Arc<[U8CO]>,
38    }
39
40    // ------------------------------------------------------------
41    // basic api: construction / accessors
42    // ------------------------------------------------------------
43
44    mod basic {
45        use super::*;
46
47        impl U8COSet {
48            /// Builds a set from an already canonical interval vector.
49            ///
50            /// # Safety
51            ///
52            /// The caller must guarantee that `intervals` is canonical:
53            ///
54            /// - intervals are sorted by ascending `start`;
55            /// - intervals are non-overlapping;
56            /// - contiguous intervals have already been merged;
57            /// - therefore, for every adjacent pair `a, b`,
58            ///   `a.end_excl() < b.start()` holds.
59            ///
60            /// Violating this invariant can make binary-search based queries
61            /// return incorrect results.
62            #[inline]
63            pub(in crate::u8) unsafe fn new_unchecked(intervals: Vec<U8CO>) -> Self {
64                debug_assert!(Self::is_canonical(&intervals));
65
66                Self {
67                    intervals: Arc::from(intervals.into_boxed_slice()),
68                }
69            }
70
71            /// Checks the canonical invariant used by binary-search queries.
72            ///
73            /// `U8CO` itself already guarantees single-interval validity. This
74            /// function only checks the relationship between adjacent intervals.
75            #[inline]
76            fn is_canonical(intervals: &[U8CO]) -> bool {
77                intervals.windows(2).all(|w| w[0].end_excl() < w[1].start())
78            }
79        }
80
81        impl U8COSet {
82            /// Returns the number of canonical intervals.
83            ///
84            /// For the `U8CO` domain, the maximum canonical interval count is
85            /// 128, e.g. `[0, 1), [2, 3), ..., [254, 255)`.
86            #[inline]
87            pub fn interval_count(&self) -> u8 {
88                self.intervals.len() as u8
89            }
90
91            /// Returns whether the set contains no intervals.
92            #[inline]
93            pub fn is_empty(&self) -> bool {
94                self.intervals.is_empty()
95            }
96
97            /// Returns the canonical interval slice.
98            ///
99            /// The returned slice is sorted, non-overlapping, and contains no
100            /// adjacent intervals.
101            #[inline]
102            pub fn as_slice(&self) -> &[U8CO] {
103                &self.intervals
104            }
105
106            /// Iterates over canonical intervals by value.
107            #[inline]
108            pub fn iter_intervals(&self) -> impl Iterator<Item = U8CO> {
109                self.intervals.iter().copied()
110            }
111        }
112    }
113
114    // ------------------------------------------------------------
115    // predicate api: yes/no queries
116    // ------------------------------------------------------------
117
118    mod predicates {
119        use super::*;
120
121        impl U8COSet {
122            /// Returns whether `x` is covered by any interval in the set.
123            ///
124            /// Complexity: `O(log n)`.
125            #[inline]
126            pub fn contains_point(&self, x: u8) -> bool {
127                let i = self.intervals.partition_point(|iv| iv.start() <= x);
128                i != 0 && self.intervals[i - 1].contains(x)
129            }
130
131            /// Returns whether `query` is fully contained by one interval.
132            ///
133            /// Since the set is canonical, a contained query interval can only
134            /// be contained by the interval immediately preceding or starting
135            /// at `query.start()`.
136            ///
137            /// Complexity: `O(log n)`.
138            #[inline]
139            pub fn contains_interval(&self, query: U8CO) -> bool {
140                let i = self
141                    .intervals
142                    .partition_point(|iv| iv.start() <= query.start());
143
144                i != 0 && self.intervals[i - 1].contains_interval(query)
145            }
146
147            /// Returns whether `query` intersects any interval in the set.
148            ///
149            /// Complexity: `O(log n)`.
150            #[inline]
151            pub fn intersects_interval(&self, query: U8CO) -> bool {
152                let i = self
153                    .intervals
154                    .partition_point(|iv| iv.end_excl() <= query.start());
155
156                self.intervals.get(i).is_some_and(|iv| iv.intersects(query))
157            }
158        }
159    }
160
161    // ------------------------------------------------------------
162    // search api: returning matched intervals
163    // ------------------------------------------------------------
164
165    mod search {
166        use super::*;
167
168        /// Point-based search APIs.
169        mod point {
170            use super::*;
171
172            impl U8COSet {
173                /// Returns the unique interval containing `x`, if any.
174                ///
175                /// Because the set is canonical, at most one interval can
176                /// contain a single point.
177                ///
178                /// Complexity: `O(log n)`.
179                #[inline]
180                pub fn interval_containing_point(&self, x: u8) -> Option<U8CO> {
181                    let i = self.intervals.partition_point(|iv| iv.start() <= x);
182
183                    if i == 0 {
184                        return None;
185                    }
186
187                    let iv = self.intervals[i - 1];
188                    iv.contains(x).then_some(iv)
189                }
190            }
191        }
192
193        /// Interval-based search APIs.
194        mod interval {
195            use super::*;
196
197            impl U8COSet {
198                /// Iterates over all canonical intervals intersecting `query`.
199                ///
200                /// The iterator yields original intervals stored in the set,
201                /// not clipped intersection segments.
202                ///
203                /// Complexity: `O(log n + k)`, where `k` is the number of
204                /// returned intervals.
205                #[inline]
206                pub fn intervals_intersecting(&self, query: U8CO) -> impl Iterator<Item = U8CO> {
207                    let i = self
208                        .intervals
209                        .partition_point(|iv| iv.end_excl() <= query.start());
210
211                    self.intervals[i..]
212                        .iter()
213                        .copied()
214                        .take_while(move |iv| iv.start() < query.end_excl())
215                }
216
217                /// Iterates over clipped intersection segments with `query`.
218                ///
219                /// Example:
220                ///
221                /// ```text
222                /// set:   [10, 20), [30, 40)
223                /// query: [15, 35)
224                /// out:   [15, 20), [30, 35)
225                /// ```
226                ///
227                /// Complexity: `O(log n + k)`, where `k` is the number of
228                /// intersecting intervals.
229                #[inline]
230                pub fn intersections(&self, query: U8CO) -> impl Iterator<Item = U8CO> {
231                    self.intervals_intersecting(query)
232                        .filter_map(move |iv| iv.intersection(query))
233                }
234            }
235        }
236    }
237
238    // ------------------------------------------------------------
239    // coverage api: covered length / uncovered length / ratio
240    // ------------------------------------------------------------
241
242    mod coverage {
243        use super::*;
244
245        impl U8COSet {
246            /// Returns the covered length inside `query`.
247            ///
248            /// Since `U8COSet` is canonical, all intersection segments are
249            /// disjoint, so summing their lengths is valid.
250            ///
251            /// The result is always `<= query.len()`.
252            #[inline]
253            pub fn covered_len(&self, query: U8CO) -> u8 {
254                self.intersections(query).map(|iv| iv.len()).sum()
255            }
256
257            /// Returns the uncovered length inside `query`.
258            #[inline]
259            pub fn uncovered_len(&self, query: U8CO) -> u8 {
260                query.len() - self.covered_len(query)
261            }
262
263            /// Returns `covered_len(query) / query.len()` as `f32`.
264            ///
265            /// `query.len()` is non-zero because `U8CO` cannot represent an
266            /// empty interval.
267            #[inline]
268            pub fn coverage_ratio(&self, query: U8CO) -> f32 {
269                self.covered_len(query) as f32 / query.len() as f32
270            }
271        }
272    }
273}
274
275/// Concurrent builder for `U8COSet`.
276///
277/// The builder accepts concurrent inserts into a skip list. During `seal`,
278/// raw intervals are scanned in sorted order and merged into a canonical
279/// immutable `U8COSet`.
280pub mod builder {
281    use std::cmp::Ordering;
282
283    use crossbeam_skiplist::SkipSet;
284
285    use super::set::U8COSet;
286    use super::*;
287
288    /// Private ordering key for storing `U8CO` inside `SkipSet`.
289    ///
290    /// `U8CO` does not need to implement `Ord` in its own crate. This wrapper
291    /// defines the ordering locally:
292    ///
293    /// ```text
294    /// (start, end_excl)
295    /// ```
296    ///
297    /// Because this is a set key, duplicate identical intervals are naturally
298    /// deduplicated during the build phase. That is correct for interval-set
299    /// semantics.
300    #[repr(transparent)]
301    #[derive(Copy, Clone, Debug, Eq, PartialEq)]
302    struct Key(U8CO);
303
304    impl Ord for Key {
305        #[inline]
306        fn cmp(&self, other: &Self) -> Ordering {
307            self.0
308                .start()
309                .cmp(&other.0.start())
310                .then_with(|| self.0.end_excl().cmp(&other.0.end_excl()))
311        }
312    }
313
314    impl PartialOrd for Key {
315        #[inline]
316        fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
317            Some(self.cmp(other))
318        }
319    }
320
321    /// Concurrent write-side builder for `U8COSet`.
322    ///
323    /// Insertions are expected `O(log n)` through `crossbeam_skiplist`.
324    /// No merging is performed during insertion; normalization happens once
325    /// in `seal`.
326    #[repr(transparent)]
327    #[derive(Debug, Default)]
328    pub struct U8COSetBuilder {
329        raw: SkipSet<Key>,
330    }
331
332    impl U8COSetBuilder {
333        /// Creates an empty builder.
334        #[inline]
335        pub fn new() -> Self {
336            Self::default()
337        }
338
339        /// Inserts one interval into the builder.
340        ///
341        /// This method is safe to call concurrently through shared references.
342        /// Identical intervals are deduplicated by the underlying `SkipSet`.
343        #[inline]
344        pub fn insert(&self, iv: U8CO) {
345            self.raw.insert(Key(iv));
346        }
347
348        /// Consumes the builder and returns a canonical immutable set.
349        ///
350        /// The merge process is linear over the sorted skip-list iterator:
351        ///
352        /// - maintain one pending interval `cur`;
353        /// - if the next interval overlaps or is adjacent, replace `cur` with
354        ///   its convex hull;
355        /// - otherwise, push `cur` and start a new pending interval;
356        /// - finally, push the last pending interval.
357        pub fn seal(self) -> U8COSet {
358            let mut iter = self.raw.iter().map(|entry| entry.value().0);
359
360            let Some(mut cur) = iter.next() else {
361                // SAFETY:
362                // The empty interval list is canonical.
363                return unsafe { U8COSet::new_unchecked(Vec::new()) };
364            };
365
366            let mut out = Vec::new();
367
368            for iv in iter {
369                if cur.is_contiguous_with(iv) {
370                    cur = cur.convex_hull(iv);
371                } else {
372                    out.push(cur);
373                    cur = iv;
374                }
375            }
376
377            out.push(cur);
378
379            // SAFETY:
380            // `self.raw` iterates keys in ascending `(start, end_excl)` order.
381            // The loop maintains one pending merged interval `cur`.
382            //
383            // If `iv` is contiguous with or overlaps `cur`, both are replaced
384            // by their convex hull, so no overlap or adjacency is emitted.
385            //
386            // If `iv` is disjoint from `cur`, then sorted order plus the failed
387            // contiguity test imply `cur.end_excl() < iv.start()`. Pushing
388            // `cur` therefore preserves the canonical invariant.
389            //
390            // After the loop, the final pending interval is pushed. Therefore
391            // `out` is sorted, non-overlapping, and contains no adjacent
392            // intervals.
393            unsafe { U8COSet::new_unchecked(out) }
394        }
395    }
396}