Skip to main content

provenant/license_detection/
position_set.rs

1use bit_set::BitSet;
2
3/// A set of usize positions stored as a BitSet.
4/// Provides O(1) membership testing and efficient set operations.
5/// Caches bounds for cheap overlap pre-checks.
6#[derive(Clone, Debug)]
7pub struct PositionSet {
8    bitset: BitSet,
9    min_pos: usize,
10    max_pos: usize,
11}
12
13impl PositionSet {
14    /// Create a PositionSet from an iterator of usize positions.
15    pub fn from_usize_iter<I: IntoIterator<Item = usize>>(iter: I) -> Self {
16        let mut bitset = BitSet::new();
17        let mut min_pos = usize::MAX;
18        let mut max_pos = 0;
19
20        for pos in iter {
21            bitset.insert(pos);
22            min_pos = min_pos.min(pos);
23            max_pos = max_pos.max(pos);
24        }
25
26        Self {
27            bitset,
28            min_pos,
29            max_pos,
30        }
31    }
32
33    /// Create an empty PositionSet.
34    pub fn new() -> Self {
35        Self {
36            bitset: BitSet::new(),
37            min_pos: usize::MAX,
38            max_pos: 0,
39        }
40    }
41
42    /// Number of positions in the set.
43    pub fn len(&self) -> usize {
44        self.bitset.count()
45    }
46
47    /// Is the set empty?
48    pub fn is_empty(&self) -> bool {
49        self.min_pos == usize::MAX
50    }
51
52    /// Insert a position.
53    pub fn insert(&mut self, pos: usize) -> bool {
54        let inserted = self.bitset.insert(pos);
55        if inserted {
56            self.min_pos = self.min_pos.min(pos);
57            self.max_pos = self.max_pos.max(pos);
58        }
59        inserted
60    }
61
62    /// Check if position is in the set.
63    pub fn contains(&self, pos: usize) -> bool {
64        self.bitset.contains(pos)
65    }
66
67    /// Quick check if a range [range_start, range_end) might overlap with this set.
68    /// Returns true if the bounding boxes overlap, false if they definitely don't.
69    /// This is O(1) and used as a pre-filter before the expensive BitSet check.
70    #[inline]
71    pub fn may_overlap_range(&self, range_start: usize, range_end: usize) -> bool {
72        // min_pos == usize::MAX means empty set (see new())
73        if self.min_pos == usize::MAX {
74            return false;
75        }
76        range_end > self.min_pos && range_start <= self.max_pos
77    }
78
79    /// Return the difference (elements in self but not in other).
80    pub fn difference(&self, other: &PositionSet) -> PositionSet {
81        let mut result = PositionSet::new();
82        for pos in self.bitset.iter() {
83            if !other.bitset.contains(pos) {
84                result.insert(pos);
85            }
86        }
87        result
88    }
89
90    /// Iterate over positions.
91    pub fn iter(&self) -> impl Iterator<Item = usize> + '_ {
92        self.bitset.iter()
93    }
94}
95
96impl Default for PositionSet {
97    fn default() -> Self {
98        Self::new()
99    }
100}
101
102impl std::iter::FromIterator<usize> for PositionSet {
103    fn from_iter<T: IntoIterator<Item = usize>>(iter: T) -> Self {
104        Self::from_usize_iter(iter)
105    }
106}
107
108#[cfg(test)]
109mod tests {
110    use super::*;
111
112    #[test]
113    fn test_new_empty() {
114        let set = PositionSet::new();
115        assert!(set.is_empty());
116        assert_eq!(set.len(), 0);
117    }
118
119    #[test]
120    fn test_from_usize_iter_sorted() {
121        let set = PositionSet::from_usize_iter(vec![1, 2, 3]);
122        assert_eq!(set.len(), 3);
123        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
124    }
125
126    #[test]
127    fn test_from_usize_iter_unsorted() {
128        let set = PositionSet::from_usize_iter(vec![3, 1, 2]);
129        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
130    }
131
132    #[test]
133    fn test_from_usize_iter_dedup() {
134        let set = PositionSet::from_usize_iter(vec![1, 2, 2, 3, 3, 3]);
135        assert_eq!(set.len(), 3);
136        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
137    }
138
139    #[test]
140    fn test_insert() {
141        let mut set = PositionSet::new();
142        assert!(set.insert(2));
143        assert!(set.insert(1));
144        assert!(set.insert(3));
145        assert!(!set.insert(2)); // Already present
146        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
147    }
148
149    #[test]
150    fn test_difference() {
151        let a = PositionSet::from_usize_iter(vec![1, 2, 3, 4]);
152        let b = PositionSet::from_usize_iter(vec![2, 4, 6]);
153        let diff = a.difference(&b);
154        assert_eq!(diff.iter().collect::<Vec<_>>(), vec![1, 3]);
155    }
156
157    #[test]
158    fn test_difference_empty() {
159        let a = PositionSet::from_usize_iter(vec![1, 2, 3]);
160        let b = PositionSet::new();
161        let diff = a.difference(&b);
162        assert_eq!(diff.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
163    }
164
165    #[test]
166    fn test_difference_all_overlap() {
167        let a = PositionSet::from_usize_iter(vec![1, 2, 3]);
168        let b = PositionSet::from_usize_iter(vec![1, 2, 3]);
169        let diff = a.difference(&b);
170        assert!(diff.is_empty());
171    }
172
173    #[test]
174    fn test_contains() {
175        let set = PositionSet::from_usize_iter(vec![1, 3, 5]);
176        assert!(set.contains(1));
177        assert!(set.contains(3));
178        assert!(set.contains(5));
179        assert!(!set.contains(0));
180        assert!(!set.contains(2));
181        assert!(!set.contains(4));
182    }
183
184    #[test]
185    fn test_collect() {
186        let set: PositionSet = vec![3, 1, 2].into_iter().collect();
187        assert_eq!(set.iter().collect::<Vec<_>>(), vec![1, 2, 3]);
188    }
189}