1use std::collections::BTreeMap;
20
21use roaring::RoaringBitmap;
22use serde::{Deserialize, Serialize};
23
24#[derive(Debug, Default, Clone, Serialize, Deserialize)]
26pub struct Postings {
27 map: BTreeMap<u64, RoaringBitmap>,
28}
29
30impl Postings {
31 #[must_use]
33 pub fn new() -> Self {
34 Self {
35 map: BTreeMap::new(),
36 }
37 }
38
39 #[must_use]
41 pub fn len(&self) -> usize {
42 self.map.len()
43 }
44
45 #[must_use]
47 pub fn is_empty(&self) -> bool {
48 self.map.is_empty()
49 }
50
51 pub fn insert(&mut self, trigram: u64, doc_id: u32) {
56 self.map.entry(trigram).or_default().insert(doc_id);
57 }
58
59 pub fn remove(&mut self, trigram: u64, doc_id: u32) {
64 let drop = match self.map.get_mut(&trigram) {
65 Some(bm) => {
66 bm.remove(doc_id);
67 bm.is_empty()
68 }
69 None => false,
70 };
71 if drop {
72 self.map.remove(&trigram);
73 }
74 }
75
76 #[must_use]
78 pub fn lookup(&self, trigram: u64) -> Option<&RoaringBitmap> {
79 self.map.get(&trigram)
80 }
81
82 #[must_use]
97 pub fn intersect(&self, trigrams: &[u64]) -> RoaringBitmap {
98 if trigrams.is_empty() {
99 return RoaringBitmap::new();
100 }
101 let mut order: Vec<u64> = trigrams.to_vec();
102 order.sort_unstable();
103 order.dedup();
104 order.sort_by_key(|t| self.map.get(t).map_or(0, RoaringBitmap::len));
105
106 let mut iter = order.into_iter();
107 let Some(first_trigram) = iter.next() else {
108 return RoaringBitmap::new();
109 };
110 let mut acc = match self.map.get(&first_trigram) {
111 Some(b) => b.clone(),
112 None => return RoaringBitmap::new(),
113 };
114 for t in iter {
115 if acc.is_empty() {
116 return acc;
117 }
118 match self.map.get(&t) {
119 Some(b) => {
120 acc &= b;
121 }
122 None => return RoaringBitmap::new(),
123 }
124 }
125 acc
126 }
127
128 #[must_use]
134 pub fn union(&self, trigrams: &[u64]) -> RoaringBitmap {
135 let mut acc = RoaringBitmap::new();
136 let mut seen: Vec<u64> = trigrams.to_vec();
137 seen.sort_unstable();
138 seen.dedup();
139 for t in seen {
140 if let Some(b) = self.map.get(&t) {
141 acc |= b;
142 }
143 }
144 acc
145 }
146}
147
148#[cfg(test)]
149mod tests {
150 use super::*;
151
152 #[test]
153 fn postings_insert_and_lookup() {
154 let mut p = Postings::new();
155 p.insert(7, 1);
156 p.insert(7, 2);
157 p.insert(8, 2);
158 let bm = p.lookup(7).expect("trigram 7 present");
159 assert!(bm.contains(1));
160 assert!(bm.contains(2));
161 let bm = p.lookup(8).expect("trigram 8 present");
162 assert!(bm.contains(2));
163 assert!(!bm.contains(1));
164 assert_eq!(p.len(), 2);
165 }
166
167 #[test]
168 fn postings_lookup_missing_returns_none() {
169 let p = Postings::new();
170 assert!(p.lookup(99).is_none());
171 }
172
173 #[test]
174 fn postings_intersection_of_two_trigrams_yields_intersection() {
175 let mut p = Postings::new();
176 p.insert(1, 10);
177 p.insert(1, 20);
178 p.insert(1, 30);
179 p.insert(2, 20);
180 p.insert(2, 30);
181 p.insert(2, 40);
182 let r = p.intersect(&[1, 2]);
183 assert!(r.contains(20));
184 assert!(r.contains(30));
185 assert!(!r.contains(10));
186 assert!(!r.contains(40));
187 assert_eq!(r.len(), 2);
188 }
189
190 #[test]
191 fn postings_intersection_of_disjoint_trigrams_is_empty() {
192 let mut p = Postings::new();
193 p.insert(1, 10);
194 p.insert(1, 20);
195 p.insert(2, 30);
196 p.insert(2, 40);
197 let r = p.intersect(&[1, 2]);
198 assert!(r.is_empty());
199 }
200
201 #[test]
202 fn postings_intersection_with_missing_trigram_is_empty() {
203 let mut p = Postings::new();
204 p.insert(1, 10);
205 let r = p.intersect(&[1, 999]);
206 assert!(r.is_empty());
207 }
208
209 #[test]
210 fn postings_intersection_empty_input_is_empty() {
211 let p = Postings::new();
212 assert!(p.intersect(&[]).is_empty());
213 }
214
215 #[test]
216 fn postings_intersection_single_trigram_returns_that_list() {
217 let mut p = Postings::new();
218 p.insert(1, 10);
219 p.insert(1, 20);
220 let r = p.intersect(&[1]);
221 assert_eq!(r.len(), 2);
222 }
223
224 #[test]
225 fn postings_union_basic() {
226 let mut p = Postings::new();
227 p.insert(1, 10);
228 p.insert(2, 20);
229 p.insert(3, 30);
230 let r = p.union(&[1, 2]);
231 assert!(r.contains(10));
232 assert!(r.contains(20));
233 assert!(!r.contains(30));
234 }
235
236 #[test]
237 fn postings_remove_clears_doc_id_from_one_trigram() {
238 let mut p = Postings::new();
239 p.insert(1, 10);
240 p.insert(1, 20);
241 p.remove(1, 10);
242 let bm = p.lookup(1).expect("trigram 1 still present");
243 assert!(!bm.contains(10));
244 assert!(bm.contains(20));
245 }
246
247 #[test]
248 fn postings_dropping_a_trigram_when_no_docs_left_garbage_collects() {
249 let mut p = Postings::new();
250 p.insert(1, 10);
251 assert_eq!(p.len(), 1);
252 p.remove(1, 10);
253 assert_eq!(p.len(), 0);
254 assert!(p.lookup(1).is_none());
255 }
256
257 #[test]
258 fn postings_remove_missing_is_a_noop() {
259 let mut p = Postings::new();
260 p.insert(1, 10);
261 p.remove(1, 99);
262 p.remove(2, 10);
263 let bm = p.lookup(1).expect("trigram 1 still present");
264 assert!(bm.contains(10));
265 }
266}