1use std::collections::HashMap;
10
11use crate::core::{DocId, NO_MORE_DOCS, Scorer, SegmentId, TwoPhaseIterator};
12use roaring::RoaringBitmap;
13
14#[derive(Clone, Debug, Default)]
16pub struct DeletionMap {
17 bitmaps: HashMap<SegmentId, RoaringBitmap>,
18}
19
20impl DeletionMap {
21 pub fn new() -> Self {
22 Self::default()
23 }
24
25 pub fn mark_deleted(&mut self, segment_id: SegmentId, doc_id: DocId) {
27 self.bitmaps
28 .entry(segment_id)
29 .or_default()
30 .insert(doc_id.as_u32());
31 }
32
33 pub fn is_deleted(&self, segment_id: SegmentId, doc_id: DocId) -> bool {
35 self.bitmaps
36 .get(&segment_id)
37 .is_some_and(|bm| bm.contains(doc_id.as_u32()))
38 }
39
40 pub fn bitmap(&self, segment_id: SegmentId) -> Option<&RoaringBitmap> {
42 self.bitmaps.get(&segment_id)
43 }
44
45 pub fn deleted_count(&self, segment_id: SegmentId) -> u64 {
47 self.bitmaps.get(&segment_id).map_or(0, |bm| bm.len())
48 }
49
50 pub fn to_bytes(&self) -> Vec<u8> {
52 let mut buf = Vec::new();
53 let count = self.bitmaps.len() as u32;
54 buf.extend_from_slice(&count.to_le_bytes());
55
56 for (&segment_id, bitmap) in &self.bitmaps {
57 buf.extend_from_slice(&segment_id.as_u64().to_le_bytes());
58 let mut bm_buf = Vec::new();
59 bitmap.serialize_into(&mut bm_buf).unwrap();
60 buf.extend_from_slice(&(bm_buf.len() as u32).to_le_bytes());
61 buf.extend_from_slice(&bm_buf);
62 }
63
64 buf
65 }
66
67 pub fn from_bytes(data: &[u8]) -> crate::core::Result<Self> {
69 if data.len() < 4 {
70 return Ok(Self::new());
71 }
72
73 let count = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize;
74 let mut pos = 4;
75 let mut bitmaps = HashMap::with_capacity(count);
76
77 for _ in 0..count {
78 let seg_id = SegmentId::new(u64::from_le_bytes(data[pos..pos + 8].try_into().unwrap()));
79 pos += 8;
80 let bm_len = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()) as usize;
81 pos += 4;
82 let bitmap =
83 RoaringBitmap::deserialize_from(&data[pos..pos + bm_len]).map_err(|e| {
84 crate::core::LuciError::IndexCorrupted(format!(
85 "failed to deserialize deletion bitmap: {e}"
86 ))
87 })?;
88 pos += bm_len;
89 bitmaps.insert(seg_id, bitmap);
90 }
91
92 Ok(Self { bitmaps })
93 }
94}
95
96pub struct FilteredScorer<S: Scorer> {
98 inner: S,
99 deleted: RoaringBitmap,
100}
101
102impl<S: Scorer> FilteredScorer<S> {
103 pub fn new(inner: S, deleted: RoaringBitmap) -> Self {
104 let mut s = Self { inner, deleted };
106 s.skip_deleted();
107 s
108 }
109
110 fn skip_deleted(&mut self) {
111 while self.inner.doc_id() != NO_MORE_DOCS
112 && self.deleted.contains(self.inner.doc_id().as_u32())
113 {
114 self.inner.next();
115 }
116 }
117}
118
119impl<S: Scorer> Scorer for FilteredScorer<S> {
120 fn doc_id(&self) -> DocId {
121 self.inner.doc_id()
122 }
123
124 fn next(&mut self) -> DocId {
125 self.inner.next();
126 self.skip_deleted();
127 self.inner.doc_id()
128 }
129
130 fn advance(&mut self, target: DocId) -> DocId {
131 self.inner.advance(target);
132 self.skip_deleted();
133 self.inner.doc_id()
134 }
135
136 fn score(&mut self) -> f32 {
137 self.inner.score()
138 }
139
140 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
141 self.inner.two_phase()
142 }
143
144 fn max_score(&self) -> f32 {
145 self.inner.max_score()
146 }
147}
148
149#[cfg(test)]
150mod tests {
151 use super::*;
152
153 #[test]
154 fn mark_and_check_deleted() {
155 let mut dm = DeletionMap::new();
156 let seg = SegmentId::new(1);
157
158 assert!(!dm.is_deleted(seg, DocId::new(0)));
159 dm.mark_deleted(seg, DocId::new(0));
160 assert!(dm.is_deleted(seg, DocId::new(0)));
161 assert!(!dm.is_deleted(seg, DocId::new(1)));
162 }
163
164 #[test]
165 fn deleted_count() {
166 let mut dm = DeletionMap::new();
167 let seg = SegmentId::new(1);
168
169 assert_eq!(dm.deleted_count(seg), 0);
170 dm.mark_deleted(seg, DocId::new(0));
171 dm.mark_deleted(seg, DocId::new(5));
172 dm.mark_deleted(seg, DocId::new(10));
173 assert_eq!(dm.deleted_count(seg), 3);
174 }
175
176 #[test]
177 fn multiple_segments() {
178 let mut dm = DeletionMap::new();
179 dm.mark_deleted(SegmentId::new(1), DocId::new(0));
180 dm.mark_deleted(SegmentId::new(2), DocId::new(5));
181
182 assert!(dm.is_deleted(SegmentId::new(1), DocId::new(0)));
183 assert!(!dm.is_deleted(SegmentId::new(1), DocId::new(5)));
184 assert!(!dm.is_deleted(SegmentId::new(2), DocId::new(0)));
185 assert!(dm.is_deleted(SegmentId::new(2), DocId::new(5)));
186 }
187
188 #[test]
189 fn serialization_round_trip() {
190 let mut dm = DeletionMap::new();
191 dm.mark_deleted(SegmentId::new(1), DocId::new(0));
192 dm.mark_deleted(SegmentId::new(1), DocId::new(100));
193 dm.mark_deleted(SegmentId::new(2), DocId::new(50));
194
195 let bytes = dm.to_bytes();
196 let dm2 = DeletionMap::from_bytes(&bytes).unwrap();
197
198 assert!(dm2.is_deleted(SegmentId::new(1), DocId::new(0)));
199 assert!(dm2.is_deleted(SegmentId::new(1), DocId::new(100)));
200 assert!(!dm2.is_deleted(SegmentId::new(1), DocId::new(50)));
201 assert!(dm2.is_deleted(SegmentId::new(2), DocId::new(50)));
202 }
203
204 #[test]
205 fn empty_serialization() {
206 let dm = DeletionMap::new();
207 let bytes = dm.to_bytes();
208 let dm2 = DeletionMap::from_bytes(&bytes).unwrap();
209 assert_eq!(dm2.deleted_count(SegmentId::new(1)), 0);
210 }
211
212 struct VecScorer {
216 docs: Vec<(DocId, f32)>,
217 pos: usize,
218 }
219
220 impl VecScorer {
221 fn new(docs: Vec<(DocId, f32)>) -> Self {
222 Self { docs, pos: 0 }
223 }
224
225 fn current(&self) -> (DocId, f32) {
226 if self.pos < self.docs.len() {
227 self.docs[self.pos]
228 } else {
229 (NO_MORE_DOCS, 0.0)
230 }
231 }
232 }
233
234 impl Scorer for VecScorer {
235 fn doc_id(&self) -> DocId {
236 self.current().0
237 }
238
239 fn next(&mut self) -> DocId {
240 if self.pos < self.docs.len() {
241 self.pos += 1;
242 }
243 self.current().0
244 }
245
246 fn advance(&mut self, target: DocId) -> DocId {
247 while self.pos < self.docs.len() && self.docs[self.pos].0 < target {
248 self.pos += 1;
249 }
250 self.current().0
251 }
252
253 fn score(&mut self) -> f32 {
254 self.current().1
255 }
256
257 fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
258 None
259 }
260 }
261
262 #[test]
263 fn filtered_scorer_skips_deleted() {
264 let scorer = VecScorer::new(vec![
265 (DocId::new(0), 1.0),
266 (DocId::new(1), 2.0),
267 (DocId::new(2), 3.0),
268 (DocId::new(3), 4.0),
269 ]);
270 let mut deleted = RoaringBitmap::new();
271 deleted.insert(0);
272 deleted.insert(2);
273
274 let mut filtered = FilteredScorer::new(scorer, deleted);
275
276 assert_eq!(filtered.doc_id(), DocId::new(1));
278 assert_eq!(filtered.score(), 2.0);
279
280 assert_eq!(filtered.next(), DocId::new(3));
282 assert_eq!(filtered.score(), 4.0);
283
284 assert_eq!(filtered.next(), NO_MORE_DOCS);
285 }
286
287 #[test]
288 fn filtered_scorer_advance_past_deleted() {
289 let scorer = VecScorer::new(vec![
290 (DocId::new(0), 1.0),
291 (DocId::new(5), 2.0),
292 (DocId::new(10), 3.0),
293 (DocId::new(15), 4.0),
294 ]);
295 let mut deleted = RoaringBitmap::new();
296 deleted.insert(5);
297 deleted.insert(10);
298
299 let mut filtered = FilteredScorer::new(scorer, deleted);
300
301 assert_eq!(filtered.doc_id(), DocId::new(0));
302 assert_eq!(filtered.advance(DocId::new(5)), DocId::new(15));
304 }
305
306 #[test]
307 fn filtered_scorer_all_deleted() {
308 let scorer = VecScorer::new(vec![(DocId::new(0), 1.0), (DocId::new(1), 2.0)]);
309 let mut deleted = RoaringBitmap::new();
310 deleted.insert(0);
311 deleted.insert(1);
312
313 let filtered = FilteredScorer::new(scorer, deleted);
314 assert_eq!(filtered.doc_id(), NO_MORE_DOCS);
315 }
316
317 #[test]
318 fn filtered_scorer_none_deleted() {
319 let scorer = VecScorer::new(vec![(DocId::new(0), 1.0), (DocId::new(1), 2.0)]);
320 let deleted = RoaringBitmap::new();
321
322 let mut filtered = FilteredScorer::new(scorer, deleted);
323 assert_eq!(filtered.doc_id(), DocId::new(0));
324 assert_eq!(filtered.next(), DocId::new(1));
325 assert_eq!(filtered.next(), NO_MORE_DOCS);
326 }
327}