Skip to main content

hermes_core/structures/postings/
positions.rs

1//! Position-aware posting list for phrase queries and multi-field element tracking
2//!
3//! Positions are encoded as: (element_ordinal << 20) | token_position
4//! This allows up to 4096 elements per field and ~1M tokens per element.
5//!
6//! ## Block Format
7//!
8//! Uses a block-based format with skip list for efficient binary search by doc_id:
9//! - Skip list enables O(log n) lookup by doc_id
10//! - Blocks of up to 128 documents for cache efficiency
11//! - Delta encoding within blocks for compression
12//! - Stackable for fast merge (just adjust doc_id_base per block)
13//!
14//! Format:
15//! ```text
16//! Header:
17//!   - doc_count: u32
18//!   - num_blocks: u32
19//!   - skip_list: [(base_doc_id, last_doc_id, byte_offset)] per block
20//!   - data_len: u32
21//! Data:
22//!   - blocks: each block contains delta-encoded postings with positions
23//! ```
24
25use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
26use std::io::{self, Read, Write};
27
28use crate::DocId;
29
30/// Block size for position posting list (same as regular posting list)
31pub const POSITION_BLOCK_SIZE: usize = 128;
32
33/// Maximum token position within an element (20 bits = 1,048,575)
34pub const MAX_TOKEN_POSITION: u32 = (1 << 20) - 1;
35
36/// Maximum element ordinal (12 bits = 4095)
37pub const MAX_ELEMENT_ORDINAL: u32 = (1 << 12) - 1;
38
39/// Encode element ordinal and token position into a single u32
40#[inline]
41pub fn encode_position(element_ordinal: u32, token_position: u32) -> u32 {
42    debug_assert!(
43        element_ordinal <= MAX_ELEMENT_ORDINAL,
44        "Element ordinal {} exceeds maximum {}",
45        element_ordinal,
46        MAX_ELEMENT_ORDINAL
47    );
48    debug_assert!(
49        token_position <= MAX_TOKEN_POSITION,
50        "Token position {} exceeds maximum {}",
51        token_position,
52        MAX_TOKEN_POSITION
53    );
54    (element_ordinal << 20) | (token_position & MAX_TOKEN_POSITION)
55}
56
57/// Decode element ordinal from encoded position
58#[inline]
59pub fn decode_element_ordinal(position: u32) -> u32 {
60    position >> 20
61}
62
63/// Decode token position from encoded position
64#[inline]
65pub fn decode_token_position(position: u32) -> u32 {
66    position & MAX_TOKEN_POSITION
67}
68
69/// A posting entry with positions (used during building)
70#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct PostingWithPositions {
72    pub doc_id: DocId,
73    pub term_freq: u32,
74    /// Encoded positions: (element_ordinal << 20) | token_position
75    pub positions: Vec<u32>,
76}
77
78/// Block-based position posting list with skip list for O(log n) doc_id lookup
79///
80/// Similar to BlockPostingList but stores positions per document.
81/// Uses binary search on skip list to find the right block, then linear scan within block.
82#[derive(Debug, Clone)]
83pub struct PositionPostingList {
84    /// Skip list: (base_doc_id, last_doc_id, byte_offset)
85    /// Enables binary search to find the right block
86    skip_list: Vec<(DocId, DocId, u32)>,
87    /// Compressed block data
88    data: Vec<u8>,
89    /// Total document count
90    doc_count: u32,
91}
92
93impl Default for PositionPostingList {
94    fn default() -> Self {
95        Self::new()
96    }
97}
98
99impl PositionPostingList {
100    pub fn new() -> Self {
101        Self {
102            skip_list: Vec::new(),
103            data: Vec::new(),
104            doc_count: 0,
105        }
106    }
107
108    pub fn with_capacity(capacity: usize) -> Self {
109        Self {
110            skip_list: Vec::with_capacity(capacity / POSITION_BLOCK_SIZE + 1),
111            data: Vec::with_capacity(capacity * 8), // rough estimate
112            doc_count: 0,
113        }
114    }
115
116    /// Build from a list of postings with positions
117    pub fn from_postings(postings: &[PostingWithPositions]) -> io::Result<Self> {
118        if postings.is_empty() {
119            return Ok(Self::new());
120        }
121
122        let mut skip_list = Vec::new();
123        let mut data = Vec::new();
124        let mut i = 0;
125
126        while i < postings.len() {
127            let block_start = data.len() as u32;
128            let block_end = (i + POSITION_BLOCK_SIZE).min(postings.len());
129            let block = &postings[i..block_end];
130
131            // Record skip entry
132            let base_doc_id = block.first().unwrap().doc_id;
133            let last_doc_id = block.last().unwrap().doc_id;
134            skip_list.push((base_doc_id, last_doc_id, block_start));
135
136            // Write block
137            write_vint(&mut data, block.len() as u64)?;
138
139            let mut prev_doc_id = base_doc_id;
140            for (j, posting) in block.iter().enumerate() {
141                if j == 0 {
142                    // First doc in block: store absolute
143                    write_vint(&mut data, posting.doc_id as u64)?;
144                } else {
145                    // Rest: store delta
146                    let delta = posting.doc_id - prev_doc_id;
147                    write_vint(&mut data, delta as u64)?;
148                }
149                prev_doc_id = posting.doc_id;
150
151                // Write positions count and positions (absolute - delta bad for ordinal<<20)
152                write_vint(&mut data, posting.positions.len() as u64)?;
153                for &pos in &posting.positions {
154                    write_vint(&mut data, pos as u64)?;
155                }
156            }
157
158            i = block_end;
159        }
160
161        Ok(Self {
162            skip_list,
163            data,
164            doc_count: postings.len() as u32,
165        })
166    }
167
168    /// Add a posting with positions (for building - converts to block format on serialize)
169    pub fn push(&mut self, doc_id: DocId, positions: Vec<u32>) {
170        // For compatibility: build in-memory, convert to blocks on serialize
171        // This is a simplified approach - we rebuild skip_list on serialize
172        let posting = PostingWithPositions {
173            doc_id,
174            term_freq: positions.len() as u32,
175            positions,
176        };
177
178        // Serialize this posting to data buffer
179        let block_start = self.data.len() as u32;
180
181        // If this is first posting or we need a new block
182        let need_new_block =
183            self.skip_list.is_empty() || self.doc_count.is_multiple_of(POSITION_BLOCK_SIZE as u32);
184
185        if need_new_block {
186            // Start new block
187            self.skip_list.push((doc_id, doc_id, block_start));
188            // Write count placeholder (will be 1 initially)
189            write_vint(&mut self.data, 1u64).unwrap();
190            // Write absolute doc_id for first in block
191            write_vint(&mut self.data, doc_id as u64).unwrap();
192        } else {
193            // Add to existing block - need to rewrite block count and add delta
194            let last_block = self.skip_list.last_mut().unwrap();
195            let prev_doc_id = last_block.1;
196            last_block.1 = doc_id; // Update last_doc_id
197
198            // Write delta doc_id
199            let delta = doc_id - prev_doc_id;
200            write_vint(&mut self.data, delta as u64).unwrap();
201        }
202
203        // Write positions (absolute - delta encoding bad for ordinal<<20)
204        write_vint(&mut self.data, posting.positions.len() as u64).unwrap();
205        for &pos in &posting.positions {
206            write_vint(&mut self.data, pos as u64).unwrap();
207        }
208
209        self.doc_count += 1;
210    }
211
212    pub fn doc_count(&self) -> u32 {
213        self.doc_count
214    }
215
216    pub fn len(&self) -> usize {
217        self.doc_count as usize
218    }
219
220    pub fn is_empty(&self) -> bool {
221        self.doc_count == 0
222    }
223
224    /// Get positions for a specific document using binary search on skip list
225    pub fn get_positions(&self, target_doc_id: DocId) -> Option<Vec<u32>> {
226        if self.skip_list.is_empty() {
227            return None;
228        }
229
230        // Binary search on skip list to find the right block
231        let block_idx = match self.skip_list.binary_search_by(|&(base, last, _)| {
232            if target_doc_id < base {
233                std::cmp::Ordering::Greater
234            } else if target_doc_id > last {
235                std::cmp::Ordering::Less
236            } else {
237                std::cmp::Ordering::Equal
238            }
239        }) {
240            Ok(idx) => idx,
241            Err(_) => return None, // doc_id not in any block range
242        };
243
244        // Decode block and search for doc_id
245        let offset = self.skip_list[block_idx].2 as usize;
246        let mut reader = &self.data[offset..];
247
248        let count = read_vint(&mut reader).ok()? as usize;
249        let mut prev_doc_id = 0u32;
250
251        for i in 0..count {
252            let doc_id = if i == 0 {
253                read_vint(&mut reader).ok()? as u32
254            } else {
255                let delta = read_vint(&mut reader).ok()? as u32;
256                prev_doc_id + delta
257            };
258            prev_doc_id = doc_id;
259
260            let num_positions = read_vint(&mut reader).ok()? as usize;
261
262            if doc_id == target_doc_id {
263                // Found it! Read positions (stored absolute)
264                let mut positions = Vec::with_capacity(num_positions);
265                for _ in 0..num_positions {
266                    let pos = read_vint(&mut reader).ok()? as u32;
267                    positions.push(pos);
268                }
269                return Some(positions);
270            } else {
271                // Skip positions
272                for _ in 0..num_positions {
273                    let _ = read_vint(&mut reader);
274                }
275            }
276        }
277
278        None
279    }
280
281    /// Serialize to bytes with block format
282    pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
283        // Write doc count
284        writer.write_u32::<LittleEndian>(self.doc_count)?;
285
286        // Write skip list
287        writer.write_u32::<LittleEndian>(self.skip_list.len() as u32)?;
288        for (base_doc_id, last_doc_id, offset) in &self.skip_list {
289            writer.write_u32::<LittleEndian>(*base_doc_id)?;
290            writer.write_u32::<LittleEndian>(*last_doc_id)?;
291            writer.write_u32::<LittleEndian>(*offset)?;
292        }
293
294        // Write data
295        writer.write_u32::<LittleEndian>(self.data.len() as u32)?;
296        writer.write_all(&self.data)?;
297
298        Ok(())
299    }
300
301    /// Deserialize from bytes
302    pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
303        let doc_count = reader.read_u32::<LittleEndian>()?;
304
305        let skip_count = reader.read_u32::<LittleEndian>()? as usize;
306        let mut skip_list = Vec::with_capacity(skip_count);
307        for _ in 0..skip_count {
308            let base_doc_id = reader.read_u32::<LittleEndian>()?;
309            let last_doc_id = reader.read_u32::<LittleEndian>()?;
310            let offset = reader.read_u32::<LittleEndian>()?;
311            skip_list.push((base_doc_id, last_doc_id, offset));
312        }
313
314        let data_len = reader.read_u32::<LittleEndian>()? as usize;
315        let mut data = vec![0u8; data_len];
316        reader.read_exact(&mut data)?;
317
318        Ok(Self {
319            skip_list,
320            data,
321            doc_count,
322        })
323    }
324
325    /// Concatenate blocks from multiple position lists with doc_id remapping (for merge)
326    pub fn concatenate_blocks(sources: &[(PositionPostingList, u32)]) -> io::Result<Self> {
327        let mut skip_list = Vec::new();
328        let mut data = Vec::new();
329        let mut total_docs = 0u32;
330
331        for (source, doc_offset) in sources {
332            for block_idx in 0..source.skip_list.len() {
333                let (base, last, src_offset) = source.skip_list[block_idx];
334                let next_offset = if block_idx + 1 < source.skip_list.len() {
335                    source.skip_list[block_idx + 1].2 as usize
336                } else {
337                    source.data.len()
338                };
339
340                let new_base = base + doc_offset;
341                let new_last = last + doc_offset;
342                let new_offset = data.len() as u32;
343
344                // Copy and adjust block data
345                let block_bytes = &source.data[src_offset as usize..next_offset];
346                let mut reader = block_bytes;
347
348                let count = read_vint(&mut reader)? as usize;
349                write_vint(&mut data, count as u64)?;
350
351                // Adjust first doc_id
352                let first_doc = read_vint(&mut reader)? as u32;
353                write_vint(&mut data, (first_doc + doc_offset) as u64)?;
354
355                // Copy remaining data unchanged
356                data.extend_from_slice(reader);
357
358                skip_list.push((new_base, new_last, new_offset));
359                total_docs += count as u32;
360            }
361        }
362
363        Ok(Self {
364            skip_list,
365            data,
366            doc_count: total_docs,
367        })
368    }
369
370    /// Get iterator over all postings (for phrase queries)
371    pub fn iter(&self) -> PositionPostingIterator<'_> {
372        PositionPostingIterator::new(self)
373    }
374}
375
376/// Write variable-length integer (same as posting.rs)
377fn write_vint<W: Write>(writer: &mut W, mut value: u64) -> io::Result<()> {
378    loop {
379        let byte = (value & 0x7F) as u8;
380        value >>= 7;
381        if value == 0 {
382            writer.write_u8(byte)?;
383            break;
384        } else {
385            writer.write_u8(byte | 0x80)?;
386        }
387    }
388    Ok(())
389}
390
391/// Read variable-length integer (same as posting.rs)
392fn read_vint<R: Read>(reader: &mut R) -> io::Result<u64> {
393    let mut result = 0u64;
394    let mut shift = 0;
395    loop {
396        let byte = reader.read_u8()?;
397        result |= ((byte & 0x7F) as u64) << shift;
398        if byte & 0x80 == 0 {
399            break;
400        }
401        shift += 7;
402    }
403    Ok(result)
404}
405
406/// Iterator over block-based position posting list
407pub struct PositionPostingIterator<'a> {
408    list: &'a PositionPostingList,
409    current_block: usize,
410    position_in_block: usize,
411    block_postings: Vec<PostingWithPositions>,
412    exhausted: bool,
413}
414
415impl<'a> PositionPostingIterator<'a> {
416    pub fn new(list: &'a PositionPostingList) -> Self {
417        let exhausted = list.skip_list.is_empty();
418        let mut iter = Self {
419            list,
420            current_block: 0,
421            position_in_block: 0,
422            block_postings: Vec::new(),
423            exhausted,
424        };
425        if !iter.exhausted {
426            iter.load_block(0);
427        }
428        iter
429    }
430
431    fn load_block(&mut self, block_idx: usize) {
432        if block_idx >= self.list.skip_list.len() {
433            self.exhausted = true;
434            return;
435        }
436
437        self.current_block = block_idx;
438        self.position_in_block = 0;
439
440        let offset = self.list.skip_list[block_idx].2 as usize;
441        let mut reader = &self.list.data[offset..];
442
443        let count = read_vint(&mut reader).unwrap_or(0) as usize;
444        self.block_postings.clear();
445        self.block_postings.reserve(count);
446
447        let mut prev_doc_id = 0u32;
448
449        for i in 0..count {
450            let doc_id = if i == 0 {
451                read_vint(&mut reader).unwrap_or(0) as u32
452            } else {
453                let delta = read_vint(&mut reader).unwrap_or(0) as u32;
454                prev_doc_id + delta
455            };
456            prev_doc_id = doc_id;
457
458            let num_positions = read_vint(&mut reader).unwrap_or(0) as usize;
459            let mut positions = Vec::with_capacity(num_positions);
460            for _ in 0..num_positions {
461                let pos = read_vint(&mut reader).unwrap_or(0) as u32;
462                positions.push(pos);
463            }
464
465            self.block_postings.push(PostingWithPositions {
466                doc_id,
467                term_freq: num_positions as u32,
468                positions,
469            });
470        }
471    }
472
473    pub fn doc(&self) -> DocId {
474        if self.exhausted || self.position_in_block >= self.block_postings.len() {
475            u32::MAX
476        } else {
477            self.block_postings[self.position_in_block].doc_id
478        }
479    }
480
481    pub fn term_freq(&self) -> u32 {
482        if self.exhausted || self.position_in_block >= self.block_postings.len() {
483            0
484        } else {
485            self.block_postings[self.position_in_block].term_freq
486        }
487    }
488
489    pub fn positions(&self) -> &[u32] {
490        if self.exhausted || self.position_in_block >= self.block_postings.len() {
491            &[]
492        } else {
493            &self.block_postings[self.position_in_block].positions
494        }
495    }
496
497    pub fn advance(&mut self) {
498        if self.exhausted {
499            return;
500        }
501
502        self.position_in_block += 1;
503        if self.position_in_block >= self.block_postings.len() {
504            self.load_block(self.current_block + 1);
505        }
506    }
507
508    pub fn seek(&mut self, target: DocId) {
509        if self.exhausted {
510            return;
511        }
512
513        // Check if target is in current block
514        if let Some((_, last, _)) = self.list.skip_list.get(self.current_block)
515            && target <= *last
516        {
517            // Target might be in current block, scan forward
518            while self.position_in_block < self.block_postings.len()
519                && self.block_postings[self.position_in_block].doc_id < target
520            {
521                self.position_in_block += 1;
522            }
523            if self.position_in_block >= self.block_postings.len() {
524                self.load_block(self.current_block + 1);
525                self.seek(target); // Continue seeking in next block
526            }
527            return;
528        }
529
530        // Binary search on skip list to find the right block
531        let block_idx = match self.list.skip_list.binary_search_by(|&(base, last, _)| {
532            if target < base {
533                std::cmp::Ordering::Greater
534            } else if target > last {
535                std::cmp::Ordering::Less
536            } else {
537                std::cmp::Ordering::Equal
538            }
539        }) {
540            Ok(idx) => idx,
541            Err(idx) => idx, // Use the next block if not found exactly
542        };
543
544        if block_idx >= self.list.skip_list.len() {
545            self.exhausted = true;
546            return;
547        }
548
549        self.load_block(block_idx);
550
551        // Linear scan within block
552        while self.position_in_block < self.block_postings.len()
553            && self.block_postings[self.position_in_block].doc_id < target
554        {
555            self.position_in_block += 1;
556        }
557
558        if self.position_in_block >= self.block_postings.len() {
559            self.load_block(self.current_block + 1);
560        }
561    }
562}
563
564#[cfg(test)]
565mod tests {
566    use super::*;
567
568    #[test]
569    fn test_position_encoding() {
570        // Element 0, position 5
571        let pos = encode_position(0, 5);
572        assert_eq!(decode_element_ordinal(pos), 0);
573        assert_eq!(decode_token_position(pos), 5);
574
575        // Element 3, position 100
576        let pos = encode_position(3, 100);
577        assert_eq!(decode_element_ordinal(pos), 3);
578        assert_eq!(decode_token_position(pos), 100);
579
580        // Max values
581        let pos = encode_position(MAX_ELEMENT_ORDINAL, MAX_TOKEN_POSITION);
582        assert_eq!(decode_element_ordinal(pos), MAX_ELEMENT_ORDINAL);
583        assert_eq!(decode_token_position(pos), MAX_TOKEN_POSITION);
584    }
585
586    #[test]
587    fn test_position_posting_list_build() {
588        // Build from postings
589        let postings = vec![
590            PostingWithPositions {
591                doc_id: 1,
592                term_freq: 2,
593                positions: vec![encode_position(0, 0), encode_position(0, 2)],
594            },
595            PostingWithPositions {
596                doc_id: 3,
597                term_freq: 1,
598                positions: vec![encode_position(1, 0)],
599            },
600        ];
601
602        let list = PositionPostingList::from_postings(&postings).unwrap();
603        assert_eq!(list.doc_count(), 2);
604
605        // Test binary search
606        let pos = list.get_positions(1).unwrap();
607        assert_eq!(pos.len(), 2);
608
609        let pos = list.get_positions(3).unwrap();
610        assert_eq!(pos.len(), 1);
611
612        // Not found
613        assert!(list.get_positions(2).is_none());
614        assert!(list.get_positions(99).is_none());
615    }
616
617    #[test]
618    fn test_serialization_roundtrip() {
619        let postings = vec![
620            PostingWithPositions {
621                doc_id: 1,
622                term_freq: 2,
623                positions: vec![encode_position(0, 0), encode_position(0, 5)],
624            },
625            PostingWithPositions {
626                doc_id: 3,
627                term_freq: 1,
628                positions: vec![encode_position(1, 0)],
629            },
630            PostingWithPositions {
631                doc_id: 5,
632                term_freq: 1,
633                positions: vec![encode_position(0, 10)],
634            },
635        ];
636
637        let list = PositionPostingList::from_postings(&postings).unwrap();
638
639        let mut bytes = Vec::new();
640        list.serialize(&mut bytes).unwrap();
641
642        let mut cursor = std::io::Cursor::new(&bytes);
643        let deserialized = PositionPostingList::deserialize(&mut cursor).unwrap();
644
645        assert_eq!(list.doc_count(), deserialized.doc_count());
646
647        // Verify binary search works on deserialized
648        let pos = deserialized.get_positions(1).unwrap();
649        assert_eq!(pos, vec![encode_position(0, 0), encode_position(0, 5)]);
650
651        let pos = deserialized.get_positions(3).unwrap();
652        assert_eq!(pos, vec![encode_position(1, 0)]);
653    }
654
655    #[test]
656    fn test_binary_search_many_blocks() {
657        // Create enough postings to span multiple blocks (128 per block)
658        let mut postings = Vec::new();
659        for i in 0..300 {
660            postings.push(PostingWithPositions {
661                doc_id: i * 2, // doc_ids: 0, 2, 4, 6, ...
662                term_freq: 1,
663                positions: vec![encode_position(0, i)],
664            });
665        }
666
667        let list = PositionPostingList::from_postings(&postings).unwrap();
668        assert_eq!(list.doc_count(), 300);
669
670        // Should have 3 blocks (128 + 128 + 44)
671        assert_eq!(list.skip_list.len(), 3);
672
673        // Test binary search across blocks
674        let pos = list.get_positions(0).unwrap();
675        assert_eq!(pos, vec![encode_position(0, 0)]);
676
677        let pos = list.get_positions(256).unwrap(); // doc 128 in block 1
678        assert_eq!(pos, vec![encode_position(0, 128)]);
679
680        let pos = list.get_positions(598).unwrap(); // last doc
681        assert_eq!(pos, vec![encode_position(0, 299)]);
682
683        // Not found (odd numbers don't exist)
684        assert!(list.get_positions(1).is_none());
685        assert!(list.get_positions(257).is_none());
686    }
687
688    #[test]
689    fn test_concatenate_blocks_merge() {
690        // Test efficient merge by concatenating blocks
691        let postings1 = vec![
692            PostingWithPositions {
693                doc_id: 0,
694                term_freq: 1,
695                positions: vec![0],
696            },
697            PostingWithPositions {
698                doc_id: 1,
699                term_freq: 1,
700                positions: vec![5],
701            },
702            PostingWithPositions {
703                doc_id: 2,
704                term_freq: 1,
705                positions: vec![10],
706            },
707        ];
708        let list1 = PositionPostingList::from_postings(&postings1).unwrap();
709
710        let postings2 = vec![
711            PostingWithPositions {
712                doc_id: 0,
713                term_freq: 1,
714                positions: vec![100],
715            },
716            PostingWithPositions {
717                doc_id: 1,
718                term_freq: 1,
719                positions: vec![105],
720            },
721        ];
722        let list2 = PositionPostingList::from_postings(&postings2).unwrap();
723
724        // Merge with doc_id offset
725        let combined = PositionPostingList::concatenate_blocks(&[
726            (list1, 0), // no offset
727            (list2, 3), // offset by 3 (list1 has 3 docs)
728        ])
729        .unwrap();
730
731        assert_eq!(combined.doc_count(), 5);
732
733        // Check doc_ids are correctly remapped
734        assert!(combined.get_positions(0).is_some());
735        assert!(combined.get_positions(1).is_some());
736        assert!(combined.get_positions(2).is_some());
737        assert!(combined.get_positions(3).is_some()); // was 0 in list2
738        assert!(combined.get_positions(4).is_some()); // was 1 in list2
739    }
740
741    #[test]
742    fn test_iterator() {
743        let postings = vec![
744            PostingWithPositions {
745                doc_id: 1,
746                term_freq: 2,
747                positions: vec![0, 5],
748            },
749            PostingWithPositions {
750                doc_id: 3,
751                term_freq: 1,
752                positions: vec![10],
753            },
754            PostingWithPositions {
755                doc_id: 5,
756                term_freq: 1,
757                positions: vec![15],
758            },
759        ];
760
761        let list = PositionPostingList::from_postings(&postings).unwrap();
762        let mut iter = list.iter();
763
764        assert_eq!(iter.doc(), 1);
765        assert_eq!(iter.positions(), &[0, 5]);
766
767        iter.advance();
768        assert_eq!(iter.doc(), 3);
769
770        iter.seek(5);
771        assert_eq!(iter.doc(), 5);
772        assert_eq!(iter.positions(), &[15]);
773
774        iter.advance();
775        assert_eq!(iter.doc(), u32::MAX); // exhausted
776    }
777}