hermes_core/structures/postings/
posting_format.rs

1//! Unified posting list format with automatic compression selection
2//!
3//! Automatically selects the best compression method based on posting list characteristics:
4//! - **Inline**: 1-3 postings stored directly in TermInfo (no separate I/O)
5//! - **HorizontalBP128**: Horizontal layout with SIMD (fastest encoding + decoding)
6//! - **Partitioned EF**: Best compression for large lists (>20K)
7//! - **Roaring**: Bitmap-based for very frequent terms (>1% of corpus), fastest iteration
8//!
9//! Supports three optimization modes:
10//! - **Adaptive**: Balanced compression/speed (default, zstd level 7)
11//! - **SizeOptimized**: Best compression ratio (OptP4D + zstd level 22)
12//! - **PerformanceOptimized**: Fastest decoding (Roaring + zstd level 3)
13
14use byteorder::{ReadBytesExt, WriteBytesExt};
15use std::io::{self, Read, Write};
16
17use super::elias_fano::EliasFanoPostingList;
18use super::horizontal_bp128::HorizontalBP128PostingList;
19use super::opt_p4d::OptP4DPostingList;
20use super::partitioned_ef::PartitionedEFPostingList;
21use super::roaring::RoaringPostingList;
22use super::vertical_bp128::VerticalBP128PostingList;
23
24/// Thresholds for format selection (based on benchmarks)
25pub const INLINE_THRESHOLD: usize = 3;
26pub const ROARING_THRESHOLD_RATIO: f32 = 0.01; // 1% of corpus
27/// Threshold for using Partitioned EF (best compression)
28pub const PARTITIONED_EF_THRESHOLD: usize = 20_000;
29
30/// Index optimization mode for balancing compression ratio vs speed
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
32pub enum IndexOptimization {
33    /// Balanced compression and speed (zstd level 7)
34    /// Uses adaptive format selection based on posting list characteristics
35    #[default]
36    Adaptive,
37    /// Optimize for smallest index size (zstd level 22)
38    /// Prefers OptP4D for best compression ratio (optimal bit-width + patched exceptions)
39    SizeOptimized,
40    /// Optimize for fastest query performance (zstd level 3)
41    /// Prefers Roaring bitmaps for fastest iteration
42    PerformanceOptimized,
43}
44
45impl IndexOptimization {
46    /// Get the zstd compression level for this optimization mode
47    pub fn zstd_level(&self) -> i32 {
48        match self {
49            IndexOptimization::Adaptive => 7,
50            IndexOptimization::SizeOptimized => 22,
51            IndexOptimization::PerformanceOptimized => 3,
52        }
53    }
54
55    /// Parse from string (for CLI)
56    pub fn parse(s: &str) -> Option<Self> {
57        match s.to_lowercase().as_str() {
58            "adaptive" | "balanced" | "default" => Some(IndexOptimization::Adaptive),
59            "size" | "size-optimized" | "small" | "compact" => {
60                Some(IndexOptimization::SizeOptimized)
61            }
62            "performance" | "perf" | "fast" | "speed" => {
63                Some(IndexOptimization::PerformanceOptimized)
64            }
65            _ => None,
66        }
67    }
68}
69
70/// Format tag bytes for serialization
71const FORMAT_HORIZONTAL_BP128: u8 = 0;
72const FORMAT_ELIAS_FANO: u8 = 1;
73const FORMAT_ROARING: u8 = 2;
74const FORMAT_VERTICAL_BP128: u8 = 3;
75const FORMAT_PARTITIONED_EF: u8 = 4;
76const FORMAT_OPT_P4D: u8 = 5;
77
78/// Posting list format selector
79#[derive(Debug, Clone, Copy, PartialEq, Eq)]
80pub enum PostingFormat {
81    /// Horizontal BP128 layout (fastest encoding + decoding with SIMD)
82    HorizontalBP128,
83    /// Vertical BP128 with bit-interleaved layout (optimal for SIMD scatter)
84    VerticalBP128,
85    /// Elias-Fano for long posting lists
86    EliasFano,
87    /// Partitioned Elias-Fano (very long lists, better compression)
88    PartitionedEF,
89    /// Roaring bitmap for very frequent terms
90    Roaring,
91    /// OptP4D (Optimized Patched Frame-of-Reference Delta) - optimal bit-width selection with exceptions
92    OptP4D,
93}
94
95impl PostingFormat {
96    /// Select optimal format based on posting list characteristics and optimization mode
97    ///
98    /// Format selection varies by optimization mode:
99    /// - **Adaptive**: Balanced selection based on list size and frequency
100    /// - **SizeOptimized**: Prefer PartitionedEF for best compression
101    /// - **PerformanceOptimized**: Prefer Roaring for fastest iteration
102    pub fn select(doc_count: usize, total_docs: usize) -> Self {
103        Self::select_with_optimization(doc_count, total_docs, IndexOptimization::Adaptive)
104    }
105
106    /// Select format with explicit optimization mode
107    pub fn select_with_optimization(
108        doc_count: usize,
109        total_docs: usize,
110        optimization: IndexOptimization,
111    ) -> Self {
112        let frequency_ratio = doc_count as f32 / total_docs.max(1) as f32;
113
114        match optimization {
115            IndexOptimization::Adaptive => {
116                // Balanced: use best format for each size range
117                if frequency_ratio >= ROARING_THRESHOLD_RATIO
118                    && doc_count >= PARTITIONED_EF_THRESHOLD
119                {
120                    PostingFormat::Roaring
121                } else if doc_count >= PARTITIONED_EF_THRESHOLD {
122                    PostingFormat::PartitionedEF
123                } else {
124                    PostingFormat::HorizontalBP128
125                }
126            }
127            IndexOptimization::SizeOptimized => {
128                // Size: prefer OptP4D for best compression (optimal bit-width + patched exceptions)
129                if doc_count >= 128 {
130                    PostingFormat::OptP4D
131                } else {
132                    PostingFormat::HorizontalBP128
133                }
134            }
135            IndexOptimization::PerformanceOptimized => {
136                // Performance: prefer Roaring for fastest iteration
137                if doc_count >= 64 {
138                    PostingFormat::Roaring
139                } else {
140                    PostingFormat::HorizontalBP128
141                }
142            }
143        }
144    }
145}
146
147/// Unified posting list that can use any compression format
148#[derive(Debug, Clone)]
149pub enum CompressedPostingList {
150    HorizontalBP128(HorizontalBP128PostingList),
151    VerticalBP128(VerticalBP128PostingList),
152    EliasFano(EliasFanoPostingList),
153    PartitionedEF(PartitionedEFPostingList),
154    Roaring(RoaringPostingList),
155    OptP4D(OptP4DPostingList),
156}
157
158impl CompressedPostingList {
159    /// Create from raw postings with automatic format selection
160    pub fn from_postings(doc_ids: &[u32], term_freqs: &[u32], total_docs: usize, idf: f32) -> Self {
161        let format = PostingFormat::select(doc_ids.len(), total_docs);
162
163        match format {
164            PostingFormat::HorizontalBP128 => CompressedPostingList::HorizontalBP128(
165                HorizontalBP128PostingList::from_postings(doc_ids, term_freqs, idf),
166            ),
167            PostingFormat::VerticalBP128 => CompressedPostingList::VerticalBP128(
168                VerticalBP128PostingList::from_postings(doc_ids, term_freqs, idf),
169            ),
170            PostingFormat::EliasFano => CompressedPostingList::EliasFano(
171                EliasFanoPostingList::from_postings(doc_ids, term_freqs),
172            ),
173            PostingFormat::PartitionedEF => CompressedPostingList::PartitionedEF(
174                PartitionedEFPostingList::from_postings_with_idf(doc_ids, term_freqs, idf),
175            ),
176            PostingFormat::Roaring => CompressedPostingList::Roaring(
177                RoaringPostingList::from_postings(doc_ids, term_freqs),
178            ),
179            PostingFormat::OptP4D => CompressedPostingList::OptP4D(
180                OptP4DPostingList::from_postings(doc_ids, term_freqs, idf),
181            ),
182        }
183    }
184
185    /// Create with explicit format selection
186    pub fn from_postings_with_format(
187        doc_ids: &[u32],
188        term_freqs: &[u32],
189        format: PostingFormat,
190        idf: f32,
191    ) -> Self {
192        match format {
193            PostingFormat::HorizontalBP128 => CompressedPostingList::HorizontalBP128(
194                HorizontalBP128PostingList::from_postings(doc_ids, term_freqs, idf),
195            ),
196            PostingFormat::VerticalBP128 => CompressedPostingList::VerticalBP128(
197                VerticalBP128PostingList::from_postings(doc_ids, term_freqs, idf),
198            ),
199            PostingFormat::EliasFano => CompressedPostingList::EliasFano(
200                EliasFanoPostingList::from_postings(doc_ids, term_freqs),
201            ),
202            PostingFormat::PartitionedEF => CompressedPostingList::PartitionedEF(
203                PartitionedEFPostingList::from_postings_with_idf(doc_ids, term_freqs, idf),
204            ),
205            PostingFormat::Roaring => CompressedPostingList::Roaring(
206                RoaringPostingList::from_postings(doc_ids, term_freqs),
207            ),
208            PostingFormat::OptP4D => CompressedPostingList::OptP4D(
209                OptP4DPostingList::from_postings(doc_ids, term_freqs, idf),
210            ),
211        }
212    }
213
214    /// Get document count
215    pub fn doc_count(&self) -> u32 {
216        match self {
217            CompressedPostingList::HorizontalBP128(p) => p.doc_count,
218            CompressedPostingList::VerticalBP128(p) => p.doc_count,
219            CompressedPostingList::EliasFano(p) => p.len(),
220            CompressedPostingList::PartitionedEF(p) => p.len(),
221            CompressedPostingList::Roaring(p) => p.len(),
222            CompressedPostingList::OptP4D(p) => p.len(),
223        }
224    }
225
226    /// Get maximum term frequency
227    pub fn max_tf(&self) -> u32 {
228        match self {
229            CompressedPostingList::HorizontalBP128(p) => p.max_score as u32, // Approximation
230            CompressedPostingList::VerticalBP128(p) => {
231                p.blocks.iter().map(|b| b.max_tf).max().unwrap_or(0)
232            }
233            CompressedPostingList::EliasFano(p) => p.max_tf,
234            CompressedPostingList::PartitionedEF(p) => p.max_tf,
235            CompressedPostingList::Roaring(p) => p.max_tf,
236            CompressedPostingList::OptP4D(p) => {
237                p.blocks.iter().map(|b| b.max_tf).max().unwrap_or(0)
238            }
239        }
240    }
241
242    /// Get format type
243    pub fn format(&self) -> PostingFormat {
244        match self {
245            CompressedPostingList::HorizontalBP128(_) => PostingFormat::HorizontalBP128,
246            CompressedPostingList::VerticalBP128(_) => PostingFormat::VerticalBP128,
247            CompressedPostingList::EliasFano(_) => PostingFormat::EliasFano,
248            CompressedPostingList::PartitionedEF(_) => PostingFormat::PartitionedEF,
249            CompressedPostingList::Roaring(_) => PostingFormat::Roaring,
250            CompressedPostingList::OptP4D(_) => PostingFormat::OptP4D,
251        }
252    }
253
254    /// Serialize
255    pub fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
256        match self {
257            CompressedPostingList::HorizontalBP128(p) => {
258                writer.write_u8(FORMAT_HORIZONTAL_BP128)?;
259                p.serialize(writer)
260            }
261            CompressedPostingList::VerticalBP128(p) => {
262                writer.write_u8(FORMAT_VERTICAL_BP128)?;
263                p.serialize(writer)
264            }
265            CompressedPostingList::EliasFano(p) => {
266                writer.write_u8(FORMAT_ELIAS_FANO)?;
267                p.serialize(writer)
268            }
269            CompressedPostingList::PartitionedEF(p) => {
270                writer.write_u8(FORMAT_PARTITIONED_EF)?;
271                p.serialize(writer)
272            }
273            CompressedPostingList::Roaring(p) => {
274                writer.write_u8(FORMAT_ROARING)?;
275                p.serialize(writer)
276            }
277            CompressedPostingList::OptP4D(p) => {
278                writer.write_u8(FORMAT_OPT_P4D)?;
279                p.serialize(writer)
280            }
281        }
282    }
283
284    /// Deserialize
285    pub fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
286        let format = reader.read_u8()?;
287        match format {
288            FORMAT_HORIZONTAL_BP128 => Ok(CompressedPostingList::HorizontalBP128(
289                HorizontalBP128PostingList::deserialize(reader)?,
290            )),
291            FORMAT_VERTICAL_BP128 => Ok(CompressedPostingList::VerticalBP128(
292                VerticalBP128PostingList::deserialize(reader)?,
293            )),
294            FORMAT_ELIAS_FANO => Ok(CompressedPostingList::EliasFano(
295                EliasFanoPostingList::deserialize(reader)?,
296            )),
297            FORMAT_PARTITIONED_EF => Ok(CompressedPostingList::PartitionedEF(
298                PartitionedEFPostingList::deserialize(reader)?,
299            )),
300            FORMAT_ROARING => Ok(CompressedPostingList::Roaring(
301                RoaringPostingList::deserialize(reader)?,
302            )),
303            FORMAT_OPT_P4D => Ok(CompressedPostingList::OptP4D(
304                OptP4DPostingList::deserialize(reader)?,
305            )),
306            _ => Err(io::Error::new(
307                io::ErrorKind::InvalidData,
308                format!("Unknown posting list format: {}", format),
309            )),
310        }
311    }
312
313    /// Create an iterator
314    pub fn iterator(&self) -> CompressedPostingIterator<'_> {
315        match self {
316            CompressedPostingList::HorizontalBP128(p) => {
317                CompressedPostingIterator::HorizontalBP128(p.iterator())
318            }
319            CompressedPostingList::VerticalBP128(p) => {
320                CompressedPostingIterator::VerticalBP128(p.iterator())
321            }
322            CompressedPostingList::EliasFano(p) => {
323                CompressedPostingIterator::EliasFano(p.iterator())
324            }
325            CompressedPostingList::PartitionedEF(p) => {
326                CompressedPostingIterator::PartitionedEF(p.iterator())
327            }
328            CompressedPostingList::Roaring(p) => {
329                let mut iter = p.iterator();
330                iter.init();
331                CompressedPostingIterator::Roaring(iter)
332            }
333            CompressedPostingList::OptP4D(p) => CompressedPostingIterator::OptP4D(p.iterator()),
334        }
335    }
336}
337
338/// Unified iterator over any posting list format
339pub enum CompressedPostingIterator<'a> {
340    HorizontalBP128(super::horizontal_bp128::HorizontalBP128Iterator<'a>),
341    VerticalBP128(super::vertical_bp128::VerticalBP128Iterator<'a>),
342    EliasFano(super::elias_fano::EliasFanoPostingIterator<'a>),
343    PartitionedEF(super::partitioned_ef::PartitionedEFPostingIterator<'a>),
344    Roaring(super::roaring::RoaringPostingIterator<'a>),
345    OptP4D(super::opt_p4d::OptP4DIterator<'a>),
346}
347
348impl<'a> CompressedPostingIterator<'a> {
349    /// Current document ID
350    pub fn doc(&self) -> u32 {
351        match self {
352            CompressedPostingIterator::HorizontalBP128(i) => i.doc(),
353            CompressedPostingIterator::VerticalBP128(i) => i.doc(),
354            CompressedPostingIterator::EliasFano(i) => i.doc(),
355            CompressedPostingIterator::PartitionedEF(i) => i.doc(),
356            CompressedPostingIterator::Roaring(i) => i.doc(),
357            CompressedPostingIterator::OptP4D(i) => i.doc(),
358        }
359    }
360
361    /// Current term frequency
362    pub fn term_freq(&self) -> u32 {
363        match self {
364            CompressedPostingIterator::HorizontalBP128(i) => i.term_freq(),
365            CompressedPostingIterator::VerticalBP128(i) => i.term_freq(),
366            CompressedPostingIterator::EliasFano(i) => i.term_freq(),
367            CompressedPostingIterator::PartitionedEF(i) => i.term_freq(),
368            CompressedPostingIterator::Roaring(i) => i.term_freq(),
369            CompressedPostingIterator::OptP4D(i) => i.term_freq(),
370        }
371    }
372
373    /// Advance to next document
374    pub fn advance(&mut self) -> u32 {
375        match self {
376            CompressedPostingIterator::HorizontalBP128(i) => i.advance(),
377            CompressedPostingIterator::VerticalBP128(i) => i.advance(),
378            CompressedPostingIterator::EliasFano(i) => i.advance(),
379            CompressedPostingIterator::PartitionedEF(i) => i.advance(),
380            CompressedPostingIterator::Roaring(i) => i.advance(),
381            CompressedPostingIterator::OptP4D(i) => i.advance(),
382        }
383    }
384
385    /// Seek to first doc >= target
386    pub fn seek(&mut self, target: u32) -> u32 {
387        match self {
388            CompressedPostingIterator::HorizontalBP128(i) => i.seek(target),
389            CompressedPostingIterator::VerticalBP128(i) => i.seek(target),
390            CompressedPostingIterator::EliasFano(i) => i.seek(target),
391            CompressedPostingIterator::PartitionedEF(i) => i.seek(target),
392            CompressedPostingIterator::Roaring(i) => i.seek(target),
393            CompressedPostingIterator::OptP4D(i) => i.seek(target),
394        }
395    }
396
397    /// Check if exhausted
398    pub fn is_exhausted(&self) -> bool {
399        self.doc() == u32::MAX
400    }
401}
402
403/// Statistics about compression format usage
404#[derive(Debug, Default, Clone)]
405pub struct CompressionStats {
406    pub bitpacked_count: u32,
407    pub bitpacked_docs: u64,
408    pub simd_bp128_count: u32,
409    pub simd_bp128_docs: u64,
410    pub elias_fano_count: u32,
411    pub elias_fano_docs: u64,
412    pub partitioned_ef_count: u32,
413    pub partitioned_ef_docs: u64,
414    pub roaring_count: u32,
415    pub roaring_docs: u64,
416    pub inline_count: u32,
417    pub inline_docs: u64,
418}
419
420impl CompressionStats {
421    pub fn record(&mut self, format: PostingFormat, doc_count: u32) {
422        match format {
423            PostingFormat::HorizontalBP128 => {
424                self.bitpacked_count += 1;
425                self.bitpacked_docs += doc_count as u64;
426            }
427            PostingFormat::VerticalBP128 => {
428                self.simd_bp128_count += 1;
429                self.simd_bp128_docs += doc_count as u64;
430            }
431            PostingFormat::EliasFano => {
432                self.elias_fano_count += 1;
433                self.elias_fano_docs += doc_count as u64;
434            }
435            PostingFormat::PartitionedEF => {
436                self.partitioned_ef_count += 1;
437                self.partitioned_ef_docs += doc_count as u64;
438            }
439            PostingFormat::Roaring => {
440                self.roaring_count += 1;
441                self.roaring_docs += doc_count as u64;
442            }
443            PostingFormat::OptP4D => {
444                // Track OptP4D under bitpacked for now (similar compression family)
445                self.bitpacked_count += 1;
446                self.bitpacked_docs += doc_count as u64;
447            }
448        }
449    }
450
451    pub fn record_inline(&mut self, doc_count: u32) {
452        self.inline_count += 1;
453        self.inline_docs += doc_count as u64;
454    }
455
456    pub fn total_terms(&self) -> u32 {
457        self.bitpacked_count
458            + self.simd_bp128_count
459            + self.elias_fano_count
460            + self.partitioned_ef_count
461            + self.roaring_count
462            + self.inline_count
463    }
464
465    pub fn total_postings(&self) -> u64 {
466        self.bitpacked_docs
467            + self.simd_bp128_docs
468            + self.elias_fano_docs
469            + self.partitioned_ef_docs
470            + self.roaring_docs
471            + self.inline_docs
472    }
473}
474
475#[cfg(test)]
476mod tests {
477    use super::*;
478
479    #[test]
480    fn test_format_selection() {
481        // Small list -> HorizontalBP128 (fastest encoding + decoding with SIMD)
482        assert_eq!(
483            PostingFormat::select(100, 1_000_000),
484            PostingFormat::HorizontalBP128
485        );
486
487        // Medium list -> HorizontalBP128
488        assert_eq!(
489            PostingFormat::select(500, 1_000_000),
490            PostingFormat::HorizontalBP128
491        );
492
493        assert_eq!(
494            PostingFormat::select(5_000, 1_000_000),
495            PostingFormat::HorizontalBP128
496        );
497
498        assert_eq!(
499            PostingFormat::select(15_000, 10_000_000),
500            PostingFormat::HorizontalBP128
501        );
502
503        // Large list (>=20K) -> Partitioned EF
504        assert_eq!(
505            PostingFormat::select(25_000, 10_000_000),
506            PostingFormat::PartitionedEF
507        );
508
509        // Very frequent term (>1% of corpus AND >=20K docs) -> Roaring
510        assert_eq!(
511            PostingFormat::select(50_000, 1_000_000),
512            PostingFormat::Roaring
513        );
514    }
515
516    #[test]
517    fn test_compressed_posting_list_small() {
518        let doc_ids: Vec<u32> = (0..100).map(|i| i * 2).collect();
519        let term_freqs: Vec<u32> = vec![1; 100];
520
521        let list = CompressedPostingList::from_postings(&doc_ids, &term_freqs, 1_000_000, 1.0);
522
523        // Small lists use HorizontalBP128 (fastest encoding + decoding with SIMD)
524        assert_eq!(list.format(), PostingFormat::HorizontalBP128);
525        assert_eq!(list.doc_count(), 100);
526
527        let mut iter = list.iterator();
528        for (i, &expected) in doc_ids.iter().enumerate() {
529            assert_eq!(iter.doc(), expected, "Mismatch at {}", i);
530            iter.advance();
531        }
532    }
533
534    #[test]
535    fn test_compressed_posting_list_bitpacked() {
536        let doc_ids: Vec<u32> = (0..15_000).map(|i| i * 2).collect();
537        let term_freqs: Vec<u32> = vec![1; 15_000];
538
539        // 15K docs with large corpus -> HorizontalBP128 (under 20K threshold)
540        let list = CompressedPostingList::from_postings(&doc_ids, &term_freqs, 10_000_000, 1.0);
541
542        assert_eq!(list.format(), PostingFormat::HorizontalBP128);
543        assert_eq!(list.doc_count(), 15_000);
544    }
545
546    #[test]
547    fn test_compressed_posting_list_serialization() {
548        let doc_ids: Vec<u32> = (0..500).map(|i| i * 3).collect();
549        let term_freqs: Vec<u32> = (0..500).map(|i| (i % 5) + 1).collect();
550
551        let list = CompressedPostingList::from_postings(&doc_ids, &term_freqs, 1_000_000, 1.0);
552
553        let mut buffer = Vec::new();
554        list.serialize(&mut buffer).unwrap();
555
556        let restored = CompressedPostingList::deserialize(&mut &buffer[..]).unwrap();
557
558        assert_eq!(restored.format(), list.format());
559        assert_eq!(restored.doc_count(), list.doc_count());
560
561        // Verify iteration
562        let mut iter1 = list.iterator();
563        let mut iter2 = restored.iterator();
564
565        while iter1.doc() != u32::MAX {
566            assert_eq!(iter1.doc(), iter2.doc());
567            assert_eq!(iter1.term_freq(), iter2.term_freq());
568            iter1.advance();
569            iter2.advance();
570        }
571    }
572
573    #[test]
574    fn test_iterator_seek() {
575        let doc_ids: Vec<u32> = vec![10, 20, 30, 100, 200, 300, 1000, 2000];
576        let term_freqs: Vec<u32> = vec![1; 8];
577
578        let list = CompressedPostingList::from_postings(&doc_ids, &term_freqs, 1_000_000, 1.0);
579        let mut iter = list.iterator();
580
581        assert_eq!(iter.seek(25), 30);
582        assert_eq!(iter.seek(100), 100);
583        assert_eq!(iter.seek(500), 1000);
584        assert_eq!(iter.seek(3000), u32::MAX);
585    }
586
587    #[test]
588    fn test_opt_p4d_via_unified_interface() {
589        let doc_ids: Vec<u32> = (0..500).map(|i| i * 3).collect();
590        let term_freqs: Vec<u32> = (0..500).map(|i| (i % 5) + 1).collect();
591
592        // Create OptP4D explicitly via the unified interface
593        let list = CompressedPostingList::from_postings_with_format(
594            &doc_ids,
595            &term_freqs,
596            PostingFormat::OptP4D,
597            1.0,
598        );
599
600        assert_eq!(list.format(), PostingFormat::OptP4D);
601        assert_eq!(list.doc_count(), 500);
602
603        // Test serialization roundtrip
604        let mut buffer = Vec::new();
605        list.serialize(&mut buffer).unwrap();
606        let restored = CompressedPostingList::deserialize(&mut &buffer[..]).unwrap();
607
608        assert_eq!(restored.format(), PostingFormat::OptP4D);
609        assert_eq!(restored.doc_count(), 500);
610
611        // Verify iteration
612        let mut iter1 = list.iterator();
613        let mut iter2 = restored.iterator();
614
615        while iter1.doc() != u32::MAX {
616            assert_eq!(iter1.doc(), iter2.doc());
617            assert_eq!(iter1.term_freq(), iter2.term_freq());
618            iter1.advance();
619            iter2.advance();
620        }
621
622        // Test seek
623        let mut iter = list.iterator();
624        assert_eq!(iter.seek(100), 102); // 34 * 3 = 102
625        assert_eq!(iter.seek(500), 501); // 167 * 3 = 501
626    }
627}