1use crate::error::{Error, Result};
7use crate::varint;
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct PostingList {
11 pub entries: Vec<PostingEntry>,
12}
13
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct PostingEntry {
16 pub file_id: u32,
17 pub offsets: Vec<u32>,
18}
19
20impl PostingList {
21 const ZSTD_COMPRESSION_LEVEL: i32 = 3;
22
23 pub fn encode(&self) -> Vec<u8> {
27 let mut buf = Vec::new();
28 varint::encode(self.entries.len() as u64, &mut buf);
29
30 let mut last_file_id = 0u32;
31 for entry in &self.entries {
32 let file_id_delta = entry.file_id - last_file_id;
33 varint::encode(file_id_delta as u64, &mut buf);
34 last_file_id = entry.file_id;
35
36 varint::encode(entry.offsets.len() as u64, &mut buf);
37 let mut last_offset = 0u32;
38 for &offset in &entry.offsets {
39 let offset_delta = offset - last_offset;
40 varint::encode(offset_delta as u64, &mut buf);
41 last_offset = offset;
42 }
43 }
44
45 zstd::encode_all(&buf[..], Self::ZSTD_COMPRESSION_LEVEL).unwrap_or(buf)
46 }
47
48 pub fn decode(data: &[u8]) -> Result<Self> {
51 let payload = zstd::decode_all(data).map_err(|_| Error::PostingCorrupted)?;
52
53 let mut pos = 0;
54 let num_files = varint::decode(&payload, &mut pos)? as usize;
55 let mut entries = Vec::with_capacity(num_files);
56
57 let mut last_file_id = 0u32;
58 for _ in 0..num_files {
59 let file_id_delta = varint::decode(&payload, &mut pos)? as u32;
60 let file_id = last_file_id + file_id_delta;
61 last_file_id = file_id;
62
63 let num_offsets = varint::decode(&payload, &mut pos)? as usize;
64 let mut offsets = Vec::with_capacity(num_offsets);
65 let mut last_offset = 0u32;
66 for _ in 0..num_offsets {
67 let offset_delta = varint::decode(&payload, &mut pos)? as u32;
68 let offset = last_offset + offset_delta;
69 last_offset = offset;
70 offsets.push(offset);
71 }
72 entries.push(PostingEntry { file_id, offsets });
73 }
74
75 Ok(PostingList { entries })
76 }
77}
78
79#[cfg(test)]
80mod tests {
81 use super::*;
82
83 #[test]
84 fn roundtrip() {
85 let list = PostingList {
86 entries: vec![
87 PostingEntry {
88 file_id: 5,
89 offsets: vec![100, 340, 342],
90 },
91 PostingEntry {
92 file_id: 12,
93 offsets: vec![44],
94 },
95 PostingEntry {
96 file_id: 15,
97 offsets: vec![200, 880],
98 },
99 ],
100 };
101
102 let encoded = list.encode();
103 let decoded = PostingList::decode(&encoded).unwrap();
104 assert_eq!(list, decoded);
105 }
106
107 #[test]
108 fn test_corruption_detection() {
109 let list = PostingList {
110 entries: vec![PostingEntry {
111 file_id: 1,
112 offsets: vec![10, 20],
113 }],
114 };
115 let mut encoded = list.encode();
116
117 encoded[0] ^= 0xFF;
119
120 let result = PostingList::decode(&encoded);
121 assert!(result.is_err(), "Decoding corrupted ZSTD data should fail");
122 }
123
124 #[test]
125 fn empty() {
126 let list = PostingList { entries: vec![] };
127 let encoded = list.encode();
128 let decoded = PostingList::decode(&encoded).unwrap();
129 assert_eq!(list, decoded);
130 }
131
132 #[test]
133 fn compression_ratio() {
134 let mut entries = Vec::new();
135 for i in 0..1000 {
136 entries.push(PostingEntry {
137 file_id: i,
138 offsets: (0..100).map(|j| i * 100 + j).collect(),
139 });
140 }
141 let list = PostingList { entries };
142 let encoded = list.encode();
143
144 assert!(
146 encoded.len() < 50000,
147 "Expected compression, got {} bytes",
148 encoded.len()
149 );
150 }
151}