1use crate::error::{Error, Result};
7use crate::varint;
8
9#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct PostingList {
12 pub entries: Vec<PostingEntry>,
14}
15
16#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct PostingEntry {
19 pub file_id: u32,
21 pub offsets: Vec<u32>,
23}
24
25impl PostingList {
26 pub const ZSTD_COMPRESSION_LEVEL: i32 = 3;
28
29 pub fn encode(&self) -> Result<Vec<u8>> {
37 let mut buf = Vec::new();
38 varint::encode(u64::try_from(self.entries.len()).unwrap_or(0), &mut buf);
39
40 let mut last_file_id = 0u32;
41 for entry in &self.entries {
42 let file_id_delta = entry.file_id - last_file_id;
43 varint::encode(u64::from(file_id_delta), &mut buf);
44 last_file_id = entry.file_id;
45
46 varint::encode(u64::try_from(entry.offsets.len()).unwrap_or(0), &mut buf);
47 let mut last_offset = 0u32;
48 for &offset in &entry.offsets {
49 let offset_delta = offset - last_offset;
50 varint::encode(u64::from(offset_delta), &mut buf);
51 last_offset = offset;
52 }
53 }
54
55 zstd::encode_all(&buf[..], Self::ZSTD_COMPRESSION_LEVEL)
56 .map_err(|e| Error::Config(format!("posting zstd encode: {e}")))
57 }
58
59 pub fn decode(data: &[u8]) -> Result<Self> {
67 let payload = zstd::decode_all(data).map_err(|_| Error::PostingCorrupted)?;
68
69 let mut pos = 0;
70 let num_files = usize::try_from(varint::decode(&payload, &mut pos)?).unwrap_or(0);
71 let mut entries = Vec::with_capacity(num_files);
72
73 let mut last_file_id = 0u32;
74 for _ in 0..num_files {
75 let file_id_delta = u32::try_from(varint::decode(&payload, &mut pos)?).unwrap_or(0);
76 let file_id = last_file_id + file_id_delta;
77 last_file_id = file_id;
78
79 let num_offsets = usize::try_from(varint::decode(&payload, &mut pos)?).unwrap_or(0);
80 let mut offsets = Vec::with_capacity(num_offsets);
81 let mut last_offset = 0u32;
82 for _ in 0..num_offsets {
83 let offset_delta = u32::try_from(varint::decode(&payload, &mut pos)?).unwrap_or(0);
84 let offset = last_offset + offset_delta;
85 last_offset = offset;
86 offsets.push(offset);
87 }
88 entries.push(PostingEntry { file_id, offsets });
89 }
90
91 Ok(Self { entries })
92 }
93}
94
95#[cfg(test)]
96#[allow(clippy::as_conversions, clippy::unwrap_used, clippy::indexing_slicing)]
97mod tests {
98 use super::*;
99
100 #[test]
101 fn roundtrip() {
102 let list = PostingList {
103 entries: vec![
104 PostingEntry {
105 file_id: 5,
106 offsets: vec![100, 340, 342],
107 },
108 PostingEntry {
109 file_id: 12,
110 offsets: vec![44],
111 },
112 PostingEntry {
113 file_id: 15,
114 offsets: vec![200, 880],
115 },
116 ],
117 };
118
119 let encoded = list.encode().unwrap();
120 let decoded = PostingList::decode(&encoded).unwrap();
121 assert_eq!(list, decoded);
122}
123
124#[test]
125fn test_corruption_detection() {
126 let list = PostingList {
127 entries: vec![PostingEntry {
128 file_id: 1,
129 offsets: vec![10, 20],
130 }],
131 };
132 let mut encoded = list.encode().unwrap();
133
134 encoded[0] ^= 0xFF;
136
137 let result = PostingList::decode(&encoded);
138 assert!(result.is_err(), "Decoding corrupted ZSTD data should fail");
139 }
140
141 #[test]
142 fn empty() {
143 let list = PostingList { entries: vec![] };
144 let encoded = list.encode().unwrap();
145 let decoded = PostingList::decode(&encoded).unwrap();
146 assert_eq!(list, decoded);
147}
148
149#[test]
150fn compression_ratio() {
151 let mut entries = Vec::new();
152 for i in 0..1000 {
153 entries.push(PostingEntry {
154 file_id: i,
155 offsets: (0..100).map(|j| i * 100 + j).collect(),
156 });
157 }
158 let list = PostingList { entries };
159 let encoded = list.encode().unwrap();
160
161 assert!(
162 encoded.len() < 50000,
163 "Expected compression, got {} bytes",
164 encoded.len()
165 );
166 }
167}