1use crate::error::{Error, Result};
7use crate::varint;
8
9#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct PostingList {
12 pub entries: Vec<PostingEntry>,
14}
15
16#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct PostingEntry {
19 pub file_id: u32,
21 pub offsets: Vec<u32>,
23}
24
25impl PostingList {
26 pub const ZSTD_COMPRESSION_LEVEL: i32 = 3;
28
29 #[must_use]
33 pub fn encode(&self) -> Vec<u8> {
34 let mut buf = Vec::new();
35 varint::encode(u64::try_from(self.entries.len()).unwrap_or(0), &mut buf);
36
37 let mut last_file_id = 0u32;
38 for entry in &self.entries {
39 let file_id_delta = entry.file_id - last_file_id;
40 varint::encode(u64::from(file_id_delta), &mut buf);
41 last_file_id = entry.file_id;
42
43 varint::encode(u64::try_from(entry.offsets.len()).unwrap_or(0), &mut buf);
44 let mut last_offset = 0u32;
45 for &offset in &entry.offsets {
46 let offset_delta = offset - last_offset;
47 varint::encode(u64::from(offset_delta), &mut buf);
48 last_offset = offset;
49 }
50 }
51
52 zstd::encode_all(&buf[..], Self::ZSTD_COMPRESSION_LEVEL).unwrap_or(buf)
53 }
54
55 pub fn decode(data: &[u8]) -> Result<Self> {
63 let payload = zstd::decode_all(data).map_err(|_| Error::PostingCorrupted)?;
64
65 let mut pos = 0;
66 let num_files = usize::try_from(varint::decode(&payload, &mut pos)?).unwrap_or(0);
67 let mut entries = Vec::with_capacity(num_files);
68
69 let mut last_file_id = 0u32;
70 for _ in 0..num_files {
71 let file_id_delta = u32::try_from(varint::decode(&payload, &mut pos)?).unwrap_or(0);
72 let file_id = last_file_id + file_id_delta;
73 last_file_id = file_id;
74
75 let num_offsets = usize::try_from(varint::decode(&payload, &mut pos)?).unwrap_or(0);
76 let mut offsets = Vec::with_capacity(num_offsets);
77 let mut last_offset = 0u32;
78 for _ in 0..num_offsets {
79 let offset_delta = u32::try_from(varint::decode(&payload, &mut pos)?).unwrap_or(0);
80 let offset = last_offset + offset_delta;
81 last_offset = offset;
82 offsets.push(offset);
83 }
84 entries.push(PostingEntry { file_id, offsets });
85 }
86
87 Ok(Self { entries })
88 }
89}
90
91#[cfg(test)]
92#[allow(clippy::as_conversions, clippy::unwrap_used, clippy::indexing_slicing)]
93mod tests {
94 use super::*;
95
96 #[test]
97 fn roundtrip() {
98 let list = PostingList {
99 entries: vec![
100 PostingEntry {
101 file_id: 5,
102 offsets: vec![100, 340, 342],
103 },
104 PostingEntry {
105 file_id: 12,
106 offsets: vec![44],
107 },
108 PostingEntry {
109 file_id: 15,
110 offsets: vec![200, 880],
111 },
112 ],
113 };
114
115 let encoded = list.encode();
116 let decoded = PostingList::decode(&encoded).unwrap();
117 assert_eq!(list, decoded);
118 }
119
120 #[test]
121 fn test_corruption_detection() {
122 let list = PostingList {
123 entries: vec![PostingEntry {
124 file_id: 1,
125 offsets: vec![10, 20],
126 }],
127 };
128 let mut encoded = list.encode();
129
130 encoded[0] ^= 0xFF;
132
133 let result = PostingList::decode(&encoded);
134 assert!(result.is_err(), "Decoding corrupted ZSTD data should fail");
135 }
136
137 #[test]
138 fn empty() {
139 let list = PostingList { entries: vec![] };
140 let encoded = list.encode();
141 let decoded = PostingList::decode(&encoded).unwrap();
142 assert_eq!(list, decoded);
143 }
144
145 #[test]
146 fn compression_ratio() {
147 let mut entries = Vec::new();
148 for i in 0..1000 {
149 entries.push(PostingEntry {
150 file_id: i,
151 offsets: (0..100).map(|j| i * 100 + j).collect(),
152 });
153 }
154 let list = PostingList { entries };
155 let encoded = list.encode();
156
157 assert!(
159 encoded.len() < 50000,
160 "Expected compression, got {} bytes",
161 encoded.len()
162 );
163 }
164}