1use crate::error::{Error, Result};
7use crate::varint;
8
9#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct PostingList {
12 pub entries: Vec<PostingEntry>,
14}
15
16#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct PostingEntry {
19 pub file_id: u32,
21 pub offsets: Vec<u32>,
23}
24
25impl PostingList {
26 const ZSTD_COMPRESSION_LEVEL: i32 = 3;
27
28 #[must_use]
32 pub fn encode(&self) -> Vec<u8> {
33 let mut buf = Vec::new();
34 varint::encode(u64::try_from(self.entries.len()).unwrap_or(0), &mut buf);
35
36 let mut last_file_id = 0u32;
37 for entry in &self.entries {
38 let file_id_delta = entry.file_id - last_file_id;
39 varint::encode(u64::from(file_id_delta), &mut buf);
40 last_file_id = entry.file_id;
41
42 varint::encode(u64::try_from(entry.offsets.len()).unwrap_or(0), &mut buf);
43 let mut last_offset = 0u32;
44 for &offset in &entry.offsets {
45 let offset_delta = offset - last_offset;
46 varint::encode(u64::from(offset_delta), &mut buf);
47 last_offset = offset;
48 }
49 }
50
51 zstd::encode_all(&buf[..], Self::ZSTD_COMPRESSION_LEVEL).unwrap_or(buf)
52 }
53
54 pub fn decode(data: &[u8]) -> Result<Self> {
62 let payload = zstd::decode_all(data).map_err(|_| Error::PostingCorrupted)?;
63
64 let mut pos = 0;
65 let num_files = usize::try_from(varint::decode(&payload, &mut pos)?).unwrap_or(0);
66 let mut entries = Vec::with_capacity(num_files);
67
68 let mut last_file_id = 0u32;
69 for _ in 0..num_files {
70 let file_id_delta = u32::try_from(varint::decode(&payload, &mut pos)?).unwrap_or(0);
71 let file_id = last_file_id + file_id_delta;
72 last_file_id = file_id;
73
74 let num_offsets = usize::try_from(varint::decode(&payload, &mut pos)?).unwrap_or(0);
75 let mut offsets = Vec::with_capacity(num_offsets);
76 let mut last_offset = 0u32;
77 for _ in 0..num_offsets {
78 let offset_delta = u32::try_from(varint::decode(&payload, &mut pos)?).unwrap_or(0);
79 let offset = last_offset + offset_delta;
80 last_offset = offset;
81 offsets.push(offset);
82 }
83 entries.push(PostingEntry { file_id, offsets });
84 }
85
86 Ok(Self { entries })
87 }
88}
89
90#[cfg(test)]
91#[allow(clippy::as_conversions, clippy::unwrap_used, clippy::indexing_slicing)]
92mod tests {
93 use super::*;
94
95 #[test]
96 fn roundtrip() {
97 let list = PostingList {
98 entries: vec![
99 PostingEntry {
100 file_id: 5,
101 offsets: vec![100, 340, 342],
102 },
103 PostingEntry {
104 file_id: 12,
105 offsets: vec![44],
106 },
107 PostingEntry {
108 file_id: 15,
109 offsets: vec![200, 880],
110 },
111 ],
112 };
113
114 let encoded = list.encode();
115 let decoded = PostingList::decode(&encoded).unwrap();
116 assert_eq!(list, decoded);
117 }
118
119 #[test]
120 fn test_corruption_detection() {
121 let list = PostingList {
122 entries: vec![PostingEntry {
123 file_id: 1,
124 offsets: vec![10, 20],
125 }],
126 };
127 let mut encoded = list.encode();
128
129 encoded[0] ^= 0xFF;
131
132 let result = PostingList::decode(&encoded);
133 assert!(result.is_err(), "Decoding corrupted ZSTD data should fail");
134 }
135
136 #[test]
137 fn empty() {
138 let list = PostingList { entries: vec![] };
139 let encoded = list.encode();
140 let decoded = PostingList::decode(&encoded).unwrap();
141 assert_eq!(list, decoded);
142 }
143
144 #[test]
145 fn compression_ratio() {
146 let mut entries = Vec::new();
147 for i in 0..1000 {
148 entries.push(PostingEntry {
149 file_id: i,
150 offsets: (0..100).map(|j| i * 100 + j).collect(),
151 });
152 }
153 let list = PostingList { entries };
154 let encoded = list.encode();
155
156 assert!(
158 encoded.len() < 50000,
159 "Expected compression, got {} bytes",
160 encoded.len()
161 );
162 }
163}