1use crate::error::{Error, Result};
6use crate::varint;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub struct PostingList {
10 pub entries: Vec<PostingEntry>,
11}
12
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct PostingEntry {
15 pub file_id: u32,
16 pub offsets: Vec<u32>,
17}
18
19impl PostingList {
20 pub fn encode(&self) -> Vec<u8> {
22 let mut buf = Vec::new();
23 varint::encode(self.entries.len() as u64, &mut buf);
24
25 let mut last_file_id = 0u32;
26 for entry in &self.entries {
27 let file_id_delta = entry.file_id - last_file_id;
28 varint::encode(file_id_delta as u64, &mut buf);
29 last_file_id = entry.file_id;
30
31 varint::encode(entry.offsets.len() as u64, &mut buf);
32 let mut last_offset = 0u32;
33 for &offset in &entry.offsets {
34 let offset_delta = offset - last_offset;
35 varint::encode(offset_delta as u64, &mut buf);
36 last_offset = offset;
37 }
38 }
39
40 let crc = crc32c::crc32c(&buf);
42 buf.extend_from_slice(&crc.to_le_bytes());
43 buf
44 }
45
46 pub fn decode(data: &[u8]) -> Result<Self> {
48 if data.len() < 4 {
49 return Err(Error::PostingCorrupted);
50 }
51
52 let (payload, crc_bytes) = data.split_at(data.len() - 4);
53 let expected_crc = u32::from_le_bytes(crc_bytes.try_into().unwrap());
54 let actual_crc = crc32c::crc32c(payload);
55
56 if expected_crc != actual_crc {
57 return Err(Error::PostingCorrupted);
58 }
59
60 let mut pos = 0;
61 let num_files = varint::decode(payload, &mut pos)? as usize;
62 let mut entries = Vec::with_capacity(num_files);
63
64 let mut last_file_id = 0u32;
65 for _ in 0..num_files {
66 let file_id_delta = varint::decode(payload, &mut pos)? as u32;
67 let file_id = last_file_id + file_id_delta;
68 last_file_id = file_id;
69
70 let num_offsets = varint::decode(payload, &mut pos)? as usize;
71 let mut offsets = Vec::with_capacity(num_offsets);
72 let mut last_offset = 0u32;
73 for _ in 0..num_offsets {
74 let offset_delta = varint::decode(payload, &mut pos)? as u32;
75 let offset = last_offset + offset_delta;
76 last_offset = offset;
77 offsets.push(offset);
78 }
79 entries.push(PostingEntry { file_id, offsets });
80 }
81
82 Ok(PostingList { entries })
83 }
84}
85
86#[cfg(test)]
87mod tests {
88 use super::*;
89
90 #[test]
91 fn roundtrip() {
92 let list = PostingList {
93 entries: vec![
94 PostingEntry {
95 file_id: 5,
96 offsets: vec![100, 340, 342],
97 },
98 PostingEntry {
99 file_id: 12,
100 offsets: vec![44],
101 },
102 PostingEntry {
103 file_id: 15,
104 offsets: vec![200, 880],
105 },
106 ],
107 };
108
109 let encoded = list.encode();
110 let decoded = PostingList::decode(&encoded).unwrap();
111 assert_eq!(list, decoded);
112 }
113
114 #[test]
115 fn test_corruption_detection() {
116 let list = PostingList {
117 entries: vec![PostingEntry {
118 file_id: 1,
119 offsets: vec![10, 20],
120 }],
121 };
122 let mut encoded = list.encode();
123
124 encoded[0] ^= 0xFF;
126
127 let result = PostingList::decode(&encoded);
128 assert!(result.is_err(), "Decoding corrupted payload should fail");
129
130 encoded[0] ^= 0xFF;
132 let last_idx = encoded.len() - 1;
133 encoded[last_idx] ^= 0xFF;
134
135 let result = PostingList::decode(&encoded);
136 assert!(result.is_err(), "Decoding with corrupted CRC should fail");
137 }
138
139 #[test]
140 fn empty() {
141 let list = PostingList { entries: vec![] };
142 let encoded = list.encode();
143 let decoded = PostingList::decode(&encoded).unwrap();
144 assert_eq!(list, decoded);
145 }
146}