Skip to main content

ix/
posting.rs

1//! Posting list encode/decode (delta + varint).
2//!
3//! Compact representation of (file_id, [offsets]) for a single trigram.
4
5use crate::error::{Error, Result};
6use crate::varint;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub struct PostingList {
10    pub entries: Vec<PostingEntry>,
11}
12
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct PostingEntry {
15    pub file_id: u32,
16    pub offsets: Vec<u32>,
17}
18
19impl PostingList {
20    /// Encode the posting list into a byte buffer with a CRC32C footer.
21    pub fn encode(&self) -> Vec<u8> {
22        let mut buf = Vec::new();
23        varint::encode(self.entries.len() as u64, &mut buf);
24
25        let mut last_file_id = 0u32;
26        for entry in &self.entries {
27            let file_id_delta = entry.file_id - last_file_id;
28            varint::encode(file_id_delta as u64, &mut buf);
29            last_file_id = entry.file_id;
30
31            varint::encode(entry.offsets.len() as u64, &mut buf);
32            let mut last_offset = 0u32;
33            for &offset in &entry.offsets {
34                let offset_delta = offset - last_offset;
35                varint::encode(offset_delta as u64, &mut buf);
36                last_offset = offset;
37            }
38        }
39
40        // Add CRC32C checksum
41        let crc = crc32c::crc32c(&buf);
42        buf.extend_from_slice(&crc.to_le_bytes());
43        buf
44    }
45
46    /// Decode the posting list from a byte slice, verifying the CRC32C footer.
47    pub fn decode(data: &[u8]) -> Result<Self> {
48        if data.len() < 4 {
49            return Err(Error::PostingCorrupted);
50        }
51
52        let (payload, crc_bytes) = data.split_at(data.len() - 4);
53        let expected_crc = u32::from_le_bytes(crc_bytes.try_into().unwrap());
54        let actual_crc = crc32c::crc32c(payload);
55
56        if expected_crc != actual_crc {
57            return Err(Error::PostingCorrupted);
58        }
59
60        let mut pos = 0;
61        let num_files = varint::decode(payload, &mut pos)? as usize;
62        let mut entries = Vec::with_capacity(num_files);
63
64        let mut last_file_id = 0u32;
65        for _ in 0..num_files {
66            let file_id_delta = varint::decode(payload, &mut pos)? as u32;
67            let file_id = last_file_id + file_id_delta;
68            last_file_id = file_id;
69
70            let num_offsets = varint::decode(payload, &mut pos)? as usize;
71            let mut offsets = Vec::with_capacity(num_offsets);
72            let mut last_offset = 0u32;
73            for _ in 0..num_offsets {
74                let offset_delta = varint::decode(payload, &mut pos)? as u32;
75                let offset = last_offset + offset_delta;
76                last_offset = offset;
77                offsets.push(offset);
78            }
79            entries.push(PostingEntry { file_id, offsets });
80        }
81
82        Ok(PostingList { entries })
83    }
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89
90    #[test]
91    fn roundtrip() {
92        let list = PostingList {
93            entries: vec![
94                PostingEntry {
95                    file_id: 5,
96                    offsets: vec![100, 340, 342],
97                },
98                PostingEntry {
99                    file_id: 12,
100                    offsets: vec![44],
101                },
102                PostingEntry {
103                    file_id: 15,
104                    offsets: vec![200, 880],
105                },
106            ],
107        };
108
109        let encoded = list.encode();
110        let decoded = PostingList::decode(&encoded).unwrap();
111        assert_eq!(list, decoded);
112    }
113
114    #[test]
115    fn test_corruption_detection() {
116        let list = PostingList {
117            entries: vec![PostingEntry {
118                file_id: 1,
119                offsets: vec![10, 20],
120            }],
121        };
122        let mut encoded = list.encode();
123        
124        // Flip a bit in the payload (not the CRC)
125        encoded[0] ^= 0xFF;
126        
127        let result = PostingList::decode(&encoded);
128        assert!(result.is_err(), "Decoding corrupted payload should fail");
129        
130        // Restore payload, flip a bit in CRC
131        encoded[0] ^= 0xFF;
132        let last_idx = encoded.len() - 1;
133        encoded[last_idx] ^= 0xFF;
134        
135        let result = PostingList::decode(&encoded);
136        assert!(result.is_err(), "Decoding with corrupted CRC should fail");
137    }
138
139    #[test]
140    fn empty() {
141        let list = PostingList { entries: vec![] };
142        let encoded = list.encode();
143        let decoded = PostingList::decode(&encoded).unwrap();
144        assert_eq!(list, decoded);
145    }
146}