Skip to main content

sift_core/storage/
postings.rs

1//! Contiguous `u32` LE file-id payloads referenced by the lexicon.
2
3use std::fs::File;
4use std::io::{BufWriter, Write};
5use std::path::Path;
6
7use memmap2::Mmap;
8
9use crate::storage::format::{write_magic, POSTINGS_MAGIC};
10use crate::storage::mmap::open_mmap;
11
12/// Write postings blob to `out_path`.
13///
14/// # Errors
15///
16/// Propagates IO errors from writing `out_path`.
17pub fn write_postings(out_path: &Path, payload: &[u8]) -> std::io::Result<()> {
18    let f = File::create(out_path)?;
19    let mut w = BufWriter::new(f);
20    write_magic(&mut w, POSTINGS_MAGIC)?;
21    let plen: u32 = payload
22        .len()
23        .try_into()
24        .map_err(|_| std::io::Error::new(std::io::ErrorKind::InvalidInput, "postings too large"))?;
25    w.write_all(&plen.to_le_bytes())?;
26    w.write_all(payload)?;
27    w.flush()?;
28    Ok(())
29}
30
31#[derive(Debug)]
32pub struct MappedPostings {
33    backing: Backing,
34    payload_len: usize,
35}
36
37#[derive(Debug)]
38enum Backing {
39    Mmap(Mmap),
40    Owned(Vec<u8>),
41}
42
43impl MappedPostings {
44    fn bytes(&self) -> &[u8] {
45        match &self.backing {
46            Backing::Mmap(m) => m.as_ref(),
47            Backing::Owned(v) => v.as_slice(),
48        }
49    }
50
51    #[must_use]
52    pub fn from_bytes(payload: &[u8]) -> Self {
53        let mut data = Vec::with_capacity(POSTINGS_MAGIC.len() + 4 + payload.len());
54        data.extend_from_slice(&POSTINGS_MAGIC);
55        let plen = u32::try_from(payload.len()).unwrap_or(u32::MAX);
56        data.extend_from_slice(&plen.to_le_bytes());
57        data.extend_from_slice(payload);
58        Self {
59            backing: Backing::Owned(data),
60            payload_len: payload.len(),
61        }
62    }
63
64    /// Open postings from a memory-mapped file.
65    ///
66    /// # Errors
67    ///
68    /// Returns an error if the file is malformed.
69    pub fn open(path: &Path) -> std::io::Result<Self> {
70        let mmap = open_mmap(path)?;
71        let bytes = mmap.as_ref();
72        let payload_len = Self::validate(bytes)?;
73        Ok(Self {
74            backing: Backing::Mmap(mmap),
75            payload_len,
76        })
77    }
78
79    fn validate(bytes: &[u8]) -> std::io::Result<usize> {
80        let magic_len = POSTINGS_MAGIC.len();
81        if bytes.len() < magic_len + 4 {
82            return Err(std::io::Error::new(
83                std::io::ErrorKind::InvalidData,
84                "postings too short for magic+len",
85            ));
86        }
87        if bytes[..magic_len] != POSTINGS_MAGIC {
88            return Err(std::io::Error::new(
89                std::io::ErrorKind::InvalidData,
90                "unexpected postings magic",
91            ));
92        }
93        let plen = u32::from_le_bytes(bytes[magic_len..magic_len + 4].try_into().unwrap()) as usize;
94        if bytes.len() < magic_len + 4 + plen {
95            return Err(std::io::Error::new(
96                std::io::ErrorKind::InvalidData,
97                "postings payload shorter than declared length",
98            ));
99        }
100        Ok(plen)
101    }
102
103    #[must_use]
104    pub fn slice(&self, start: usize, len: usize) -> &[u8] {
105        let payload_start = POSTINGS_MAGIC.len() + 4;
106        let start = payload_start + start;
107        self.bytes().get(start..start + len).unwrap_or(&[])
108    }
109
110    #[must_use]
111    pub fn as_bytes(&self) -> &[u8] {
112        let payload_start = POSTINGS_MAGIC.len() + 4;
113        &self.bytes()[payload_start..payload_start + self.payload_len]
114    }
115
116    #[must_use]
117    pub fn backing_slice(&self) -> &[u8] {
118        self.bytes()
119    }
120}