mf_file/
record.rs

1use crc32fast::Hasher as Crc32;
2use memmap2::{Mmap, MmapOptions};
3use std::fs::{File, OpenOptions};
4use std::io::{BufWriter, Read, Seek, SeekFrom, Write};
5use std::path::Path;
6
7use crate::error::{FileError, Result};
8
9pub const MAGIC: &[u8; 8] = b"MFFILE01";
10pub const HEADER_LEN: usize = 16; // 8 字节魔数 + 8 字节预留区
11pub const REC_HDR: usize = 8; // 记录头: u32 负载长度 + u32 CRC32
12
13#[inline]
14pub fn crc32(data: &[u8]) -> u32 {
15    let mut h = Crc32::new();
16    h.update(data);
17    h.finalize()
18}
19#[inline]
20pub fn read_u32_le(buf: &[u8]) -> u32 {
21    u32::from_le_bytes(buf.try_into().unwrap())
22}
23#[inline]
24pub fn write_u32_le(
25    out: &mut [u8],
26    v: u32,
27) {
28    out.copy_from_slice(&v.to_le_bytes());
29}
30
31// 写入文件头(包含魔数)
32fn write_header(file: &mut File) -> Result<()> {
33    file.seek(SeekFrom::Start(0))?;
34    let mut buf = [0u8; HEADER_LEN];
35    buf[..8].copy_from_slice(MAGIC);
36    file.write_all(&buf)?;
37    Ok(())
38}
39
40// 校验文件头(校验魔数)
41fn check_header(file: &mut File) -> Result<()> {
42    file.seek(SeekFrom::Start(0))?;
43    let mut hdr = [0u8; HEADER_LEN];
44    file.read_exact(&mut hdr)?;
45    if &hdr[..8] != MAGIC {
46        return Err(FileError::BadHeader);
47    }
48    Ok(())
49}
50
51#[derive(Debug)]
52pub struct Writer {
53    pub(crate) file: File,
54    buf: BufWriter<File>,
55    pub(crate) logical_end: u64,
56    prealloc_until: u64,
57    prealloc_chunk: u64,
58}
59
60impl Writer {
61    // 创建写入器; prealloc_chunk 为预分配块大小(0 表示不预分配)
62    pub fn create<P: AsRef<Path>>(
63        path: P,
64        prealloc_chunk: u64,
65    ) -> Result<Self> {
66        let mut file = OpenOptions::new()
67            .create(true)
68            .truncate(false)
69            .read(true)
70            .write(true)
71            .open(&path)?;
72
73        let meta_len = file.metadata()?.len();
74        if meta_len == 0 {
75            write_header(&mut file)?;
76        } else {
77            check_header(&mut file)?;
78        }
79
80        // 通过 mmap 扫描逻辑结尾(容忍尾部不完整记录)
81        let (logical_end, file_len) = {
82            let mmap = unsafe { MmapOptions::new().map(&file)? };
83            let l = scan_logical_end(&mmap)?;
84            (l, mmap.len() as u64)
85        };
86
87        let mut prealloc_until = file_len.max(logical_end);
88        if prealloc_chunk > 0 && prealloc_until < logical_end + prealloc_chunk {
89            prealloc_until =
90                (logical_end + prealloc_chunk).max(HEADER_LEN as u64);
91            file.set_len(prealloc_until)?;
92        }
93
94        file.seek(SeekFrom::Start(logical_end))?;
95        let buf = BufWriter::with_capacity(8 * 1024 * 1024, file.try_clone()?);
96
97        Ok(Self { file, buf, logical_end, prealloc_until, prealloc_chunk })
98    }
99
100    // 追加一条记录,返回该记录的起始偏移
101    pub fn append(
102        &mut self,
103        payload: &[u8],
104    ) -> Result<u64> {
105        if payload.is_empty() {
106            return Err(FileError::EmptyRecord);
107        }
108        if payload.len() > (u32::MAX as usize) {
109            return Err(FileError::RecordTooLarge(payload.len()));
110        }
111        let need = REC_HDR as u64 + payload.len() as u64;
112        self.ensure_capacity(need)?;
113
114        let offset = self.logical_end;
115        let mut hdr = [0u8; REC_HDR];
116        write_u32_le(&mut hdr[0..4], payload.len() as u32);
117        write_u32_le(&mut hdr[4..8], crc32(payload));
118        self.buf.write_all(&hdr)?;
119        self.buf.write_all(payload)?;
120        self.logical_end += need;
121        Ok(offset)
122    }
123
124    // 刷新缓冲区并同步到磁盘
125    pub fn flush(&mut self) -> Result<()> {
126        self.buf.flush()?;
127        self.file.sync_data()?;
128        Ok(())
129    }
130    // 当前逻辑长度
131    pub fn len(&self) -> u64 {
132        self.logical_end
133    }
134
135    // 检查是否为空
136    pub fn is_empty(&self) -> bool {
137        self.logical_end == HEADER_LEN as u64
138    }
139
140    // 确保物理空间足够; 按块扩容
141    fn ensure_capacity(
142        &mut self,
143        need: u64,
144    ) -> Result<()> {
145        if self.prealloc_chunk == 0 {
146            return Ok(());
147        }
148        let want = self.logical_end + need;
149        if want <= self.prealloc_until {
150            return Ok(());
151        }
152        let mut new_size = self.prealloc_until;
153        while new_size < want {
154            new_size += self.prealloc_chunk;
155        }
156        self.buf.flush()?;
157        self.file.set_len(new_size)?;
158        self.prealloc_until = new_size;
159        Ok(())
160    }
161}
162
163#[derive(Debug)]
164pub struct Reader {
165    pub(crate) _file: File, // 保持文件句柄存活以维持 mmap 有效性
166    pub(crate) mmap: Mmap,
167    pub(crate) logical_end: u64,
168}
169
170impl Reader {
171    // 打开只读映射
172    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
173        let mut file = OpenOptions::new().read(true).open(path)?;
174        check_header(&mut file)?;
175        let mmap = unsafe { MmapOptions::new().map(&file)? };
176        let logical_end = scan_logical_end(&mmap)?;
177        Ok(Self { _file: file, mmap, logical_end })
178    }
179    // 逻辑结尾
180    pub fn logical_len(&self) -> u64 {
181        self.logical_end
182    }
183    // 读取指定偏移的记录负载
184    pub fn get_at(
185        &self,
186        offset: u64,
187    ) -> Result<&[u8]> {
188        let end = usize::try_from(self.logical_end)
189            .map_err(|_| FileError::BadHeader)?;
190        let p = usize::try_from(offset).map_err(|_| FileError::BadHeader)?;
191        if p + REC_HDR > end {
192            return Err(FileError::BadHeader);
193        }
194        let len: usize = read_u32_le(&self.mmap[p..p + 4]) as usize;
195        let stored_crc = read_u32_le(&self.mmap[p + 4..p + 8]);
196        if len == 0 {
197            return Err(FileError::BadHeader);
198        }
199        let s = p + REC_HDR;
200        let e = s + len;
201        if e > end {
202            return Err(FileError::BadHeader);
203        }
204        let payload = &self.mmap[s..e];
205        if crc32(payload) != stored_crc {
206            return Err(FileError::CrcMismatch(offset));
207        }
208        Ok(payload)
209    }
210    // 迭代所有记录(校验 CRC,遇到损坏或不完整即停止)
211    pub fn iter(&self) -> Iter<'_> {
212        Iter { mmap: &self.mmap, p: HEADER_LEN, end: self.logical_end as usize }
213    }
214}
215
216pub struct Iter<'a> {
217    mmap: &'a Mmap,
218    p: usize,
219    end: usize,
220}
221impl<'a> Iterator for Iter<'a> {
222    type Item = &'a [u8];
223    fn next(&mut self) -> Option<Self::Item> {
224        if self.p + REC_HDR > self.end {
225            return None;
226        }
227        let len = read_u32_le(&self.mmap[self.p..self.p + 4]) as usize;
228        let stored_crc = read_u32_le(&self.mmap[self.p + 4..self.p + 8]);
229        if len == 0 {
230            return None;
231        }
232        let s = self.p + REC_HDR;
233        let e = s + len;
234        if e > self.end {
235            return None;
236        }
237        let payload = &self.mmap[s..e];
238        if crc32(payload) != stored_crc {
239            return None;
240        }
241        self.p = e;
242        Some(payload)
243    }
244}
245
246// 扫描逻辑结尾:从文件头开始按记录推进,直到遇到越界/校验失败/零长度
247pub fn scan_logical_end(mmap: &Mmap) -> Result<u64> {
248    if mmap.len() < HEADER_LEN {
249        return Err(FileError::BadHeader);
250    }
251    if &mmap[..8] != MAGIC {
252        return Err(FileError::BadHeader);
253    }
254    let mut p = HEADER_LEN;
255    let n = mmap.len();
256    while p + REC_HDR <= n {
257        let len = read_u32_le(&mmap[p..p + 4]) as usize;
258        if len == 0 {
259            break;
260        }
261        let s = p + REC_HDR;
262        let e = s + len;
263        if e > n {
264            break;
265        }
266        let stored_crc = read_u32_le(&mmap[p + 4..p + 8]);
267        if crc32(&mmap[s..e]) != stored_crc {
268            break;
269        }
270        p = e;
271    }
272    Ok(p as u64)
273}
274#[cfg(test)]
275mod tests {
276    use super::*;
277    use tempfile::tempdir;
278
279    #[test]
280    fn reject_zero_length_records() {
281        let dir = tempdir().unwrap();
282        let path = dir.path().join("zero.mff");
283
284        let mut writer = Writer::create(&path, 0).unwrap();
285        let err = writer.append(&[]).unwrap_err();
286        assert!(matches!(err, FileError::EmptyRecord));
287        writer.flush().unwrap();
288        drop(writer);
289
290        let reader = Reader::open(&path).unwrap();
291        assert_eq!(reader.logical_len(), HEADER_LEN as u64);
292        assert_eq!(reader.iter().count(), 0);
293    }
294}