mf_file/
record.rs

1use crc32fast::Hasher as Crc32;
2use memmap2::{Mmap, MmapOptions};
3use std::fs::{File, OpenOptions};
4use std::io::{BufWriter, Read, Seek, SeekFrom, Write};
5use std::path::Path;
6
7use crate::error::{FileError, Result};
8
9pub const MAGIC: &[u8; 8] = b"MFFILE01";
10pub const HEADER_LEN: usize = 16; // 8 字节魔数 + 8 字节预留区
11pub const REC_HDR: usize = 8; // 记录头: u32 负载长度 + u32 CRC32
12
13#[inline]
14pub fn crc32(data: &[u8]) -> u32 {
15    let mut h = Crc32::new();
16    h.update(data);
17    h.finalize()
18}
19#[inline]
20pub fn read_u32_le(buf: &[u8]) -> u32 {
21    u32::from_le_bytes(buf.try_into().unwrap())
22}
23#[inline]
24pub fn write_u32_le(
25    out: &mut [u8],
26    v: u32,
27) {
28    out.copy_from_slice(&v.to_le_bytes());
29}
30
31// 写入文件头(包含魔数)
32fn write_header(file: &mut File) -> Result<()> {
33    file.seek(SeekFrom::Start(0))?;
34    let mut buf = [0u8; HEADER_LEN];
35    buf[..8].copy_from_slice(MAGIC);
36    file.write_all(&buf)?;
37    Ok(())
38}
39
40// 校验文件头(校验魔数)
41fn check_header(file: &mut File) -> Result<()> {
42    file.seek(SeekFrom::Start(0))?;
43    let mut hdr = [0u8; HEADER_LEN];
44    file.read_exact(&mut hdr)?;
45    if &hdr[..8] != MAGIC {
46        return Err(FileError::BadHeader);
47    }
48    Ok(())
49}
50
51#[derive(Debug)]
52pub struct Writer {
53    pub(crate) file: File,
54    buf: BufWriter<File>,
55    pub(crate) logical_end: u64,
56    prealloc_until: u64,
57    prealloc_chunk: u64,
58}
59
60impl Writer {
61    // 创建写入器; prealloc_chunk 为预分配块大小(0 表示不预分配)
62    pub fn create<P: AsRef<Path>>(
63        path: P,
64        prealloc_chunk: u64,
65    ) -> Result<Self> {
66        let mut file = OpenOptions::new()
67            .create(true)
68            .truncate(false)
69            .read(true)
70            .write(true)
71            .open(&path)?;
72
73        let meta_len = file.metadata()?.len();
74        if meta_len == 0 {
75            write_header(&mut file)?;
76        } else {
77            check_header(&mut file)?;
78        }
79
80        // 通过 mmap 扫描逻辑结尾(容忍尾部不完整记录)
81        let (logical_end, file_len) = {
82            let mmap = unsafe { MmapOptions::new().map(&file)? };
83            let l = scan_logical_end(&mmap)?;
84            (l, mmap.len() as u64)
85        };
86
87        let mut prealloc_until = file_len.max(logical_end);
88        if prealloc_chunk > 0 && prealloc_until < logical_end + prealloc_chunk {
89            prealloc_until =
90                (logical_end + prealloc_chunk).max(HEADER_LEN as u64);
91            file.set_len(prealloc_until)?;
92        }
93
94        file.seek(SeekFrom::Start(logical_end))?;
95        let buf = BufWriter::with_capacity(8 * 1024 * 1024, file.try_clone()?);
96
97        Ok(Self { file, buf, logical_end, prealloc_until, prealloc_chunk })
98    }
99
100    // 追加一条记录,返回该记录的起始偏移
101    pub fn append(
102        &mut self,
103        payload: &[u8],
104    ) -> Result<u64> {
105        if payload.len() > (u32::MAX as usize) {
106            return Err(FileError::RecordTooLarge(payload.len()));
107        }
108        let need = REC_HDR as u64 + payload.len() as u64;
109        self.ensure_capacity(need)?;
110
111        let offset = self.logical_end;
112        let mut hdr = [0u8; REC_HDR];
113        write_u32_le(&mut hdr[0..4], payload.len() as u32);
114        write_u32_le(&mut hdr[4..8], crc32(payload));
115        self.buf.write_all(&hdr)?;
116        self.buf.write_all(payload)?;
117        self.logical_end += need;
118        Ok(offset)
119    }
120
121    // 刷新缓冲区并同步到磁盘
122    pub fn flush(&mut self) -> Result<()> {
123        self.buf.flush()?;
124        self.file.sync_data()?;
125        Ok(())
126    }
127    // 当前逻辑长度
128    pub fn len(&self) -> u64 {
129        self.logical_end
130    }
131
132    // 检查是否为空
133    pub fn is_empty(&self) -> bool {
134        self.logical_end == HEADER_LEN as u64
135    }
136
137    // 确保物理空间足够; 按块扩容
138    fn ensure_capacity(
139        &mut self,
140        need: u64,
141    ) -> Result<()> {
142        if self.prealloc_chunk == 0 {
143            return Ok(());
144        }
145        let want = self.logical_end + need;
146        if want <= self.prealloc_until {
147            return Ok(());
148        }
149        let mut new_size = self.prealloc_until;
150        while new_size < want {
151            new_size += self.prealloc_chunk;
152        }
153        self.buf.flush()?;
154        self.file.set_len(new_size)?;
155        self.prealloc_until = new_size;
156        Ok(())
157    }
158}
159
160#[derive(Debug)]
161pub struct Reader {
162    pub(crate) _file: File, // 保持文件句柄存活以维持 mmap 有效性
163    pub(crate) mmap: Mmap,
164    pub(crate) logical_end: u64,
165}
166
167impl Reader {
168    // 打开只读映射
169    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
170        let mut file = OpenOptions::new().read(true).open(path)?;
171        check_header(&mut file)?;
172        let mmap = unsafe { MmapOptions::new().map(&file)? };
173        let logical_end = scan_logical_end(&mmap)?;
174        Ok(Self { _file: file, mmap, logical_end })
175    }
176    // 逻辑结尾
177    pub fn logical_len(&self) -> u64 {
178        self.logical_end
179    }
180    // 读取指定偏移的记录负载
181    pub fn get_at(
182        &self,
183        offset: u64,
184    ) -> Result<&[u8]> {
185        let end = self.logical_end as usize;
186        let p = offset as usize;
187        if p + REC_HDR > end {
188            return Err(FileError::BadHeader);
189        }
190        let len: usize = read_u32_le(&self.mmap[p..p + 4]) as usize;
191        let stored_crc = read_u32_le(&self.mmap[p + 4..p + 8]);
192        if len == 0 {
193            return Err(FileError::BadHeader);
194        }
195        let s = p + REC_HDR;
196        let e = s + len;
197        if e > end {
198            return Err(FileError::BadHeader);
199        }
200        let payload = &self.mmap[s..e];
201        if crc32(payload) != stored_crc {
202            return Err(FileError::CrcMismatch(offset));
203        }
204        Ok(payload)
205    }
206    // 迭代所有记录(校验 CRC,遇到损坏或不完整即停止)
207    pub fn iter(&self) -> Iter<'_> {
208        Iter { mmap: &self.mmap, p: HEADER_LEN, end: self.logical_end as usize }
209    }
210}
211
212pub struct Iter<'a> {
213    mmap: &'a Mmap,
214    p: usize,
215    end: usize,
216}
217impl<'a> Iterator for Iter<'a> {
218    type Item = &'a [u8];
219    fn next(&mut self) -> Option<Self::Item> {
220        if self.p + REC_HDR > self.end {
221            return None;
222        }
223        let len = read_u32_le(&self.mmap[self.p..self.p + 4]) as usize;
224        let stored_crc = read_u32_le(&self.mmap[self.p + 4..self.p + 8]);
225        if len == 0 {
226            return None;
227        }
228        let s = self.p + REC_HDR;
229        let e = s + len;
230        if e > self.end {
231            return None;
232        }
233        let payload = &self.mmap[s..e];
234        if crc32(payload) != stored_crc {
235            return None;
236        }
237        self.p = e;
238        Some(payload)
239    }
240}
241
242// 扫描逻辑结尾:从文件头开始按记录推进,直到遇到越界/校验失败/零长度
243pub fn scan_logical_end(mmap: &Mmap) -> Result<u64> {
244    if mmap.len() < HEADER_LEN {
245        return Err(FileError::BadHeader);
246    }
247    if &mmap[..8] != MAGIC {
248        return Err(FileError::BadHeader);
249    }
250    let mut p = HEADER_LEN;
251    let n = mmap.len();
252    while p + REC_HDR <= n {
253        let len = read_u32_le(&mmap[p..p + 4]) as usize;
254        if len == 0 {
255            break;
256        }
257        let s = p + REC_HDR;
258        let e = s + len;
259        if e > n {
260            break;
261        }
262        let stored_crc = read_u32_le(&mmap[p + 4..p + 8]);
263        if crc32(&mmap[s..e]) != stored_crc {
264            break;
265        }
266        p = e;
267    }
268    Ok(p as u64)
269}