1use std::io;
2use std::io::{Seek, SeekFrom, Write};
3use std::path::{Path, PathBuf};
4
5use blake3::Hasher as Blake3;
6use serde::{Deserialize, Serialize};
7
8use crate::error::{FileError, Result};
9use crate::record::{crc32, read_u32_le, Reader, Writer, HEADER_LEN, REC_HDR};
10
11const TAIL_MAGIC: &[u8; 8] = b"MFFTAIL1"; #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
16pub enum SegmentType {
17 Meta,
18 Schema,
19 Snapshot,
20 Assets,
21 History,
22 Index,
23 Directory,
24}
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct SegmentEntry {
29 pub kind: SegmentType,
30 pub offset: u64,
31 pub length: u64,
32 pub crc32: u32,
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct Directory {
38 pub entries: Vec<SegmentEntry>,
39 pub flags: u32,
40 pub file_hash: [u8; 32],
41}
42
43pub struct DocumentWriter {
45 w: Writer,
46 segments: Vec<SegmentEntry>,
47 path: PathBuf,
48}
49impl DocumentWriter {
50 pub fn begin<P: AsRef<Path>>(path: P) -> Result<Self> {
52 let p = path.as_ref().to_path_buf();
53 Ok(Self { w: Writer::create(&p, 0)?, segments: Vec::new(), path: p })
54 }
55 pub fn add_segment(
57 &mut self,
58 kind: SegmentType,
59 payload: &[u8],
60 ) -> Result<()> {
61 let off = self.w.len();
62 let _ = self.w.append(payload)?;
63 let crc = crc32(payload);
64 self.segments.push(SegmentEntry {
65 kind,
66 offset: off,
67 length: (REC_HDR as u64) + payload.len() as u64,
68 crc32: crc,
69 });
70 Ok(())
71 }
72 pub fn finalize(mut self) -> Result<()> {
74 self.w.flush()?;
76 let mut hasher = Blake3::new();
77 let r = Reader::open(&self.path)?;
78 for bytes in r.iter() {
79 hasher.update(bytes);
80 }
81 let hash = *hasher.finalize().as_bytes();
82 let dir =
84 Directory { entries: self.segments, flags: 0, file_hash: hash };
85 let bytes =
86 bincode::serde::encode_to_vec(&dir, bincode::config::standard())
87 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))
88 .map_err(FileError::Io)?;
89 let dir_off = self.w.append(&bytes)?;
90 self.w.flush()?;
91
92 {
95 let file = &mut self.w.file;
97 file.seek(SeekFrom::Start(self.w.logical_end))?;
98 file.write_all(TAIL_MAGIC)?;
99 file.write_all(&dir_off.to_le_bytes())?;
100 file.sync_data()?;
101 }
102 Ok(())
103 }
104}
105
106pub struct DocumentReader {
108 r: Reader,
109 dir: Directory,
110}
111impl DocumentReader {
112 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
114 let r = Reader::open(path)?;
115 let mut last_off = HEADER_LEN as u64;
117 let phys_len = r.mmap.len();
118 if phys_len >= 16 {
119 let tail = &r.mmap[phys_len - 16..phys_len];
120 if &tail[..8] == TAIL_MAGIC {
121 let mut off_bytes = [0u8; 8];
122 off_bytes.copy_from_slice(&tail[8..16]);
123 let off = u64::from_le_bytes(off_bytes);
124 if (off as usize) + REC_HDR <= r.logical_end as usize {
126 let len =
127 read_u32_le(&r.mmap[off as usize..off as usize + 4])
128 as usize;
129 let s = off as usize + REC_HDR;
130 let e = s + len;
131 if e <= r.logical_end as usize {
132 let stored_crc = read_u32_le(
133 &r.mmap[off as usize + 4..off as usize + 8],
134 );
135 if crc32(&r.mmap[s..e]) == stored_crc {
136 last_off = off;
137 }
138 }
139 }
140 }
141 }
142 if last_off == (HEADER_LEN as u64) {
144 let mut p = HEADER_LEN;
145 let end = r.logical_end as usize;
146 let mut fallback_last = HEADER_LEN as u64;
147 while p + REC_HDR <= end {
148 let len = read_u32_le(&r.mmap[p..p + 4]) as usize;
149 if len == 0 {
150 break;
151 }
152 let s = p + REC_HDR;
153 let e = s + len;
154 if e > end {
155 break;
156 }
157 let stored_crc = read_u32_le(&r.mmap[p + 4..p + 8]);
158 if crc32(&r.mmap[s..e]) != stored_crc {
159 break;
160 }
161 fallback_last = p as u64;
162 p = e;
163 }
164 last_off = fallback_last;
165 }
166 let dir_bytes = r.get_at(last_off)?;
167 let (dir, _) = bincode::serde::decode_from_slice::<Directory, _>(
168 dir_bytes,
169 bincode::config::standard(),
170 )
171 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))
172 .map_err(FileError::Io)?;
173 let mut hasher = Blake3::new();
175 let mut q = HEADER_LEN;
176 let end2 = last_off as usize;
177 while q + REC_HDR <= end2 {
178 let len = read_u32_le(&r.mmap[q..q + 4]) as usize;
179 if len == 0 {
180 break;
181 }
182 let s = q + REC_HDR;
183 let e = s + len;
184 if e > end2 {
185 break;
186 }
187 let stored_crc = read_u32_le(&r.mmap[q + 4..q + 8]);
188 if crc32(&r.mmap[s..e]) != stored_crc {
189 break;
190 }
191 hasher.update(&r.mmap[s..e]);
192 q = e;
193 }
194 let calc = *hasher.finalize().as_bytes();
195 if calc != dir.file_hash {
196 return Err(FileError::BadHeader);
197 }
198 Ok(Self { r, dir })
199 }
200 pub fn read_segment(
202 &self,
203 kind: SegmentType,
204 ) -> Result<Option<&[u8]>> {
205 if let Some(entry) =
206 self.dir.entries.iter().rev().find(|e| e.kind == kind)
207 {
208 let bytes = self.r.get_at(entry.offset)?;
209 if crc32(bytes) != entry.crc32 {
210 return Err(FileError::CrcMismatch(entry.offset));
211 }
212 return Ok(Some(bytes));
213 }
214 Ok(None)
215 }
216}