realm_db_reader/
realm.rs

1use std::sync::Arc;
2use std::{fmt::Debug, path::Path};
3
4use anyhow::bail;
5use byteorder::{ByteOrder, LittleEndian};
6use memmap2::Mmap;
7use tracing::instrument;
8
9use crate::Group;
10use crate::array::{Array, RealmRef};
11use crate::traits::Node;
12
13/// The header for a Realm file.
14#[derive(Clone, Copy)]
15pub(crate) struct Header {
16    /// The two possible top references. Each element points to a [`crate::group::Group`].
17    /// The least significant bit of flags indicates which of the two references is active.
18    top_ref: [u64; 2],
19    magic: [u8; 4],
20    fmt_ver: [u8; 2],
21    _reserved: u8,
22    flags: u8,
23}
24
25impl Debug for Header {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        f.debug_struct("Header")
28            .field("top_ref", &self.top_ref)
29            .field("fmt_ver", &self.fmt_ver)
30            .field("flags", &self.flags)
31            .finish()
32    }
33}
34
35impl Header {
36    const SIZE: usize = 24;
37    const MAGIC: [u8; 4] = *b"T-DB";
38
39    fn parse(buf: &[u8]) -> anyhow::Result<Self> {
40        if buf.len() < Self::SIZE {
41            bail!("file too small for Realm header");
42        }
43
44        let h = Header {
45            top_ref: [
46                LittleEndian::read_u64(&buf[0..8]),
47                LittleEndian::read_u64(&buf[8..16]),
48            ],
49            magic: buf[16..20].try_into().unwrap(),
50            fmt_ver: buf[20..22].try_into().unwrap(),
51            _reserved: buf[22],
52            flags: buf[23],
53        };
54        if h.magic != Self::MAGIC {
55            bail!("not a Realm file (magic mismatch)");
56        }
57
58        Ok(h)
59    }
60
61    /// Choose the active top ref using the switch bit (bit 0 of `flags`).
62    pub(crate) fn current_top_ref(&self) -> RealmRef {
63        let idx = (self.flags & 1) as usize;
64        RealmRef::new(self.top_ref[idx] as usize)
65    }
66
67    fn is_encrypted(&self) -> bool {
68        self.flags & 0x80 != 0
69    }
70
71    fn file_format_version(&self) -> (u8, u8) {
72        (self.fmt_ver[0], self.fmt_ver[1])
73    }
74}
75
76/// The header for a single node in a Realm file.
77#[derive(Clone, Copy)]
78pub(crate) struct NodeHeader {
79    pub(crate) flags: u8,
80    pub(crate) size: u32, // 24-bit little-endian count
81}
82
83impl Debug for NodeHeader {
84    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
85        f.debug_struct("NodeHeader")
86            .field("is_inner_btree", &self.is_inner_bptree())
87            .field("has_refs", &self.has_refs())
88            .field("context_flag", &self.context_flag())
89            .field("width", &self.width())
90            .field("size", &self.size)
91            .finish()
92    }
93}
94
95impl NodeHeader {
96    pub(crate) const SIZE: usize = 8;
97    pub(crate) const DUMMY_CHECKSUM: u32 = 0x4141_4141;
98
99    /// Parse a node header from a buffer.
100    ///
101    /// Returns an error if the buffer is too small.
102    ///
103    /// Panics if the checksum is invalid.
104    pub(crate) fn parse(buf: &[u8]) -> anyhow::Result<Self> {
105        if buf.len() < Self::SIZE {
106            bail!("node too small");
107        }
108
109        let checksum = LittleEndian::read_u32(&buf[0..4]);
110        let flags = buf[4];
111        let size = ((buf[5] as u32) << 16) | ((buf[6] as u32) << 8) | (buf[7] as u32);
112
113        assert_eq!(checksum, Self::DUMMY_CHECKSUM, "invalid checksum");
114
115        Ok(Self { flags, size })
116    }
117
118    /// Returns true if the node is an inner B+Tree node.
119    pub(crate) fn is_inner_bptree(&self) -> bool {
120        self.flags & 0x80 != 0
121    }
122
123    /// Returns true if the node has references.
124    pub(crate) fn has_refs(&self) -> bool {
125        self.flags & 0x40 != 0
126    }
127
128    /// Returns true if the context flag is set.
129    ///
130    /// The meaning of this value varies by context.
131    pub(crate) fn context_flag(&self) -> bool {
132        self.flags & 0x20 != 0
133    }
134
135    #[inline]
136    fn width_scheme(&self) -> u8 {
137        (self.flags & 0x18) >> 3
138    }
139
140    #[inline]
141    pub(crate) fn width(&self) -> u8 {
142        (1 << (self.flags & 0x07)) >> 1
143    }
144
145    pub(crate) fn payload_len(&self) -> usize {
146        let width = self.width() as u32;
147        let num_bytes = match self.width_scheme() {
148            0 => {
149                // Current assumption is that size is at most 2^24 and that width is at most 64.
150                // In that case the following will never overflow. (Assuming that size_t is at least 32 bits)
151                assert!(self.size < 0x1000000);
152                let num_bits = self.size * width;
153                (num_bits + 7) >> 3
154            }
155            1 => self.size * width,
156            2 => self.size,
157            _ => {
158                unreachable!("invalid width scheme");
159            }
160        };
161
162        // Ensure 8-byte alignment
163        // ((num_bytes + 7) & !7) as usize
164        num_bytes as usize
165    }
166}
167
168/// A reference to a Realm database.
169pub struct Realm {
170    mmap: Mmap,
171    pub(crate) hdr: Header,
172}
173
174impl Debug for Realm {
175    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176        f.debug_struct("Realm").field("hdr", &self.hdr).finish()
177    }
178}
179
180impl Realm {
181    /// Open a Realm database.
182    ///
183    /// Returns an error if:
184    /// - The header is invalid.
185    /// - The file is encrypted.
186    /// - The file format version is not supported.
187    #[instrument(target = "Realm", level = "debug")]
188    pub fn open(path: impl AsRef<Path> + Debug) -> anyhow::Result<Self> {
189        let file = std::fs::File::open(path)?;
190        let mmap = unsafe { Mmap::map(&file)? };
191        let hdr = Header::parse(&mmap[..Header::SIZE])?;
192
193        if hdr.is_encrypted() {
194            bail!("Encrypted Realm files are not supported");
195        }
196
197        if hdr.file_format_version() != (9, 9) {
198            bail!(
199                "Unsupported Realm format version ({}.{}, supported: 9.9)",
200                hdr.file_format_version().0,
201                hdr.file_format_version().1,
202            );
203        }
204
205        Ok(Realm { mmap, hdr })
206    }
207
208    pub(crate) fn slice(&self, ref_: RealmRef, len: usize) -> &[u8] {
209        let o = ref_.to_offset();
210        if o + len > self.mmap.len() {
211            panic!("offset 0x{o:X} outside file");
212        }
213        &self.mmap[o..o + len]
214    }
215
216    pub(crate) fn payload(&self, ref_: RealmRef, payload_len: usize) -> &[u8] {
217        let payload_offset = ref_ + NodeHeader::SIZE;
218        self.slice(payload_offset, payload_len)
219    }
220
221    pub(crate) fn header(&self, ref_: RealmRef) -> anyhow::Result<NodeHeader> {
222        let bytes = self.slice(ref_, NodeHeader::SIZE);
223        NodeHeader::parse(bytes)
224    }
225
226    pub(crate) fn top_ref(&self) -> RealmRef {
227        self.hdr.current_top_ref()
228    }
229
230    /// Create a reference to the [`Group`] in this Realm database. The
231    /// [`Group`] is the main entrypoint for interacting with the tables.
232    pub fn into_group(self) -> anyhow::Result<Group> {
233        let ref_ = self.top_ref();
234        let realm = Arc::new(self);
235        let array = Array::from_ref(Arc::clone(&realm), ref_)?;
236
237        Group::build(array)
238    }
239}
240
241#[derive(Clone)]
242pub(crate) struct RealmNode {
243    pub(crate) realm: Arc<Realm>,
244    pub(crate) ref_: RealmRef,
245    pub(crate) header: NodeHeader,
246    cached_payload_len: usize,
247}
248
249impl Debug for RealmNode {
250    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
251        let payload = self.payload();
252
253        f.debug_struct("RealmNode")
254            .field("ref_", &self.ref_)
255            .field("header", &self.header)
256            .field(
257                "payload",
258                &format!(
259                    "<{} byte{}>",
260                    payload.len(),
261                    if payload.len() == 1 { "" } else { "s" }
262                ),
263            )
264            .finish()
265    }
266}
267
268impl Node for RealmNode {
269    fn from_ref(realm: Arc<Realm>, ref_: RealmRef) -> anyhow::Result<Self> {
270        let header = realm.header(ref_)?;
271        let cached_payload_len = header.payload_len();
272
273        Ok(Self {
274            realm,
275            ref_,
276            header,
277            cached_payload_len,
278        })
279    }
280}
281
282impl RealmNode {
283    pub(crate) fn payload(&self) -> &[u8] {
284        self.realm.payload(self.ref_, self.cached_payload_len)
285    }
286}
287
288#[cfg(test)]
289mod tests {
290    use crate::realm::NodeHeader;
291
292    #[test]
293    fn test_node_header() {
294        // let bytes = 0x41414141_02000002_0A000000_00000000u128.to_be_bytes();
295        let bytes = [0x41, 0x41, 0x41, 0x41, 0b10, 0x00, 0x00, 0x02];
296        dbg!(&bytes);
297        let header = NodeHeader::parse(&bytes).unwrap();
298
299        dbg!(&header);
300        eprintln!("flags: {:08b}", header.flags);
301
302        assert!(!header.is_inner_bptree());
303        assert!(!header.has_refs());
304        assert!(!header.context_flag());
305        assert!(header.width_scheme() == 0);
306        eprintln!("element width: {}", header.width());
307        assert_eq!(header.width(), 2);
308        assert_eq!(header.size, 2);
309        eprintln!("payload length: {}", header.payload_len());
310        // 10 bits -> 2 bytes -> align to 8
311        // assert!(header.payload_len() == 8);
312
313        // let bytes = 0x41414141_4600000Au64.to_be_bytes();
314        let bytes = [0x41, 0x41, 0x41, 0x41, 0b01000110, 0x00, 0x00, 0x0A];
315        dbg!(&bytes);
316        let header = NodeHeader::parse(&bytes).unwrap();
317
318        dbg!(&header);
319        eprintln!(
320            "flags: {:08b} width_ndx: {} width_scheme: {}",
321            header.flags,
322            header.width(),
323            header.width_scheme()
324        );
325
326        assert!(!header.is_inner_bptree());
327        assert!(header.has_refs());
328        assert!(!header.context_flag());
329        assert_eq!(header.width_scheme(), 0);
330        eprintln!("element width: {}", header.width());
331        assert_eq!(header.width(), 32);
332        assert_eq!(header.size, 10);
333        eprintln!("payload length: {}", header.payload_len());
334        // 32 bits -> 4 bytes -> *10 = 40 -> align to 8
335        assert_eq!(header.payload_len(), 40);
336    }
337}