realm_db_reader/
realm.rs

1use std::sync::Arc;
2use std::{fmt::Debug, path::Path};
3
4use byteorder::{ByteOrder, LittleEndian};
5use memmap2::Mmap;
6use tracing::instrument;
7
8use crate::array::{Array, RealmRef};
9use crate::traits::Node;
10use crate::{Group, RealmFileError};
11
12/// The header for a Realm file.
13#[derive(Clone, Copy)]
14pub(crate) struct Header {
15    /// The two possible top references. Each element points to a [`crate::group::Group`].
16    /// The least significant bit of flags indicates which of the two references is active.
17    top_ref: [u64; 2],
18    magic: [u8; 4],
19    fmt_ver: [u8; 2],
20    _reserved: u8,
21    flags: u8,
22}
23
24impl Debug for Header {
25    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
26        f.debug_struct("Header")
27            .field("top_ref", &self.top_ref)
28            .field("fmt_ver", &self.fmt_ver)
29            .field("flags", &self.flags)
30            .finish()
31    }
32}
33
34impl Header {
35    const SIZE: usize = 24;
36    const MAGIC: [u8; 4] = *b"T-DB";
37
38    fn parse(buf: &[u8]) -> crate::RealmResult<Self> {
39        if buf.len() < Self::SIZE {
40            return Err(RealmFileError::InvalidRealmFile {
41                reason: "file too small for Realm header".to_string(),
42            });
43        }
44
45        let h = Header {
46            top_ref: [
47                LittleEndian::read_u64(&buf[0..8]),
48                LittleEndian::read_u64(&buf[8..16]),
49            ],
50            magic: buf[16..20].try_into().unwrap(),
51            fmt_ver: buf[20..22].try_into().unwrap(),
52            _reserved: buf[22],
53            flags: buf[23],
54        };
55        if h.magic != Self::MAGIC {
56            return Err(RealmFileError::InvalidRealmFile {
57                reason: "not a Realm file (magic mismatch)".to_string(),
58            });
59        }
60
61        Ok(h)
62    }
63
64    /// Choose the active top ref using the switch bit (bit 0 of `flags`).
65    pub(crate) fn current_top_ref(&self) -> RealmRef {
66        let idx = (self.flags & 1) as usize;
67        RealmRef::new(self.top_ref[idx] as usize)
68    }
69
70    fn is_encrypted(&self) -> bool {
71        self.flags & 0x80 != 0
72    }
73
74    fn file_format_version(&self) -> (u8, u8) {
75        (self.fmt_ver[0], self.fmt_ver[1])
76    }
77}
78
79/// The header for a single node in a Realm file.
80#[derive(Clone, Copy)]
81pub(crate) struct NodeHeader {
82    pub(crate) flags: u8,
83    pub(crate) size: u32, // 24-bit little-endian count
84}
85
86impl Debug for NodeHeader {
87    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
88        f.debug_struct("NodeHeader")
89            .field("is_inner_btree", &self.is_inner_bptree())
90            .field("has_refs", &self.has_refs())
91            .field("context_flag", &self.context_flag())
92            .field("width", &self.width())
93            .field("size", &self.size)
94            .finish()
95    }
96}
97
98impl NodeHeader {
99    pub(crate) const SIZE: usize = 8;
100    pub(crate) const DUMMY_CHECKSUM: u32 = 0x4141_4141;
101
102    /// Parse a node header from a buffer.
103    ///
104    /// Returns an error if the buffer is too small.
105    ///
106    /// Panics if the checksum is invalid.
107    pub(crate) fn parse(buf: &[u8]) -> crate::RealmResult<Self> {
108        if buf.len() < Self::SIZE {
109            return Err(RealmFileError::InvalidRealmFile {
110                reason: format!("buffer is too small to contain a node ({})", buf.len()),
111            });
112        }
113
114        let checksum = LittleEndian::read_u32(&buf[0..4]);
115        let flags = buf[4];
116        let size = ((buf[5] as u32) << 16) | ((buf[6] as u32) << 8) | (buf[7] as u32);
117
118        if checksum != Self::DUMMY_CHECKSUM {
119            return Err(RealmFileError::InvalidRealmFile {
120                reason: "invalid checksum".to_string(),
121            });
122        }
123
124        Ok(Self { flags, size })
125    }
126
127    /// Returns true if the node is an inner B+Tree node.
128    pub(crate) fn is_inner_bptree(&self) -> bool {
129        self.flags & 0x80 != 0
130    }
131
132    /// Returns true if the node has references.
133    pub(crate) fn has_refs(&self) -> bool {
134        self.flags & 0x40 != 0
135    }
136
137    /// Returns true if the context flag is set.
138    ///
139    /// The meaning of this value varies by context.
140    pub(crate) fn context_flag(&self) -> bool {
141        self.flags & 0x20 != 0
142    }
143
144    #[inline]
145    fn width_scheme(&self) -> u8 {
146        (self.flags & 0x18) >> 3
147    }
148
149    #[inline]
150    pub(crate) fn width(&self) -> u8 {
151        (1 << (self.flags & 0x07)) >> 1
152    }
153
154    pub(crate) fn payload_len(&self) -> usize {
155        let width = self.width() as u32;
156        let num_bytes = match self.width_scheme() {
157            0 => {
158                // Current assumption is that size is at most 2^24 and that width is at most 64.
159                // In that case the following will never overflow. (Assuming that size_t is at least 32 bits)
160                assert!(self.size < 0x1000000);
161                let num_bits = self.size * width;
162                (num_bits + 7) >> 3
163            }
164            1 => self.size * width,
165            2 => self.size,
166            _ => {
167                unreachable!("invalid width scheme");
168            }
169        };
170
171        // Ensure 8-byte alignment
172        // ((num_bytes + 7) & !7) as usize
173        num_bytes as usize
174    }
175}
176
177/// A reference to a Realm database.
178pub struct Realm {
179    mmap: Mmap,
180    pub(crate) hdr: Header,
181}
182
183impl Debug for Realm {
184    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
185        f.debug_struct("Realm").field("hdr", &self.hdr).finish()
186    }
187}
188
189impl Realm {
190    /// Open a Realm database.
191    ///
192    /// Returns an error if:
193    /// - The header is invalid.
194    /// - The file is encrypted.
195    /// - The file format version is not supported.
196    #[instrument(level = "debug")]
197    pub fn open(path: impl AsRef<Path> + Debug) -> crate::RealmResult<Self> {
198        let file = std::fs::File::open(path)?;
199        let mmap = unsafe { Mmap::map(&file)? };
200        let hdr = Header::parse(&mmap[..Header::SIZE])?;
201
202        if hdr.is_encrypted() {
203            return Err(RealmFileError::Unsupported {
204                reason: "Encrypted Realm files are not supported".to_string(),
205            });
206        }
207
208        if hdr.file_format_version() != (9, 9) {
209            return Err(RealmFileError::Unsupported {
210                reason: format!(
211                    "Unsupported Realm format version ({}.{}, supported: 9.9)",
212                    hdr.file_format_version().0,
213                    hdr.file_format_version().1,
214                ),
215            });
216        }
217
218        Ok(Realm { mmap, hdr })
219    }
220
221    pub(crate) fn slice(&self, ref_: RealmRef, len: usize) -> &[u8] {
222        let o = ref_.to_offset();
223        if o + len > self.mmap.len() {
224            panic!("offset 0x{o:X} outside file");
225        }
226        &self.mmap[o..o + len]
227    }
228
229    pub(crate) fn payload(&self, ref_: RealmRef, payload_len: usize) -> &[u8] {
230        let payload_offset = ref_ + NodeHeader::SIZE;
231        self.slice(payload_offset, payload_len)
232    }
233
234    pub(crate) fn header(&self, ref_: RealmRef) -> crate::RealmResult<NodeHeader> {
235        let bytes = self.slice(ref_, NodeHeader::SIZE);
236        NodeHeader::parse(bytes)
237    }
238
239    pub(crate) fn top_ref(&self) -> RealmRef {
240        self.hdr.current_top_ref()
241    }
242
243    /// Create a reference to the [`Group`] in this Realm database. The
244    /// [`Group`] is the main entrypoint for interacting with the tables.
245    pub fn into_group(self) -> crate::RealmResult<Group> {
246        let ref_ = self.top_ref();
247        let realm = Arc::new(self);
248        let array = Array::from_ref(Arc::clone(&realm), ref_)?;
249
250        Group::build(array)
251    }
252}
253
254#[derive(Clone)]
255pub(crate) struct RealmNode {
256    pub(crate) realm: Arc<Realm>,
257    pub(crate) ref_: RealmRef,
258    pub(crate) header: NodeHeader,
259    cached_payload_len: usize,
260}
261
262impl Debug for RealmNode {
263    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
264        let payload = self.payload();
265
266        f.debug_struct("RealmNode")
267            .field("ref_", &self.ref_)
268            .field("header", &self.header)
269            .field(
270                "payload",
271                &format!(
272                    "<{} byte{}>",
273                    payload.len(),
274                    if payload.len() == 1 { "" } else { "s" }
275                ),
276            )
277            .finish()
278    }
279}
280
281impl Node for RealmNode {
282    fn from_ref(realm: Arc<Realm>, ref_: RealmRef) -> crate::RealmResult<Self> {
283        let header = realm.header(ref_)?;
284        let cached_payload_len = header.payload_len();
285
286        Ok(Self {
287            realm,
288            ref_,
289            header,
290            cached_payload_len,
291        })
292    }
293}
294
295impl RealmNode {
296    pub(crate) fn payload(&self) -> &[u8] {
297        self.realm.payload(self.ref_, self.cached_payload_len)
298    }
299}
300
301#[cfg(test)]
302mod tests {
303    use crate::realm::NodeHeader;
304
305    #[test]
306    fn test_node_header() {
307        // let bytes = 0x41414141_02000002_0A000000_00000000u128.to_be_bytes();
308        let bytes = [0x41, 0x41, 0x41, 0x41, 0b10, 0x00, 0x00, 0x02];
309        dbg!(&bytes);
310        let header = NodeHeader::parse(&bytes).unwrap();
311
312        dbg!(&header);
313        eprintln!("flags: {:08b}", header.flags);
314
315        assert!(!header.is_inner_bptree());
316        assert!(!header.has_refs());
317        assert!(!header.context_flag());
318        assert!(header.width_scheme() == 0);
319        eprintln!("element width: {}", header.width());
320        assert_eq!(header.width(), 2);
321        assert_eq!(header.size, 2);
322        eprintln!("payload length: {}", header.payload_len());
323        // 10 bits -> 2 bytes -> align to 8
324        // assert!(header.payload_len() == 8);
325
326        // let bytes = 0x41414141_4600000Au64.to_be_bytes();
327        let bytes = [0x41, 0x41, 0x41, 0x41, 0b01000110, 0x00, 0x00, 0x0A];
328        dbg!(&bytes);
329        let header = NodeHeader::parse(&bytes).unwrap();
330
331        dbg!(&header);
332        eprintln!(
333            "flags: {:08b} width_ndx: {} width_scheme: {}",
334            header.flags,
335            header.width(),
336            header.width_scheme()
337        );
338
339        assert!(!header.is_inner_bptree());
340        assert!(header.has_refs());
341        assert!(!header.context_flag());
342        assert_eq!(header.width_scheme(), 0);
343        eprintln!("element width: {}", header.width());
344        assert_eq!(header.width(), 32);
345        assert_eq!(header.size, 10);
346        eprintln!("payload length: {}", header.payload_len());
347        // 32 bits -> 4 bytes -> *10 = 40 -> align to 8
348        assert_eq!(header.payload_len(), 40);
349    }
350}