synadb/
mmap.rs

1// Copyright (c) 2025 SynaDB Contributors
2// Licensed under the SynaDB License. See LICENSE file for details.
3
4//! Memory-mapped file access for zero-copy reads.
5//!
6//! This module provides memory-mapped access to database files, enabling
7//! zero-copy reads for tensor data. This is particularly useful for large
8//! tensors where copying data would be expensive.
9//!
10//! # Features
11//!
12//! - Zero-copy access to tensor data via memory mapping
13//! - Direct slice access for f32 and f64 arrays
14//! - Safe bounds checking with clear error messages
15//!
16//! # Safety
17//!
18//! The `as_f32_slice` and `as_f64_slice` methods use unsafe code to
19//! reinterpret byte slices as typed slices. This is safe when:
20//! - The offset and count are within bounds
21//! - The data was originally written as the requested type
22//! - The platform uses little-endian byte order (most common)
23//!
24//! # Examples
25//!
26//! ```rust,no_run
27//! use synadb::mmap::MmapReader;
28//!
29//! // Open a database file for memory-mapped reading
30//! let reader = MmapReader::open("data.db").unwrap();
31//!
32//! // Read raw bytes at an offset
33//! let bytes = reader.slice(0, 100);
34//!
35//! // Read f32 tensor data (zero-copy)
36//! let floats = reader.as_f32_slice(1024, 256);
37//! ```
38//!
39//! _Requirements: 2.4, 9.3_
40
41use memmap2::{Mmap, MmapOptions};
42use std::fs::File;
43use std::path::Path;
44
45use crate::error::{Result, SynaError};
46
47/// Memory-mapped database file for zero-copy reads.
48///
49/// This struct wraps a memory-mapped file and provides safe access
50/// to the underlying data. It's particularly useful for reading
51/// large tensor data without copying.
52///
53/// # Examples
54///
55/// ```rust,no_run
56/// use synadb::mmap::MmapReader;
57///
58/// let reader = MmapReader::open("data.db").unwrap();
59/// let data = reader.slice(0, 1024);
60/// println!("Read {} bytes", data.len());
61/// ```
62pub struct MmapReader {
63    mmap: Mmap,
64}
65
66impl MmapReader {
67    /// Open a file for memory-mapped reading.
68    ///
69    /// # Arguments
70    ///
71    /// * `path` - Path to the file to memory-map
72    ///
73    /// # Returns
74    ///
75    /// A new `MmapReader` instance.
76    ///
77    /// # Errors
78    ///
79    /// Returns `SynaError::Io` if the file cannot be opened or mapped.
80    ///
81    /// # Examples
82    ///
83    /// ```rust,no_run
84    /// use synadb::mmap::MmapReader;
85    ///
86    /// let reader = MmapReader::open("data.db").unwrap();
87    /// ```
88    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
89        let file = File::open(path)?;
90        // Safety: We're only reading from the file, and the file handle
91        // is kept alive by the Mmap struct internally.
92        let mmap = unsafe { MmapOptions::new().map(&file)? };
93        Ok(Self { mmap })
94    }
95
96    /// Get the total length of the memory-mapped file.
97    ///
98    /// # Returns
99    ///
100    /// The size of the file in bytes.
101    ///
102    /// # Examples
103    ///
104    /// ```rust,no_run
105    /// use synadb::mmap::MmapReader;
106    ///
107    /// let reader = MmapReader::open("data.db").unwrap();
108    /// println!("File size: {} bytes", reader.len());
109    /// ```
110    #[inline]
111    pub fn len(&self) -> usize {
112        self.mmap.len()
113    }
114
115    /// Check if the memory-mapped file is empty.
116    ///
117    /// # Returns
118    ///
119    /// `true` if the file has zero length, `false` otherwise.
120    #[inline]
121    pub fn is_empty(&self) -> bool {
122        self.mmap.is_empty()
123    }
124
125    /// Get a slice of bytes at the specified offset.
126    ///
127    /// # Arguments
128    ///
129    /// * `offset` - Starting byte offset
130    /// * `len` - Number of bytes to read
131    ///
132    /// # Returns
133    ///
134    /// A byte slice referencing the memory-mapped data.
135    ///
136    /// # Panics
137    ///
138    /// Panics if `offset + len` exceeds the file size.
139    /// Use [`try_slice`](Self::try_slice) for a non-panicking version.
140    ///
141    /// # Examples
142    ///
143    /// ```rust,no_run
144    /// use synadb::mmap::MmapReader;
145    ///
146    /// let reader = MmapReader::open("data.db").unwrap();
147    /// let header = reader.slice(0, 15); // Read 15-byte header
148    /// ```
149    #[inline]
150    pub fn slice(&self, offset: usize, len: usize) -> &[u8] {
151        &self.mmap[offset..offset + len]
152    }
153
154    /// Try to get a slice of bytes at the specified offset.
155    ///
156    /// This is a non-panicking version of [`slice`](Self::slice).
157    ///
158    /// # Arguments
159    ///
160    /// * `offset` - Starting byte offset
161    /// * `len` - Number of bytes to read
162    ///
163    /// # Returns
164    ///
165    /// `Some(&[u8])` if the range is valid, `None` otherwise.
166    ///
167    /// # Examples
168    ///
169    /// ```rust,no_run
170    /// use synadb::mmap::MmapReader;
171    ///
172    /// let reader = MmapReader::open("data.db").unwrap();
173    /// if let Some(data) = reader.try_slice(0, 100) {
174    ///     println!("Read {} bytes", data.len());
175    /// }
176    /// ```
177    #[inline]
178    pub fn try_slice(&self, offset: usize, len: usize) -> Option<&[u8]> {
179        let end = offset.checked_add(len)?;
180        if end <= self.mmap.len() {
181            Some(&self.mmap[offset..end])
182        } else {
183            None
184        }
185    }
186
187    /// Get tensor data as f32 slice (zero-copy).
188    ///
189    /// This method reinterprets the raw bytes as a slice of f32 values
190    /// without copying the data. The data must have been written as
191    /// little-endian f32 values.
192    ///
193    /// # Arguments
194    ///
195    /// * `offset` - Starting byte offset (must be 4-byte aligned for best performance)
196    /// * `count` - Number of f32 elements to read
197    ///
198    /// # Returns
199    ///
200    /// A slice of f32 values referencing the memory-mapped data.
201    ///
202    /// # Panics
203    ///
204    /// Panics if the requested range exceeds the file size.
205    /// Use [`try_as_f32_slice`](Self::try_as_f32_slice) for a non-panicking version.
206    ///
207    /// # Safety
208    ///
209    /// This method uses unsafe code to reinterpret bytes as f32.
210    /// It is safe when:
211    /// - The data was originally written as f32 values
212    /// - The platform uses little-endian byte order
213    ///
214    /// # Examples
215    ///
216    /// ```rust,no_run
217    /// use synadb::mmap::MmapReader;
218    ///
219    /// let reader = MmapReader::open("vectors.db").unwrap();
220    /// let floats = reader.as_f32_slice(1024, 768); // Read 768-dim vector
221    /// println!("First value: {}", floats[0]);
222    /// ```
223    ///
224    /// _Requirements: 2.4_
225    #[inline]
226    pub fn as_f32_slice(&self, offset: usize, count: usize) -> &[f32] {
227        let byte_len = count * std::mem::size_of::<f32>();
228        let bytes = &self.mmap[offset..offset + byte_len];
229        // Safety: We ensure bounds are valid above. The caller is responsible
230        // for ensuring the data was written as f32 values.
231        unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const f32, count) }
232    }
233
234    /// Try to get tensor data as f32 slice (zero-copy).
235    ///
236    /// This is a non-panicking version of [`as_f32_slice`](Self::as_f32_slice).
237    ///
238    /// # Arguments
239    ///
240    /// * `offset` - Starting byte offset
241    /// * `count` - Number of f32 elements to read
242    ///
243    /// # Returns
244    ///
245    /// `Ok(&[f32])` if the range is valid, `Err` otherwise.
246    ///
247    /// # Examples
248    ///
249    /// ```rust,no_run
250    /// use synadb::mmap::MmapReader;
251    ///
252    /// let reader = MmapReader::open("vectors.db").unwrap();
253    /// match reader.try_as_f32_slice(1024, 768) {
254    ///     Ok(floats) => println!("Read {} floats", floats.len()),
255    ///     Err(e) => println!("Error: {}", e),
256    /// }
257    /// ```
258    pub fn try_as_f32_slice(&self, offset: usize, count: usize) -> Result<&[f32]> {
259        let byte_len =
260            count
261                .checked_mul(std::mem::size_of::<f32>())
262                .ok_or(SynaError::ShapeMismatch {
263                    data_size: usize::MAX,
264                    expected_size: 0,
265                })?;
266
267        let end = offset
268            .checked_add(byte_len)
269            .ok_or_else(|| SynaError::ShapeMismatch {
270                data_size: usize::MAX,
271                expected_size: self.mmap.len(),
272            })?;
273
274        if end > self.mmap.len() {
275            return Err(SynaError::ShapeMismatch {
276                data_size: end,
277                expected_size: self.mmap.len(),
278            });
279        }
280
281        let bytes = &self.mmap[offset..end];
282        // Safety: We've verified bounds above
283        Ok(unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const f32, count) })
284    }
285
286    /// Get tensor data as f64 slice (zero-copy).
287    ///
288    /// This method reinterprets the raw bytes as a slice of f64 values
289    /// without copying the data. The data must have been written as
290    /// little-endian f64 values.
291    ///
292    /// # Arguments
293    ///
294    /// * `offset` - Starting byte offset (must be 8-byte aligned for best performance)
295    /// * `count` - Number of f64 elements to read
296    ///
297    /// # Returns
298    ///
299    /// A slice of f64 values referencing the memory-mapped data.
300    ///
301    /// # Panics
302    ///
303    /// Panics if the requested range exceeds the file size.
304    /// Use [`try_as_f64_slice`](Self::try_as_f64_slice) for a non-panicking version.
305    ///
306    /// # Safety
307    ///
308    /// This method uses unsafe code to reinterpret bytes as f64.
309    /// It is safe when:
310    /// - The data was originally written as f64 values
311    /// - The platform uses little-endian byte order
312    ///
313    /// # Examples
314    ///
315    /// ```rust,no_run
316    /// use synadb::mmap::MmapReader;
317    ///
318    /// let reader = MmapReader::open("data.db").unwrap();
319    /// let doubles = reader.as_f64_slice(0, 100);
320    /// println!("Sum: {}", doubles.iter().sum::<f64>());
321    /// ```
322    ///
323    /// _Requirements: 2.4_
324    #[inline]
325    pub fn as_f64_slice(&self, offset: usize, count: usize) -> &[f64] {
326        let byte_len = count * std::mem::size_of::<f64>();
327        let bytes = &self.mmap[offset..offset + byte_len];
328        // Safety: We ensure bounds are valid above. The caller is responsible
329        // for ensuring the data was written as f64 values.
330        unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const f64, count) }
331    }
332
333    /// Try to get tensor data as f64 slice (zero-copy).
334    ///
335    /// This is a non-panicking version of [`as_f64_slice`](Self::as_f64_slice).
336    ///
337    /// # Arguments
338    ///
339    /// * `offset` - Starting byte offset
340    /// * `count` - Number of f64 elements to read
341    ///
342    /// # Returns
343    ///
344    /// `Ok(&[f64])` if the range is valid, `Err` otherwise.
345    ///
346    /// # Examples
347    ///
348    /// ```rust,no_run
349    /// use synadb::mmap::MmapReader;
350    ///
351    /// let reader = MmapReader::open("data.db").unwrap();
352    /// match reader.try_as_f64_slice(0, 100) {
353    ///     Ok(doubles) => println!("Read {} doubles", doubles.len()),
354    ///     Err(e) => println!("Error: {}", e),
355    /// }
356    /// ```
357    pub fn try_as_f64_slice(&self, offset: usize, count: usize) -> Result<&[f64]> {
358        let byte_len =
359            count
360                .checked_mul(std::mem::size_of::<f64>())
361                .ok_or(SynaError::ShapeMismatch {
362                    data_size: usize::MAX,
363                    expected_size: 0,
364                })?;
365
366        let end = offset
367            .checked_add(byte_len)
368            .ok_or_else(|| SynaError::ShapeMismatch {
369                data_size: usize::MAX,
370                expected_size: self.mmap.len(),
371            })?;
372
373        if end > self.mmap.len() {
374            return Err(SynaError::ShapeMismatch {
375                data_size: end,
376                expected_size: self.mmap.len(),
377            });
378        }
379
380        let bytes = &self.mmap[offset..end];
381        // Safety: We've verified bounds above
382        Ok(unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const f64, count) })
383    }
384
385    /// Get tensor data as i32 slice (zero-copy).
386    ///
387    /// # Arguments
388    ///
389    /// * `offset` - Starting byte offset
390    /// * `count` - Number of i32 elements to read
391    ///
392    /// # Returns
393    ///
394    /// A slice of i32 values referencing the memory-mapped data.
395    ///
396    /// # Panics
397    ///
398    /// Panics if the requested range exceeds the file size.
399    #[inline]
400    pub fn as_i32_slice(&self, offset: usize, count: usize) -> &[i32] {
401        let byte_len = count * std::mem::size_of::<i32>();
402        let bytes = &self.mmap[offset..offset + byte_len];
403        unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const i32, count) }
404    }
405
406    /// Get tensor data as i64 slice (zero-copy).
407    ///
408    /// # Arguments
409    ///
410    /// * `offset` - Starting byte offset
411    /// * `count` - Number of i64 elements to read
412    ///
413    /// # Returns
414    ///
415    /// A slice of i64 values referencing the memory-mapped data.
416    ///
417    /// # Panics
418    ///
419    /// Panics if the requested range exceeds the file size.
420    #[inline]
421    pub fn as_i64_slice(&self, offset: usize, count: usize) -> &[i64] {
422        let byte_len = count * std::mem::size_of::<i64>();
423        let bytes = &self.mmap[offset..offset + byte_len];
424        unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const i64, count) }
425    }
426
427    /// Get the raw pointer to the memory-mapped data.
428    ///
429    /// This is useful for advanced use cases where direct pointer access
430    /// is needed, such as GPU memory transfers.
431    ///
432    /// # Safety
433    ///
434    /// The returned pointer is valid only as long as this `MmapReader`
435    /// instance exists. Do not use the pointer after dropping the reader.
436    ///
437    /// # Returns
438    ///
439    /// A raw pointer to the start of the memory-mapped region.
440    #[inline]
441    pub fn as_ptr(&self) -> *const u8 {
442        self.mmap.as_ptr()
443    }
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449    use std::io::Write;
450    use tempfile::NamedTempFile;
451
452    #[test]
453    fn test_mmap_reader_open() {
454        // Create a temp file with some data
455        let mut file = NamedTempFile::new().unwrap();
456        file.write_all(b"Hello, World!").unwrap();
457        file.flush().unwrap();
458
459        let reader = MmapReader::open(file.path()).unwrap();
460        assert_eq!(reader.len(), 13);
461        assert!(!reader.is_empty());
462    }
463
464    #[test]
465    fn test_mmap_reader_slice() {
466        let mut file = NamedTempFile::new().unwrap();
467        file.write_all(b"Hello, World!").unwrap();
468        file.flush().unwrap();
469
470        let reader = MmapReader::open(file.path()).unwrap();
471        let slice = reader.slice(0, 5);
472        assert_eq!(slice, b"Hello");
473
474        let slice = reader.slice(7, 5);
475        assert_eq!(slice, b"World");
476    }
477
478    #[test]
479    fn test_mmap_reader_try_slice() {
480        let mut file = NamedTempFile::new().unwrap();
481        file.write_all(b"Hello").unwrap();
482        file.flush().unwrap();
483
484        let reader = MmapReader::open(file.path()).unwrap();
485
486        // Valid range
487        assert!(reader.try_slice(0, 5).is_some());
488
489        // Out of bounds
490        assert!(reader.try_slice(0, 100).is_none());
491        assert!(reader.try_slice(10, 1).is_none());
492    }
493
494    #[test]
495    fn test_mmap_reader_f32_slice() {
496        let mut file = NamedTempFile::new().unwrap();
497
498        // Write some f32 values
499        let values: Vec<f32> = vec![1.0, 2.0, 3.0, 4.0];
500        for v in &values {
501            file.write_all(&v.to_le_bytes()).unwrap();
502        }
503        file.flush().unwrap();
504
505        let reader = MmapReader::open(file.path()).unwrap();
506        let slice = reader.as_f32_slice(0, 4);
507
508        assert_eq!(slice.len(), 4);
509        assert_eq!(slice[0], 1.0);
510        assert_eq!(slice[1], 2.0);
511        assert_eq!(slice[2], 3.0);
512        assert_eq!(slice[3], 4.0);
513    }
514
515    #[test]
516    fn test_mmap_reader_f64_slice() {
517        let mut file = NamedTempFile::new().unwrap();
518
519        // Write some f64 values
520        let values: Vec<f64> = vec![1.5, 2.5, 3.5];
521        for v in &values {
522            file.write_all(&v.to_le_bytes()).unwrap();
523        }
524        file.flush().unwrap();
525
526        let reader = MmapReader::open(file.path()).unwrap();
527        let slice = reader.as_f64_slice(0, 3);
528
529        assert_eq!(slice.len(), 3);
530        assert_eq!(slice[0], 1.5);
531        assert_eq!(slice[1], 2.5);
532        assert_eq!(slice[2], 3.5);
533    }
534
535    #[test]
536    fn test_mmap_reader_try_f32_slice_bounds() {
537        let mut file = NamedTempFile::new().unwrap();
538        let values: Vec<f32> = vec![1.0, 2.0];
539        for v in &values {
540            file.write_all(&v.to_le_bytes()).unwrap();
541        }
542        file.flush().unwrap();
543
544        let reader = MmapReader::open(file.path()).unwrap();
545
546        // Valid range
547        assert!(reader.try_as_f32_slice(0, 2).is_ok());
548
549        // Out of bounds
550        assert!(reader.try_as_f32_slice(0, 100).is_err());
551    }
552
553    #[test]
554    fn test_mmap_reader_try_f64_slice_bounds() {
555        let mut file = NamedTempFile::new().unwrap();
556        let values: Vec<f64> = vec![1.0, 2.0];
557        for v in &values {
558            file.write_all(&v.to_le_bytes()).unwrap();
559        }
560        file.flush().unwrap();
561
562        let reader = MmapReader::open(file.path()).unwrap();
563
564        // Valid range
565        assert!(reader.try_as_f64_slice(0, 2).is_ok());
566
567        // Out of bounds
568        assert!(reader.try_as_f64_slice(0, 100).is_err());
569    }
570
571    #[test]
572    fn test_mmap_reader_i32_slice() {
573        let mut file = NamedTempFile::new().unwrap();
574        let values: Vec<i32> = vec![10, 20, 30];
575        for v in &values {
576            file.write_all(&v.to_le_bytes()).unwrap();
577        }
578        file.flush().unwrap();
579
580        let reader = MmapReader::open(file.path()).unwrap();
581        let slice = reader.as_i32_slice(0, 3);
582
583        assert_eq!(slice, &[10, 20, 30]);
584    }
585
586    #[test]
587    fn test_mmap_reader_i64_slice() {
588        let mut file = NamedTempFile::new().unwrap();
589        let values: Vec<i64> = vec![100, 200, 300];
590        for v in &values {
591            file.write_all(&v.to_le_bytes()).unwrap();
592        }
593        file.flush().unwrap();
594
595        let reader = MmapReader::open(file.path()).unwrap();
596        let slice = reader.as_i64_slice(0, 3);
597
598        assert_eq!(slice, &[100, 200, 300]);
599    }
600
601    #[test]
602    fn test_mmap_reader_offset_access() {
603        let mut file = NamedTempFile::new().unwrap();
604
605        // Write header (8 bytes) + f32 data
606        file.write_all(&[0u8; 8]).unwrap(); // 8-byte header
607        let values: Vec<f32> = vec![1.0, 2.0, 3.0];
608        for v in &values {
609            file.write_all(&v.to_le_bytes()).unwrap();
610        }
611        file.flush().unwrap();
612
613        let reader = MmapReader::open(file.path()).unwrap();
614
615        // Read f32 data starting at offset 8
616        let slice = reader.as_f32_slice(8, 3);
617        assert_eq!(slice, &[1.0, 2.0, 3.0]);
618    }
619
620    #[test]
621    fn test_mmap_reader_empty_file() {
622        let file = NamedTempFile::new().unwrap();
623        let reader = MmapReader::open(file.path()).unwrap();
624
625        assert_eq!(reader.len(), 0);
626        assert!(reader.is_empty());
627    }
628
629    #[test]
630    fn test_mmap_reader_as_ptr() {
631        let mut file = NamedTempFile::new().unwrap();
632        file.write_all(b"test").unwrap();
633        file.flush().unwrap();
634
635        let reader = MmapReader::open(file.path()).unwrap();
636        let ptr = reader.as_ptr();
637
638        // Verify pointer is valid by reading through it
639        unsafe {
640            assert_eq!(*ptr, b't');
641        }
642    }
643}