synadb/mmap.rs
1// Copyright (c) 2025 SynaDB Contributors
2// Licensed under the SynaDB License. See LICENSE file for details.
3
4//! Memory-mapped file access for zero-copy reads.
5//!
6//! This module provides memory-mapped access to database files, enabling
7//! zero-copy reads for tensor data. This is particularly useful for large
8//! tensors where copying data would be expensive.
9//!
10//! # Features
11//!
12//! - Zero-copy access to tensor data via memory mapping
13//! - Direct slice access for f32 and f64 arrays
14//! - Safe bounds checking with clear error messages
15//!
16//! # Safety
17//!
18//! The `as_f32_slice` and `as_f64_slice` methods use unsafe code to
19//! reinterpret byte slices as typed slices. This is safe when:
20//! - The offset and count are within bounds
21//! - The data was originally written as the requested type
22//! - The platform uses little-endian byte order (most common)
23//!
24//! # Examples
25//!
26//! ```rust,no_run
27//! use synadb::mmap::MmapReader;
28//!
29//! // Open a database file for memory-mapped reading
30//! let reader = MmapReader::open("data.db").unwrap();
31//!
32//! // Read raw bytes at an offset
33//! let bytes = reader.slice(0, 100);
34//!
35//! // Read f32 tensor data (zero-copy)
36//! let floats = reader.as_f32_slice(1024, 256);
37//! ```
38//!
39//! _Requirements: 2.4, 9.3_
40
41use memmap2::{Mmap, MmapOptions};
42use std::fs::File;
43use std::path::Path;
44
45use crate::error::{Result, SynaError};
46
47/// Memory-mapped database file for zero-copy reads.
48///
49/// This struct wraps a memory-mapped file and provides safe access
50/// to the underlying data. It's particularly useful for reading
51/// large tensor data without copying.
52///
53/// # Examples
54///
55/// ```rust,no_run
56/// use synadb::mmap::MmapReader;
57///
58/// let reader = MmapReader::open("data.db").unwrap();
59/// let data = reader.slice(0, 1024);
60/// println!("Read {} bytes", data.len());
61/// ```
62pub struct MmapReader {
63 mmap: Mmap,
64}
65
66impl MmapReader {
67 /// Open a file for memory-mapped reading.
68 ///
69 /// # Arguments
70 ///
71 /// * `path` - Path to the file to memory-map
72 ///
73 /// # Returns
74 ///
75 /// A new `MmapReader` instance.
76 ///
77 /// # Errors
78 ///
79 /// Returns `SynaError::Io` if the file cannot be opened or mapped.
80 ///
81 /// # Examples
82 ///
83 /// ```rust,no_run
84 /// use synadb::mmap::MmapReader;
85 ///
86 /// let reader = MmapReader::open("data.db").unwrap();
87 /// ```
88 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
89 let file = File::open(path)?;
90 // Safety: We're only reading from the file, and the file handle
91 // is kept alive by the Mmap struct internally.
92 let mmap = unsafe { MmapOptions::new().map(&file)? };
93 Ok(Self { mmap })
94 }
95
96 /// Get the total length of the memory-mapped file.
97 ///
98 /// # Returns
99 ///
100 /// The size of the file in bytes.
101 ///
102 /// # Examples
103 ///
104 /// ```rust,no_run
105 /// use synadb::mmap::MmapReader;
106 ///
107 /// let reader = MmapReader::open("data.db").unwrap();
108 /// println!("File size: {} bytes", reader.len());
109 /// ```
110 #[inline]
111 pub fn len(&self) -> usize {
112 self.mmap.len()
113 }
114
115 /// Check if the memory-mapped file is empty.
116 ///
117 /// # Returns
118 ///
119 /// `true` if the file has zero length, `false` otherwise.
120 #[inline]
121 pub fn is_empty(&self) -> bool {
122 self.mmap.is_empty()
123 }
124
125 /// Get a slice of bytes at the specified offset.
126 ///
127 /// # Arguments
128 ///
129 /// * `offset` - Starting byte offset
130 /// * `len` - Number of bytes to read
131 ///
132 /// # Returns
133 ///
134 /// A byte slice referencing the memory-mapped data.
135 ///
136 /// # Panics
137 ///
138 /// Panics if `offset + len` exceeds the file size.
139 /// Use [`try_slice`](Self::try_slice) for a non-panicking version.
140 ///
141 /// # Examples
142 ///
143 /// ```rust,no_run
144 /// use synadb::mmap::MmapReader;
145 ///
146 /// let reader = MmapReader::open("data.db").unwrap();
147 /// let header = reader.slice(0, 15); // Read 15-byte header
148 /// ```
149 #[inline]
150 pub fn slice(&self, offset: usize, len: usize) -> &[u8] {
151 &self.mmap[offset..offset + len]
152 }
153
154 /// Try to get a slice of bytes at the specified offset.
155 ///
156 /// This is a non-panicking version of [`slice`](Self::slice).
157 ///
158 /// # Arguments
159 ///
160 /// * `offset` - Starting byte offset
161 /// * `len` - Number of bytes to read
162 ///
163 /// # Returns
164 ///
165 /// `Some(&[u8])` if the range is valid, `None` otherwise.
166 ///
167 /// # Examples
168 ///
169 /// ```rust,no_run
170 /// use synadb::mmap::MmapReader;
171 ///
172 /// let reader = MmapReader::open("data.db").unwrap();
173 /// if let Some(data) = reader.try_slice(0, 100) {
174 /// println!("Read {} bytes", data.len());
175 /// }
176 /// ```
177 #[inline]
178 pub fn try_slice(&self, offset: usize, len: usize) -> Option<&[u8]> {
179 let end = offset.checked_add(len)?;
180 if end <= self.mmap.len() {
181 Some(&self.mmap[offset..end])
182 } else {
183 None
184 }
185 }
186
187 /// Get tensor data as f32 slice (zero-copy).
188 ///
189 /// This method reinterprets the raw bytes as a slice of f32 values
190 /// without copying the data. The data must have been written as
191 /// little-endian f32 values.
192 ///
193 /// # Arguments
194 ///
195 /// * `offset` - Starting byte offset (must be 4-byte aligned for best performance)
196 /// * `count` - Number of f32 elements to read
197 ///
198 /// # Returns
199 ///
200 /// A slice of f32 values referencing the memory-mapped data.
201 ///
202 /// # Panics
203 ///
204 /// Panics if the requested range exceeds the file size.
205 /// Use [`try_as_f32_slice`](Self::try_as_f32_slice) for a non-panicking version.
206 ///
207 /// # Safety
208 ///
209 /// This method uses unsafe code to reinterpret bytes as f32.
210 /// It is safe when:
211 /// - The data was originally written as f32 values
212 /// - The platform uses little-endian byte order
213 ///
214 /// # Examples
215 ///
216 /// ```rust,no_run
217 /// use synadb::mmap::MmapReader;
218 ///
219 /// let reader = MmapReader::open("vectors.db").unwrap();
220 /// let floats = reader.as_f32_slice(1024, 768); // Read 768-dim vector
221 /// println!("First value: {}", floats[0]);
222 /// ```
223 ///
224 /// _Requirements: 2.4_
225 #[inline]
226 pub fn as_f32_slice(&self, offset: usize, count: usize) -> &[f32] {
227 let byte_len = count * std::mem::size_of::<f32>();
228 let bytes = &self.mmap[offset..offset + byte_len];
229 // Safety: We ensure bounds are valid above. The caller is responsible
230 // for ensuring the data was written as f32 values.
231 unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const f32, count) }
232 }
233
234 /// Try to get tensor data as f32 slice (zero-copy).
235 ///
236 /// This is a non-panicking version of [`as_f32_slice`](Self::as_f32_slice).
237 ///
238 /// # Arguments
239 ///
240 /// * `offset` - Starting byte offset
241 /// * `count` - Number of f32 elements to read
242 ///
243 /// # Returns
244 ///
245 /// `Ok(&[f32])` if the range is valid, `Err` otherwise.
246 ///
247 /// # Examples
248 ///
249 /// ```rust,no_run
250 /// use synadb::mmap::MmapReader;
251 ///
252 /// let reader = MmapReader::open("vectors.db").unwrap();
253 /// match reader.try_as_f32_slice(1024, 768) {
254 /// Ok(floats) => println!("Read {} floats", floats.len()),
255 /// Err(e) => println!("Error: {}", e),
256 /// }
257 /// ```
258 pub fn try_as_f32_slice(&self, offset: usize, count: usize) -> Result<&[f32]> {
259 let byte_len =
260 count
261 .checked_mul(std::mem::size_of::<f32>())
262 .ok_or(SynaError::ShapeMismatch {
263 data_size: usize::MAX,
264 expected_size: 0,
265 })?;
266
267 let end = offset
268 .checked_add(byte_len)
269 .ok_or_else(|| SynaError::ShapeMismatch {
270 data_size: usize::MAX,
271 expected_size: self.mmap.len(),
272 })?;
273
274 if end > self.mmap.len() {
275 return Err(SynaError::ShapeMismatch {
276 data_size: end,
277 expected_size: self.mmap.len(),
278 });
279 }
280
281 let bytes = &self.mmap[offset..end];
282 // Safety: We've verified bounds above
283 Ok(unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const f32, count) })
284 }
285
286 /// Get tensor data as f64 slice (zero-copy).
287 ///
288 /// This method reinterprets the raw bytes as a slice of f64 values
289 /// without copying the data. The data must have been written as
290 /// little-endian f64 values.
291 ///
292 /// # Arguments
293 ///
294 /// * `offset` - Starting byte offset (must be 8-byte aligned for best performance)
295 /// * `count` - Number of f64 elements to read
296 ///
297 /// # Returns
298 ///
299 /// A slice of f64 values referencing the memory-mapped data.
300 ///
301 /// # Panics
302 ///
303 /// Panics if the requested range exceeds the file size.
304 /// Use [`try_as_f64_slice`](Self::try_as_f64_slice) for a non-panicking version.
305 ///
306 /// # Safety
307 ///
308 /// This method uses unsafe code to reinterpret bytes as f64.
309 /// It is safe when:
310 /// - The data was originally written as f64 values
311 /// - The platform uses little-endian byte order
312 ///
313 /// # Examples
314 ///
315 /// ```rust,no_run
316 /// use synadb::mmap::MmapReader;
317 ///
318 /// let reader = MmapReader::open("data.db").unwrap();
319 /// let doubles = reader.as_f64_slice(0, 100);
320 /// println!("Sum: {}", doubles.iter().sum::<f64>());
321 /// ```
322 ///
323 /// _Requirements: 2.4_
324 #[inline]
325 pub fn as_f64_slice(&self, offset: usize, count: usize) -> &[f64] {
326 let byte_len = count * std::mem::size_of::<f64>();
327 let bytes = &self.mmap[offset..offset + byte_len];
328 // Safety: We ensure bounds are valid above. The caller is responsible
329 // for ensuring the data was written as f64 values.
330 unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const f64, count) }
331 }
332
333 /// Try to get tensor data as f64 slice (zero-copy).
334 ///
335 /// This is a non-panicking version of [`as_f64_slice`](Self::as_f64_slice).
336 ///
337 /// # Arguments
338 ///
339 /// * `offset` - Starting byte offset
340 /// * `count` - Number of f64 elements to read
341 ///
342 /// # Returns
343 ///
344 /// `Ok(&[f64])` if the range is valid, `Err` otherwise.
345 ///
346 /// # Examples
347 ///
348 /// ```rust,no_run
349 /// use synadb::mmap::MmapReader;
350 ///
351 /// let reader = MmapReader::open("data.db").unwrap();
352 /// match reader.try_as_f64_slice(0, 100) {
353 /// Ok(doubles) => println!("Read {} doubles", doubles.len()),
354 /// Err(e) => println!("Error: {}", e),
355 /// }
356 /// ```
357 pub fn try_as_f64_slice(&self, offset: usize, count: usize) -> Result<&[f64]> {
358 let byte_len =
359 count
360 .checked_mul(std::mem::size_of::<f64>())
361 .ok_or(SynaError::ShapeMismatch {
362 data_size: usize::MAX,
363 expected_size: 0,
364 })?;
365
366 let end = offset
367 .checked_add(byte_len)
368 .ok_or_else(|| SynaError::ShapeMismatch {
369 data_size: usize::MAX,
370 expected_size: self.mmap.len(),
371 })?;
372
373 if end > self.mmap.len() {
374 return Err(SynaError::ShapeMismatch {
375 data_size: end,
376 expected_size: self.mmap.len(),
377 });
378 }
379
380 let bytes = &self.mmap[offset..end];
381 // Safety: We've verified bounds above
382 Ok(unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const f64, count) })
383 }
384
385 /// Get tensor data as i32 slice (zero-copy).
386 ///
387 /// # Arguments
388 ///
389 /// * `offset` - Starting byte offset
390 /// * `count` - Number of i32 elements to read
391 ///
392 /// # Returns
393 ///
394 /// A slice of i32 values referencing the memory-mapped data.
395 ///
396 /// # Panics
397 ///
398 /// Panics if the requested range exceeds the file size.
399 #[inline]
400 pub fn as_i32_slice(&self, offset: usize, count: usize) -> &[i32] {
401 let byte_len = count * std::mem::size_of::<i32>();
402 let bytes = &self.mmap[offset..offset + byte_len];
403 unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const i32, count) }
404 }
405
406 /// Get tensor data as i64 slice (zero-copy).
407 ///
408 /// # Arguments
409 ///
410 /// * `offset` - Starting byte offset
411 /// * `count` - Number of i64 elements to read
412 ///
413 /// # Returns
414 ///
415 /// A slice of i64 values referencing the memory-mapped data.
416 ///
417 /// # Panics
418 ///
419 /// Panics if the requested range exceeds the file size.
420 #[inline]
421 pub fn as_i64_slice(&self, offset: usize, count: usize) -> &[i64] {
422 let byte_len = count * std::mem::size_of::<i64>();
423 let bytes = &self.mmap[offset..offset + byte_len];
424 unsafe { std::slice::from_raw_parts(bytes.as_ptr() as *const i64, count) }
425 }
426
427 /// Get the raw pointer to the memory-mapped data.
428 ///
429 /// This is useful for advanced use cases where direct pointer access
430 /// is needed, such as GPU memory transfers.
431 ///
432 /// # Safety
433 ///
434 /// The returned pointer is valid only as long as this `MmapReader`
435 /// instance exists. Do not use the pointer after dropping the reader.
436 ///
437 /// # Returns
438 ///
439 /// A raw pointer to the start of the memory-mapped region.
440 #[inline]
441 pub fn as_ptr(&self) -> *const u8 {
442 self.mmap.as_ptr()
443 }
444}
445
446#[cfg(test)]
447mod tests {
448 use super::*;
449 use std::io::Write;
450 use tempfile::NamedTempFile;
451
452 #[test]
453 fn test_mmap_reader_open() {
454 // Create a temp file with some data
455 let mut file = NamedTempFile::new().unwrap();
456 file.write_all(b"Hello, World!").unwrap();
457 file.flush().unwrap();
458
459 let reader = MmapReader::open(file.path()).unwrap();
460 assert_eq!(reader.len(), 13);
461 assert!(!reader.is_empty());
462 }
463
464 #[test]
465 fn test_mmap_reader_slice() {
466 let mut file = NamedTempFile::new().unwrap();
467 file.write_all(b"Hello, World!").unwrap();
468 file.flush().unwrap();
469
470 let reader = MmapReader::open(file.path()).unwrap();
471 let slice = reader.slice(0, 5);
472 assert_eq!(slice, b"Hello");
473
474 let slice = reader.slice(7, 5);
475 assert_eq!(slice, b"World");
476 }
477
478 #[test]
479 fn test_mmap_reader_try_slice() {
480 let mut file = NamedTempFile::new().unwrap();
481 file.write_all(b"Hello").unwrap();
482 file.flush().unwrap();
483
484 let reader = MmapReader::open(file.path()).unwrap();
485
486 // Valid range
487 assert!(reader.try_slice(0, 5).is_some());
488
489 // Out of bounds
490 assert!(reader.try_slice(0, 100).is_none());
491 assert!(reader.try_slice(10, 1).is_none());
492 }
493
494 #[test]
495 fn test_mmap_reader_f32_slice() {
496 let mut file = NamedTempFile::new().unwrap();
497
498 // Write some f32 values
499 let values: Vec<f32> = vec![1.0, 2.0, 3.0, 4.0];
500 for v in &values {
501 file.write_all(&v.to_le_bytes()).unwrap();
502 }
503 file.flush().unwrap();
504
505 let reader = MmapReader::open(file.path()).unwrap();
506 let slice = reader.as_f32_slice(0, 4);
507
508 assert_eq!(slice.len(), 4);
509 assert_eq!(slice[0], 1.0);
510 assert_eq!(slice[1], 2.0);
511 assert_eq!(slice[2], 3.0);
512 assert_eq!(slice[3], 4.0);
513 }
514
515 #[test]
516 fn test_mmap_reader_f64_slice() {
517 let mut file = NamedTempFile::new().unwrap();
518
519 // Write some f64 values
520 let values: Vec<f64> = vec![1.5, 2.5, 3.5];
521 for v in &values {
522 file.write_all(&v.to_le_bytes()).unwrap();
523 }
524 file.flush().unwrap();
525
526 let reader = MmapReader::open(file.path()).unwrap();
527 let slice = reader.as_f64_slice(0, 3);
528
529 assert_eq!(slice.len(), 3);
530 assert_eq!(slice[0], 1.5);
531 assert_eq!(slice[1], 2.5);
532 assert_eq!(slice[2], 3.5);
533 }
534
535 #[test]
536 fn test_mmap_reader_try_f32_slice_bounds() {
537 let mut file = NamedTempFile::new().unwrap();
538 let values: Vec<f32> = vec![1.0, 2.0];
539 for v in &values {
540 file.write_all(&v.to_le_bytes()).unwrap();
541 }
542 file.flush().unwrap();
543
544 let reader = MmapReader::open(file.path()).unwrap();
545
546 // Valid range
547 assert!(reader.try_as_f32_slice(0, 2).is_ok());
548
549 // Out of bounds
550 assert!(reader.try_as_f32_slice(0, 100).is_err());
551 }
552
553 #[test]
554 fn test_mmap_reader_try_f64_slice_bounds() {
555 let mut file = NamedTempFile::new().unwrap();
556 let values: Vec<f64> = vec![1.0, 2.0];
557 for v in &values {
558 file.write_all(&v.to_le_bytes()).unwrap();
559 }
560 file.flush().unwrap();
561
562 let reader = MmapReader::open(file.path()).unwrap();
563
564 // Valid range
565 assert!(reader.try_as_f64_slice(0, 2).is_ok());
566
567 // Out of bounds
568 assert!(reader.try_as_f64_slice(0, 100).is_err());
569 }
570
571 #[test]
572 fn test_mmap_reader_i32_slice() {
573 let mut file = NamedTempFile::new().unwrap();
574 let values: Vec<i32> = vec![10, 20, 30];
575 for v in &values {
576 file.write_all(&v.to_le_bytes()).unwrap();
577 }
578 file.flush().unwrap();
579
580 let reader = MmapReader::open(file.path()).unwrap();
581 let slice = reader.as_i32_slice(0, 3);
582
583 assert_eq!(slice, &[10, 20, 30]);
584 }
585
586 #[test]
587 fn test_mmap_reader_i64_slice() {
588 let mut file = NamedTempFile::new().unwrap();
589 let values: Vec<i64> = vec![100, 200, 300];
590 for v in &values {
591 file.write_all(&v.to_le_bytes()).unwrap();
592 }
593 file.flush().unwrap();
594
595 let reader = MmapReader::open(file.path()).unwrap();
596 let slice = reader.as_i64_slice(0, 3);
597
598 assert_eq!(slice, &[100, 200, 300]);
599 }
600
601 #[test]
602 fn test_mmap_reader_offset_access() {
603 let mut file = NamedTempFile::new().unwrap();
604
605 // Write header (8 bytes) + f32 data
606 file.write_all(&[0u8; 8]).unwrap(); // 8-byte header
607 let values: Vec<f32> = vec![1.0, 2.0, 3.0];
608 for v in &values {
609 file.write_all(&v.to_le_bytes()).unwrap();
610 }
611 file.flush().unwrap();
612
613 let reader = MmapReader::open(file.path()).unwrap();
614
615 // Read f32 data starting at offset 8
616 let slice = reader.as_f32_slice(8, 3);
617 assert_eq!(slice, &[1.0, 2.0, 3.0]);
618 }
619
620 #[test]
621 fn test_mmap_reader_empty_file() {
622 let file = NamedTempFile::new().unwrap();
623 let reader = MmapReader::open(file.path()).unwrap();
624
625 assert_eq!(reader.len(), 0);
626 assert!(reader.is_empty());
627 }
628
629 #[test]
630 fn test_mmap_reader_as_ptr() {
631 let mut file = NamedTempFile::new().unwrap();
632 file.write_all(b"test").unwrap();
633 file.flush().unwrap();
634
635 let reader = MmapReader::open(file.path()).unwrap();
636 let ptr = reader.as_ptr();
637
638 // Verify pointer is valid by reading through it
639 unsafe {
640 assert_eq!(*ptr, b't');
641 }
642 }
643}