Skip to main content

oxigdal_streaming/
mmap.rs

1//! Memory-mapped file support for local reads.
2//!
3//! For local files, this module provides an in-memory buffer backed view that
4//! simulates mmap semantics using pure Rust `std::fs` I/O.  On platforms where
5//! OS-level mmap is available a future feature flag could swap in a proper
6//! implementation without changing the public API.
7
8use std::fs::File;
9use std::io::{Read, Seek, SeekFrom};
10use std::path::{Path, PathBuf};
11
12use crate::error::StreamingError;
13
14/// A memory-buffered view of a file, providing mmap-like random-access semantics.
15pub struct MappedFile {
16    path: PathBuf,
17    data: Vec<u8>,
18    file_size: u64,
19}
20
21impl MappedFile {
22    /// Opens `path` and loads its entire contents into memory.
23    ///
24    /// # Errors
25    /// Returns [`StreamingError`] on any I/O failure.
26    pub fn open(path: impl AsRef<Path>) -> Result<Self, StreamingError> {
27        let path = path.as_ref().to_path_buf();
28        let mut file = File::open(&path)?;
29        let file_size = file.seek(SeekFrom::End(0))?;
30        file.seek(SeekFrom::Start(0))?;
31
32        let mut data = Vec::with_capacity(file_size as usize);
33        file.read_to_end(&mut data)?;
34
35        Ok(Self {
36            path,
37            data,
38            file_size,
39        })
40    }
41
42    /// Returns a slice covering bytes `[start, start + len)`.
43    ///
44    /// # Errors
45    /// Returns an error if the requested range falls outside the file.
46    pub fn read_range(&self, start: u64, len: usize) -> Result<&[u8], StreamingError> {
47        let start_usize = start as usize;
48        let end = start_usize + len;
49        if end > self.data.len() {
50            return Err(StreamingError::Other(format!(
51                "Range [{start_usize}, {end}) out of bounds (file size {})",
52                self.file_size
53            )));
54        }
55        Ok(&self.data[start_usize..end])
56    }
57
58    /// Returns a slice of all file bytes.
59    #[must_use]
60    pub fn as_slice(&self) -> &[u8] {
61        &self.data
62    }
63
64    /// Returns the file size in bytes.
65    #[must_use]
66    pub fn file_size(&self) -> u64 {
67        self.file_size
68    }
69
70    /// Returns the path used to open this file.
71    #[must_use]
72    pub fn path(&self) -> &Path {
73        &self.path
74    }
75
76    /// Returns `true` if the file contains no bytes.
77    #[must_use]
78    pub fn is_empty(&self) -> bool {
79        self.data.is_empty()
80    }
81
82    /// Reads multiple `(start, len)` ranges simultaneously.
83    ///
84    /// The returned `Vec` preserves the order of the input slice.
85    pub fn read_ranges(&self, ranges: &[(u64, usize)]) -> Vec<Result<&[u8], StreamingError>> {
86        ranges
87            .iter()
88            .map(|(start, len)| self.read_range(*start, *len))
89            .collect()
90    }
91}
92
93// ── Prefetch support ─────────────────────────────────────────────────────────
94
95/// Priority level for a prefetch hint.
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub enum PrefetchPriority {
98    /// Background, best-effort prefetch.
99    Low,
100    /// Default prefetch priority.
101    Normal,
102    /// Urgent prefetch — schedule first.
103    High,
104}
105
106impl PartialOrd for PrefetchPriority {
107    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
108        Some(self.cmp(other))
109    }
110}
111
112impl Ord for PrefetchPriority {
113    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
114        let rank = |p: &PrefetchPriority| match p {
115            PrefetchPriority::Low => 0u8,
116            PrefetchPriority::Normal => 1,
117            PrefetchPriority::High => 2,
118        };
119        rank(self).cmp(&rank(other))
120    }
121}
122
123/// A hint advising the scheduler to prefetch a region of a file.
124#[derive(Debug, Clone)]
125pub struct PrefetchHint {
126    /// Byte offset where the region starts.
127    pub offset: u64,
128    /// Number of bytes to prefetch.
129    pub length: usize,
130    /// Priority of this prefetch hint.
131    pub priority: PrefetchPriority,
132}
133
134/// Collects prefetch hints and can return them in priority order.
135pub struct PrefetchScheduler {
136    hints: Vec<PrefetchHint>,
137    max_prefetch_bytes: usize,
138}
139
140impl PrefetchScheduler {
141    /// Creates a new `PrefetchScheduler` with the given byte cap.
142    #[must_use]
143    pub fn new(max_prefetch_bytes: usize) -> Self {
144        Self {
145            hints: Vec::new(),
146            max_prefetch_bytes,
147        }
148    }
149
150    /// Adds a prefetch hint.
151    pub fn add_hint(&mut self, hint: PrefetchHint) {
152        self.hints.push(hint);
153    }
154
155    /// Returns hints sorted by descending priority, then ascending offset.
156    #[must_use]
157    pub fn sorted_hints(&self) -> Vec<&PrefetchHint> {
158        let mut sorted: Vec<&PrefetchHint> = self.hints.iter().collect();
159        sorted.sort_by(|a, b| b.priority.cmp(&a.priority).then(a.offset.cmp(&b.offset)));
160        sorted
161    }
162
163    /// Returns the number of hints currently held.
164    #[must_use]
165    pub fn hint_count(&self) -> usize {
166        self.hints.len()
167    }
168
169    /// Returns the sum of all hinted lengths.
170    #[must_use]
171    pub fn total_bytes_hinted(&self) -> usize {
172        self.hints.iter().map(|h| h.length).sum()
173    }
174
175    /// Returns the configured maximum prefetch byte limit.
176    #[must_use]
177    pub fn max_prefetch_bytes(&self) -> usize {
178        self.max_prefetch_bytes
179    }
180
181    /// Clears all stored hints.
182    pub fn clear(&mut self) {
183        self.hints.clear();
184    }
185}