mmap_io/
iterator.rs

1//! Iterator-based access for efficient sequential processing of memory-mapped files.
2
3use crate::errors::Result;
4use crate::mmap::MemoryMappedFile;
5use crate::utils::page_size;
6use std::marker::PhantomData;
7
8/// Iterator over fixed-size chunks of a memory-mapped file.
9///
10/// # Examples
11///
12/// ```no_run
13/// use mmap_io::MemoryMappedFile;
14///
15/// let mmap = MemoryMappedFile::open_ro("data.bin")?;
16///
17/// // Iterate over 4KB chunks
18/// for (offset, chunk) in mmap.chunks(4096).enumerate() {
19///     let chunk_data = chunk?;
20///     println!("Chunk {} at offset {}: {} bytes",
21///              offset, offset * 4096, chunk_data.len());
22/// }
23/// # Ok::<(), mmap_io::MmapIoError>(())
24/// ```
25pub struct ChunkIterator<'a> {
26    mmap: &'a MemoryMappedFile,
27    chunk_size: usize,
28    current_offset: u64,
29    total_len: u64,
30    // Reusable buffer to avoid allocations on each iteration
31    buffer: Vec<u8>,
32}
33
34impl<'a> ChunkIterator<'a> {
35    /// Create a new chunk iterator.
36    pub(crate) fn new(mmap: &'a MemoryMappedFile, chunk_size: usize) -> Result<Self> {
37        let total_len = mmap.current_len()?;
38        // Pre-allocate buffer with chunk_size capacity
39        let buffer = Vec::with_capacity(chunk_size);
40        Ok(Self {
41            mmap,
42            chunk_size,
43            current_offset: 0,
44            total_len,
45            buffer,
46        })
47    }
48}
49
50impl<'a> Iterator for ChunkIterator<'a> {
51    type Item = Result<Vec<u8>>;
52
53    fn next(&mut self) -> Option<Self::Item> {
54        if self.current_offset >= self.total_len {
55            return None;
56        }
57
58        let remaining = self.total_len - self.current_offset;
59        let chunk_len = remaining.min(self.chunk_size as u64);
60
61        // Resize the reusable buffer to the exact chunk size needed
62        self.buffer.resize(chunk_len as usize, 0);
63
64        // For RW mappings, we need to use read_into
65        match self.mmap.read_into(self.current_offset, &mut self.buffer) {
66            Ok(()) => {
67                self.current_offset += chunk_len;
68                // Clone the buffer data to return ownership
69                // This is necessary because we reuse the buffer
70                Some(Ok(self.buffer.clone()))
71            }
72            Err(e) => Some(Err(e)),
73        }
74    }
75
76    fn size_hint(&self) -> (usize, Option<usize>) {
77        let remaining = self.total_len.saturating_sub(self.current_offset);
78        let chunks = (remaining as usize).div_ceil(self.chunk_size);
79        (chunks, Some(chunks))
80    }
81}
82
83impl<'a> ExactSizeIterator for ChunkIterator<'a> {}
84
85/// Iterator over page-aligned chunks of a memory-mapped file.
86///
87/// Pages are aligned to the system's page size for optimal performance.
88///
89/// # Examples
90///
91/// ```no_run
92/// use mmap_io::MemoryMappedFile;
93///
94/// let mmap = MemoryMappedFile::open_ro("data.bin")?;
95///
96/// // Iterate over system pages
97/// for page in mmap.pages() {
98///     let page_data = page?;
99///     // Process page...
100/// }
101/// # Ok::<(), mmap_io::MmapIoError>(())
102/// ```
103pub struct PageIterator<'a> {
104    inner: ChunkIterator<'a>,
105}
106
107impl<'a> PageIterator<'a> {
108    /// Create a new page iterator.
109    pub(crate) fn new(mmap: &'a MemoryMappedFile) -> Result<Self> {
110        let ps = page_size();
111        Ok(Self {
112            inner: ChunkIterator::new(mmap, ps)?,
113        })
114    }
115}
116
117impl<'a> Iterator for PageIterator<'a> {
118    type Item = Result<Vec<u8>>;
119
120    fn next(&mut self) -> Option<Self::Item> {
121        self.inner.next()
122    }
123
124    fn size_hint(&self) -> (usize, Option<usize>) {
125        self.inner.size_hint()
126    }
127}
128
129impl<'a> ExactSizeIterator for PageIterator<'a> {}
130
131/// Mutable iterator over fixed-size chunks of a memory-mapped file.
132///
133/// This iterator provides mutable access to chunks, but due to Rust's borrowing
134/// rules, it cannot yield multiple mutable references simultaneously. Instead,
135/// it provides a callback-based interface.
136pub struct ChunkIteratorMut<'a> {
137    mmap: &'a MemoryMappedFile,
138    chunk_size: usize,
139    current_offset: u64,
140    total_len: u64,
141    _phantom: PhantomData<&'a mut [u8]>,
142}
143
144impl<'a> ChunkIteratorMut<'a> {
145    /// Create a new mutable chunk iterator.
146    pub(crate) fn new(mmap: &'a MemoryMappedFile, chunk_size: usize) -> Result<Self> {
147        let total_len = mmap.current_len()?;
148        Ok(Self {
149            mmap,
150            chunk_size,
151            current_offset: 0,
152            total_len,
153            _phantom: PhantomData,
154        })
155    }
156
157    /// Process each chunk with a callback function.
158    ///
159    /// The callback receives the offset and a mutable slice for each chunk.
160    pub fn for_each_mut<F, E>(mut self, mut f: F) -> Result<std::result::Result<(), E>>
161    where
162        F: FnMut(u64, &mut [u8]) -> std::result::Result<(), E>,
163    {
164        while self.current_offset < self.total_len {
165            let remaining = self.total_len - self.current_offset;
166            let chunk_len = remaining.min(self.chunk_size as u64);
167
168            let mut guard = self.mmap.as_slice_mut(self.current_offset, chunk_len)?;
169            let slice = guard.as_mut();
170
171            match f(self.current_offset, slice) {
172                Ok(()) => {}
173                Err(e) => return Ok(Err(e)),
174            }
175
176            self.current_offset += chunk_len;
177        }
178        Ok(Ok(()))
179    }
180}
181
182impl MemoryMappedFile {
183    /// Create an iterator over fixed-size chunks of the file.
184    ///
185    /// For read-only and copy-on-write mappings, this returns immutable slices.
186    /// For read-write mappings, use `chunks_mut()` for mutable access.
187    ///
188    /// # Arguments
189    ///
190    /// * `chunk_size` - Size of each chunk in bytes
191    ///
192    /// # Examples
193    ///
194    /// ```no_run
195    /// use mmap_io::MemoryMappedFile;
196    ///
197    /// let mmap = MemoryMappedFile::open_ro("data.bin")?;
198    ///
199    /// // Process file in 1MB chunks
200    /// for chunk in mmap.chunks(1024 * 1024) {
201    ///     let data = chunk?;
202    ///     // Process chunk...
203    /// }
204    /// # Ok::<(), mmap_io::MmapIoError>(())
205    /// ```
206    #[cfg(feature = "iterator")]
207    pub fn chunks(&self, chunk_size: usize) -> ChunkIterator<'_> {
208        ChunkIterator::new(self, chunk_size).expect("chunk iterator creation should not fail")
209    }
210
211    /// Create an iterator over page-aligned chunks of the file.
212    ///
213    /// Pages are aligned to the system's page size, which is typically 4KB on most systems.
214    /// This can provide better performance for certain access patterns.
215    ///
216    /// # Examples
217    ///
218    /// ```no_run
219    /// use mmap_io::MemoryMappedFile;
220    ///
221    /// let mmap = MemoryMappedFile::open_ro("data.bin")?;
222    ///
223    /// // Process file page by page
224    /// for page in mmap.pages() {
225    ///     let data = page?;
226    ///     // Process page...
227    /// }
228    /// # Ok::<(), mmap_io::MmapIoError>(())
229    /// ```
230    #[cfg(feature = "iterator")]
231    pub fn pages(&self) -> PageIterator<'_> {
232        PageIterator::new(self).expect("page iterator creation should not fail")
233    }
234
235    /// Create a mutable iterator over fixed-size chunks of the file.
236    ///
237    /// This is only available for read-write mappings. Due to Rust's borrowing rules,
238    /// this returns an iterator that processes chunks through a callback.
239    ///
240    /// # Arguments
241    ///
242    /// * `chunk_size` - Size of each chunk in bytes
243    ///
244    /// # Examples
245    ///
246    /// ```no_run
247    /// use mmap_io::{MemoryMappedFile, MmapMode};
248    ///
249    /// let mmap = MemoryMappedFile::open_rw("data.bin")?;
250    ///
251    /// // Zero out file in 4KB chunks
252    /// mmap.chunks_mut(4096).for_each_mut(|offset, chunk| {
253    ///     chunk.fill(0);
254    ///     Ok::<(), std::io::Error>(())
255    /// })??;
256    /// # Ok::<(), mmap_io::MmapIoError>(())
257    /// ```
258    #[cfg(feature = "iterator")]
259    pub fn chunks_mut(&self, chunk_size: usize) -> ChunkIteratorMut<'_> {
260        ChunkIteratorMut::new(self, chunk_size)
261            .expect("mutable chunk iterator creation should not fail")
262    }
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268    use crate::create_mmap;
269    use std::fs;
270    use std::path::PathBuf;
271
272    fn tmp_path(name: &str) -> PathBuf {
273        let mut p = std::env::temp_dir();
274        p.push(format!(
275            "mmap_io_iterator_test_{}_{}",
276            name,
277            std::process::id()
278        ));
279        p
280    }
281
282    #[test]
283    #[cfg(feature = "iterator")]
284    fn test_chunk_iterator() {
285        let path = tmp_path("chunk_iter");
286        let _ = fs::remove_file(&path);
287
288        // Create file with pattern
289        let mmap = create_mmap(&path, 10240).expect("create");
290        for i in 0..10 {
291            let data = vec![i as u8; 1024];
292            mmap.update_region(i * 1024, &data).expect("write");
293        }
294        mmap.flush().expect("flush");
295
296        // Test chunk iteration
297        let chunks: Vec<_> = mmap
298            .chunks(1024)
299            .collect::<Result<Vec<_>>>()
300            .expect("collect chunks");
301
302        assert_eq!(chunks.len(), 10);
303        for (i, chunk) in chunks.iter().enumerate() {
304            assert_eq!(chunk.len(), 1024);
305            assert!(chunk.iter().all(|&b| b == i as u8));
306        }
307
308        // Test with non-aligned chunk size
309        let chunks: Vec<_> = mmap
310            .chunks(3000)
311            .collect::<Result<Vec<_>>>()
312            .expect("collect chunks");
313
314        assert_eq!(chunks.len(), 4); // 3000, 3000, 3000, 1240
315        assert_eq!(chunks[3].len(), 1240);
316
317        fs::remove_file(&path).expect("cleanup");
318    }
319
320    #[test]
321    #[cfg(feature = "iterator")]
322    fn test_page_iterator() {
323        let path = tmp_path("page_iter");
324        let _ = fs::remove_file(&path);
325
326        let ps = page_size();
327        let file_size = ps * 3 + 100; // 3 full pages + partial
328
329        let mmap = create_mmap(&path, file_size as u64).expect("create");
330
331        let pages: Vec<_> = mmap
332            .pages()
333            .collect::<Result<Vec<_>>>()
334            .expect("collect pages");
335
336        assert_eq!(pages.len(), 4); // 3 full + 1 partial
337        assert_eq!(pages[0].len(), ps);
338        assert_eq!(pages[1].len(), ps);
339        assert_eq!(pages[2].len(), ps);
340        assert_eq!(pages[3].len(), 100);
341
342        fs::remove_file(&path).expect("cleanup");
343    }
344
345    #[test]
346    #[cfg(feature = "iterator")]
347    fn test_mutable_chunk_iterator() {
348        let path = tmp_path("mut_chunk_iter");
349        let _ = fs::remove_file(&path);
350
351        let mmap = create_mmap(&path, 4096).expect("create");
352
353        // Fill chunks with different values
354        let result = mmap.chunks_mut(1024).for_each_mut(|offset, chunk| {
355            let value = (offset / 1024) as u8;
356            chunk.fill(value);
357            Ok::<(), std::io::Error>(())
358        });
359
360        assert!(result.is_ok());
361        assert!(result.unwrap().is_ok());
362
363        mmap.flush().expect("flush");
364
365        // Verify
366        let mut buf = [0u8; 1024];
367        for i in 0..4 {
368            mmap.read_into(i * 1024, &mut buf).expect("read");
369            assert!(buf.iter().all(|&b| b == i as u8));
370        }
371
372        fs::remove_file(&path).expect("cleanup");
373    }
374
375    #[test]
376    #[cfg(feature = "iterator")]
377    fn test_iterator_size_hint() {
378        let path = tmp_path("size_hint");
379        let _ = fs::remove_file(&path);
380
381        let mmap = create_mmap(&path, 10000).expect("create");
382
383        let iter = mmap.chunks(1000);
384        assert_eq!(iter.size_hint(), (10, Some(10)));
385
386        let iter = mmap.chunks(3000);
387        assert_eq!(iter.size_hint(), (4, Some(4)));
388
389        fs::remove_file(&path).expect("cleanup");
390    }
391
392    #[test]
393    #[cfg(feature = "iterator")]
394    fn test_empty_file_iteration() {
395        let path = tmp_path("empty_iter");
396        let _ = fs::remove_file(&path);
397
398        let mmap = create_mmap(&path, 1).expect("create"); // Can't create 0-size
399        mmap.resize(1).expect("resize"); // Keep it minimal
400
401        let chunks: Vec<_> = mmap
402            .chunks(1024)
403            .collect::<Result<Vec<_>>>()
404            .expect("collect");
405
406        assert_eq!(chunks.len(), 1);
407        assert_eq!(chunks[0].len(), 1);
408
409        fs::remove_file(&path).expect("cleanup");
410    }
411}