mmap_io/
iterator.rs

1//! Iterator-based access for efficient sequential processing of memory-mapped files.
2
3use crate::errors::Result;
4use crate::mmap::MemoryMappedFile;
5use crate::utils::page_size;
6use std::marker::PhantomData;
7
8/// Iterator over fixed-size chunks of a memory-mapped file.
9///
10/// # Examples
11///
12/// ```no_run
13/// use mmap_io::MemoryMappedFile;
14///
15/// let mmap = MemoryMappedFile::open_ro("data.bin")?;
16/// 
17/// // Iterate over 4KB chunks
18/// for (offset, chunk) in mmap.chunks(4096).enumerate() {
19///     let chunk_data = chunk?;
20///     println!("Chunk {} at offset {}: {} bytes", 
21///              offset, offset * 4096, chunk_data.len());
22/// }
23/// # Ok::<(), mmap_io::MmapIoError>(())
24/// ```
25pub struct ChunkIterator<'a> {
26    mmap: &'a MemoryMappedFile,
27    chunk_size: usize,
28    current_offset: u64,
29    total_len: u64,
30    // Reusable buffer to avoid allocations on each iteration
31    buffer: Vec<u8>,
32}
33
34impl<'a> ChunkIterator<'a> {
35    /// Create a new chunk iterator.
36    pub(crate) fn new(mmap: &'a MemoryMappedFile, chunk_size: usize) -> Result<Self> {
37        let total_len = mmap.current_len()?;
38        // Pre-allocate buffer with chunk_size capacity
39        let buffer = Vec::with_capacity(chunk_size);
40        Ok(Self {
41            mmap,
42            chunk_size,
43            current_offset: 0,
44            total_len,
45            buffer,
46        })
47    }
48}
49
50impl<'a> Iterator for ChunkIterator<'a> {
51    type Item = Result<Vec<u8>>;
52
53    fn next(&mut self) -> Option<Self::Item> {
54        if self.current_offset >= self.total_len {
55            return None;
56        }
57
58        let remaining = self.total_len - self.current_offset;
59        let chunk_len = remaining.min(self.chunk_size as u64);
60
61        // Resize the reusable buffer to the exact chunk size needed
62        self.buffer.resize(chunk_len as usize, 0);
63        
64        // For RW mappings, we need to use read_into
65        match self.mmap.read_into(self.current_offset, &mut self.buffer) {
66            Ok(()) => {
67                self.current_offset += chunk_len;
68                // Clone the buffer data to return ownership
69                // This is necessary because we reuse the buffer
70                Some(Ok(self.buffer.clone()))
71            }
72            Err(e) => Some(Err(e)),
73        }
74    }
75
76    fn size_hint(&self) -> (usize, Option<usize>) {
77        let remaining = self.total_len.saturating_sub(self.current_offset);
78        let chunks = (remaining as usize).div_ceil(self.chunk_size);
79        (chunks, Some(chunks))
80    }
81}
82
83impl<'a> ExactSizeIterator for ChunkIterator<'a> {}
84
85/// Iterator over page-aligned chunks of a memory-mapped file.
86///
87/// Pages are aligned to the system's page size for optimal performance.
88///
89/// # Examples
90///
91/// ```no_run
92/// use mmap_io::MemoryMappedFile;
93///
94/// let mmap = MemoryMappedFile::open_ro("data.bin")?;
95/// 
96/// // Iterate over system pages
97/// for page in mmap.pages() {
98///     let page_data = page?;
99///     // Process page...
100/// }
101/// # Ok::<(), mmap_io::MmapIoError>(())
102/// ```
103pub struct PageIterator<'a> {
104    inner: ChunkIterator<'a>,
105}
106
107impl<'a> PageIterator<'a> {
108    /// Create a new page iterator.
109    pub(crate) fn new(mmap: &'a MemoryMappedFile) -> Result<Self> {
110        let ps = page_size();
111        Ok(Self {
112            inner: ChunkIterator::new(mmap, ps)?,
113        })
114    }
115}
116
117impl<'a> Iterator for PageIterator<'a> {
118    type Item = Result<Vec<u8>>;
119
120    fn next(&mut self) -> Option<Self::Item> {
121        self.inner.next()
122    }
123
124    fn size_hint(&self) -> (usize, Option<usize>) {
125        self.inner.size_hint()
126    }
127}
128
129impl<'a> ExactSizeIterator for PageIterator<'a> {}
130
131/// Mutable iterator over fixed-size chunks of a memory-mapped file.
132///
133/// This iterator provides mutable access to chunks, but due to Rust's borrowing
134/// rules, it cannot yield multiple mutable references simultaneously. Instead,
135/// it provides a callback-based interface.
136pub struct ChunkIteratorMut<'a> {
137    mmap: &'a MemoryMappedFile,
138    chunk_size: usize,
139    current_offset: u64,
140    total_len: u64,
141    _phantom: PhantomData<&'a mut [u8]>,
142}
143
144impl<'a> ChunkIteratorMut<'a> {
145    /// Create a new mutable chunk iterator.
146    pub(crate) fn new(mmap: &'a MemoryMappedFile, chunk_size: usize) -> Result<Self> {
147        let total_len = mmap.current_len()?;
148        Ok(Self {
149            mmap,
150            chunk_size,
151            current_offset: 0,
152            total_len,
153            _phantom: PhantomData,
154        })
155    }
156
157    /// Process each chunk with a callback function.
158    ///
159    /// The callback receives the offset and a mutable slice for each chunk.
160    pub fn for_each_mut<F, E>(mut self, mut f: F) -> Result<std::result::Result<(), E>>
161    where
162        F: FnMut(u64, &mut [u8]) -> std::result::Result<(), E>,
163    {
164        while self.current_offset < self.total_len {
165            let remaining = self.total_len - self.current_offset;
166            let chunk_len = remaining.min(self.chunk_size as u64);
167
168            let mut guard = self.mmap.as_slice_mut(self.current_offset, chunk_len)?;
169            let slice = guard.as_mut();
170            
171            match f(self.current_offset, slice) {
172                Ok(()) => {},
173                Err(e) => return Ok(Err(e)),
174            }
175
176            self.current_offset += chunk_len;
177        }
178        Ok(Ok(()))
179    }
180}
181
182impl MemoryMappedFile {
183    /// Create an iterator over fixed-size chunks of the file.
184    ///
185    /// For read-only and copy-on-write mappings, this returns immutable slices.
186    /// For read-write mappings, use `chunks_mut()` for mutable access.
187    ///
188    /// # Arguments
189    ///
190    /// * `chunk_size` - Size of each chunk in bytes
191    ///
192    /// # Examples
193    ///
194    /// ```no_run
195    /// use mmap_io::MemoryMappedFile;
196    ///
197    /// let mmap = MemoryMappedFile::open_ro("data.bin")?;
198    /// 
199    /// // Process file in 1MB chunks
200    /// for chunk in mmap.chunks(1024 * 1024) {
201    ///     let data = chunk?;
202    ///     // Process chunk...
203    /// }
204    /// # Ok::<(), mmap_io::MmapIoError>(())
205    /// ```
206    #[cfg(feature = "iterator")]
207    pub fn chunks(&self, chunk_size: usize) -> ChunkIterator<'_> {
208        ChunkIterator::new(self, chunk_size).expect("chunk iterator creation should not fail")
209    }
210
211    /// Create an iterator over page-aligned chunks of the file.
212    ///
213    /// Pages are aligned to the system's page size, which is typically 4KB on most systems.
214    /// This can provide better performance for certain access patterns.
215    ///
216    /// # Examples
217    ///
218    /// ```no_run
219    /// use mmap_io::MemoryMappedFile;
220    ///
221    /// let mmap = MemoryMappedFile::open_ro("data.bin")?;
222    /// 
223    /// // Process file page by page
224    /// for page in mmap.pages() {
225    ///     let data = page?;
226    ///     // Process page...
227    /// }
228    /// # Ok::<(), mmap_io::MmapIoError>(())
229    /// ```
230    #[cfg(feature = "iterator")]
231    pub fn pages(&self) -> PageIterator<'_> {
232        PageIterator::new(self).expect("page iterator creation should not fail")
233    }
234
235    /// Create a mutable iterator over fixed-size chunks of the file.
236    ///
237    /// This is only available for read-write mappings. Due to Rust's borrowing rules,
238    /// this returns an iterator that processes chunks through a callback.
239    ///
240    /// # Arguments
241    ///
242    /// * `chunk_size` - Size of each chunk in bytes
243    ///
244    /// # Examples
245    ///
246    /// ```no_run
247    /// use mmap_io::{MemoryMappedFile, MmapMode};
248    ///
249    /// let mmap = MemoryMappedFile::open_rw("data.bin")?;
250    /// 
251    /// // Zero out file in 4KB chunks
252    /// mmap.chunks_mut(4096).for_each_mut(|offset, chunk| {
253    ///     chunk.fill(0);
254    ///     Ok::<(), std::io::Error>(())
255    /// })??;
256    /// # Ok::<(), mmap_io::MmapIoError>(())
257    /// ```
258    #[cfg(feature = "iterator")]
259    pub fn chunks_mut(&self, chunk_size: usize) -> ChunkIteratorMut<'_> {
260        ChunkIteratorMut::new(self, chunk_size).expect("mutable chunk iterator creation should not fail")
261    }
262}
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267    use crate::create_mmap;
268    use std::fs;
269    use std::path::PathBuf;
270
271    fn tmp_path(name: &str) -> PathBuf {
272        let mut p = std::env::temp_dir();
273        p.push(format!("mmap_io_iterator_test_{}_{}", name, std::process::id()));
274        p
275    }
276
277    #[test]
278    #[cfg(feature = "iterator")]
279    fn test_chunk_iterator() {
280        let path = tmp_path("chunk_iter");
281        let _ = fs::remove_file(&path);
282
283        // Create file with pattern
284        let mmap = create_mmap(&path, 10240).expect("create");
285        for i in 0..10 {
286            let data = vec![i as u8; 1024];
287            mmap.update_region(i * 1024, &data).expect("write");
288        }
289        mmap.flush().expect("flush");
290
291        // Test chunk iteration
292        let chunks: Vec<_> = mmap.chunks(1024)
293            .collect::<Result<Vec<_>>>()
294            .expect("collect chunks");
295        
296        assert_eq!(chunks.len(), 10);
297        for (i, chunk) in chunks.iter().enumerate() {
298            assert_eq!(chunk.len(), 1024);
299            assert!(chunk.iter().all(|&b| b == i as u8));
300        }
301
302        // Test with non-aligned chunk size
303        let chunks: Vec<_> = mmap.chunks(3000)
304            .collect::<Result<Vec<_>>>()
305            .expect("collect chunks");
306        
307        assert_eq!(chunks.len(), 4); // 3000, 3000, 3000, 1240
308        assert_eq!(chunks[3].len(), 1240);
309
310        fs::remove_file(&path).expect("cleanup");
311    }
312
313    #[test]
314    #[cfg(feature = "iterator")]
315    fn test_page_iterator() {
316        let path = tmp_path("page_iter");
317        let _ = fs::remove_file(&path);
318
319        let ps = page_size();
320        let file_size = ps * 3 + 100; // 3 full pages + partial
321
322        let mmap = create_mmap(&path, file_size as u64).expect("create");
323        
324        let pages: Vec<_> = mmap.pages()
325            .collect::<Result<Vec<_>>>()
326            .expect("collect pages");
327        
328        assert_eq!(pages.len(), 4); // 3 full + 1 partial
329        assert_eq!(pages[0].len(), ps);
330        assert_eq!(pages[1].len(), ps);
331        assert_eq!(pages[2].len(), ps);
332        assert_eq!(pages[3].len(), 100);
333
334        fs::remove_file(&path).expect("cleanup");
335    }
336
337    #[test]
338    #[cfg(feature = "iterator")]
339    fn test_mutable_chunk_iterator() {
340        let path = tmp_path("mut_chunk_iter");
341        let _ = fs::remove_file(&path);
342
343        let mmap = create_mmap(&path, 4096).expect("create");
344        
345        // Fill chunks with different values
346        let result = mmap.chunks_mut(1024).for_each_mut(|offset, chunk| {
347            let value = (offset / 1024) as u8;
348            chunk.fill(value);
349            Ok::<(), std::io::Error>(())
350        });
351        
352        assert!(result.is_ok());
353        assert!(result.unwrap().is_ok());
354        
355        mmap.flush().expect("flush");
356
357        // Verify
358        let mut buf = [0u8; 1024];
359        for i in 0..4 {
360            mmap.read_into(i * 1024, &mut buf).expect("read");
361            assert!(buf.iter().all(|&b| b == i as u8));
362        }
363
364        fs::remove_file(&path).expect("cleanup");
365    }
366
367    #[test]
368    #[cfg(feature = "iterator")]
369    fn test_iterator_size_hint() {
370        let path = tmp_path("size_hint");
371        let _ = fs::remove_file(&path);
372
373        let mmap = create_mmap(&path, 10000).expect("create");
374        
375        let iter = mmap.chunks(1000);
376        assert_eq!(iter.size_hint(), (10, Some(10)));
377        
378        let iter = mmap.chunks(3000);
379        assert_eq!(iter.size_hint(), (4, Some(4)));
380
381        fs::remove_file(&path).expect("cleanup");
382    }
383
384    #[test]
385    #[cfg(feature = "iterator")]
386    fn test_empty_file_iteration() {
387        let path = tmp_path("empty_iter");
388        let _ = fs::remove_file(&path);
389
390        let mmap = create_mmap(&path, 1).expect("create"); // Can't create 0-size
391        mmap.resize(1).expect("resize"); // Keep it minimal
392        
393        let chunks: Vec<_> = mmap.chunks(1024)
394            .collect::<Result<Vec<_>>>()
395            .expect("collect");
396        
397        assert_eq!(chunks.len(), 1);
398        assert_eq!(chunks[0].len(), 1);
399
400        fs::remove_file(&path).expect("cleanup");
401    }
402}