scirs2-core 0.4.2

Core utilities and common functionality for SciRS2 (scirs2-core)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
//! Chunked processing extension for memory-mapped arrays.
//!
//! This module provides utilities for working with large memory-mapped arrays
//! in a memory-efficient manner by processing them in smaller chunks.
//!
//! ## Overview
//!
//! Memory-mapped arrays allow working with data that doesn't fit entirely in RAM,
//! but processing such large arrays can still be challenging. This module extends
//! `MemoryMappedArray` with chunked processing capabilities, enabling efficient
//! processing of large datasets through a combination of:
//!
//! - Processing data in manageable chunks to control memory usage
//! - Providing both iterator-based and callback-based processing APIs
//! - Supporting various chunking strategies to optimize for different workloads
//! - Ensuring mutations are properly persisted to the underlying memory-mapped file
//!
//! ## Usage
//!
//! There are three main ways to process chunks:
//!
//! 1. Using `process_chunks` for reading/processing chunks:
//!    ```no_run
//!    # use scirs2_core::memory_efficient::{MemoryMappedArray, MemoryMappedChunks, ChunkingStrategy};
//!    # let mmap: MemoryMappedArray<f64> = unimplemented!();
//!    // Process chunks of a large array and collect results
//!    let results = mmap.process_chunks(
//!        ChunkingStrategy::Fixed(1000),
//!        |chunk_data, chunk_idx| {
//!            // Process this chunk and return a result
//!            chunk_data.iter().sum::<f64>()
//!        }
//!    );
//!    ```
//!
//! 2. Using `process_chunks_mut` for mutating chunks:
//!    ```no_run
//!    # use scirs2_core::memory_efficient::{MemoryMappedArray, MemoryMappedChunks, ChunkingStrategy};
//!    # let mut mmap: MemoryMappedArray<f64> = unimplemented!();
//!    // Modify each chunk in-place
//!    mmap.process_chunks_mut(
//!        ChunkingStrategy::NumChunks(10),
//!        |chunk_data, chunk_idx| {
//!            // Modify the chunk data in-place
//!            for i in 0..chunk_data.len() {
//!                chunk_data[i] *= 2.0;
//!            }
//!        }
//!    );
//!    ```
//!
//! 3. Using the `chunks` iterator for element-by-element processing:
//!    ```no_run
//!    # use scirs2_core::memory_efficient::{MemoryMappedArray, MemoryMappedChunkIter, ChunkingStrategy};
//!    # let mmap: MemoryMappedArray<f64> = unimplemented!();
//!    // Process chunks using iterator
//!    for chunk in mmap.chunks(ChunkingStrategy::Auto) {
//!        // Each chunk is an Array1 of the appropriate type
//!        println!("Chunk sum: {}", chunk.sum());
//!    }
//!    ```
//!
//! 4. If you have the `parallel` feature enabled, you can also use parallel processing:
//!    ```ignore
//!    # #[cfg(feature = "parallel")]
//!    # {
//!    # use scirs2_core::memory_efficient::{create_mmap, AccessMode, ChunkingStrategy, MemoryMappedChunksParallel};
//!    # use scirs2_core::ndarray::Array1;
//!    # let data = Array1::<f64>::zeros(1000);
//!    # let mmap = create_mmap(&data, "/tmp/data.bin", AccessMode::Write, 0).expect("Operation failed");
//!    // Process chunks in parallel
//!    let results = mmap.process_chunks_parallel(
//!        ChunkingStrategy::Fixed(1000),
//!        |chunk_data, chunk_idx| {
//!            chunk_data.iter().sum::<f64>()
//!        }
//!    );
//!    # }
//!    ```
//!
//! ## Chunking Strategies
//!
//! This module supports different chunking strategies:
//!
//! - `ChunkingStrategy::Fixed(size)`: Fixed-size chunks
//! - `ChunkingStrategy::NumChunks(n)`: Divide the array into a specific number of chunks
//! - `ChunkingStrategy::Auto`: Automatically determine a reasonable chunk size
//! - `ChunkingStrategy::FixedBytes(bytes)`: Chunks with a specific size in bytes
//!
//! ## Limitations
//!
//! - Currently only works with 1D arrays (1-dimensional arrays only)
//! - For mutating operations, the module uses direct file I/O to ensure changes are
//!   properly persisted to disk, which may be slower than memory-only operations

use crate::memory_efficient::chunked::ChunkingStrategy;
use crate::memory_efficient::memmap::MemoryMappedArray;
use ::ndarray::Array1;
use std::fs::OpenOptions;
use std::io::{Seek, SeekFrom, Write};

#[cfg(feature = "parallel")]
use crate::parallel_ops::*;

/// Extension trait for MemoryMappedArray to enable chunked processing of large datasets.
///
/// This trait extends `MemoryMappedArray` with methods for processing large datasets
/// in manageable chunks, which helps to control memory usage and enables working with
/// arrays that might be too large to fit entirely in memory.
pub trait MemoryMappedChunks<A: Clone + Copy + 'static + Send + Sync> {
    /// Get the number of chunks for the given chunking strategy.
    ///
    /// # Arguments
    ///
    /// * `strategy` - The chunking strategy to determine chunk sizes
    ///
    /// # Returns
    ///
    /// The number of chunks that the array will be divided into
    ///
    /// # Examples
    ///
    /// ```
    /// use scirs2_core::memory_efficient::{create_mmap, AccessMode, ChunkingStrategy, MemoryMappedChunks};
    /// use scirs2_core::ndarray::Array1;
    ///
    /// // Create a memory-mapped array with 100 elements
    /// let data = Array1::<f64>::linspace(0., 99., 100);
    /// let file_path = "example.bin";  // In practice, use a proper temporary path
    /// let mmap = create_mmap(&data, file_path.as_ref(), AccessMode::Write, 0).expect("Operation failed");
    ///
    /// // Check how many chunks we'll get with different strategies
    /// assert_eq!(mmap.chunk_count(ChunkingStrategy::Fixed(10)), 10);  // 10 chunks of 10 elements each
    /// assert_eq!(mmap.chunk_count(ChunkingStrategy::NumChunks(5)), 5);  // 5 chunks of 20 elements each
    /// ```
    fn chunk_count(&self, strategy: ChunkingStrategy) -> usize;

    /// Process each chunk with a function and collect the results.
    ///
    /// This method divides the array into chunks according to the given strategy,
    /// applies the provided function to each chunk, and collects the results into a vector.
    /// It is efficient for read-only operations on large arrays.
    ///
    /// # Arguments
    ///
    /// * `strategy` - The chunking strategy to determine chunk sizes
    /// * `f` - A function that processes each chunk and returns a result
    ///
    /// # Returns
    ///
    /// A vector containing the results from processing each chunk
    ///
    /// # Examples
    ///
    /// ```
    /// use scirs2_core::memory_efficient::{create_mmap, AccessMode, ChunkingStrategy, MemoryMappedChunks};
    /// use scirs2_core::ndarray::Array1;
    ///
    /// // Create a memory-mapped array with 20 elements (small numbers to avoid overflow)
    /// let data = Array1::<f64>::from_vec((0..20).map(|x| x as f64).collect());
    /// let file_path = "example.bin";  // In practice, use a proper temporary path
    /// let mmap = create_mmap(&data, file_path.as_ref(), AccessMode::Write, 0).expect("Operation failed");
    ///
    /// // Calculate the sum of each chunk
    /// let chunk_sums = mmap.process_chunks(
    ///     ChunkingStrategy::Fixed(5),
    ///     |chunk, chunk_idx| chunk.iter().sum::<f64>()
    /// );
    ///
    /// // We should have 4 chunks with sums of elements 0-4, 5-9, 10-14, 15-19
    /// assert_eq!(chunk_sums.len(), 4);
    /// ```
    fn process_chunks<F, R>(&self, strategy: ChunkingStrategy, f: F) -> Vec<R>
    where
        F: Fn(&[A], usize) -> R;

    /// Process each chunk with a mutable function that modifies the data in-place.
    ///
    /// This method divides the array into chunks according to the given strategy
    /// and applies the provided mutable function to each chunk. The function can
    /// modify the chunk data in-place, and the changes will be saved to the
    /// underlying memory-mapped file.
    ///
    /// # Arguments
    ///
    /// * `strategy` - The chunking strategy to determine chunk sizes
    /// * `f` - A function that processes and potentially modifies each chunk
    ///
    /// # Examples
    ///
    /// ```
    /// use scirs2_core::memory_efficient::{create_mmap, AccessMode, ChunkingStrategy, MemoryMappedChunks};
    /// use scirs2_core::ndarray::Array1;
    ///
    /// // Create a memory-mapped array with 100 zeros
    /// let data = Array1::<f64>::zeros(100);
    /// let file_path = "example.bin";  // In practice, use a proper temporary path
    /// let mut mmap = create_mmap(&data, file_path.as_ref(), AccessMode::Write, 0).expect("Operation failed");
    ///
    /// // Modify each chunk: set elements to their index
    /// mmap.process_chunks_mut(
    ///     ChunkingStrategy::Fixed(10),
    ///     |chunk, chunk_idx| {
    ///         for (i, elem) in chunk.iter_mut().enumerate() {
    ///             *elem = (chunk_idx * 10 + i) as f64;
    ///         }
    ///     }
    /// );
    ///
    /// // Now the array contains [0, 1, 2, ..., 99]
    /// ```
    ///
    /// # Notes
    ///
    /// This method uses direct file I/O to ensure changes are properly persisted to disk,
    /// which may be slower than memory-only operations but is more reliable for ensuring
    /// data is properly saved, especially with large datasets.
    fn process_chunks_mut<F>(&mut self, strategy: ChunkingStrategy, f: F)
    where
        F: Fn(&mut [A], usize);
}

/// Extension trait for parallel processing of memory-mapped arrays.
///
/// This trait is only available when the 'parallel' feature is enabled.
/// It extends the `MemoryMappedChunks` trait with parallel processing capabilities.
#[cfg(feature = "parallel")]
pub trait MemoryMappedChunksParallel<A: Clone + Copy + 'static + Send + Sync + Send + Sync>:
    MemoryMappedChunks<A>
{
    /// Process chunks in parallel and collect the results.
    ///
    /// This method works like `process_chunks` but processes the chunks in parallel using Rayon.
    /// It's useful for computationally intensive operations on large datasets.
    ///
    /// # Arguments
    ///
    /// * `strategy` - The chunking strategy to determine chunk sizes
    /// * `f` - A function that processes each chunk and returns a result
    ///
    /// # Returns
    ///
    /// A vector containing the results from processing each chunk, in chunk order
    ///
    /// # Examples
    ///
    /// ```ignore
    /// # #[cfg(feature = "parallel")]
    /// # {
    /// use scirs2_core::memory_efficient::{create_mmap, AccessMode, ChunkingStrategy, MemoryMappedChunks, MemoryMappedChunksParallel};
    /// use scirs2_core::ndarray::Array1;
    ///
    /// // Create a memory-mapped array with 20 elements (small numbers to avoid overflow)
    /// let data = Array1::<i32>::from_vec((1..=20).collect());
    /// let file_path = "example.bin";  // In practice, use a proper temporary path
    /// let mmap = create_mmap(&data, file_path.as_ref(), AccessMode::Write, 0).expect("Operation failed");
    ///
    /// // Calculate the sum of each chunk in parallel
    /// let chunk_sums = mmap.process_chunks_parallel(
    ///     ChunkingStrategy::Fixed(5),
    ///     |chunk, chunk_idx| chunk.iter().sum::<i32>()
    /// );
    ///
    /// // We should have 4 chunks with reasonable sums
    /// assert_eq!(chunk_sums.len(), 4);
    /// # }
    /// ```
    fn process_chunks_parallel<F, R>(&self, strategy: ChunkingStrategy, f: F) -> Vec<R>
    where
        F: Fn(&[A], usize) -> R + Send + Sync,
        R: Send;

    /// Process chunks in parallel with a mutable function.
    ///
    /// This method works like `process_chunks_mut` but processes the chunks in parallel using Rayon.
    /// It's useful for computationally intensive operations on large datasets.
    ///
    /// # Arguments
    ///
    /// * `strategy` - The chunking strategy to determine chunk sizes
    /// * `f` - A function that processes and potentially modifies each chunk
    ///
    /// # Examples
    ///
    /// ```
    /// # #[cfg(feature = "parallel")]
    /// # {
    /// use scirs2_core::memory_efficient::{create_mmap, AccessMode, ChunkingStrategy, MemoryMappedChunks, MemoryMappedChunksParallel};
    /// use scirs2_core::ndarray::Array1;
    ///
    /// // Create a memory-mapped array with 100 zeros
    /// let data = Array1::<f64>::zeros(100);
    /// let file_path = "example.bin";  // In practice, use a proper temporary path
    /// let mut mmap = create_mmap(&data, file_path.as_ref(), AccessMode::Write, 0).expect("Operation failed");
    ///
    /// // Modify each chunk in parallel: set elements to their index
    /// mmap.process_chunks_mut_parallel(
    ///     ChunkingStrategy::Fixed(10),
    ///     |chunk, chunk_idx| {
    ///         for (i, elem) in chunk.iter_mut().enumerate() {
    ///             *elem = (chunk_idx * 10 + i) as f64;
    ///         }
    ///     }
    /// );
    /// # }
    /// ```
    ///
    /// # Notes
    ///
    /// Even when used in parallel, this method ensures that file writes are safe
    /// and do not conflict with each other by collecting all modifications and
    /// applying them sequentially.
    fn process_chunks_mut_parallel<F>(&mut self, strategy: ChunkingStrategy, f: F)
    where
        F: Fn(&mut [A], usize) + Send + Sync;
}

/// Iterator over chunks of a memory-mapped array (for 1D arrays only).
///
/// This iterator provides a convenient way to process a memory-mapped array in chunks,
/// returning each chunk as an `Array1<A>`. It's particularly useful for operations where
/// you want to process chunks sequentially and don't need to collect results.
///
/// # Examples
///
/// ```
/// use scirs2_core::memory_efficient::{create_mmap, AccessMode, ChunkingStrategy, MemoryMappedChunkIter};
/// use scirs2_core::ndarray::Array1;
///
/// // Create a memory-mapped array
/// let data = Array1::<f64>::linspace(0., 99., 100);
/// let file_path = "example.bin";  // In practice, use a proper temporary path
/// let mmap = create_mmap(&data, file_path.as_ref(), AccessMode::Write, 0).expect("Operation failed");
///
/// // Process chunks using iterator
/// let mut sum = 0.0;
/// for chunk in mmap.chunks(ChunkingStrategy::Fixed(10)) {
///     // Each chunk is an Array1<f64>
///     sum += chunk.sum();
/// }
///
/// // The sum should be the same as summing the original array
/// assert!((sum - data.sum()).abs() < 1e-10);
/// ```
pub struct ChunkIter<'a, A>
where
    A: Clone + Copy + 'static + Send + Sync + Send + Sync,
{
    /// Reference to the memory-mapped array
    array: &'a MemoryMappedArray<A>,
    /// Current chunk index
    current_idx: usize,
    /// Total number of chunks
    total_chunks: usize,
    /// Chunking strategy
    strategy: ChunkingStrategy,
}

impl<A> Iterator for ChunkIter<'_, A>
where
    A: Clone + Copy + 'static + Send + Sync,
{
    type Item = Array1<A>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.current_idx >= self.total_chunks {
            None
        } else {
            let chunk_idx = self.current_idx;
            self.current_idx += 1;

            // Get chunk start/end indices
            let chunk_size = match self.strategy {
                ChunkingStrategy::Fixed(size) => size,
                ChunkingStrategy::NumChunks(n) => self.array.size.div_ceil(n),
                ChunkingStrategy::Auto => (self.array.size / 100).max(1),
                ChunkingStrategy::FixedBytes(bytes) => {
                    let element_size = std::mem::size_of::<A>();
                    let elements_per_chunk = bytes / element_size;
                    elements_per_chunk.max(1)
                }
                ChunkingStrategy::Advanced(_) => {
                    // For advanced strategies, fall back to auto sizing
                    (self.array.size / 100).max(1)
                }
            };

            let start_idx = chunk_idx * chunk_size;
            let end_idx = (start_idx + chunk_size).min(self.array.size);

            // Get the array data to return a chunk
            if let Ok(array_1d) = self.array.as_array::<crate::ndarray::Ix1>() {
                Some(array_1d.slice(crate::s![start_idx..end_idx]).to_owned())
            } else {
                None
            }
        }
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        let remaining = self.total_chunks - self.current_idx;
        (remaining, Some(remaining))
    }
}

impl<A> ExactSizeIterator for ChunkIter<'_, A> where A: Clone + Copy + 'static + Send + Sync {}

/// Extension trait for MemoryMappedArray to enable chunked iteration.
///
/// This trait extends `MemoryMappedArray` with the ability to iterate over chunks
/// of the array, which provides a convenient way to process large arrays sequentially
/// in smaller, manageable pieces.
pub trait MemoryMappedChunkIter<A: Clone + Copy + 'static + Send + Sync> {
    /// Create an iterator over chunks of the array (for 1D arrays only).
    ///
    /// This method returns an iterator that yields chunks of the array as
    /// `Array1<A>` values, making it easy to process the array in smaller pieces.
    ///
    /// # Arguments
    ///
    /// * `strategy` - The chunking strategy to determine chunk sizes
    ///
    /// # Returns
    ///
    /// An iterator that yields `Array1<A>` chunks of the memory-mapped array
    ///
    /// # Examples
    ///
    /// ```
    /// use scirs2_core::memory_efficient::{create_mmap, AccessMode, ChunkingStrategy, MemoryMappedChunkIter};
    /// use scirs2_core::ndarray::Array1;
    ///
    /// // Create a memory-mapped array
    /// let data = Array1::<f64>::linspace(0., 99., 100);
    /// let file_path = "example.bin";  // In practice, use a proper temporary path
    /// let mmap = create_mmap(&data, file_path.as_ref(), AccessMode::Write, 0).expect("Operation failed");
    ///
    /// // Create a chunk iterator with chunks of size 25
    /// let mut iter = mmap.chunks(ChunkingStrategy::Fixed(25));
    ///
    /// // Get the first chunk (elements 0-24)
    /// let chunk1 = iter.next().expect("Operation failed");
    /// assert_eq!(chunk1.len(), 25);
    /// assert_eq!(chunk1[0], 0.0);
    /// assert_eq!(chunk1[24], 24.0);
    /// ```
    fn chunks(&self, strategy: ChunkingStrategy) -> ChunkIter<A>;
}

impl<A: Clone + Copy + 'static + Send + Sync + Send + Sync> MemoryMappedChunks<A>
    for MemoryMappedArray<A>
{
    fn chunk_count(&self, strategy: ChunkingStrategy) -> usize {
        match strategy {
            ChunkingStrategy::Fixed(size) => {
                // Calculate how many chunks of the given size we need
                self.size.div_ceil(size)
            }
            ChunkingStrategy::NumChunks(n) => {
                // Number of chunks is explicitly specified
                n
            }
            ChunkingStrategy::Auto => {
                // Determine a reasonable chunk size based on the array size
                let total_elements = self.size;
                let optimal_chunk_size = (total_elements / 100).max(1);
                total_elements.div_ceil(optimal_chunk_size)
            }
            ChunkingStrategy::FixedBytes(bytes) => {
                // Calculate how many chunks based on bytes
                let element_size = std::mem::size_of::<A>();
                let elements_per_chunk = bytes / element_size;
                let elements_per_chunk = elements_per_chunk.max(1); // Ensure at least 1 element per chunk
                self.size.div_ceil(elements_per_chunk)
            }
            ChunkingStrategy::Advanced(_) => {
                // For advanced strategies, fall back to auto sizing
                let total_elements = self.size;
                let optimal_chunk_size = (total_elements / 100).max(1);
                total_elements.div_ceil(optimal_chunk_size)
            }
        }
    }

    fn process_chunks<F, R>(&self, strategy: ChunkingStrategy, f: F) -> Vec<R>
    where
        F: Fn(&[A], usize) -> R,
    {
        let total_chunks = self.chunk_count(strategy);
        let mut results = Vec::with_capacity(total_chunks);

        // Process each chunk by copying the data
        for chunk_idx in 0..total_chunks {
            // Calculate chunk size and indices
            let chunk_size = match strategy {
                ChunkingStrategy::Fixed(size) => size,
                ChunkingStrategy::NumChunks(n) => self.size.div_ceil(n),
                ChunkingStrategy::Auto => {
                    let total_elements = self.size;
                    (total_elements / 100).max(1)
                }
                ChunkingStrategy::FixedBytes(bytes) => {
                    let element_size = std::mem::size_of::<A>();
                    let elements_per_chunk = bytes / element_size;
                    elements_per_chunk.max(1)
                }
                ChunkingStrategy::Advanced(_) => {
                    // For advanced strategies, fall back to auto sizing
                    let total_elements = self.size;
                    (total_elements / 100).max(1)
                }
            };

            let start_idx = chunk_idx * chunk_size;
            let end_idx = (start_idx + chunk_size).min(self.size);

            // Get the data for this chunk
            if let Ok(array_1d) = self.as_array::<crate::ndarray::Ix1>() {
                // Copy the data to a new array to avoid lifetime issues
                let chunk_data = array_1d.slice(crate::s![start_idx..end_idx]).to_vec();

                // Process the chunk data
                results.push(f(&chunk_data, chunk_idx));
            }
        }

        results
    }

    fn process_chunks_mut<F>(&mut self, strategy: ChunkingStrategy, f: F)
    where
        F: Fn(&mut [A], usize),
    {
        let total_chunks = self.chunk_count(strategy);
        let element_size = std::mem::size_of::<A>();

        // Process each chunk
        for chunk_idx in 0..total_chunks {
            // Calculate chunk size and indices
            let chunk_size = match strategy {
                ChunkingStrategy::Fixed(size) => size,
                ChunkingStrategy::NumChunks(n) => self.size.div_ceil(n),
                ChunkingStrategy::Auto => {
                    let total_elements = self.size;
                    (total_elements / 100).max(1)
                }
                ChunkingStrategy::FixedBytes(bytes) => {
                    let elements_per_chunk = bytes / element_size;
                    elements_per_chunk.max(1)
                }
                ChunkingStrategy::Advanced(_) => {
                    // For advanced strategies, fall back to auto sizing
                    let total_elements = self.size;
                    (total_elements / 100).max(1)
                }
            };

            let start_idx = chunk_idx * chunk_size;
            let end_idx = (start_idx + chunk_size).min(self.size);

            // Get a copy of the data for this chunk
            let mut chunk_data = Vec::with_capacity(end_idx - start_idx);

            // Obtain the data safely through the memory mapping
            if let Ok(array_1d) = self.as_array::<crate::ndarray::Ix1>() {
                chunk_data.extend_from_slice(
                    array_1d
                        .slice(crate::s![start_idx..end_idx])
                        .as_slice()
                        .expect("Operation failed"),
                );
            } else {
                continue;
            }

            // Process the chunk data with the provided function
            f(&mut chunk_data, chunk_idx);

            // Write the modified data back to the file directly
            // This is the most reliable way to ensure changes are persisted
            let file_path = &self.file_path;

            if let Ok(mut file) = OpenOptions::new().write(true).open(file_path) {
                // Calculate the effective offset (header + data offset + element position)
                let effective_offset = self.offset + start_idx * element_size;

                // Seek to the position and write the data
                if file.seek(SeekFrom::Start(effective_offset as u64)).is_ok() {
                    // Convert the chunk data to bytes
                    let bytes = unsafe {
                        std::slice::from_raw_parts(
                            chunk_data.as_ptr() as *const u8,
                            chunk_data.len() * element_size,
                        )
                    };

                    // Write the bytes to the file
                    let _ = file.write_all(bytes);
                    let _ = file.flush();
                }
            }
        }

        // Reload the memory mapping to ensure changes are visible
        let _ = self.reload();
    }
}

// Add the parallel methods directly to the existing implementation
#[cfg(feature = "parallel")]
impl<A: Clone + Copy + 'static + Send + Sync + Send + Sync> MemoryMappedChunksParallel<A>
    for MemoryMappedArray<A>
{
    fn process_chunks_parallel<F, R>(&self, strategy: ChunkingStrategy, f: F) -> Vec<R>
    where
        F: Fn(&[A], usize) -> R + Send + Sync,
        R: Send,
    {
        // First, generate all the chunk indices and sizes
        let total_chunks = self.chunk_count(strategy);
        let chunks_info: Vec<_> = (0..total_chunks)
            .map(|chunk_idx| {
                let chunk_size = match strategy {
                    ChunkingStrategy::Fixed(size) => size,
                    ChunkingStrategy::NumChunks(n) => self.size.div_ceil(n),
                    ChunkingStrategy::Auto => {
                        let total_elements = self.size;
                        (total_elements / 100).max(1)
                    }
                    ChunkingStrategy::FixedBytes(bytes) => {
                        let element_size = std::mem::size_of::<A>();
                        let elements_per_chunk = bytes / element_size;
                        elements_per_chunk.max(1)
                    }
                    ChunkingStrategy::Advanced(_) => {
                        // For advanced strategies, fall back to auto sizing
                        let total_elements = self.size;
                        (total_elements / 100).max(1)
                    }
                };

                let start_idx = chunk_idx * chunk_size;
                let end_idx = (start_idx + chunk_size).min(self.size);

                (chunk_idx, start_idx, end_idx)
            })
            .collect();

        // Get the full array data
        let array_1d = match self.as_array::<crate::ndarray::Ix1>() {
            Ok(arr) => arr,
            Err(_) => return Vec::new(),
        };

        // Process chunks in parallel
        let results: Vec<_> = chunks_info
            .into_par_iter()
            .map(|(chunk_idx, start_idx, end_idx)| {
                // Copy the data for this chunk
                let chunk_data = array_1d.slice(crate::s![start_idx..end_idx]).to_vec();

                // Process the chunk and return the result
                f(&chunk_data, chunk_idx)
            })
            .collect();

        results
    }

    fn process_chunks_mut_parallel<F>(&mut self, strategy: ChunkingStrategy, f: F)
    where
        F: Fn(&mut [A], usize) + Send + Sync,
    {
        let total_chunks = self.chunk_count(strategy);
        let element_size = std::mem::size_of::<A>();

        // First, generate all the chunk indices and sizes
        let chunks_info: Vec<_> = (0..total_chunks)
            .map(|chunk_idx| {
                let chunk_size = match strategy {
                    ChunkingStrategy::Fixed(size) => size,
                    ChunkingStrategy::NumChunks(n) => self.size.div_ceil(n),
                    ChunkingStrategy::Auto => {
                        let total_elements = self.size;
                        (total_elements / 100).max(1)
                    }
                    ChunkingStrategy::FixedBytes(bytes) => {
                        let elements_per_chunk = bytes / element_size;
                        elements_per_chunk.max(1)
                    }
                    ChunkingStrategy::Advanced(_) => {
                        // For advanced strategies, fall back to auto sizing
                        let total_elements = self.size;
                        (total_elements / 100).max(1)
                    }
                };

                let start_idx = chunk_idx * chunk_size;
                let end_idx = (start_idx + chunk_size).min(self.size);

                (chunk_idx, start_idx, end_idx)
            })
            .collect();

        // Get reference to the file path for the closures
        let file_path = self.file_path.clone();
        let offset = self.offset;

        // Get the full array data
        let array_1d = match self.as_array::<crate::ndarray::Ix1>() {
            Ok(arr) => arr,
            Err(_) => return,
        };

        // Process chunks in parallel and collect the modified data
        let modifications: Vec<_> = chunks_info
            .into_par_iter()
            .map(|(chunk_idx, start_idx, end_idx)| {
                // Copy the data for this chunk
                let mut chunk_data = array_1d.slice(crate::s![start_idx..end_idx]).to_vec();

                // Process the chunk data with the provided function
                f(&mut chunk_data, chunk_idx);

                // Return the chunk index, start index, and modified data
                (chunk_idx, start_idx, chunk_data)
            })
            .collect();

        // Apply all modifications to the file sequentially to avoid conflicts
        for (_, start_idx, chunk_data) in modifications {
            if let Ok(mut file) = OpenOptions::new().write(true).open(&file_path) {
                // Calculate the effective offset
                let effective_offset = offset + start_idx * element_size;

                // Seek to the position and write the data
                if file.seek(SeekFrom::Start(effective_offset as u64)).is_ok() {
                    // Convert the chunk data to bytes
                    let bytes = unsafe {
                        std::slice::from_raw_parts(
                            chunk_data.as_ptr() as *const u8,
                            chunk_data.len() * element_size,
                        )
                    };

                    // Write the bytes to the file
                    let _ = file.write_all(bytes);
                    let _ = file.flush();
                }
            }
        }

        // Reload the memory mapping to ensure changes are visible
        let _ = self.reload();
    }
}

impl<A: Clone + Copy + 'static + Send + Sync> MemoryMappedChunkIter<A> for MemoryMappedArray<A> {
    fn chunks(&self, strategy: ChunkingStrategy) -> ChunkIter<A> {
        ChunkIter {
            array: self,
            current_idx: 0,
            total_chunks: self.chunk_count(strategy),
            strategy,
        }
    }
}