Skip to main content

winx_code_agent/utils/
mmap.rs

1use memmap2::{Mmap, MmapOptions};
2use rayon::prelude::*;
3use std::cmp::min;
4use std::fs::File;
5use std::io::{BufReader, Read, Seek, SeekFrom};
6use std::path::{Path, PathBuf};
7use std::sync::Arc;
8use tracing::{debug, info, trace, warn};
9
10use crate::errors::{Result, WinxError};
11
12/// Maximum file size for direct reading (10MB)
13pub const DIRECT_READ_THRESHOLD: u64 = 10_000_000;
14
15/// Maximum file size for single memory mapping (1GB)
16pub const MAX_MMAP_SIZE: u64 = 1_000_000_000;
17
18/// Maximum file size for segmented memory mapping (4GB)
19pub const MAX_SEGMENTED_MMAP_SIZE: u64 = 4_000_000_000;
20
21/// Segment size for large file memory mapping (256MB)
22pub const SEGMENT_SIZE: u64 = 256_000_000;
23
24const DIRECT_READ_CHUNK_SIZE: usize = 1_048_576;
25const MMAP_PARALLEL_CHUNK_SIZE: usize = 1_048_576;
26const STREAMING_CHUNK_SIZE: usize = 4_194_304;
27
28/// Read file contents optimally based on file size
29///
30/// This function chooses the optimal reading strategy based on file size:
31/// - Small files: Direct read with standard File I/O
32/// - Medium files: Memory-mapped reading for performance
33/// - Large files: Segmented memory-mapped reading
34/// - Extreme files: Windowed access with streaming
35///
36/// # Arguments
37///
38/// * `path` - Path to the file to read
39/// * `max_file_size` - Maximum allowed file size
40///
41/// # Returns
42///
43/// A vector containing the file contents
44///
45/// # Errors
46///
47/// Returns an error if the file cannot be read or exceeds the size limit
48pub fn read_file_optimized(path: &Path, max_file_size: u64) -> Result<Vec<u8>> {
49    // Get file metadata
50    let file = File::open(path).map_err(|e| WinxError::FileAccessError {
51        path: path.to_path_buf(),
52        message: format!("Error opening file: {e}"),
53    })?;
54
55    let metadata = file.metadata().map_err(|e| WinxError::FileAccessError {
56        path: path.to_path_buf(),
57        message: format!("Failed to get file metadata: {e}"),
58    })?;
59
60    // Check file size
61    let file_size = metadata.len();
62    if file_size > max_file_size {
63        return Err(WinxError::FileTooLarge {
64            path: path.to_path_buf(),
65            size: file_size,
66            max_size: max_file_size,
67        });
68    }
69
70    // Choose reading strategy based on file size
71    if file_size < DIRECT_READ_THRESHOLD {
72        debug!("Using direct read for file: {}", path.display());
73        read_direct(&file, file_size, path)
74    } else if file_size < MAX_MMAP_SIZE {
75        debug!("Using memory-mapped read for file: {}", path.display());
76        read_mmap(&file, path)
77    } else if file_size < MAX_SEGMENTED_MMAP_SIZE {
78        debug!("Using segmented memory-mapped read for file: {}", path.display());
79        read_segmented_mmap(&file, file_size, path)
80    } else {
81        debug!("Using streaming read for extremely large file: {}", path.display());
82        read_streaming(&file, file_size, path)
83    }
84}
85
86/// Read file contents directly using standard I/O
87///
88/// This is efficient for small files.
89///
90/// # Arguments
91///
92/// * `file` - Open file handle
93/// * `file_size` - Size of the file
94/// * `path` - Path to the file (for error reporting)
95///
96/// # Returns
97///
98/// A vector containing the file contents
99///
100/// # Errors
101///
102/// Returns an error if the file cannot be read
103fn read_direct(file: &File, file_size: u64, path: &Path) -> Result<Vec<u8>> {
104    // For very small files (< 1MB), use an optimized approach
105    if file_size < 1_000_000 {
106        // Pre-allocate an exact-sized buffer
107        let mut buffer = Vec::with_capacity(file_size as usize);
108
109        // Create a mutable file handle and seek to the beginning
110        let mut file_handle = file.try_clone().map_err(|e| WinxError::FileAccessError {
111            path: path.to_path_buf(),
112            message: format!("Error cloning file handle: {e}"),
113        })?;
114
115        file_handle.seek(SeekFrom::Start(0)).map_err(|e| WinxError::FileAccessError {
116            path: path.to_path_buf(),
117            message: format!("Error seeking to start of file: {e}"),
118        })?;
119
120        // Use a BufReader with an appropriate buffer size (4K-64K)
121        let mut reader = BufReader::with_capacity(min(file_size as usize, 64 * 1024), file_handle);
122
123        // Read directly to the end
124        reader.read_to_end(&mut buffer).map_err(|e| WinxError::FileAccessError {
125            path: path.to_path_buf(),
126            message: format!("Error reading file: {e}"),
127        })?;
128
129        return Ok(buffer);
130    }
131
132    // For larger files, use a chunked reading approach with progress tracking
133    let mut buffer = Vec::with_capacity(file_size as usize);
134
135    // Create a mutable file handle and seek to the beginning
136    let mut file_handle = file.try_clone().map_err(|e| WinxError::FileAccessError {
137        path: path.to_path_buf(),
138        message: format!("Error cloning file handle: {e}"),
139    })?;
140
141    file_handle.seek(SeekFrom::Start(0)).map_err(|e| WinxError::FileAccessError {
142        path: path.to_path_buf(),
143        message: format!("Error seeking to start of file: {e}"),
144    })?;
145
146    let mut reader = BufReader::with_capacity(262_144, file_handle); // 256KB buffer
147
148    let mut chunk = vec![0; DIRECT_READ_CHUNK_SIZE];
149    let mut bytes_read = 0;
150
151    loop {
152        match reader.read(&mut chunk) {
153            Ok(0) => break, // EOF
154            Ok(n) => {
155                buffer.extend_from_slice(&chunk[..n]);
156                bytes_read += n as u64;
157
158                // Log progress for larger files
159                if file_size > 5_000_000 && bytes_read % 5_000_000 < DIRECT_READ_CHUNK_SIZE as u64 {
160                    trace!(
161                        "Read progress for {}: {}MB/{}MB ({}%)",
162                        path.display(),
163                        bytes_read / 1_000_000,
164                        file_size / 1_000_000,
165                        bytes_read * 100 / file_size
166                    );
167                }
168            }
169            Err(e) => {
170                return Err(WinxError::FileAccessError {
171                    path: path.to_path_buf(),
172                    message: format!("Error reading file chunk: {e}"),
173                });
174            }
175        }
176    }
177
178    Ok(buffer)
179}
180
181/// Read file contents using memory mapping
182///
183/// This is efficient for larger files as it avoids loading the entire
184/// file into memory at once.
185///
186/// # Arguments
187///
188/// * `file` - Open file handle
189/// * `path` - Path to the file (for error reporting)
190///
191/// # Returns
192///
193/// A vector containing the file contents
194///
195/// # Errors
196///
197/// Returns an error if the file cannot be mapped
198fn read_mmap(file: &File, path: &Path) -> Result<Vec<u8>> {
199    // Check for empty file to avoid mmap error
200    if file.metadata().map(|m| m.len()).unwrap_or(0) == 0 {
201        return Ok(Vec::new());
202    }
203
204    // SAFETY: Memory mapping a file is inherently unsafe because:
205    // - The file could be modified by another process during access
206    // - The file could be truncated, causing access to invalid memory
207    // We mitigate these risks by:
208    // - Using the mapped data immediately and converting to Vec<u8>
209    // - Not holding the mmap across async boundaries
210    // - File size was verified before this call
211    let mmap = unsafe { MmapOptions::new().map(file) }.map_err(|e| WinxError::FileAccessError {
212        path: path.to_path_buf(),
213        message: format!("Failed to memory-map file: {e}"),
214    })?;
215
216    // Use Rayon for parallel processing if the file is large enough
217    if mmap.len() > 10_000_000 {
218        // 10MB threshold for parallel processing
219        debug!("Using parallel processing for large mmap file: {}", path.display());
220
221        // Process in parallel chunks
222        let chunk_count = mmap.len().div_ceil(MMAP_PARALLEL_CHUNK_SIZE);
223        let mut result = vec![0; mmap.len()];
224
225        // Process in parallel with Rayon - use collect for parallel map
226        let chunks: Vec<_> = (0..chunk_count)
227            .into_par_iter()
228            .map(|i| {
229                let start = i * MMAP_PARALLEL_CHUNK_SIZE;
230                let end = min((i + 1) * MMAP_PARALLEL_CHUNK_SIZE, mmap.len());
231
232                if start < mmap.len() {
233                    // Extract chunk from mmap
234                    let src = &mmap[start..end];
235                    (start, end, src.to_vec())
236                } else {
237                    (start, start, Vec::new())
238                }
239            })
240            .collect();
241
242        // Now apply all chunks to the result sequentially
243        for (start, end, chunk) in chunks {
244            if start < end {
245                result[start..end].copy_from_slice(&chunk);
246            }
247        }
248
249        Ok(result)
250    } else {
251        // For smaller files, just copy the entire map to a Vec
252        Ok(mmap.to_vec())
253    }
254}
255
256/// Read large file with segmented memory mapping
257///
258/// This function reads a large file using multiple memory mapped segments,
259/// which allows handling files larger than the maximum mapping size.
260///
261/// # Arguments
262///
263/// * `file` - Open file handle
264/// * `file_size` - Size of the file
265/// * `path` - Path to the file (for error reporting)
266///
267/// # Returns
268///
269/// A vector containing the file contents
270///
271/// # Errors
272///
273/// Returns an error if the file cannot be read or mapped
274fn read_segmented_mmap(_file: &File, file_size: u64, path: &Path) -> Result<Vec<u8>> {
275    // Calculate number of segments needed
276    let segment_count = file_size.div_ceil(SEGMENT_SIZE);
277    debug!(
278        "Reading file {} in {} segments of {}MB each",
279        path.display(),
280        segment_count,
281        SEGMENT_SIZE / 1_000_000
282    );
283
284    // Pre-allocate result vector
285    let mut result = Vec::with_capacity(file_size as usize);
286
287    // Process each segment
288    for i in 0..segment_count {
289        let segment_start = i * SEGMENT_SIZE;
290        let segment_size = min(SEGMENT_SIZE, file_size - segment_start);
291
292        info!(
293            "Processing segment {}/{} of file {} ({:.1}%)",
294            i + 1,
295            segment_count,
296            path.display(),
297            (segment_start as f64 / file_size as f64) * 100.0
298        );
299
300        // Open a new file handle for each segment to avoid position conflicts
301        let segment_file = File::open(path).map_err(|e| WinxError::FileAccessError {
302            path: path.to_path_buf(),
303            message: format!("Error opening file for segment {i}: {e}"),
304        })?;
305
306        // Seek to segment start
307        let mut segment_file = segment_file;
308        segment_file.seek(SeekFrom::Start(segment_start)).map_err(|e| {
309            WinxError::FileAccessError {
310                path: path.to_path_buf(),
311                message: format!("Error seeking to segment start: {e}"),
312            }
313        })?;
314
315        // SAFETY: Memory mapping a segment is safe here because:
316        // - File handle is freshly opened and seeked to correct position
317        // - Segment bounds are calculated from verified file size
318        // - Data is immediately copied to Vec, not held across boundaries
319        let segment_mmap = unsafe {
320            MmapOptions::new().offset(segment_start).len(segment_size as usize).map(&segment_file)
321        }
322        .map_err(|e| WinxError::FileAccessError {
323            path: path.to_path_buf(),
324            message: format!("Failed to memory-map file segment {i}: {e}"),
325        })?;
326
327        // Append segment data to result
328        result.extend_from_slice(&segment_mmap);
329    }
330
331    Ok(result)
332}
333
334/// Read extremely large file with streaming
335///
336/// This function reads an extremely large file using a streaming approach,
337/// which minimizes memory usage by processing the file in small chunks.
338///
339/// # Arguments
340///
341/// * `file` - Open file handle
342/// * `file_size` - Size of the file
343/// * `path` - Path to the file (for error reporting)
344///
345/// # Returns
346///
347/// A vector containing the file contents
348///
349/// # Errors
350///
351/// Returns an error if the file cannot be read
352fn read_streaming(file: &File, file_size: u64, path: &Path) -> Result<Vec<u8>> {
353    warn!(
354        "Reading extremely large file ({}GB) with streaming approach: {}",
355        file_size / 1_000_000_000,
356        path.display()
357    );
358
359    // For extreme files, pre-allocate a reasonably sized vector and grow as needed
360    let initial_capacity = min(file_size as usize, 1_000_000_000); // 1GB initial max
361    let mut buffer = Vec::with_capacity(initial_capacity);
362
363    let mut reader = BufReader::with_capacity(STREAMING_CHUNK_SIZE, file);
364    let mut chunk = vec![0; STREAMING_CHUNK_SIZE];
365    let mut bytes_read = 0;
366
367    loop {
368        match reader.read(&mut chunk) {
369            Ok(0) => break, // EOF
370            Ok(n) => {
371                buffer.extend_from_slice(&chunk[..n]);
372                bytes_read += n as u64;
373
374                // Log progress every 100MB
375                if bytes_read % 100_000_000 < STREAMING_CHUNK_SIZE as u64 {
376                    info!(
377                        "Read progress for large file {}: {:.2}GB/{:.2}GB ({:.1}%)",
378                        path.display(),
379                        bytes_read as f64 / 1_000_000_000.0,
380                        file_size as f64 / 1_000_000_000.0,
381                        bytes_read as f64 * 100.0 / file_size as f64
382                    );
383                }
384            }
385            Err(e) => {
386                return Err(WinxError::FileAccessError {
387                    path: path.to_path_buf(),
388                    message: format!("Error reading file chunk at position {bytes_read}: {e}"),
389                });
390            }
391        }
392    }
393
394    Ok(buffer)
395}
396
397/// Read a specific segment of a file
398///
399/// This function reads a specific segment of a file using memory mapping
400/// or direct I/O, depending on the segment size.
401///
402/// # Arguments
403///
404/// * `path` - Path to the file
405/// * `offset` - Starting offset in bytes
406/// * `length` - Length of segment to read in bytes
407/// * `max_file_size` - Maximum allowed file size
408///
409/// # Returns
410///
411/// A vector containing the file segment contents
412///
413/// # Errors
414///
415/// Returns an error if the file cannot be read or exceeds the size limit
416pub fn read_file_segment(
417    path: &Path,
418    offset: u64,
419    length: u64,
420    max_file_size: u64,
421) -> Result<Vec<u8>> {
422    // Get file metadata
423    let file = File::open(path).map_err(|e| WinxError::FileAccessError {
424        path: path.to_path_buf(),
425        message: format!("Error opening file: {e}"),
426    })?;
427
428    let metadata = file.metadata().map_err(|e| WinxError::FileAccessError {
429        path: path.to_path_buf(),
430        message: format!("Failed to get file metadata: {e}"),
431    })?;
432
433    // Check file size
434    let file_size = metadata.len();
435    if file_size > max_file_size {
436        return Err(WinxError::FileTooLarge {
437            path: path.to_path_buf(),
438            size: file_size,
439            max_size: max_file_size,
440        });
441    }
442
443    // Validate offset and length
444    if offset >= file_size {
445        return Err(WinxError::FileAccessError {
446            path: path.to_path_buf(),
447            message: format!("Offset {offset} exceeds file size {file_size}"),
448        });
449    }
450
451    // Adjust length if needed to stay within file bounds
452    let length = min(length, file_size - offset);
453
454    // Choose reading strategy based on segment size
455    if length < DIRECT_READ_THRESHOLD {
456        debug!("Using direct read for file segment: {}", path.display());
457        read_segment_direct(&file, offset, length, path)
458    } else {
459        debug!("Using memory-mapped read for file segment: {}", path.display());
460        read_segment_mmap(&file, offset, length, path)
461    }
462}
463
464/// Read a file segment directly using standard I/O
465///
466/// # Arguments
467///
468/// * `file` - Open file handle
469/// * `offset` - Starting offset in bytes
470/// * `length` - Length of segment to read in bytes
471/// * `path` - Path to the file (for error reporting)
472///
473/// # Returns
474///
475/// A vector containing the file segment contents
476///
477/// # Errors
478///
479/// Returns an error if the file segment cannot be read
480fn read_segment_direct(file: &File, offset: u64, length: u64, path: &Path) -> Result<Vec<u8>> {
481    // Create a new file object that can be seeked
482    let mut seekable_file = file.try_clone().map_err(|e| WinxError::FileAccessError {
483        path: path.to_path_buf(),
484        message: format!("Failed to clone file handle: {e}"),
485    })?;
486
487    // Seek to the specified offset
488    seekable_file.seek(SeekFrom::Start(offset)).map_err(|e| WinxError::FileAccessError {
489        path: path.to_path_buf(),
490        message: format!("Failed to seek to offset {offset}: {e}"),
491    })?;
492
493    // Read the specified length
494    let mut buffer = Vec::with_capacity(length as usize);
495    let reader = BufReader::with_capacity(min(length as usize, 64 * 1024), seekable_file);
496
497    // Use take to limit the read to the specified length
498    reader.take(length).read_to_end(&mut buffer).map_err(|e| WinxError::FileAccessError {
499        path: path.to_path_buf(),
500        message: format!("Error reading file segment: {e}"),
501    })?;
502
503    Ok(buffer)
504}
505
506/// Read a file segment using memory mapping
507///
508/// # Arguments
509///
510/// * `file` - Open file handle
511/// * `offset` - Starting offset in bytes
512/// * `length` - Length of segment to read in bytes
513/// * `path` - Path to the file (for error reporting)
514///
515/// # Returns
516///
517/// A vector containing the file segment contents
518///
519/// # Errors
520///
521/// Returns an error if the file segment cannot be mapped
522fn read_segment_mmap(file: &File, offset: u64, length: u64, path: &Path) -> Result<Vec<u8>> {
523    // SAFETY: Memory mapping is safe here because:
524    // - Offset and length were validated against file size by caller
525    // - Data is immediately copied to Vec<u8>, not held
526    // - File handle remains valid for duration of the map operation
527    let segment_mmap = unsafe { MmapOptions::new().offset(offset).len(length as usize).map(file) }
528        .map_err(|e| WinxError::FileAccessError {
529            path: path.to_path_buf(),
530            message: format!("Failed to memory-map file segment: {e}"),
531        })?;
532
533    // Copy segment data to result
534    Ok(segment_mmap.to_vec())
535}
536
537/// Read a text file as a string using the optimal reading strategy
538///
539/// This function reads a file as text, using the most efficient strategy
540/// based on the file size.
541///
542/// # Arguments
543///
544/// * `path` - Path to the file to read
545/// * `max_file_size` - Maximum allowed file size
546///
547/// # Returns
548///
549/// A string containing the file contents
550///
551/// # Errors
552///
553/// Returns an error if the file cannot be read or exceeds the size limit
554pub fn read_file_to_string(path: &Path, max_file_size: u64) -> Result<String> {
555    let bytes = read_file_optimized(path, max_file_size)?;
556
557    String::from_utf8(bytes).map_err(|e| WinxError::FileAccessError {
558        path: path.to_path_buf(),
559        message: format!("Failed to decode file as UTF-8: {e}"),
560    })
561}
562
563/// Read a text file in a parallel, line-by-line fashion
564///
565/// This processes lines in parallel using Rayon for faster processing
566/// of large text files.
567///
568/// # Arguments
569///
570/// * `path` - Path to the file to read
571/// * `max_file_size` - Maximum allowed file size
572/// * `line_processor` - Function to process each line
573///
574/// # Returns
575///
576/// Result indicating success or failure
577///
578/// # Errors
579///
580/// Returns an error if the file cannot be read or exceeds the size limit
581pub fn process_text_file_parallel<F>(
582    path: &Path,
583    max_file_size: u64,
584    line_processor: F,
585) -> Result<()>
586where
587    F: Fn(&str) + Sync,
588{
589    let content = read_file_to_string(path, max_file_size)?;
590
591    // For larger files, use parallel processing
592    if content.len() > 1_000_000 {
593        // 1MB
594        content.par_lines().for_each(|line| {
595            line_processor(line);
596        });
597    } else {
598        // For smaller files, process sequentially
599        content.lines().for_each(|line| {
600            line_processor(line);
601        });
602    }
603
604    Ok(())
605}
606
607/// Read a text file segment as a string
608///
609/// # Arguments
610///
611/// * `path` - Path to the file to read
612/// * `offset` - Starting offset in bytes
613/// * `length` - Length of segment to read in bytes
614/// * `max_file_size` - Maximum allowed file size
615///
616/// # Returns
617///
618/// A string containing the file segment contents
619///
620/// # Errors
621///
622/// Returns an error if the file segment cannot be read
623pub fn read_file_segment_to_string(
624    path: &Path,
625    offset: u64,
626    length: u64,
627    max_file_size: u64,
628) -> Result<String> {
629    let bytes = read_file_segment(path, offset, length, max_file_size)?;
630
631    String::from_utf8(bytes).map_err(|e| WinxError::FileAccessError {
632        path: path.to_path_buf(),
633        message: format!("Failed to decode file segment as UTF-8: {e}"),
634    })
635}
636
637/// `ShareableMap` provides a thread-safe memory-mapped file access
638///
639/// This is useful for providing read-only access to multiple threads
640/// without copying the data, especially for large files.
641#[derive(Clone)]
642pub struct ShareableMap {
643    /// The memory-mapped file data
644    data: Arc<Mmap>,
645    /// The path to the mapped file
646    path: PathBuf,
647}
648
649impl ShareableMap {
650    /// Create a new `ShareableMap` from a file
651    ///
652    /// # Arguments
653    ///
654    /// * `path` - Path to the file to map
655    ///
656    /// # Returns
657    ///
658    /// A Result containing the `ShareableMap` or an error
659    ///
660    /// # Errors
661    ///
662    /// Returns an error if the file cannot be mapped
663    pub fn new(path: &Path) -> Result<Self> {
664        let file = File::open(path).map_err(|e| WinxError::FileAccessError {
665            path: path.to_path_buf(),
666            message: format!("Error opening file: {e}"),
667        })?;
668
669        // Check for empty file
670        if file
671            .metadata()
672            .map_err(|e| WinxError::FileAccessError {
673                path: path.to_path_buf(),
674                message: format!("Failed to get metadata: {e}"),
675            })?
676            .len()
677            == 0
678        {
679            return Err(WinxError::FileAccessError {
680                path: path.to_path_buf(),
681                message: "Cannot memory map empty file".to_string(),
682            });
683        }
684
685        // SAFETY: ShareableMap wraps the Mmap in Arc for thread-safe sharing.
686        // The mapped data is read-only and the Arc ensures the Mmap outlives
687        // all references. Users must be aware the underlying file should not
688        // be modified while ShareableMap is in use.
689        let mmap =
690            unsafe { MmapOptions::new().map(&file) }.map_err(|e| WinxError::FileAccessError {
691                path: path.to_path_buf(),
692                message: format!("Failed to memory-map file: {e}"),
693            })?;
694
695        Ok(Self { data: Arc::new(mmap), path: path.to_path_buf() })
696    }
697
698    /// Create a new `ShareableMap` for a segment of a file
699    ///
700    /// # Arguments
701    ///
702    /// * `path` - Path to the file to map
703    /// * `offset` - Starting offset in bytes
704    /// * `length` - Length of segment to map in bytes
705    ///
706    /// # Returns
707    ///
708    /// A Result containing the `ShareableMap` or an error
709    ///
710    /// # Errors
711    ///
712    /// Returns an error if the file segment cannot be mapped
713    pub fn new_segment(path: &Path, offset: u64, length: u64) -> Result<Self> {
714        if length == 0 {
715            return Err(WinxError::FileAccessError {
716                path: path.to_path_buf(),
717                message: "Cannot memory map segment of length 0".to_string(),
718            });
719        }
720
721        let file = File::open(path).map_err(|e| WinxError::FileAccessError {
722            path: path.to_path_buf(),
723            message: format!("Error opening file: {e}"),
724        })?;
725
726        // SAFETY: Same as ShareableMap::new, plus:
727        // - Caller is responsible for ensuring offset+length is within file bounds
728        // - The segment mapping is wrapped in Arc for safe sharing
729        let mmap = unsafe { MmapOptions::new().offset(offset).len(length as usize).map(&file) }
730            .map_err(|e| WinxError::FileAccessError {
731                path: path.to_path_buf(),
732                message: format!("Failed to memory-map file segment: {e}"),
733            })?;
734
735        Ok(Self { data: Arc::new(mmap), path: path.to_path_buf() })
736    }
737
738    /// Get the data as a byte slice
739    pub fn as_slice(&self) -> &[u8] {
740        &self.data
741    }
742
743    /// Get the path to the mapped file
744    pub fn path(&self) -> &Path {
745        &self.path
746    }
747
748    /// Get the size of the mapped data
749    pub fn len(&self) -> usize {
750        self.data.len()
751    }
752
753    /// Check if the mapped data is empty
754    pub fn is_empty(&self) -> bool {
755        self.data.is_empty()
756    }
757}
758
759#[cfg(test)]
760mod tests {
761    use super::*;
762    use std::io::Write;
763    use tempfile::NamedTempFile;
764
765    fn create_test_file(size: usize) -> Result<(NamedTempFile, Vec<u8>)> {
766        let mut file = NamedTempFile::new()?;
767        let mut data = Vec::with_capacity(size);
768
769        // Fill with pattern data (more realistic than zeros)
770        for i in 0..size {
771            data.push((i % 256) as u8);
772        }
773
774        file.write_all(&data)?;
775        file.flush()?;
776
777        Ok((file, data))
778    }
779
780    #[test]
781    fn test_direct_read_small_file() -> Result<()> {
782        let size = 10 * 1024; // 10KB
783        let (file, expected_data) = create_test_file(size)?;
784
785        let result = read_direct(file.as_file(), size as u64, file.path())?;
786        assert_eq!(result, expected_data);
787        Ok(())
788    }
789
790    #[test]
791    fn test_mmap_read() -> Result<()> {
792        let size = 1024 * 1024; // 1MB
793        let (file, expected_data) = create_test_file(size)?;
794
795        let result = read_mmap(file.as_file(), file.path())?;
796        assert_eq!(result, expected_data);
797        Ok(())
798    }
799
800    #[test]
801    fn test_file_segment_read() -> Result<()> {
802        let size = 1024 * 1024; // 1MB
803        let (file, data) = create_test_file(size)?;
804
805        // Read a segment from the middle
806        let offset = 100 * 1024; // 100KB
807        let length = 200 * 1024; // 200KB
808        let expected_segment = &data[offset as usize..(offset + length) as usize];
809
810        let result = read_segment_direct(file.as_file(), offset, length, file.path())?;
811        assert_eq!(result, expected_segment);
812
813        let result = read_segment_mmap(file.as_file(), offset, length, file.path())?;
814        assert_eq!(result, expected_segment);
815        Ok(())
816    }
817
818    #[test]
819    fn test_shareable_map() -> Result<()> {
820        let size = 100 * 1024; // 100KB
821        let (file, data) = create_test_file(size)?;
822
823        let map = ShareableMap::new(file.path())?;
824        assert_eq!(map.as_slice(), &data);
825
826        // Test segment
827        let offset = 10 * 1024; // 10KB
828        let length = 20 * 1024; // 20KB
829        let segment_map = ShareableMap::new_segment(file.path(), offset, length)?;
830        assert_eq!(segment_map.as_slice(), &data[offset as usize..(offset + length) as usize]);
831        Ok(())
832    }
833
834    #[test]
835    fn test_parallel_processing() -> Result<()> {
836        // Create a test file with lines
837        let mut file = NamedTempFile::new()?;
838        let mut lines = Vec::new();
839
840        for i in 0..1000 {
841            let line = format!("Line {i}\n");
842            file.write_all(line.as_bytes())?;
843            lines.push(format!("Line {i}"));
844        }
845        file.flush()?;
846
847        // Test parallel processing
848        let processed_lines = std::sync::Mutex::new(Vec::new());
849
850        process_text_file_parallel(file.path(), 1_000_000, |line| {
851            if let Ok(mut lines) = processed_lines.lock() {
852                lines.push(line.to_string());
853            }
854        })?;
855
856        // Verify results (order may differ due to parallel processing)
857        let result =
858            processed_lines.lock().map_err(|error| WinxError::ResourceAllocationError {
859                message: format!("Failed to lock processed lines: {error}"),
860            })?;
861        assert_eq!(result.len(), lines.len());
862
863        // Check that all lines are present
864        for line in &lines {
865            assert!(result.contains(line));
866        }
867        Ok(())
868    }
869}