fast_yaml_cli/batch/
reader.rs

1//! Smart file reading with automatic strategy selection based on file size.
2
3use std::fs::File;
4use std::path::Path;
5
6use memmap2::Mmap;
7
8use super::error::ProcessingError;
9
10/// Memory-map threshold constant: 512KB
11const MMAP_THRESHOLD: u64 = 512 * 1024;
12
13/// File content holder that abstracts over in-memory strings and memory-mapped files.
14pub enum FileContent {
15    /// Content loaded into memory as a String
16    String(String),
17    /// Content accessed via memory-mapped file
18    Mmap(Mmap),
19}
20
21impl FileContent {
22    /// Returns the content as a string slice.
23    ///
24    /// For String variant, returns the string directly.
25    /// For Mmap variant, validates UTF-8 encoding first.
26    pub fn as_str(&self) -> Result<&str, ProcessingError> {
27        match self {
28            Self::String(s) => Ok(s),
29            Self::Mmap(mmap) => std::str::from_utf8(mmap).map_err(ProcessingError::Utf8Error),
30        }
31    }
32
33    /// Returns true if content is memory-mapped
34    pub const fn is_mmap(&self) -> bool {
35        matches!(self, Self::Mmap(_))
36    }
37
38    /// Returns the size of the content in bytes
39    pub fn len(&self) -> usize {
40        match self {
41            Self::String(s) => s.len(),
42            Self::Mmap(mmap) => mmap.len(),
43        }
44    }
45
46    /// Returns true if the content is empty
47    pub fn is_empty(&self) -> bool {
48        self.len() == 0
49    }
50}
51
52/// Smart file reader that chooses optimal reading strategy based on file size.
53///
54/// For files smaller than the threshold, uses `std::fs::read_to_string` for simplicity.
55/// For larger files, uses memory-mapped files to avoid loading entire content into heap.
56pub struct SmartFileReader {
57    mmap_threshold: u64,
58}
59
60impl SmartFileReader {
61    /// Creates a new `SmartFileReader` with the default threshold (1MB)
62    pub const fn new() -> Self {
63        Self::with_threshold(MMAP_THRESHOLD)
64    }
65
66    /// Creates a new `SmartFileReader` with a custom threshold
67    pub const fn with_threshold(threshold: u64) -> Self {
68        Self {
69            mmap_threshold: threshold,
70        }
71    }
72
73    /// Reads file content using the optimal strategy based on file size.
74    ///
75    /// Returns `FileContent` and automatically chooses between:
76    /// - `read_to_string` for files < threshold
77    /// - `mmap` for files >= threshold
78    ///
79    /// Falls back to `read_to_string` if mmap fails.
80    pub fn read(&self, path: &Path) -> Result<FileContent, ProcessingError> {
81        let metadata = std::fs::metadata(path).map_err(ProcessingError::ReadError)?;
82
83        let size = metadata.len();
84
85        if size >= self.mmap_threshold {
86            Self::read_mmap(path).or_else(|_| {
87                // Fallback to read_to_string if mmap fails
88                Self::read_string(path)
89            })
90        } else {
91            Self::read_string(path)
92        }
93    }
94
95    /// Reads file into memory as a String
96    fn read_string(path: &Path) -> Result<FileContent, ProcessingError> {
97        let content = std::fs::read_to_string(path).map_err(ProcessingError::ReadError)?;
98        Ok(FileContent::String(content))
99    }
100
101    /// Reads file using memory-mapped file
102    ///
103    /// Uses unsafe memory-mapping for performance. See SAFETY note below.
104    #[allow(unsafe_code)]
105    fn read_mmap(path: &Path) -> Result<FileContent, ProcessingError> {
106        let file = File::open(path).map_err(ProcessingError::MmapError)?;
107
108        // SAFETY: We're opening the file read-only and mapping it.
109        // The file could be modified by another process during reading,
110        // but this is acceptable for a formatter tool:
111        // - If modified, worst case is a parse error (which is handled)
112        // - User expectation is that files aren't modified during formatting
113        // - Same race condition exists with read_to_string
114        // - The mmap is read-only, so we won't write to mapped memory
115        // - Mmap type ensures memory is unmapped when dropped
116        let mmap = unsafe { Mmap::map(&file).map_err(ProcessingError::MmapError)? };
117
118        Ok(FileContent::Mmap(mmap))
119    }
120}
121
122impl Default for SmartFileReader {
123    fn default() -> Self {
124        Self::new()
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131    use std::io::Write;
132    use tempfile::NamedTempFile;
133
134    #[test]
135    fn test_file_content_as_str_string() {
136        let content = FileContent::String("test content".to_string());
137        assert_eq!(content.as_str().unwrap(), "test content");
138        assert!(!content.is_mmap());
139        assert_eq!(content.len(), 12);
140        assert!(!content.is_empty());
141    }
142
143    #[test]
144    fn test_file_content_is_empty() {
145        let content = FileContent::String(String::new());
146        assert!(content.is_empty());
147    }
148
149    #[test]
150    fn test_reader_small_file_uses_string() {
151        let mut file = NamedTempFile::new().unwrap();
152        write!(file, "small: content").unwrap();
153
154        let reader = SmartFileReader::new();
155        let content = reader.read(file.path()).unwrap();
156
157        assert!(!content.is_mmap());
158        assert_eq!(content.as_str().unwrap(), "small: content");
159    }
160
161    #[test]
162    fn test_reader_large_file_uses_mmap() {
163        let mut file = NamedTempFile::new().unwrap();
164
165        // Write content larger than 1MB threshold
166        let large_content = "x".repeat(2 * 1024 * 1024);
167        write!(file, "{large_content}").unwrap();
168
169        let reader = SmartFileReader::new();
170        let content = reader.read(file.path()).unwrap();
171
172        assert!(content.is_mmap());
173        assert_eq!(content.len(), large_content.len());
174    }
175
176    #[test]
177    fn test_reader_custom_threshold() {
178        let mut file = NamedTempFile::new().unwrap();
179        write!(file, "test content").unwrap();
180
181        // Threshold of 5 bytes should trigger mmap for our 12-byte file
182        let reader = SmartFileReader::with_threshold(5);
183        let content = reader.read(file.path()).unwrap();
184
185        // Should use mmap since file > 5 bytes
186        assert!(content.is_mmap());
187    }
188
189    #[test]
190    fn test_reader_default_equals_new() {
191        let reader1 = SmartFileReader::new();
192        let reader2 = SmartFileReader::default();
193
194        assert_eq!(reader1.mmap_threshold, reader2.mmap_threshold);
195    }
196
197    #[test]
198    fn test_read_nonexistent_file() {
199        let reader = SmartFileReader::new();
200        let result = reader.read(Path::new("/nonexistent/file.yaml"));
201        assert!(result.is_err());
202    }
203
204    #[test]
205    fn test_file_content_len() {
206        let content = FileContent::String("hello".to_string());
207        assert_eq!(content.len(), 5);
208    }
209
210    #[test]
211    fn test_read_utf8_validation_with_mmap() {
212        let mut file = NamedTempFile::new().unwrap();
213
214        // Write valid UTF-8 content larger than threshold
215        let content = "valid: utf8 content\n".repeat(100_000);
216        write!(file, "{content}").unwrap();
217
218        let reader = SmartFileReader::new();
219        let file_content = reader.read(file.path()).unwrap();
220
221        // Should be mmap and valid UTF-8
222        assert!(file_content.is_mmap());
223        assert!(file_content.as_str().is_ok());
224    }
225
226    #[test]
227    #[cfg(unix)]
228    fn test_symlink_handling() {
229        use std::os::unix::fs::symlink;
230
231        let temp_dir = tempfile::tempdir().unwrap();
232        let target = temp_dir.path().join("target.yaml");
233        let link = temp_dir.path().join("link.yaml");
234
235        // Create target file
236        std::fs::write(&target, "key: value\n").unwrap();
237
238        // Create symlink
239        symlink(&target, &link).unwrap();
240
241        // Reader should follow symlink and read content
242        let reader = SmartFileReader::new();
243        let content = reader.read(&link).unwrap();
244
245        assert_eq!(content.as_str().unwrap(), "key: value\n");
246    }
247
248    #[test]
249    #[cfg(unix)]
250    fn test_broken_symlink_error() {
251        use std::os::unix::fs::symlink;
252
253        let temp_dir = tempfile::tempdir().unwrap();
254        let nonexistent = temp_dir.path().join("nonexistent.yaml");
255        let link = temp_dir.path().join("broken_link.yaml");
256
257        // Create symlink to nonexistent file
258        symlink(&nonexistent, &link).unwrap();
259
260        // Reading broken symlink should fail
261        let reader = SmartFileReader::new();
262        let result = reader.read(&link);
263
264        assert!(result.is_err());
265    }
266
267    #[test]
268    #[cfg(unix)]
269    fn test_symlink_loop_detection() {
270        use std::os::unix::fs::symlink;
271
272        let temp_dir = tempfile::tempdir().unwrap();
273        let link1 = temp_dir.path().join("link1.yaml");
274        let link2 = temp_dir.path().join("link2.yaml");
275
276        // Create symlink loop: link1 -> link2 -> link1
277        symlink(&link2, &link1).unwrap();
278        symlink(&link1, &link2).unwrap();
279
280        // Reading symlink loop should fail (OS detects ELOOP)
281        let reader = SmartFileReader::new();
282        let result = reader.read(&link1);
283
284        assert!(result.is_err());
285    }
286
287    #[test]
288    #[cfg(unix)]
289    fn test_symlink_to_directory_error() {
290        use std::os::unix::fs::symlink;
291
292        let temp_dir = tempfile::tempdir().unwrap();
293        let dir = temp_dir.path().join("subdir");
294        let link = temp_dir.path().join("dir_link.yaml");
295
296        // Create directory and symlink to it
297        std::fs::create_dir(&dir).unwrap();
298        symlink(&dir, &link).unwrap();
299
300        // Reading symlink to directory should fail
301        let reader = SmartFileReader::new();
302        let result = reader.read(&link);
303
304        assert!(result.is_err());
305    }
306}