cowfile/cowfile.rs
1//! The main [`CowFile`] type with OS-level copy-on-write and a pending write log.
2//!
3//! `CowFile` wraps binary data (from a [`Vec<u8>`] or a copy-on-write memory map)
4//! with a pending log that tracks writes. The committed buffer is accessible as
5//! `&[u8]` via [`data`](CowFile::data), while [`read`](CowFile::read) and typed
6//! accessors composite pending writes over the committed state.
7//!
8//! # Thread Safety
9//!
10//! `CowFile` is [`Send`] and [`Sync`]. The committed buffer can be read
11//! concurrently via [`data`](CowFile::data) from multiple threads. Writes
12//! to the pending log are serialised by an internal [`RwLock`](std::sync::RwLock).
13
14use std::{
15 fmt,
16 io::Write,
17 path::{Path, PathBuf},
18 sync::{
19 atomic::{AtomicBool, Ordering},
20 RwLock,
21 },
22};
23
24use crate::{
25 cursor::CowFileCursor,
26 error::{Error, Result},
27 primitives::Primitive,
28 traits::{ReadFrom, WriteTo},
29};
30
31/// Threshold above which [`to_file`](CowFile::to_file) uses a writable memory map
32/// instead of buffered I/O. Set to 64 MiB.
33const MMAP_WRITE_THRESHOLD: usize = 64 * 1024 * 1024;
34
35/// Inner storage for `CowFile`.
36enum Inner {
37 /// Owned byte vector, directly mutable.
38 Vec(Vec<u8>),
39 /// Copy-on-write memory map (`MAP_PRIVATE`). Writes are process-private.
40 Mmap(memmap2::MmapMut),
41}
42
43impl Inner {
44 fn as_slice(&self) -> &[u8] {
45 match self {
46 Inner::Vec(v) => v.as_slice(),
47 Inner::Mmap(m) => m,
48 }
49 }
50
51 fn as_mut_slice(&mut self) -> &mut [u8] {
52 match self {
53 Inner::Vec(v) => v.as_mut_slice(),
54 Inner::Mmap(m) => m.as_mut(),
55 }
56 }
57
58 fn len(&self) -> usize {
59 match self {
60 Inner::Vec(v) => v.len(),
61 Inner::Mmap(m) => m.len(),
62 }
63 }
64}
65
66/// A single pending write recorded in the log.
67struct PendingWrite {
68 offset: usize,
69 data: Vec<u8>,
70}
71
72/// A copy-on-write file abstraction backed by memory or a file.
73///
74/// Writes accumulate in a pending log and are applied to the committed buffer
75/// on [`commit`](CowFile::commit). The committed buffer is accessible as
76/// `&[u8]` via [`data`](CowFile::data), while [`read`](CowFile::read) and
77/// typed I/O methods composite pending writes over the committed state.
78///
79/// # Architecture
80///
81/// ```text
82/// Committed Buffer Pending Log
83/// +---------------------+ +-------------------------+
84/// | Vec<u8> or MmapMut | <--- | Vec<PendingWrite> |
85/// | (OS-level CoW) | | (applied on commit) |
86/// +---------------------+ +-------------------------+
87/// ```
88///
89/// For memory-mapped files, the buffer is created with
90/// [`map_copy`](memmap2::MmapOptions::map_copy), which uses `MAP_PRIVATE` on
91/// Unix and `PAGE_WRITECOPY` on Windows. Only pages touched by
92/// [`commit`](CowFile::commit) are copied into anonymous memory — the rest
93/// of the file remains demand-paged from disk.
94///
95/// # Examples
96///
97/// ```
98/// use cowfile::CowFile;
99///
100/// let pf = CowFile::from_vec(vec![0u8; 100]);
101///
102/// // Writes go to the pending log
103/// pf.write(10, &[0xFF, 0xFE]).unwrap();
104///
105/// // data() returns committed state
106/// assert_eq!(pf.data()[10], 0x00);
107///
108/// // read() composites pending writes
109/// assert_eq!(pf.read_byte(10).unwrap(), 0xFF);
110///
111/// // Commit applies pending to the buffer
112/// let mut pf = pf;
113/// pf.commit().unwrap();
114/// assert_eq!(pf.data()[10], 0xFF);
115/// ```
116pub struct CowFile {
117 /// Committed buffer — only mutated by `commit()`.
118 buffer: Inner,
119 /// Pending writes, accumulated via interior mutability.
120 pending: RwLock<Vec<PendingWrite>>,
121 /// Fast check to skip empty pending iteration.
122 dirty: AtomicBool,
123 /// Original file path (set by `open()`, `None` for vec-backed).
124 source_path: Option<PathBuf>,
125}
126
127// Static assertion: CowFile must be Send + Sync.
128const _: () = {
129 fn assert_send_sync<T: Send + Sync>() {}
130 fn check() {
131 assert_send_sync::<CowFile>();
132 }
133 let _ = check;
134};
135
136impl fmt::Debug for CowFile {
137 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
138 f.debug_struct("CowFile")
139 .field("len", &self.buffer.len())
140 .field(
141 "backend",
142 &match &self.buffer {
143 Inner::Vec(_) => "Vec",
144 Inner::Mmap(_) => "Mmap",
145 },
146 )
147 .field("dirty", &self.dirty.load(Ordering::Relaxed))
148 .finish_non_exhaustive()
149 }
150}
151
152impl CowFile {
153 /// Creates a `CowFile` from an owned byte vector.
154 ///
155 /// The provided bytes become the committed buffer. No copies are made
156 /// during construction — the vector is moved into the `CowFile`.
157 ///
158 /// # Examples
159 ///
160 /// ```
161 /// use cowfile::CowFile;
162 ///
163 /// let pf = CowFile::from_vec(vec![0x4D, 0x5A, 0x90, 0x00]);
164 /// assert_eq!(pf.len(), 4);
165 /// ```
166 pub fn from_vec(data: Vec<u8>) -> Self {
167 CowFile {
168 buffer: Inner::Vec(data),
169 pending: RwLock::new(Vec::new()),
170 dirty: AtomicBool::new(false),
171 source_path: None,
172 }
173 }
174
175 /// Creates a `CowFile` by memory-mapping a file from the given path.
176 ///
177 /// The file is mapped with copy-on-write semantics (`MAP_PRIVATE` on Unix,
178 /// `PAGE_WRITECOPY` on Windows). The original file is never modified.
179 /// Only pages touched by [`commit`](CowFile::commit) are copied into
180 /// anonymous memory — the rest of the file remains demand-paged from disk.
181 ///
182 /// # Errors
183 ///
184 /// Returns [`Error::Io`] if the file cannot be opened or memory-mapped.
185 ///
186 /// # Examples
187 ///
188 /// ```no_run
189 /// use cowfile::CowFile;
190 ///
191 /// let pf = CowFile::open("binary.exe").unwrap();
192 /// println!("File size: {} bytes", pf.len());
193 /// ```
194 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
195 let path = path.as_ref();
196 let file = std::fs::File::open(path)?;
197 let mut cow = Self::from_file(file)?;
198 cow.source_path = Some(path.to_path_buf());
199 Ok(cow)
200 }
201
202 /// Creates a `CowFile` from an already-opened [`std::fs::File`].
203 ///
204 /// The file is mapped with copy-on-write semantics. The original file is
205 /// never modified.
206 ///
207 /// Empty files (0 bytes) are handled by using a `Vec` backend instead of
208 /// mmap, since memory-mapping an empty file is not supported on all
209 /// platforms.
210 ///
211 /// # Errors
212 ///
213 /// Returns [`Error::Io`] if the file cannot be memory-mapped.
214 ///
215 /// # Examples
216 ///
217 /// ```no_run
218 /// use cowfile::CowFile;
219 ///
220 /// let file = std::fs::File::open("binary.exe").unwrap();
221 /// let pf = CowFile::from_file(file).unwrap();
222 /// println!("File size: {} bytes", pf.len());
223 /// ```
224 pub fn from_file(file: std::fs::File) -> Result<Self> {
225 let metadata = file.metadata()?;
226 if metadata.len() == 0 {
227 return Ok(Self::from_vec(Vec::new()));
228 }
229
230 // SAFETY: We use map_copy which creates a private CoW mapping.
231 // The file must not be modified externally while the mapping is alive.
232 // This is the same contract as any memory-mapped file in Rust.
233 let mmap = unsafe { memmap2::MmapOptions::new().map_copy(&file)? };
234 Ok(CowFile {
235 buffer: Inner::Mmap(mmap),
236 pending: RwLock::new(Vec::new()),
237 dirty: AtomicBool::new(false),
238 source_path: None,
239 })
240 }
241
242 /// Returns the committed buffer as a byte slice.
243 ///
244 /// This is a true zero-cost `&[u8]` reference into the committed buffer.
245 /// For mmap-backed files, only accessed pages are loaded into physical
246 /// memory by the OS.
247 ///
248 /// Pending writes are **not** visible through this method. Use
249 /// [`read`](CowFile::read) or [`read_le`](CowFile::read_le) for a view
250 /// that composites pending writes, or call [`commit`](CowFile::commit)
251 /// first.
252 ///
253 /// # Examples
254 ///
255 /// ```
256 /// use cowfile::CowFile;
257 ///
258 /// let pf = CowFile::from_vec(vec![1, 2, 3]);
259 /// let data: &[u8] = pf.data();
260 /// assert_eq!(data, &[1, 2, 3]);
261 /// ```
262 pub fn data(&self) -> &[u8] {
263 self.buffer.as_slice()
264 }
265
266 /// Returns the total length of the data in bytes.
267 pub fn len(&self) -> usize {
268 self.buffer.len()
269 }
270
271 /// Returns `true` if the data is empty (zero bytes).
272 pub fn is_empty(&self) -> bool {
273 self.buffer.len() == 0
274 }
275
276 /// Returns `true` if there are uncommitted pending writes.
277 ///
278 /// # Examples
279 ///
280 /// ```
281 /// use cowfile::CowFile;
282 ///
283 /// let pf = CowFile::from_vec(vec![0u8; 10]);
284 /// assert!(!pf.has_pending());
285 ///
286 /// pf.write(0, &[0xFF]).unwrap();
287 /// assert!(pf.has_pending());
288 /// ```
289 pub fn has_pending(&self) -> bool {
290 self.dirty.load(Ordering::Relaxed)
291 }
292
293 /// Reads `length` bytes starting at `offset`, compositing pending writes.
294 ///
295 /// The returned bytes reflect pending writes applied over the committed
296 /// buffer. When there are no pending writes, this is equivalent to
297 /// slicing [`data`](CowFile::data).
298 ///
299 /// # Errors
300 ///
301 /// Returns [`Error::OutOfBounds`] if the requested range exceeds the data size.
302 ///
303 /// # Examples
304 ///
305 /// ```
306 /// use cowfile::CowFile;
307 ///
308 /// let pf = CowFile::from_vec(vec![1, 2, 3, 4, 5]);
309 /// pf.write(2, &[0xFF]).unwrap();
310 ///
311 /// let data = pf.read(1, 3).unwrap();
312 /// assert_eq!(data, vec![2, 0xFF, 4]);
313 /// ```
314 pub fn read(&self, offset: usize, length: usize) -> Result<Vec<u8>> {
315 self.check_bounds(offset, length)?;
316
317 if length == 0 {
318 return Ok(Vec::new());
319 }
320
321 let mut buf = self.buffer.as_slice()[offset..offset + length].to_vec();
322
323 if self.dirty.load(Ordering::Relaxed) {
324 let pending = self
325 .pending
326 .read()
327 .map_err(|e| Error::LockPoisoned(e.to_string()))?;
328 apply_pending(&mut buf, offset, length, &pending);
329 }
330
331 Ok(buf)
332 }
333
334 /// Reads a single byte at the given offset, compositing pending writes.
335 ///
336 /// # Errors
337 ///
338 /// Returns [`Error::OutOfBounds`] if the offset is beyond the data size.
339 ///
340 /// # Examples
341 ///
342 /// ```
343 /// use cowfile::CowFile;
344 ///
345 /// let pf = CowFile::from_vec(vec![0xAA, 0xBB, 0xCC]);
346 /// assert_eq!(pf.read_byte(1).unwrap(), 0xBB);
347 ///
348 /// pf.write_byte(1, 0xFF).unwrap();
349 /// assert_eq!(pf.read_byte(1).unwrap(), 0xFF);
350 /// ```
351 pub fn read_byte(&self, offset: usize) -> Result<u8> {
352 self.check_bounds(offset, 1)?;
353
354 if self.dirty.load(Ordering::Relaxed) {
355 let pending = self
356 .pending
357 .read()
358 .map_err(|e| Error::LockPoisoned(e.to_string()))?;
359 // Scan in reverse — last write wins.
360 for pw in pending.iter().rev() {
361 let pw_end = pw.offset + pw.data.len();
362 if offset >= pw.offset && offset < pw_end {
363 return Ok(pw.data[offset - pw.offset]);
364 }
365 }
366 }
367
368 Ok(self.buffer.as_slice()[offset])
369 }
370
371 /// Writes `data` at the given `offset` into the pending log.
372 ///
373 /// The committed buffer is not modified. Pending writes are composited
374 /// into reads via [`read`](CowFile::read) and applied to the buffer on
375 /// [`commit`](CowFile::commit).
376 ///
377 /// Empty writes (zero-length data) are silently ignored.
378 ///
379 /// # Errors
380 ///
381 /// Returns [`Error::OutOfBounds`] if the write extends beyond the data size.
382 ///
383 /// # Examples
384 ///
385 /// ```
386 /// use cowfile::CowFile;
387 ///
388 /// let pf = CowFile::from_vec(vec![0u8; 100]);
389 /// pf.write(50, &[0xDE, 0xAD, 0xBE, 0xEF]).unwrap();
390 ///
391 /// let data = pf.read(50, 4).unwrap();
392 /// assert_eq!(data, vec![0xDE, 0xAD, 0xBE, 0xEF]);
393 /// ```
394 pub fn write(&self, offset: usize, data: &[u8]) -> Result<()> {
395 if data.is_empty() {
396 return Ok(());
397 }
398
399 self.check_bounds(offset, data.len())?;
400
401 self.pending
402 .write()
403 .map_err(|e| Error::LockPoisoned(e.to_string()))?
404 .push(PendingWrite {
405 offset,
406 data: data.to_vec(),
407 });
408 self.dirty.store(true, Ordering::Relaxed);
409 Ok(())
410 }
411
412 /// Writes a single byte at the given offset into the pending log.
413 ///
414 /// # Errors
415 ///
416 /// Returns [`Error::OutOfBounds`] if the offset is beyond the data size.
417 pub fn write_byte(&self, offset: usize, byte: u8) -> Result<()> {
418 self.write(offset, &[byte])
419 }
420
421 /// Applies all pending writes to the committed buffer and clears the log.
422 ///
423 /// For mmap-backed files, only the OS pages touched by writes are copied
424 /// into anonymous memory (`MAP_PRIVATE` CoW). The rest of the file remains
425 /// demand-paged from disk.
426 ///
427 /// # Errors
428 ///
429 /// Returns [`Error::OutOfBounds`] if any pending write is out of bounds
430 /// (should not happen if writes were bounds-checked).
431 ///
432 /// # Examples
433 ///
434 /// ```
435 /// use cowfile::CowFile;
436 ///
437 /// let mut pf = CowFile::from_vec(vec![0u8; 10]);
438 ///
439 /// pf.write(0, &[0xAA]).unwrap();
440 /// assert_eq!(pf.data()[0], 0x00); // Not yet committed
441 ///
442 /// pf.commit().unwrap();
443 /// assert_eq!(pf.data()[0], 0xAA); // Now committed
444 /// assert!(!pf.has_pending());
445 /// ```
446 pub fn commit(&mut self) -> Result<()> {
447 if !*self.dirty.get_mut() {
448 return Ok(());
449 }
450
451 let pending = self
452 .pending
453 .get_mut()
454 .map_err(|e| Error::LockPoisoned(e.to_string()))?;
455 let buf = self.buffer.as_mut_slice();
456
457 for pw in pending.drain(..) {
458 buf[pw.offset..pw.offset + pw.data.len()].copy_from_slice(&pw.data);
459 }
460
461 *self.dirty.get_mut() = false;
462 Ok(())
463 }
464
465 /// Discards all pending writes without applying them.
466 ///
467 /// # Errors
468 ///
469 /// Returns [`Error::LockPoisoned`] if the internal lock was poisoned.
470 ///
471 /// # Examples
472 ///
473 /// ```
474 /// use cowfile::CowFile;
475 ///
476 /// let mut pf = CowFile::from_vec(vec![0u8; 10]);
477 /// pf.write(0, &[0xFF]).unwrap();
478 /// assert!(pf.has_pending());
479 ///
480 /// pf.discard().unwrap();
481 /// assert!(!pf.has_pending());
482 /// assert_eq!(pf.data()[0], 0x00);
483 /// ```
484 pub fn discard(&mut self) -> Result<()> {
485 self.pending
486 .get_mut()
487 .map_err(|e| Error::LockPoisoned(e.to_string()))?
488 .clear();
489 *self.dirty.get_mut() = false;
490 Ok(())
491 }
492
493 /// Reads a primitive value in little-endian byte order at the given offset.
494 ///
495 /// Composites pending writes over the committed state.
496 ///
497 /// # Errors
498 ///
499 /// Returns [`Error::OutOfBounds`] if there are not enough bytes at `offset`.
500 ///
501 /// # Examples
502 ///
503 /// ```
504 /// use cowfile::CowFile;
505 ///
506 /// let pf = CowFile::from_vec(vec![0xEF, 0xBE, 0xAD, 0xDE, 0, 0, 0, 0]);
507 /// assert_eq!(pf.read_le::<u32>(0).unwrap(), 0xDEADBEEF);
508 /// ```
509 pub fn read_le<T: Primitive>(&self, offset: usize) -> Result<T> {
510 let data = self.read(offset, T::SIZE)?;
511 Ok(T::from_le_bytes(&data))
512 }
513
514 /// Reads a primitive value in big-endian byte order at the given offset.
515 ///
516 /// Composites pending writes over the committed state.
517 ///
518 /// # Errors
519 ///
520 /// Returns [`Error::OutOfBounds`] if there are not enough bytes at `offset`.
521 ///
522 /// # Examples
523 ///
524 /// ```
525 /// use cowfile::CowFile;
526 ///
527 /// let pf = CowFile::from_vec(vec![0xDE, 0xAD, 0xBE, 0xEF, 0, 0, 0, 0]);
528 /// assert_eq!(pf.read_be::<u32>(0).unwrap(), 0xDEADBEEF);
529 /// ```
530 pub fn read_be<T: Primitive>(&self, offset: usize) -> Result<T> {
531 let data = self.read(offset, T::SIZE)?;
532 Ok(T::from_be_bytes(&data))
533 }
534
535 /// Writes a primitive value in little-endian byte order at the given offset.
536 ///
537 /// The write goes to the pending log.
538 ///
539 /// # Errors
540 ///
541 /// Returns [`Error::OutOfBounds`] if there are not enough bytes at `offset`.
542 ///
543 /// # Examples
544 ///
545 /// ```
546 /// use cowfile::CowFile;
547 ///
548 /// let pf = CowFile::from_vec(vec![0u8; 8]);
549 /// pf.write_le::<u32>(0, 0xDEADBEEF).unwrap();
550 /// assert_eq!(pf.read(0, 4).unwrap(), vec![0xEF, 0xBE, 0xAD, 0xDE]);
551 /// ```
552 pub fn write_le<T: Primitive>(&self, offset: usize, value: T) -> Result<()> {
553 let mut buf = vec![0u8; T::SIZE];
554 value.write_le_bytes(&mut buf);
555 self.write(offset, &buf)
556 }
557
558 /// Writes a primitive value in big-endian byte order at the given offset.
559 ///
560 /// The write goes to the pending log.
561 ///
562 /// # Errors
563 ///
564 /// Returns [`Error::OutOfBounds`] if there are not enough bytes at `offset`.
565 ///
566 /// # Examples
567 ///
568 /// ```
569 /// use cowfile::CowFile;
570 ///
571 /// let pf = CowFile::from_vec(vec![0u8; 8]);
572 /// pf.write_be::<u32>(0, 0xDEADBEEF).unwrap();
573 /// assert_eq!(pf.read(0, 4).unwrap(), vec![0xDE, 0xAD, 0xBE, 0xEF]);
574 /// ```
575 pub fn write_be<T: Primitive>(&self, offset: usize, value: T) -> Result<()> {
576 let mut buf = vec![0u8; T::SIZE];
577 value.write_be_bytes(&mut buf);
578 self.write(offset, &buf)
579 }
580
581 /// Reads a user-defined type implementing [`ReadFrom`] at the given offset.
582 ///
583 /// Composites pending writes over the committed state.
584 ///
585 /// # Errors
586 ///
587 /// Returns any error produced by the [`ReadFrom`] implementation.
588 ///
589 /// # Examples
590 ///
591 /// ```
592 /// use cowfile::{CowFile, ReadFrom, Result};
593 ///
594 /// struct Pair { a: u16, b: u16 }
595 ///
596 /// impl ReadFrom for Pair {
597 /// fn read_from(pf: &CowFile, offset: usize) -> Result<Self> {
598 /// Ok(Pair {
599 /// a: pf.read_le::<u16>(offset)?,
600 /// b: pf.read_le::<u16>(offset + 2)?,
601 /// })
602 /// }
603 /// }
604 ///
605 /// let pf = CowFile::from_vec(vec![0x01, 0x00, 0x02, 0x00]);
606 /// let pair: Pair = pf.read_type(0).unwrap();
607 /// assert_eq!(pair.a, 1);
608 /// assert_eq!(pair.b, 2);
609 /// ```
610 pub fn read_type<T: ReadFrom>(&self, offset: usize) -> Result<T> {
611 T::read_from(self, offset)
612 }
613
614 /// Writes a user-defined type implementing [`WriteTo`] at the given offset.
615 ///
616 /// The write goes to the pending log.
617 ///
618 /// # Errors
619 ///
620 /// Returns any error produced by the [`WriteTo`] implementation.
621 ///
622 /// # Examples
623 ///
624 /// ```
625 /// use cowfile::{CowFile, WriteTo, Result};
626 ///
627 /// struct Pair { a: u16, b: u16 }
628 ///
629 /// impl WriteTo for Pair {
630 /// fn write_to(&self, pf: &CowFile, offset: usize) -> Result<()> {
631 /// pf.write_le::<u16>(offset, self.a)?;
632 /// pf.write_le::<u16>(offset + 2, self.b)?;
633 /// Ok(())
634 /// }
635 /// }
636 ///
637 /// let pf = CowFile::from_vec(vec![0u8; 8]);
638 /// pf.write_type(0, &Pair { a: 1, b: 2 }).unwrap();
639 /// assert_eq!(pf.read(0, 4).unwrap(), vec![0x01, 0x00, 0x02, 0x00]);
640 /// ```
641 pub fn write_type<T: WriteTo>(&self, offset: usize, value: &T) -> Result<()> {
642 value.write_to(self, offset)
643 }
644
645 /// Creates a cursor over this `CowFile` at position 0.
646 ///
647 /// The returned [`CowFileCursor`] implements [`std::io::Read`],
648 /// [`std::io::Write`], and [`std::io::Seek`], allowing the `CowFile`
649 /// to be used with any API that expects standard I/O traits.
650 ///
651 /// Multiple cursors can exist over the same `CowFile` simultaneously,
652 /// each with its own independent position.
653 ///
654 /// # Examples
655 ///
656 /// ```
657 /// use std::io::{Read, Write, Seek, SeekFrom};
658 /// use cowfile::CowFile;
659 ///
660 /// let pf = CowFile::from_vec(vec![0u8; 32]);
661 /// let mut cursor = pf.cursor();
662 ///
663 /// cursor.write_all(&[1, 2, 3, 4]).unwrap();
664 /// cursor.seek(SeekFrom::Start(0)).unwrap();
665 ///
666 /// let mut buf = [0u8; 4];
667 /// cursor.read_exact(&mut buf).unwrap();
668 /// assert_eq!(buf, [1, 2, 3, 4]);
669 /// ```
670 pub fn cursor(&self) -> CowFileCursor<'_> {
671 CowFileCursor::new(self)
672 }
673
674 /// Returns the original file path for mmap-backed instances opened via [`open`](CowFile::open).
675 ///
676 /// Returns `None` for vec-backed instances or those created via [`from_file`](CowFile::from_file).
677 ///
678 /// # Examples
679 ///
680 /// ```no_run
681 /// use cowfile::CowFile;
682 ///
683 /// let pf = CowFile::open("binary.exe").unwrap();
684 /// assert!(pf.source_path().is_some());
685 ///
686 /// let pf = CowFile::from_vec(vec![0u8; 10]);
687 /// assert!(pf.source_path().is_none());
688 /// ```
689 pub fn source_path(&self) -> Option<&Path> {
690 self.source_path.as_deref()
691 }
692
693 /// Creates an independent copy of this `CowFile`.
694 ///
695 /// For mmap-backed files with a known source path, re-opens the original
696 /// file — a new `MAP_PRIVATE` mmap that shares physical read pages with
697 /// the parent via OS-level copy-on-write. For vec-backed files or those
698 /// without a source path, clones the data.
699 ///
700 /// Pending writes are **not** carried over — the fork starts clean.
701 ///
702 /// # Errors
703 ///
704 /// Returns [`Error::Io`] if the source file cannot be reopened.
705 ///
706 /// # Examples
707 ///
708 /// ```no_run
709 /// use cowfile::CowFile;
710 ///
711 /// let pf = CowFile::open("binary.exe").unwrap();
712 /// pf.write(0, &[0xFF]).unwrap();
713 ///
714 /// let forked = pf.fork().unwrap();
715 /// // Fork starts clean — no pending writes
716 /// assert!(!forked.has_pending());
717 /// // But reads the same committed data
718 /// assert_eq!(forked.data()[0], pf.data()[0]);
719 /// ```
720 pub fn fork(&self) -> Result<CowFile> {
721 match &self.source_path {
722 Some(path) => CowFile::open(path),
723 None => Ok(CowFile::from_vec(self.buffer.as_slice().to_vec())),
724 }
725 }
726
727 /// Produces a `Vec<u8>` with all pending writes composited over the
728 /// committed buffer.
729 ///
730 /// # Errors
731 ///
732 /// Returns [`Error::LockPoisoned`] if the internal lock was poisoned.
733 ///
734 /// # Examples
735 ///
736 /// ```
737 /// use cowfile::CowFile;
738 ///
739 /// let pf = CowFile::from_vec(vec![1, 2, 3, 4, 5]);
740 /// pf.write(0, &[0xFF]).unwrap();
741 ///
742 /// let output = pf.to_vec().unwrap();
743 /// assert_eq!(output, vec![0xFF, 2, 3, 4, 5]);
744 /// ```
745 pub fn to_vec(&self) -> Result<Vec<u8>> {
746 let mut output = self.buffer.as_slice().to_vec();
747
748 if self.dirty.load(Ordering::Relaxed) {
749 let pending = self
750 .pending
751 .read()
752 .map_err(|e| Error::LockPoisoned(e.to_string()))?;
753 for pw in pending.iter() {
754 output[pw.offset..pw.offset + pw.data.len()].copy_from_slice(&pw.data);
755 }
756 }
757
758 Ok(output)
759 }
760
761 /// Writes the data with all pending writes applied to disk.
762 ///
763 /// For files smaller than 64 MiB, this uses buffered I/O. For larger
764 /// files, this uses a writable memory map for efficient output.
765 ///
766 /// # Errors
767 ///
768 /// Returns [`Error::Io`] if the file cannot be created or written.
769 ///
770 /// # Examples
771 ///
772 /// ```no_run
773 /// use cowfile::CowFile;
774 ///
775 /// let pf = CowFile::from_vec(vec![0u8; 1024]);
776 /// pf.write(0, &[0x4D, 0x5A]).unwrap();
777 /// pf.to_file("output.bin").unwrap();
778 /// ```
779 pub fn to_file(&self, path: impl AsRef<Path>) -> Result<()> {
780 let size = self.buffer.len();
781
782 if size >= MMAP_WRITE_THRESHOLD {
783 self.to_file_mmap(path.as_ref())
784 } else {
785 let output = self.to_vec()?;
786 let mut file = std::fs::File::create(path.as_ref())?;
787 file.write_all(&output)?;
788 file.flush()?;
789 Ok(())
790 }
791 }
792
793 /// Consumes the `CowFile` and returns the data as an owned `Vec<u8>`.
794 ///
795 /// If there are no pending writes and the backend is a `Vec`, this is a
796 /// zero-copy move. Otherwise, the data is materialized with pending writes
797 /// applied.
798 ///
799 /// # Errors
800 ///
801 /// Returns [`Error::LockPoisoned`] if the internal lock was poisoned.
802 ///
803 /// # Examples
804 ///
805 /// ```
806 /// use cowfile::CowFile;
807 ///
808 /// let pf = CowFile::from_vec(vec![1, 2, 3]);
809 /// let data = pf.into_vec().unwrap();
810 /// assert_eq!(data, vec![1, 2, 3]);
811 /// ```
812 pub fn into_vec(self) -> Result<Vec<u8>> {
813 let dirty = self.dirty.load(Ordering::Relaxed);
814
815 if !dirty {
816 return Ok(match self.buffer {
817 Inner::Vec(v) => v,
818 Inner::Mmap(m) => m.as_ref().to_vec(),
819 });
820 }
821
822 let pending = self
823 .pending
824 .into_inner()
825 .map_err(|e| Error::LockPoisoned(e.to_string()))?;
826 let mut output = match self.buffer {
827 Inner::Vec(v) => v,
828 Inner::Mmap(m) => m.as_ref().to_vec(),
829 };
830
831 for pw in pending {
832 output[pw.offset..pw.offset + pw.data.len()].copy_from_slice(&pw.data);
833 }
834
835 Ok(output)
836 }
837
838 /// Validates that `[offset, offset + length)` is within bounds.
839 fn check_bounds(&self, offset: usize, length: usize) -> Result<()> {
840 let end = offset.checked_add(length).ok_or(Error::OutOfBounds {
841 offset,
842 length,
843 file_size: self.buffer.len(),
844 })?;
845
846 if end > self.buffer.len() {
847 return Err(Error::OutOfBounds {
848 offset,
849 length,
850 file_size: self.buffer.len(),
851 });
852 }
853
854 Ok(())
855 }
856
857 /// Writes to a file using a writable memory map (for large files).
858 fn to_file_mmap(&self, path: &Path) -> Result<()> {
859 let base = self.buffer.as_slice();
860 let size = base.len() as u64;
861
862 let file = std::fs::OpenOptions::new()
863 .read(true)
864 .write(true)
865 .create(true)
866 .truncate(true)
867 .open(path)?;
868 file.set_len(size)?;
869
870 // SAFETY: The file was just created and truncated. We have exclusive write
871 // access. The mmap is flushed before being dropped.
872 let mut mmap = unsafe { memmap2::MmapMut::map_mut(&file)? };
873 mmap.copy_from_slice(base);
874
875 if self.dirty.load(Ordering::Relaxed) {
876 let pending = self
877 .pending
878 .read()
879 .map_err(|e| Error::LockPoisoned(e.to_string()))?;
880 for pw in pending.iter() {
881 mmap[pw.offset..pw.offset + pw.data.len()].copy_from_slice(&pw.data);
882 }
883 }
884
885 mmap.flush()?;
886 Ok(())
887 }
888}
889
890/// Applies pending writes that overlap `[read_offset..read_offset+read_len)` to `buf`.
891///
892/// Writes are applied in order — later writes overwrite earlier ones.
893fn apply_pending(buf: &mut [u8], read_offset: usize, read_len: usize, pending: &[PendingWrite]) {
894 let read_end = read_offset + read_len;
895 for pw in pending {
896 let pw_end = pw.offset + pw.data.len();
897 // Check for overlap.
898 if pw.offset < read_end && pw_end > read_offset {
899 let start = pw.offset.max(read_offset);
900 let end = pw_end.min(read_end);
901 let buf_start = start - read_offset;
902 let pw_start = start - pw.offset;
903 buf[buf_start..buf_start + (end - start)]
904 .copy_from_slice(&pw.data[pw_start..pw_start + (end - start)]);
905 }
906 }
907}
908
909#[cfg(test)]
910mod tests {
911 use crate::{
912 traits::{ReadFrom, WriteTo},
913 CowFile,
914 };
915
916 #[test]
917 fn test_from_vec_basic() {
918 let pf = CowFile::from_vec(vec![1, 2, 3, 4, 5]);
919 assert_eq!(pf.len(), 5);
920 assert!(!pf.is_empty());
921 assert_eq!(pf.data(), &[1, 2, 3, 4, 5]);
922 }
923
924 #[test]
925 fn test_from_vec_empty() {
926 let pf = CowFile::from_vec(vec![]);
927 assert_eq!(pf.len(), 0);
928 assert!(pf.is_empty());
929 }
930
931 #[test]
932 fn test_open_basic() {
933 use std::io::Write;
934 let mut tmpfile = tempfile::NamedTempFile::new().unwrap();
935 tmpfile.write_all(&[0xDE, 0xAD, 0xBE, 0xEF]).unwrap();
936 tmpfile.flush().unwrap();
937
938 let pf = CowFile::open(tmpfile.path()).unwrap();
939 assert_eq!(pf.len(), 4);
940 assert_eq!(pf.data(), &[0xDE, 0xAD, 0xBE, 0xEF]);
941 }
942
943 #[test]
944 fn test_open_nonexistent() {
945 let result = CowFile::open("/nonexistent/path.bin");
946 assert!(result.is_err());
947 }
948
949 #[test]
950 fn test_write_and_read() {
951 let pf = CowFile::from_vec(vec![0u8; 10]);
952 pf.write(2, &[0xFF, 0xFE]).unwrap();
953
954 // data() shows committed state.
955 assert_eq!(pf.data()[2], 0x00);
956
957 // read() composites pending.
958 let data = pf.read(0, 10).unwrap();
959 assert_eq!(data[2], 0xFF);
960 assert_eq!(data[3], 0xFE);
961 assert_eq!(data[0], 0x00);
962 }
963
964 #[test]
965 fn test_write_byte_and_read_byte() {
966 let pf = CowFile::from_vec(vec![0u8; 10]);
967 pf.write_byte(5, 0xAA).unwrap();
968 assert_eq!(pf.read_byte(5).unwrap(), 0xAA);
969 assert_eq!(pf.read_byte(4).unwrap(), 0x00);
970 }
971
972 #[test]
973 fn test_write_empty_is_noop() {
974 let pf = CowFile::from_vec(vec![0u8; 10]);
975 pf.write(5, &[]).unwrap();
976 assert!(!pf.has_pending());
977 }
978
979 #[test]
980 fn test_commit_and_read() {
981 let mut pf = CowFile::from_vec(vec![0u8; 10]);
982 pf.write(0, &[0xAA]).unwrap();
983 assert!(pf.has_pending());
984
985 pf.commit().unwrap();
986 assert!(!pf.has_pending());
987 assert_eq!(pf.data()[0], 0xAA);
988 }
989
990 #[test]
991 fn test_multi_commit_cycle() {
992 let mut pf = CowFile::from_vec(vec![0u8; 20]);
993
994 // Pass 1.
995 pf.write(0, &[0xAA]).unwrap();
996 pf.write(10, &[0xBB]).unwrap();
997 pf.commit().unwrap();
998
999 // Pass 2.
1000 pf.write(5, &[0xCC]).unwrap();
1001 pf.commit().unwrap();
1002
1003 let output = pf.to_vec().unwrap();
1004 assert_eq!(output[0], 0xAA);
1005 assert_eq!(output[5], 0xCC);
1006 assert_eq!(output[10], 0xBB);
1007 }
1008
1009 #[test]
1010 fn test_to_vec_no_modifications() {
1011 let original = vec![1, 2, 3, 4, 5];
1012 let pf = CowFile::from_vec(original.clone());
1013 let output = pf.to_vec().unwrap();
1014 assert_eq!(output, original);
1015 }
1016
1017 #[test]
1018 fn test_to_vec_with_modifications() {
1019 let pf = CowFile::from_vec(vec![0u8; 10]);
1020 pf.write(0, &[0xFF]).unwrap();
1021 pf.write(9, &[0xEE]).unwrap();
1022
1023 let output = pf.to_vec().unwrap();
1024 assert_eq!(output[0], 0xFF);
1025 assert_eq!(output[9], 0xEE);
1026 assert_eq!(output[5], 0x00);
1027 }
1028
1029 #[test]
1030 fn test_to_file_and_read_back() {
1031 let pf = CowFile::from_vec(vec![0u8; 100]);
1032 pf.write(0, &[0x4D, 0x5A]).unwrap();
1033 pf.write(50, &[0xDE, 0xAD]).unwrap();
1034
1035 let tmpfile = tempfile::NamedTempFile::new().unwrap();
1036 pf.to_file(tmpfile.path()).unwrap();
1037
1038 let contents = std::fs::read(tmpfile.path()).unwrap();
1039 assert_eq!(contents.len(), 100);
1040 assert_eq!(contents[0], 0x4D);
1041 assert_eq!(contents[1], 0x5A);
1042 assert_eq!(contents[50], 0xDE);
1043 assert_eq!(contents[51], 0xAD);
1044 assert_eq!(contents[10], 0x00);
1045 }
1046
1047 #[test]
1048 fn test_out_of_bounds_read() {
1049 let pf = CowFile::from_vec(vec![0u8; 10]);
1050 let result = pf.read(8, 5);
1051 assert!(result.is_err());
1052 }
1053
1054 #[test]
1055 fn test_out_of_bounds_write() {
1056 let pf = CowFile::from_vec(vec![0u8; 10]);
1057 let result = pf.write(8, &[0xFF; 5]);
1058 assert!(result.is_err());
1059 }
1060
1061 #[test]
1062 fn test_out_of_bounds_at_exact_end() {
1063 let pf = CowFile::from_vec(vec![0u8; 10]);
1064 let result = pf.read(10, 0);
1065 assert!(result.is_ok());
1066 assert!(result.unwrap().is_empty());
1067 }
1068
1069 #[test]
1070 fn test_has_pending() {
1071 let mut pf = CowFile::from_vec(vec![0u8; 10]);
1072 assert!(!pf.has_pending());
1073
1074 pf.write(0, &[0xFF]).unwrap();
1075 assert!(pf.has_pending());
1076
1077 pf.commit().unwrap();
1078 assert!(!pf.has_pending());
1079 }
1080
1081 #[test]
1082 fn test_data_shows_committed_state() {
1083 let mut pf = CowFile::from_vec(vec![1, 2, 3, 4, 5]);
1084 pf.write(0, &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF]).unwrap();
1085
1086 // data() should still show original.
1087 assert_eq!(pf.data(), &[1, 2, 3, 4, 5]);
1088
1089 pf.commit().unwrap();
1090
1091 // After commit, data() shows the changes.
1092 assert_eq!(pf.data(), &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF]);
1093 }
1094
1095 #[test]
1096 fn test_data_while_writing() {
1097 let pf = CowFile::from_vec(vec![0u8; 100]);
1098
1099 // Hold a reference to data() while writing.
1100 let view = pf.data();
1101 pf.write(10, &[0xFF]).unwrap();
1102
1103 // View still shows committed state.
1104 assert_eq!(view[10], 0x00);
1105
1106 // But read_byte composites pending.
1107 assert_eq!(pf.read_byte(10).unwrap(), 0xFF);
1108 }
1109
1110 #[test]
1111 fn test_discard() {
1112 let mut pf = CowFile::from_vec(vec![0u8; 10]);
1113 pf.write(0, &[0xFF]).unwrap();
1114 assert!(pf.has_pending());
1115
1116 pf.discard().unwrap();
1117 assert!(!pf.has_pending());
1118 assert_eq!(pf.read_byte(0).unwrap(), 0x00);
1119 }
1120
1121 #[test]
1122 fn test_send_static_assertion() {
1123 fn assert_send<T: Send>() {}
1124 assert_send::<CowFile>();
1125 }
1126
1127 #[test]
1128 fn test_read_write_le_u16() {
1129 let pf = CowFile::from_vec(vec![0u8; 16]);
1130 pf.write_le::<u16>(0, 0xCAFE).unwrap();
1131 assert_eq!(pf.read_le::<u16>(0).unwrap(), 0xCAFE);
1132 assert_eq!(pf.read(0, 2).unwrap(), vec![0xFE, 0xCA]);
1133 }
1134
1135 #[test]
1136 fn test_read_write_le_u32() {
1137 let pf = CowFile::from_vec(vec![0u8; 16]);
1138 pf.write_le::<u32>(4, 0xDEADBEEF).unwrap();
1139 assert_eq!(pf.read_le::<u32>(4).unwrap(), 0xDEADBEEF);
1140 }
1141
1142 #[test]
1143 fn test_read_write_le_u64() {
1144 let pf = CowFile::from_vec(vec![0u8; 16]);
1145 pf.write_le::<u64>(0, 0x0123456789ABCDEF).unwrap();
1146 assert_eq!(pf.read_le::<u64>(0).unwrap(), 0x0123456789ABCDEF);
1147 }
1148
1149 #[test]
1150 fn test_read_write_be_u32() {
1151 let pf = CowFile::from_vec(vec![0u8; 16]);
1152 pf.write_be::<u32>(0, 0xDEADBEEF).unwrap();
1153 assert_eq!(pf.read_be::<u32>(0).unwrap(), 0xDEADBEEF);
1154 assert_eq!(pf.read(0, 4).unwrap(), vec![0xDE, 0xAD, 0xBE, 0xEF]);
1155 }
1156
1157 #[test]
1158 fn test_read_le_out_of_bounds() {
1159 let pf = CowFile::from_vec(vec![0u8; 3]);
1160 let result = pf.read_le::<u32>(0);
1161 assert!(result.is_err());
1162 }
1163
1164 #[test]
1165 fn test_write_le_out_of_bounds() {
1166 let pf = CowFile::from_vec(vec![0u8; 3]);
1167 let result = pf.write_le::<u32>(0, 42);
1168 assert!(result.is_err());
1169 }
1170
1171 #[test]
1172 fn test_read_write_type() {
1173 struct TestStruct {
1174 magic: u32,
1175 version: u16,
1176 flags: u8,
1177 }
1178
1179 impl ReadFrom for TestStruct {
1180 fn read_from(pf: &CowFile, offset: usize) -> crate::Result<Self> {
1181 Ok(TestStruct {
1182 magic: pf.read_le::<u32>(offset)?,
1183 version: pf.read_le::<u16>(offset + 4)?,
1184 flags: pf.read_le::<u8>(offset + 6)?,
1185 })
1186 }
1187 }
1188
1189 impl WriteTo for TestStruct {
1190 fn write_to(&self, pf: &CowFile, offset: usize) -> crate::Result<()> {
1191 pf.write_le::<u32>(offset, self.magic)?;
1192 pf.write_le::<u16>(offset + 4, self.version)?;
1193 pf.write_le::<u8>(offset + 6, self.flags)?;
1194 Ok(())
1195 }
1196 }
1197
1198 let pf = CowFile::from_vec(vec![0u8; 16]);
1199 let s = TestStruct {
1200 magic: 0x4D5A9000,
1201 version: 3,
1202 flags: 0xFF,
1203 };
1204
1205 pf.write_type(0, &s).unwrap();
1206 let read_back: TestStruct = pf.read_type(0).unwrap();
1207 assert_eq!(read_back.magic, 0x4D5A9000);
1208 assert_eq!(read_back.version, 3);
1209 assert_eq!(read_back.flags, 0xFF);
1210 }
1211
1212 #[test]
1213 fn test_from_file() {
1214 use std::io::Write;
1215 let mut tmpfile = tempfile::NamedTempFile::new().unwrap();
1216 tmpfile.write_all(&[0xDE, 0xAD, 0xBE, 0xEF]).unwrap();
1217 tmpfile.flush().unwrap();
1218
1219 let std_file = std::fs::File::open(tmpfile.path()).unwrap();
1220 let pf = CowFile::from_file(std_file).unwrap();
1221 assert_eq!(pf.len(), 4);
1222 assert_eq!(pf.data(), &[0xDE, 0xAD, 0xBE, 0xEF]);
1223 }
1224
1225 #[test]
1226 fn test_into_vec_no_modifications() {
1227 let pf = CowFile::from_vec(vec![1, 2, 3, 4, 5]);
1228 let data = pf.into_vec().unwrap();
1229 assert_eq!(data, vec![1, 2, 3, 4, 5]);
1230 }
1231
1232 #[test]
1233 fn test_into_vec_with_modifications() {
1234 let pf = CowFile::from_vec(vec![0u8; 10]);
1235 pf.write(0, &[0xFF]).unwrap();
1236 pf.write(9, &[0xEE]).unwrap();
1237 let data = pf.into_vec().unwrap();
1238 assert_eq!(data[0], 0xFF);
1239 assert_eq!(data[9], 0xEE);
1240 assert_eq!(data[5], 0x00);
1241 }
1242
1243 #[test]
1244 fn test_into_vec_from_mmap() {
1245 use std::io::Write;
1246 let mut tmpfile = tempfile::NamedTempFile::new().unwrap();
1247 tmpfile.write_all(&[0xDE, 0xAD, 0xBE, 0xEF]).unwrap();
1248 tmpfile.flush().unwrap();
1249
1250 let pf = CowFile::open(tmpfile.path()).unwrap();
1251 let data = pf.into_vec().unwrap();
1252 assert_eq!(data, vec![0xDE, 0xAD, 0xBE, 0xEF]);
1253 }
1254
1255 #[test]
1256 fn test_into_vec_from_mmap_with_modifications() {
1257 use std::io::Write;
1258 let mut tmpfile = tempfile::NamedTempFile::new().unwrap();
1259 tmpfile.write_all(&[0x00, 0x00, 0x00, 0x00]).unwrap();
1260 tmpfile.flush().unwrap();
1261
1262 let pf = CowFile::open(tmpfile.path()).unwrap();
1263 pf.write(0, &[0xFF]).unwrap();
1264 let data = pf.into_vec().unwrap();
1265 assert_eq!(data, vec![0xFF, 0x00, 0x00, 0x00]);
1266 }
1267
1268 #[test]
1269 fn test_cursor_basic() {
1270 use std::io::{Read, Seek, SeekFrom, Write};
1271
1272 let pf = CowFile::from_vec(vec![0u8; 32]);
1273 let mut cursor = pf.cursor();
1274
1275 cursor.write_all(&[0xAA, 0xBB, 0xCC]).unwrap();
1276 cursor.seek(SeekFrom::Start(0)).unwrap();
1277
1278 let mut buf = [0u8; 3];
1279 cursor.read_exact(&mut buf).unwrap();
1280 assert_eq!(buf, [0xAA, 0xBB, 0xCC]);
1281 }
1282
1283 #[test]
1284 fn test_overlapping_pending_writes() {
1285 let pf = CowFile::from_vec(vec![0u8; 20]);
1286
1287 pf.write(0, &[0xAA; 10]).unwrap();
1288 pf.write(5, &[0xBB; 10]).unwrap();
1289
1290 let data = pf.read(0, 20).unwrap();
1291 assert!(data[..5].iter().all(|&b| b == 0xAA));
1292 assert!(data[5..15].iter().all(|&b| b == 0xBB));
1293 assert!(data[15..20].iter().all(|&b| b == 0x00));
1294 }
1295
1296 #[test]
1297 fn test_read_byte_pending_last_wins() {
1298 let pf = CowFile::from_vec(vec![0u8; 10]);
1299 pf.write_byte(5, 0xAA).unwrap();
1300 pf.write_byte(5, 0xBB).unwrap();
1301 assert_eq!(pf.read_byte(5).unwrap(), 0xBB);
1302 }
1303}