Skip to main content

cowfile/
cowfile.rs

1//! The main [`CowFile`] type with OS-level copy-on-write and a pending write log.
2//!
3//! `CowFile` wraps binary data (from a [`Vec<u8>`] or a copy-on-write memory map)
4//! with a pending log that tracks writes. The committed buffer is accessible as
5//! `&[u8]` via [`data`](CowFile::data), while [`read`](CowFile::read) and typed
6//! accessors composite pending writes over the committed state.
7//!
8//! # Thread Safety
9//!
10//! `CowFile` is [`Send`] and [`Sync`]. The committed buffer can be read
11//! concurrently via [`data`](CowFile::data) from multiple threads. Writes
12//! to the pending log are serialised by an internal [`RwLock`](std::sync::RwLock).
13
14use std::{
15    fmt,
16    io::Write,
17    path::{Path, PathBuf},
18    sync::{
19        atomic::{AtomicBool, Ordering},
20        RwLock,
21    },
22};
23
24use crate::{
25    cursor::CowFileCursor,
26    error::{Error, Result},
27    primitives::Primitive,
28    traits::{ReadFrom, WriteTo},
29};
30
31/// Threshold above which [`to_file`](CowFile::to_file) uses a writable memory map
32/// instead of buffered I/O. Set to 64 MiB.
33const MMAP_WRITE_THRESHOLD: usize = 64 * 1024 * 1024;
34
35/// Inner storage for `CowFile`.
36enum Inner {
37    /// Owned byte vector, directly mutable.
38    Vec(Vec<u8>),
39    /// Copy-on-write memory map (`MAP_PRIVATE`). Writes are process-private.
40    Mmap(memmap2::MmapMut),
41}
42
43impl Inner {
44    fn as_slice(&self) -> &[u8] {
45        match self {
46            Inner::Vec(v) => v.as_slice(),
47            Inner::Mmap(m) => m,
48        }
49    }
50
51    fn as_mut_slice(&mut self) -> &mut [u8] {
52        match self {
53            Inner::Vec(v) => v.as_mut_slice(),
54            Inner::Mmap(m) => m.as_mut(),
55        }
56    }
57
58    fn len(&self) -> usize {
59        match self {
60            Inner::Vec(v) => v.len(),
61            Inner::Mmap(m) => m.len(),
62        }
63    }
64}
65
66/// A single pending write recorded in the log.
67struct PendingWrite {
68    offset: usize,
69    data: Vec<u8>,
70}
71
72/// A copy-on-write file abstraction backed by memory or a file.
73///
74/// Writes accumulate in a pending log and are applied to the committed buffer
75/// on [`commit`](CowFile::commit). The committed buffer is accessible as
76/// `&[u8]` via [`data`](CowFile::data), while [`read`](CowFile::read) and
77/// typed I/O methods composite pending writes over the committed state.
78///
79/// # Architecture
80///
81/// ```text
82///  Committed Buffer               Pending Log
83/// +---------------------+      +-------------------------+
84/// | Vec<u8> or MmapMut  | <--- | Vec<PendingWrite>       |
85/// | (OS-level CoW)      |      | (applied on commit)     |
86/// +---------------------+      +-------------------------+
87/// ```
88///
89/// For memory-mapped files, the buffer is created with
90/// [`map_copy`](memmap2::MmapOptions::map_copy), which uses `MAP_PRIVATE` on
91/// Unix and `PAGE_WRITECOPY` on Windows. Only pages touched by
92/// [`commit`](CowFile::commit) are copied into anonymous memory — the rest
93/// of the file remains demand-paged from disk.
94///
95/// # Examples
96///
97/// ```
98/// use cowfile::CowFile;
99///
100/// let pf = CowFile::from_vec(vec![0u8; 100]);
101///
102/// // Writes go to the pending log
103/// pf.write(10, &[0xFF, 0xFE]).unwrap();
104///
105/// // data() returns committed state
106/// assert_eq!(pf.data()[10], 0x00);
107///
108/// // read() composites pending writes
109/// assert_eq!(pf.read_byte(10).unwrap(), 0xFF);
110///
111/// // Commit applies pending to the buffer
112/// let mut pf = pf;
113/// pf.commit().unwrap();
114/// assert_eq!(pf.data()[10], 0xFF);
115/// ```
116pub struct CowFile {
117    /// Committed buffer — only mutated by `commit()`.
118    buffer: Inner,
119    /// Pending writes, accumulated via interior mutability.
120    pending: RwLock<Vec<PendingWrite>>,
121    /// Fast check to skip empty pending iteration.
122    dirty: AtomicBool,
123    /// Original file path (set by `open()`, `None` for vec-backed).
124    source_path: Option<PathBuf>,
125}
126
127// Static assertion: CowFile must be Send + Sync.
128const _: () = {
129    fn assert_send_sync<T: Send + Sync>() {}
130    fn check() {
131        assert_send_sync::<CowFile>();
132    }
133    let _ = check;
134};
135
136impl fmt::Debug for CowFile {
137    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
138        f.debug_struct("CowFile")
139            .field("len", &self.buffer.len())
140            .field(
141                "backend",
142                &match &self.buffer {
143                    Inner::Vec(_) => "Vec",
144                    Inner::Mmap(_) => "Mmap",
145                },
146            )
147            .field("dirty", &self.dirty.load(Ordering::Relaxed))
148            .finish_non_exhaustive()
149    }
150}
151
152impl CowFile {
153    /// Creates a `CowFile` from an owned byte vector.
154    ///
155    /// The provided bytes become the committed buffer. No copies are made
156    /// during construction — the vector is moved into the `CowFile`.
157    ///
158    /// # Examples
159    ///
160    /// ```
161    /// use cowfile::CowFile;
162    ///
163    /// let pf = CowFile::from_vec(vec![0x4D, 0x5A, 0x90, 0x00]);
164    /// assert_eq!(pf.len(), 4);
165    /// ```
166    pub fn from_vec(data: Vec<u8>) -> Self {
167        CowFile {
168            buffer: Inner::Vec(data),
169            pending: RwLock::new(Vec::new()),
170            dirty: AtomicBool::new(false),
171            source_path: None,
172        }
173    }
174
175    /// Creates a `CowFile` by memory-mapping a file from the given path.
176    ///
177    /// The file is mapped with copy-on-write semantics (`MAP_PRIVATE` on Unix,
178    /// `PAGE_WRITECOPY` on Windows). The original file is never modified.
179    /// Only pages touched by [`commit`](CowFile::commit) are copied into
180    /// anonymous memory — the rest of the file remains demand-paged from disk.
181    ///
182    /// # Errors
183    ///
184    /// Returns [`Error::Io`] if the file cannot be opened or memory-mapped.
185    ///
186    /// # Examples
187    ///
188    /// ```no_run
189    /// use cowfile::CowFile;
190    ///
191    /// let pf = CowFile::open("binary.exe").unwrap();
192    /// println!("File size: {} bytes", pf.len());
193    /// ```
194    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
195        let path = path.as_ref();
196        let file = std::fs::File::open(path)?;
197        let mut cow = Self::from_file(file)?;
198        cow.source_path = Some(path.to_path_buf());
199        Ok(cow)
200    }
201
202    /// Creates a `CowFile` from an already-opened [`std::fs::File`].
203    ///
204    /// The file is mapped with copy-on-write semantics. The original file is
205    /// never modified.
206    ///
207    /// Empty files (0 bytes) are handled by using a `Vec` backend instead of
208    /// mmap, since memory-mapping an empty file is not supported on all
209    /// platforms.
210    ///
211    /// # Errors
212    ///
213    /// Returns [`Error::Io`] if the file cannot be memory-mapped.
214    ///
215    /// # Examples
216    ///
217    /// ```no_run
218    /// use cowfile::CowFile;
219    ///
220    /// let file = std::fs::File::open("binary.exe").unwrap();
221    /// let pf = CowFile::from_file(file).unwrap();
222    /// println!("File size: {} bytes", pf.len());
223    /// ```
224    pub fn from_file(file: std::fs::File) -> Result<Self> {
225        let metadata = file.metadata()?;
226        if metadata.len() == 0 {
227            return Ok(Self::from_vec(Vec::new()));
228        }
229
230        // SAFETY: We use map_copy which creates a private CoW mapping.
231        // The file must not be modified externally while the mapping is alive.
232        // This is the same contract as any memory-mapped file in Rust.
233        let mmap = unsafe { memmap2::MmapOptions::new().map_copy(&file)? };
234        Ok(CowFile {
235            buffer: Inner::Mmap(mmap),
236            pending: RwLock::new(Vec::new()),
237            dirty: AtomicBool::new(false),
238            source_path: None,
239        })
240    }
241
242    /// Returns the committed buffer as a byte slice.
243    ///
244    /// This is a true zero-cost `&[u8]` reference into the committed buffer.
245    /// For mmap-backed files, only accessed pages are loaded into physical
246    /// memory by the OS.
247    ///
248    /// Pending writes are **not** visible through this method. Use
249    /// [`read`](CowFile::read) or [`read_le`](CowFile::read_le) for a view
250    /// that composites pending writes, or call [`commit`](CowFile::commit)
251    /// first.
252    ///
253    /// # Examples
254    ///
255    /// ```
256    /// use cowfile::CowFile;
257    ///
258    /// let pf = CowFile::from_vec(vec![1, 2, 3]);
259    /// let data: &[u8] = pf.data();
260    /// assert_eq!(data, &[1, 2, 3]);
261    /// ```
262    pub fn data(&self) -> &[u8] {
263        self.buffer.as_slice()
264    }
265
266    /// Returns the total length of the data in bytes.
267    pub fn len(&self) -> usize {
268        self.buffer.len()
269    }
270
271    /// Returns `true` if the data is empty (zero bytes).
272    pub fn is_empty(&self) -> bool {
273        self.buffer.len() == 0
274    }
275
276    /// Returns `true` if there are uncommitted pending writes.
277    ///
278    /// # Examples
279    ///
280    /// ```
281    /// use cowfile::CowFile;
282    ///
283    /// let pf = CowFile::from_vec(vec![0u8; 10]);
284    /// assert!(!pf.has_pending());
285    ///
286    /// pf.write(0, &[0xFF]).unwrap();
287    /// assert!(pf.has_pending());
288    /// ```
289    pub fn has_pending(&self) -> bool {
290        self.dirty.load(Ordering::Relaxed)
291    }
292
293    /// Reads `length` bytes starting at `offset`, compositing pending writes.
294    ///
295    /// The returned bytes reflect pending writes applied over the committed
296    /// buffer. When there are no pending writes, this is equivalent to
297    /// slicing [`data`](CowFile::data).
298    ///
299    /// # Errors
300    ///
301    /// Returns [`Error::OutOfBounds`] if the requested range exceeds the data size.
302    ///
303    /// # Examples
304    ///
305    /// ```
306    /// use cowfile::CowFile;
307    ///
308    /// let pf = CowFile::from_vec(vec![1, 2, 3, 4, 5]);
309    /// pf.write(2, &[0xFF]).unwrap();
310    ///
311    /// let data = pf.read(1, 3).unwrap();
312    /// assert_eq!(data, vec![2, 0xFF, 4]);
313    /// ```
314    pub fn read(&self, offset: usize, length: usize) -> Result<Vec<u8>> {
315        self.check_bounds(offset, length)?;
316
317        if length == 0 {
318            return Ok(Vec::new());
319        }
320
321        let mut buf = self.buffer.as_slice()[offset..offset + length].to_vec();
322
323        if self.dirty.load(Ordering::Relaxed) {
324            let pending = self
325                .pending
326                .read()
327                .map_err(|e| Error::LockPoisoned(e.to_string()))?;
328            apply_pending(&mut buf, offset, length, &pending);
329        }
330
331        Ok(buf)
332    }
333
334    /// Reads a single byte at the given offset, compositing pending writes.
335    ///
336    /// # Errors
337    ///
338    /// Returns [`Error::OutOfBounds`] if the offset is beyond the data size.
339    ///
340    /// # Examples
341    ///
342    /// ```
343    /// use cowfile::CowFile;
344    ///
345    /// let pf = CowFile::from_vec(vec![0xAA, 0xBB, 0xCC]);
346    /// assert_eq!(pf.read_byte(1).unwrap(), 0xBB);
347    ///
348    /// pf.write_byte(1, 0xFF).unwrap();
349    /// assert_eq!(pf.read_byte(1).unwrap(), 0xFF);
350    /// ```
351    pub fn read_byte(&self, offset: usize) -> Result<u8> {
352        self.check_bounds(offset, 1)?;
353
354        if self.dirty.load(Ordering::Relaxed) {
355            let pending = self
356                .pending
357                .read()
358                .map_err(|e| Error::LockPoisoned(e.to_string()))?;
359            // Scan in reverse — last write wins.
360            for pw in pending.iter().rev() {
361                let pw_end = pw.offset + pw.data.len();
362                if offset >= pw.offset && offset < pw_end {
363                    return Ok(pw.data[offset - pw.offset]);
364                }
365            }
366        }
367
368        Ok(self.buffer.as_slice()[offset])
369    }
370
371    /// Writes `data` at the given `offset` into the pending log.
372    ///
373    /// The committed buffer is not modified. Pending writes are composited
374    /// into reads via [`read`](CowFile::read) and applied to the buffer on
375    /// [`commit`](CowFile::commit).
376    ///
377    /// Empty writes (zero-length data) are silently ignored.
378    ///
379    /// # Errors
380    ///
381    /// Returns [`Error::OutOfBounds`] if the write extends beyond the data size.
382    ///
383    /// # Examples
384    ///
385    /// ```
386    /// use cowfile::CowFile;
387    ///
388    /// let pf = CowFile::from_vec(vec![0u8; 100]);
389    /// pf.write(50, &[0xDE, 0xAD, 0xBE, 0xEF]).unwrap();
390    ///
391    /// let data = pf.read(50, 4).unwrap();
392    /// assert_eq!(data, vec![0xDE, 0xAD, 0xBE, 0xEF]);
393    /// ```
394    pub fn write(&self, offset: usize, data: &[u8]) -> Result<()> {
395        if data.is_empty() {
396            return Ok(());
397        }
398
399        self.check_bounds(offset, data.len())?;
400
401        self.pending
402            .write()
403            .map_err(|e| Error::LockPoisoned(e.to_string()))?
404            .push(PendingWrite {
405                offset,
406                data: data.to_vec(),
407            });
408        self.dirty.store(true, Ordering::Relaxed);
409        Ok(())
410    }
411
412    /// Writes a single byte at the given offset into the pending log.
413    ///
414    /// # Errors
415    ///
416    /// Returns [`Error::OutOfBounds`] if the offset is beyond the data size.
417    pub fn write_byte(&self, offset: usize, byte: u8) -> Result<()> {
418        self.write(offset, &[byte])
419    }
420
421    /// Applies all pending writes to the committed buffer and clears the log.
422    ///
423    /// For mmap-backed files, only the OS pages touched by writes are copied
424    /// into anonymous memory (`MAP_PRIVATE` CoW). The rest of the file remains
425    /// demand-paged from disk.
426    ///
427    /// # Errors
428    ///
429    /// Returns [`Error::OutOfBounds`] if any pending write is out of bounds
430    /// (should not happen if writes were bounds-checked).
431    ///
432    /// # Examples
433    ///
434    /// ```
435    /// use cowfile::CowFile;
436    ///
437    /// let mut pf = CowFile::from_vec(vec![0u8; 10]);
438    ///
439    /// pf.write(0, &[0xAA]).unwrap();
440    /// assert_eq!(pf.data()[0], 0x00); // Not yet committed
441    ///
442    /// pf.commit().unwrap();
443    /// assert_eq!(pf.data()[0], 0xAA); // Now committed
444    /// assert!(!pf.has_pending());
445    /// ```
446    pub fn commit(&mut self) -> Result<()> {
447        if !*self.dirty.get_mut() {
448            return Ok(());
449        }
450
451        let pending = self
452            .pending
453            .get_mut()
454            .map_err(|e| Error::LockPoisoned(e.to_string()))?;
455        let buf = self.buffer.as_mut_slice();
456
457        for pw in pending.drain(..) {
458            buf[pw.offset..pw.offset + pw.data.len()].copy_from_slice(&pw.data);
459        }
460
461        *self.dirty.get_mut() = false;
462        Ok(())
463    }
464
465    /// Discards all pending writes without applying them.
466    ///
467    /// # Errors
468    ///
469    /// Returns [`Error::LockPoisoned`] if the internal lock was poisoned.
470    ///
471    /// # Examples
472    ///
473    /// ```
474    /// use cowfile::CowFile;
475    ///
476    /// let mut pf = CowFile::from_vec(vec![0u8; 10]);
477    /// pf.write(0, &[0xFF]).unwrap();
478    /// assert!(pf.has_pending());
479    ///
480    /// pf.discard().unwrap();
481    /// assert!(!pf.has_pending());
482    /// assert_eq!(pf.data()[0], 0x00);
483    /// ```
484    pub fn discard(&mut self) -> Result<()> {
485        self.pending
486            .get_mut()
487            .map_err(|e| Error::LockPoisoned(e.to_string()))?
488            .clear();
489        *self.dirty.get_mut() = false;
490        Ok(())
491    }
492
493    /// Reads a primitive value in little-endian byte order at the given offset.
494    ///
495    /// Composites pending writes over the committed state.
496    ///
497    /// # Errors
498    ///
499    /// Returns [`Error::OutOfBounds`] if there are not enough bytes at `offset`.
500    ///
501    /// # Examples
502    ///
503    /// ```
504    /// use cowfile::CowFile;
505    ///
506    /// let pf = CowFile::from_vec(vec![0xEF, 0xBE, 0xAD, 0xDE, 0, 0, 0, 0]);
507    /// assert_eq!(pf.read_le::<u32>(0).unwrap(), 0xDEADBEEF);
508    /// ```
509    pub fn read_le<T: Primitive>(&self, offset: usize) -> Result<T> {
510        let data = self.read(offset, T::SIZE)?;
511        Ok(T::from_le_bytes(&data))
512    }
513
514    /// Reads a primitive value in big-endian byte order at the given offset.
515    ///
516    /// Composites pending writes over the committed state.
517    ///
518    /// # Errors
519    ///
520    /// Returns [`Error::OutOfBounds`] if there are not enough bytes at `offset`.
521    ///
522    /// # Examples
523    ///
524    /// ```
525    /// use cowfile::CowFile;
526    ///
527    /// let pf = CowFile::from_vec(vec![0xDE, 0xAD, 0xBE, 0xEF, 0, 0, 0, 0]);
528    /// assert_eq!(pf.read_be::<u32>(0).unwrap(), 0xDEADBEEF);
529    /// ```
530    pub fn read_be<T: Primitive>(&self, offset: usize) -> Result<T> {
531        let data = self.read(offset, T::SIZE)?;
532        Ok(T::from_be_bytes(&data))
533    }
534
535    /// Writes a primitive value in little-endian byte order at the given offset.
536    ///
537    /// The write goes to the pending log.
538    ///
539    /// # Errors
540    ///
541    /// Returns [`Error::OutOfBounds`] if there are not enough bytes at `offset`.
542    ///
543    /// # Examples
544    ///
545    /// ```
546    /// use cowfile::CowFile;
547    ///
548    /// let pf = CowFile::from_vec(vec![0u8; 8]);
549    /// pf.write_le::<u32>(0, 0xDEADBEEF).unwrap();
550    /// assert_eq!(pf.read(0, 4).unwrap(), vec![0xEF, 0xBE, 0xAD, 0xDE]);
551    /// ```
552    pub fn write_le<T: Primitive>(&self, offset: usize, value: T) -> Result<()> {
553        let mut buf = vec![0u8; T::SIZE];
554        value.write_le_bytes(&mut buf);
555        self.write(offset, &buf)
556    }
557
558    /// Writes a primitive value in big-endian byte order at the given offset.
559    ///
560    /// The write goes to the pending log.
561    ///
562    /// # Errors
563    ///
564    /// Returns [`Error::OutOfBounds`] if there are not enough bytes at `offset`.
565    ///
566    /// # Examples
567    ///
568    /// ```
569    /// use cowfile::CowFile;
570    ///
571    /// let pf = CowFile::from_vec(vec![0u8; 8]);
572    /// pf.write_be::<u32>(0, 0xDEADBEEF).unwrap();
573    /// assert_eq!(pf.read(0, 4).unwrap(), vec![0xDE, 0xAD, 0xBE, 0xEF]);
574    /// ```
575    pub fn write_be<T: Primitive>(&self, offset: usize, value: T) -> Result<()> {
576        let mut buf = vec![0u8; T::SIZE];
577        value.write_be_bytes(&mut buf);
578        self.write(offset, &buf)
579    }
580
581    /// Reads a user-defined type implementing [`ReadFrom`] at the given offset.
582    ///
583    /// Composites pending writes over the committed state.
584    ///
585    /// # Errors
586    ///
587    /// Returns any error produced by the [`ReadFrom`] implementation.
588    ///
589    /// # Examples
590    ///
591    /// ```
592    /// use cowfile::{CowFile, ReadFrom, Result};
593    ///
594    /// struct Pair { a: u16, b: u16 }
595    ///
596    /// impl ReadFrom for Pair {
597    ///     fn read_from(pf: &CowFile, offset: usize) -> Result<Self> {
598    ///         Ok(Pair {
599    ///             a: pf.read_le::<u16>(offset)?,
600    ///             b: pf.read_le::<u16>(offset + 2)?,
601    ///         })
602    ///     }
603    /// }
604    ///
605    /// let pf = CowFile::from_vec(vec![0x01, 0x00, 0x02, 0x00]);
606    /// let pair: Pair = pf.read_type(0).unwrap();
607    /// assert_eq!(pair.a, 1);
608    /// assert_eq!(pair.b, 2);
609    /// ```
610    pub fn read_type<T: ReadFrom>(&self, offset: usize) -> Result<T> {
611        T::read_from(self, offset)
612    }
613
614    /// Writes a user-defined type implementing [`WriteTo`] at the given offset.
615    ///
616    /// The write goes to the pending log.
617    ///
618    /// # Errors
619    ///
620    /// Returns any error produced by the [`WriteTo`] implementation.
621    ///
622    /// # Examples
623    ///
624    /// ```
625    /// use cowfile::{CowFile, WriteTo, Result};
626    ///
627    /// struct Pair { a: u16, b: u16 }
628    ///
629    /// impl WriteTo for Pair {
630    ///     fn write_to(&self, pf: &CowFile, offset: usize) -> Result<()> {
631    ///         pf.write_le::<u16>(offset, self.a)?;
632    ///         pf.write_le::<u16>(offset + 2, self.b)?;
633    ///         Ok(())
634    ///     }
635    /// }
636    ///
637    /// let pf = CowFile::from_vec(vec![0u8; 8]);
638    /// pf.write_type(0, &Pair { a: 1, b: 2 }).unwrap();
639    /// assert_eq!(pf.read(0, 4).unwrap(), vec![0x01, 0x00, 0x02, 0x00]);
640    /// ```
641    pub fn write_type<T: WriteTo>(&self, offset: usize, value: &T) -> Result<()> {
642        value.write_to(self, offset)
643    }
644
645    /// Creates a cursor over this `CowFile` at position 0.
646    ///
647    /// The returned [`CowFileCursor`] implements [`std::io::Read`],
648    /// [`std::io::Write`], and [`std::io::Seek`], allowing the `CowFile`
649    /// to be used with any API that expects standard I/O traits.
650    ///
651    /// Multiple cursors can exist over the same `CowFile` simultaneously,
652    /// each with its own independent position.
653    ///
654    /// # Examples
655    ///
656    /// ```
657    /// use std::io::{Read, Write, Seek, SeekFrom};
658    /// use cowfile::CowFile;
659    ///
660    /// let pf = CowFile::from_vec(vec![0u8; 32]);
661    /// let mut cursor = pf.cursor();
662    ///
663    /// cursor.write_all(&[1, 2, 3, 4]).unwrap();
664    /// cursor.seek(SeekFrom::Start(0)).unwrap();
665    ///
666    /// let mut buf = [0u8; 4];
667    /// cursor.read_exact(&mut buf).unwrap();
668    /// assert_eq!(buf, [1, 2, 3, 4]);
669    /// ```
670    pub fn cursor(&self) -> CowFileCursor<'_> {
671        CowFileCursor::new(self)
672    }
673
674    /// Returns the original file path for mmap-backed instances opened via [`open`](CowFile::open).
675    ///
676    /// Returns `None` for vec-backed instances or those created via [`from_file`](CowFile::from_file).
677    ///
678    /// # Examples
679    ///
680    /// ```no_run
681    /// use cowfile::CowFile;
682    ///
683    /// let pf = CowFile::open("binary.exe").unwrap();
684    /// assert!(pf.source_path().is_some());
685    ///
686    /// let pf = CowFile::from_vec(vec![0u8; 10]);
687    /// assert!(pf.source_path().is_none());
688    /// ```
689    pub fn source_path(&self) -> Option<&Path> {
690        self.source_path.as_deref()
691    }
692
693    /// Creates an independent copy of this `CowFile`.
694    ///
695    /// For mmap-backed files with a known source path, re-opens the original
696    /// file — a new `MAP_PRIVATE` mmap that shares physical read pages with
697    /// the parent via OS-level copy-on-write. For vec-backed files or those
698    /// without a source path, clones the data.
699    ///
700    /// Pending writes are **not** carried over — the fork starts clean.
701    ///
702    /// # Errors
703    ///
704    /// Returns [`Error::Io`] if the source file cannot be reopened.
705    ///
706    /// # Examples
707    ///
708    /// ```no_run
709    /// use cowfile::CowFile;
710    ///
711    /// let pf = CowFile::open("binary.exe").unwrap();
712    /// pf.write(0, &[0xFF]).unwrap();
713    ///
714    /// let forked = pf.fork().unwrap();
715    /// // Fork starts clean — no pending writes
716    /// assert!(!forked.has_pending());
717    /// // But reads the same committed data
718    /// assert_eq!(forked.data()[0], pf.data()[0]);
719    /// ```
720    pub fn fork(&self) -> Result<CowFile> {
721        match &self.source_path {
722            Some(path) => CowFile::open(path),
723            None => Ok(CowFile::from_vec(self.buffer.as_slice().to_vec())),
724        }
725    }
726
727    /// Produces a `Vec<u8>` with all pending writes composited over the
728    /// committed buffer.
729    ///
730    /// # Errors
731    ///
732    /// Returns [`Error::LockPoisoned`] if the internal lock was poisoned.
733    ///
734    /// # Examples
735    ///
736    /// ```
737    /// use cowfile::CowFile;
738    ///
739    /// let pf = CowFile::from_vec(vec![1, 2, 3, 4, 5]);
740    /// pf.write(0, &[0xFF]).unwrap();
741    ///
742    /// let output = pf.to_vec().unwrap();
743    /// assert_eq!(output, vec![0xFF, 2, 3, 4, 5]);
744    /// ```
745    pub fn to_vec(&self) -> Result<Vec<u8>> {
746        let mut output = self.buffer.as_slice().to_vec();
747
748        if self.dirty.load(Ordering::Relaxed) {
749            let pending = self
750                .pending
751                .read()
752                .map_err(|e| Error::LockPoisoned(e.to_string()))?;
753            for pw in pending.iter() {
754                output[pw.offset..pw.offset + pw.data.len()].copy_from_slice(&pw.data);
755            }
756        }
757
758        Ok(output)
759    }
760
761    /// Writes the data with all pending writes applied to disk.
762    ///
763    /// For files smaller than 64 MiB, this uses buffered I/O. For larger
764    /// files, this uses a writable memory map for efficient output.
765    ///
766    /// # Errors
767    ///
768    /// Returns [`Error::Io`] if the file cannot be created or written.
769    ///
770    /// # Examples
771    ///
772    /// ```no_run
773    /// use cowfile::CowFile;
774    ///
775    /// let pf = CowFile::from_vec(vec![0u8; 1024]);
776    /// pf.write(0, &[0x4D, 0x5A]).unwrap();
777    /// pf.to_file("output.bin").unwrap();
778    /// ```
779    pub fn to_file(&self, path: impl AsRef<Path>) -> Result<()> {
780        let size = self.buffer.len();
781
782        if size >= MMAP_WRITE_THRESHOLD {
783            self.to_file_mmap(path.as_ref())
784        } else {
785            let output = self.to_vec()?;
786            let mut file = std::fs::File::create(path.as_ref())?;
787            file.write_all(&output)?;
788            file.flush()?;
789            Ok(())
790        }
791    }
792
793    /// Consumes the `CowFile` and returns the data as an owned `Vec<u8>`.
794    ///
795    /// If there are no pending writes and the backend is a `Vec`, this is a
796    /// zero-copy move. Otherwise, the data is materialized with pending writes
797    /// applied.
798    ///
799    /// # Errors
800    ///
801    /// Returns [`Error::LockPoisoned`] if the internal lock was poisoned.
802    ///
803    /// # Examples
804    ///
805    /// ```
806    /// use cowfile::CowFile;
807    ///
808    /// let pf = CowFile::from_vec(vec![1, 2, 3]);
809    /// let data = pf.into_vec().unwrap();
810    /// assert_eq!(data, vec![1, 2, 3]);
811    /// ```
812    pub fn into_vec(self) -> Result<Vec<u8>> {
813        let dirty = self.dirty.load(Ordering::Relaxed);
814
815        if !dirty {
816            return Ok(match self.buffer {
817                Inner::Vec(v) => v,
818                Inner::Mmap(m) => m.as_ref().to_vec(),
819            });
820        }
821
822        let pending = self
823            .pending
824            .into_inner()
825            .map_err(|e| Error::LockPoisoned(e.to_string()))?;
826        let mut output = match self.buffer {
827            Inner::Vec(v) => v,
828            Inner::Mmap(m) => m.as_ref().to_vec(),
829        };
830
831        for pw in pending {
832            output[pw.offset..pw.offset + pw.data.len()].copy_from_slice(&pw.data);
833        }
834
835        Ok(output)
836    }
837
838    /// Validates that `[offset, offset + length)` is within bounds.
839    fn check_bounds(&self, offset: usize, length: usize) -> Result<()> {
840        let end = offset.checked_add(length).ok_or(Error::OutOfBounds {
841            offset,
842            length,
843            file_size: self.buffer.len(),
844        })?;
845
846        if end > self.buffer.len() {
847            return Err(Error::OutOfBounds {
848                offset,
849                length,
850                file_size: self.buffer.len(),
851            });
852        }
853
854        Ok(())
855    }
856
857    /// Writes to a file using a writable memory map (for large files).
858    fn to_file_mmap(&self, path: &Path) -> Result<()> {
859        let base = self.buffer.as_slice();
860        let size = base.len() as u64;
861
862        let file = std::fs::OpenOptions::new()
863            .read(true)
864            .write(true)
865            .create(true)
866            .truncate(true)
867            .open(path)?;
868        file.set_len(size)?;
869
870        // SAFETY: The file was just created and truncated. We have exclusive write
871        // access. The mmap is flushed before being dropped.
872        let mut mmap = unsafe { memmap2::MmapMut::map_mut(&file)? };
873        mmap.copy_from_slice(base);
874
875        if self.dirty.load(Ordering::Relaxed) {
876            let pending = self
877                .pending
878                .read()
879                .map_err(|e| Error::LockPoisoned(e.to_string()))?;
880            for pw in pending.iter() {
881                mmap[pw.offset..pw.offset + pw.data.len()].copy_from_slice(&pw.data);
882            }
883        }
884
885        mmap.flush()?;
886        Ok(())
887    }
888}
889
890/// Applies pending writes that overlap `[read_offset..read_offset+read_len)` to `buf`.
891///
892/// Writes are applied in order — later writes overwrite earlier ones.
893fn apply_pending(buf: &mut [u8], read_offset: usize, read_len: usize, pending: &[PendingWrite]) {
894    let read_end = read_offset + read_len;
895    for pw in pending {
896        let pw_end = pw.offset + pw.data.len();
897        // Check for overlap.
898        if pw.offset < read_end && pw_end > read_offset {
899            let start = pw.offset.max(read_offset);
900            let end = pw_end.min(read_end);
901            let buf_start = start - read_offset;
902            let pw_start = start - pw.offset;
903            buf[buf_start..buf_start + (end - start)]
904                .copy_from_slice(&pw.data[pw_start..pw_start + (end - start)]);
905        }
906    }
907}
908
909#[cfg(test)]
910mod tests {
911    use crate::{
912        traits::{ReadFrom, WriteTo},
913        CowFile,
914    };
915
916    #[test]
917    fn test_from_vec_basic() {
918        let pf = CowFile::from_vec(vec![1, 2, 3, 4, 5]);
919        assert_eq!(pf.len(), 5);
920        assert!(!pf.is_empty());
921        assert_eq!(pf.data(), &[1, 2, 3, 4, 5]);
922    }
923
924    #[test]
925    fn test_from_vec_empty() {
926        let pf = CowFile::from_vec(vec![]);
927        assert_eq!(pf.len(), 0);
928        assert!(pf.is_empty());
929    }
930
931    #[test]
932    fn test_open_basic() {
933        use std::io::Write;
934        let mut tmpfile = tempfile::NamedTempFile::new().unwrap();
935        tmpfile.write_all(&[0xDE, 0xAD, 0xBE, 0xEF]).unwrap();
936        tmpfile.flush().unwrap();
937
938        let pf = CowFile::open(tmpfile.path()).unwrap();
939        assert_eq!(pf.len(), 4);
940        assert_eq!(pf.data(), &[0xDE, 0xAD, 0xBE, 0xEF]);
941    }
942
943    #[test]
944    fn test_open_nonexistent() {
945        let result = CowFile::open("/nonexistent/path.bin");
946        assert!(result.is_err());
947    }
948
949    #[test]
950    fn test_write_and_read() {
951        let pf = CowFile::from_vec(vec![0u8; 10]);
952        pf.write(2, &[0xFF, 0xFE]).unwrap();
953
954        // data() shows committed state.
955        assert_eq!(pf.data()[2], 0x00);
956
957        // read() composites pending.
958        let data = pf.read(0, 10).unwrap();
959        assert_eq!(data[2], 0xFF);
960        assert_eq!(data[3], 0xFE);
961        assert_eq!(data[0], 0x00);
962    }
963
964    #[test]
965    fn test_write_byte_and_read_byte() {
966        let pf = CowFile::from_vec(vec![0u8; 10]);
967        pf.write_byte(5, 0xAA).unwrap();
968        assert_eq!(pf.read_byte(5).unwrap(), 0xAA);
969        assert_eq!(pf.read_byte(4).unwrap(), 0x00);
970    }
971
972    #[test]
973    fn test_write_empty_is_noop() {
974        let pf = CowFile::from_vec(vec![0u8; 10]);
975        pf.write(5, &[]).unwrap();
976        assert!(!pf.has_pending());
977    }
978
979    #[test]
980    fn test_commit_and_read() {
981        let mut pf = CowFile::from_vec(vec![0u8; 10]);
982        pf.write(0, &[0xAA]).unwrap();
983        assert!(pf.has_pending());
984
985        pf.commit().unwrap();
986        assert!(!pf.has_pending());
987        assert_eq!(pf.data()[0], 0xAA);
988    }
989
990    #[test]
991    fn test_multi_commit_cycle() {
992        let mut pf = CowFile::from_vec(vec![0u8; 20]);
993
994        // Pass 1.
995        pf.write(0, &[0xAA]).unwrap();
996        pf.write(10, &[0xBB]).unwrap();
997        pf.commit().unwrap();
998
999        // Pass 2.
1000        pf.write(5, &[0xCC]).unwrap();
1001        pf.commit().unwrap();
1002
1003        let output = pf.to_vec().unwrap();
1004        assert_eq!(output[0], 0xAA);
1005        assert_eq!(output[5], 0xCC);
1006        assert_eq!(output[10], 0xBB);
1007    }
1008
1009    #[test]
1010    fn test_to_vec_no_modifications() {
1011        let original = vec![1, 2, 3, 4, 5];
1012        let pf = CowFile::from_vec(original.clone());
1013        let output = pf.to_vec().unwrap();
1014        assert_eq!(output, original);
1015    }
1016
1017    #[test]
1018    fn test_to_vec_with_modifications() {
1019        let pf = CowFile::from_vec(vec![0u8; 10]);
1020        pf.write(0, &[0xFF]).unwrap();
1021        pf.write(9, &[0xEE]).unwrap();
1022
1023        let output = pf.to_vec().unwrap();
1024        assert_eq!(output[0], 0xFF);
1025        assert_eq!(output[9], 0xEE);
1026        assert_eq!(output[5], 0x00);
1027    }
1028
1029    #[test]
1030    fn test_to_file_and_read_back() {
1031        let pf = CowFile::from_vec(vec![0u8; 100]);
1032        pf.write(0, &[0x4D, 0x5A]).unwrap();
1033        pf.write(50, &[0xDE, 0xAD]).unwrap();
1034
1035        let tmpfile = tempfile::NamedTempFile::new().unwrap();
1036        pf.to_file(tmpfile.path()).unwrap();
1037
1038        let contents = std::fs::read(tmpfile.path()).unwrap();
1039        assert_eq!(contents.len(), 100);
1040        assert_eq!(contents[0], 0x4D);
1041        assert_eq!(contents[1], 0x5A);
1042        assert_eq!(contents[50], 0xDE);
1043        assert_eq!(contents[51], 0xAD);
1044        assert_eq!(contents[10], 0x00);
1045    }
1046
1047    #[test]
1048    fn test_out_of_bounds_read() {
1049        let pf = CowFile::from_vec(vec![0u8; 10]);
1050        let result = pf.read(8, 5);
1051        assert!(result.is_err());
1052    }
1053
1054    #[test]
1055    fn test_out_of_bounds_write() {
1056        let pf = CowFile::from_vec(vec![0u8; 10]);
1057        let result = pf.write(8, &[0xFF; 5]);
1058        assert!(result.is_err());
1059    }
1060
1061    #[test]
1062    fn test_out_of_bounds_at_exact_end() {
1063        let pf = CowFile::from_vec(vec![0u8; 10]);
1064        let result = pf.read(10, 0);
1065        assert!(result.is_ok());
1066        assert!(result.unwrap().is_empty());
1067    }
1068
1069    #[test]
1070    fn test_has_pending() {
1071        let mut pf = CowFile::from_vec(vec![0u8; 10]);
1072        assert!(!pf.has_pending());
1073
1074        pf.write(0, &[0xFF]).unwrap();
1075        assert!(pf.has_pending());
1076
1077        pf.commit().unwrap();
1078        assert!(!pf.has_pending());
1079    }
1080
1081    #[test]
1082    fn test_data_shows_committed_state() {
1083        let mut pf = CowFile::from_vec(vec![1, 2, 3, 4, 5]);
1084        pf.write(0, &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF]).unwrap();
1085
1086        // data() should still show original.
1087        assert_eq!(pf.data(), &[1, 2, 3, 4, 5]);
1088
1089        pf.commit().unwrap();
1090
1091        // After commit, data() shows the changes.
1092        assert_eq!(pf.data(), &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF]);
1093    }
1094
1095    #[test]
1096    fn test_data_while_writing() {
1097        let pf = CowFile::from_vec(vec![0u8; 100]);
1098
1099        // Hold a reference to data() while writing.
1100        let view = pf.data();
1101        pf.write(10, &[0xFF]).unwrap();
1102
1103        // View still shows committed state.
1104        assert_eq!(view[10], 0x00);
1105
1106        // But read_byte composites pending.
1107        assert_eq!(pf.read_byte(10).unwrap(), 0xFF);
1108    }
1109
1110    #[test]
1111    fn test_discard() {
1112        let mut pf = CowFile::from_vec(vec![0u8; 10]);
1113        pf.write(0, &[0xFF]).unwrap();
1114        assert!(pf.has_pending());
1115
1116        pf.discard().unwrap();
1117        assert!(!pf.has_pending());
1118        assert_eq!(pf.read_byte(0).unwrap(), 0x00);
1119    }
1120
1121    #[test]
1122    fn test_send_static_assertion() {
1123        fn assert_send<T: Send>() {}
1124        assert_send::<CowFile>();
1125    }
1126
1127    #[test]
1128    fn test_read_write_le_u16() {
1129        let pf = CowFile::from_vec(vec![0u8; 16]);
1130        pf.write_le::<u16>(0, 0xCAFE).unwrap();
1131        assert_eq!(pf.read_le::<u16>(0).unwrap(), 0xCAFE);
1132        assert_eq!(pf.read(0, 2).unwrap(), vec![0xFE, 0xCA]);
1133    }
1134
1135    #[test]
1136    fn test_read_write_le_u32() {
1137        let pf = CowFile::from_vec(vec![0u8; 16]);
1138        pf.write_le::<u32>(4, 0xDEADBEEF).unwrap();
1139        assert_eq!(pf.read_le::<u32>(4).unwrap(), 0xDEADBEEF);
1140    }
1141
1142    #[test]
1143    fn test_read_write_le_u64() {
1144        let pf = CowFile::from_vec(vec![0u8; 16]);
1145        pf.write_le::<u64>(0, 0x0123456789ABCDEF).unwrap();
1146        assert_eq!(pf.read_le::<u64>(0).unwrap(), 0x0123456789ABCDEF);
1147    }
1148
1149    #[test]
1150    fn test_read_write_be_u32() {
1151        let pf = CowFile::from_vec(vec![0u8; 16]);
1152        pf.write_be::<u32>(0, 0xDEADBEEF).unwrap();
1153        assert_eq!(pf.read_be::<u32>(0).unwrap(), 0xDEADBEEF);
1154        assert_eq!(pf.read(0, 4).unwrap(), vec![0xDE, 0xAD, 0xBE, 0xEF]);
1155    }
1156
1157    #[test]
1158    fn test_read_le_out_of_bounds() {
1159        let pf = CowFile::from_vec(vec![0u8; 3]);
1160        let result = pf.read_le::<u32>(0);
1161        assert!(result.is_err());
1162    }
1163
1164    #[test]
1165    fn test_write_le_out_of_bounds() {
1166        let pf = CowFile::from_vec(vec![0u8; 3]);
1167        let result = pf.write_le::<u32>(0, 42);
1168        assert!(result.is_err());
1169    }
1170
1171    #[test]
1172    fn test_read_write_type() {
1173        struct TestStruct {
1174            magic: u32,
1175            version: u16,
1176            flags: u8,
1177        }
1178
1179        impl ReadFrom for TestStruct {
1180            fn read_from(pf: &CowFile, offset: usize) -> crate::Result<Self> {
1181                Ok(TestStruct {
1182                    magic: pf.read_le::<u32>(offset)?,
1183                    version: pf.read_le::<u16>(offset + 4)?,
1184                    flags: pf.read_le::<u8>(offset + 6)?,
1185                })
1186            }
1187        }
1188
1189        impl WriteTo for TestStruct {
1190            fn write_to(&self, pf: &CowFile, offset: usize) -> crate::Result<()> {
1191                pf.write_le::<u32>(offset, self.magic)?;
1192                pf.write_le::<u16>(offset + 4, self.version)?;
1193                pf.write_le::<u8>(offset + 6, self.flags)?;
1194                Ok(())
1195            }
1196        }
1197
1198        let pf = CowFile::from_vec(vec![0u8; 16]);
1199        let s = TestStruct {
1200            magic: 0x4D5A9000,
1201            version: 3,
1202            flags: 0xFF,
1203        };
1204
1205        pf.write_type(0, &s).unwrap();
1206        let read_back: TestStruct = pf.read_type(0).unwrap();
1207        assert_eq!(read_back.magic, 0x4D5A9000);
1208        assert_eq!(read_back.version, 3);
1209        assert_eq!(read_back.flags, 0xFF);
1210    }
1211
1212    #[test]
1213    fn test_from_file() {
1214        use std::io::Write;
1215        let mut tmpfile = tempfile::NamedTempFile::new().unwrap();
1216        tmpfile.write_all(&[0xDE, 0xAD, 0xBE, 0xEF]).unwrap();
1217        tmpfile.flush().unwrap();
1218
1219        let std_file = std::fs::File::open(tmpfile.path()).unwrap();
1220        let pf = CowFile::from_file(std_file).unwrap();
1221        assert_eq!(pf.len(), 4);
1222        assert_eq!(pf.data(), &[0xDE, 0xAD, 0xBE, 0xEF]);
1223    }
1224
1225    #[test]
1226    fn test_into_vec_no_modifications() {
1227        let pf = CowFile::from_vec(vec![1, 2, 3, 4, 5]);
1228        let data = pf.into_vec().unwrap();
1229        assert_eq!(data, vec![1, 2, 3, 4, 5]);
1230    }
1231
1232    #[test]
1233    fn test_into_vec_with_modifications() {
1234        let pf = CowFile::from_vec(vec![0u8; 10]);
1235        pf.write(0, &[0xFF]).unwrap();
1236        pf.write(9, &[0xEE]).unwrap();
1237        let data = pf.into_vec().unwrap();
1238        assert_eq!(data[0], 0xFF);
1239        assert_eq!(data[9], 0xEE);
1240        assert_eq!(data[5], 0x00);
1241    }
1242
1243    #[test]
1244    fn test_into_vec_from_mmap() {
1245        use std::io::Write;
1246        let mut tmpfile = tempfile::NamedTempFile::new().unwrap();
1247        tmpfile.write_all(&[0xDE, 0xAD, 0xBE, 0xEF]).unwrap();
1248        tmpfile.flush().unwrap();
1249
1250        let pf = CowFile::open(tmpfile.path()).unwrap();
1251        let data = pf.into_vec().unwrap();
1252        assert_eq!(data, vec![0xDE, 0xAD, 0xBE, 0xEF]);
1253    }
1254
1255    #[test]
1256    fn test_into_vec_from_mmap_with_modifications() {
1257        use std::io::Write;
1258        let mut tmpfile = tempfile::NamedTempFile::new().unwrap();
1259        tmpfile.write_all(&[0x00, 0x00, 0x00, 0x00]).unwrap();
1260        tmpfile.flush().unwrap();
1261
1262        let pf = CowFile::open(tmpfile.path()).unwrap();
1263        pf.write(0, &[0xFF]).unwrap();
1264        let data = pf.into_vec().unwrap();
1265        assert_eq!(data, vec![0xFF, 0x00, 0x00, 0x00]);
1266    }
1267
1268    #[test]
1269    fn test_cursor_basic() {
1270        use std::io::{Read, Seek, SeekFrom, Write};
1271
1272        let pf = CowFile::from_vec(vec![0u8; 32]);
1273        let mut cursor = pf.cursor();
1274
1275        cursor.write_all(&[0xAA, 0xBB, 0xCC]).unwrap();
1276        cursor.seek(SeekFrom::Start(0)).unwrap();
1277
1278        let mut buf = [0u8; 3];
1279        cursor.read_exact(&mut buf).unwrap();
1280        assert_eq!(buf, [0xAA, 0xBB, 0xCC]);
1281    }
1282
1283    #[test]
1284    fn test_overlapping_pending_writes() {
1285        let pf = CowFile::from_vec(vec![0u8; 20]);
1286
1287        pf.write(0, &[0xAA; 10]).unwrap();
1288        pf.write(5, &[0xBB; 10]).unwrap();
1289
1290        let data = pf.read(0, 20).unwrap();
1291        assert!(data[..5].iter().all(|&b| b == 0xAA));
1292        assert!(data[5..15].iter().all(|&b| b == 0xBB));
1293        assert!(data[15..20].iter().all(|&b| b == 0x00));
1294    }
1295
1296    #[test]
1297    fn test_read_byte_pending_last_wins() {
1298        let pf = CowFile::from_vec(vec![0u8; 10]);
1299        pf.write_byte(5, 0xAA).unwrap();
1300        pf.write_byte(5, 0xBB).unwrap();
1301        assert_eq!(pf.read_byte(5).unwrap(), 0xBB);
1302    }
1303}