Skip to main content

zipatch_rs/index/
source.rs

1//! [`PatchSource`] trait and built-in implementations.
2//!
3//! [`PatchSource`] is the byte-fetching seam between a [`crate::index::Plan`]
4//! and the patch files the plan was built from. [`IndexApplier`](crate::index::IndexApplier)
5//! never touches the network or owns a file handle — it calls
6//! [`PatchSource::read`] with a `(patch, offset)` tuple and an exact-size
7//! buffer, and the caller decides how those reads are served (local files,
8//! in-memory cache, HTTP multi-range request, …).
9//!
10//! # Chain semantics
11//!
12//! `patch: u32` indexes into [`crate::index::Plan::patches`]. A single-patch
13//! plan uses index `0`; multi-patch chains use successive indices in the
14//! order patches were fed to [`crate::index::PlanBuilder::add_patch`]. The
15//! built-in sources store one file/buffer per patch and reject out-of-range
16//! indices with [`crate::ZiPatchError::PatchIndexOutOfRange`].
17//!
18//! # Caller-fills-buffer
19//!
20//! The destination buffer is supplied by the applier at the exact length it
21//! needs (every region carries its length). Returning bytes via `&[u8]` would
22//! force either an extra copy or a `Cow` that allocates for compressed
23//! regions; this shape composes cleanly with HTTP multi-range responses and
24//! pre-allocated scratch buffers without either compromise.
25//!
26//! # Built-in implementations
27//!
28//! - [`FilePatchSource`] — wraps one or more owned [`std::fs::File`]s,
29//!   single-threaded. Use [`FilePatchSource::open`] for one patch or
30//!   [`FilePatchSource::open_chain`] for many. Callers wanting concurrent
31//!   reads should construct one source per thread.
32//! - [`MemoryPatchSource`] (test-only) — wraps one or more `Vec<u8>`s.
33//!   Gated behind the `test-utils` feature.
34
35use crate::{Result, ZiPatchError};
36use std::fs::File;
37use std::io::{Read, Seek, SeekFrom};
38use std::path::Path;
39
40/// Source of patch-file bytes for an [`IndexApplier`](crate::index::IndexApplier).
41///
42/// Implementations must fill `dst` completely with `dst.len()` source bytes
43/// starting at `offset` in the patch identified by `patch`. A short read —
44/// fewer than `dst.len()` bytes available — must surface as
45/// [`ZiPatchError::PatchSourceTooShort`], not as a partial fill. An
46/// out-of-range `patch` must surface as
47/// [`ZiPatchError::PatchIndexOutOfRange`].
48pub trait PatchSource {
49    /// Fill `dst` with `dst.len()` source bytes starting at `offset` in the
50    /// patch indexed by `patch`.
51    ///
52    /// # Errors
53    ///
54    /// - [`ZiPatchError::PatchIndexOutOfRange`] — `patch` is `>= count`.
55    /// - [`ZiPatchError::PatchSourceTooShort`] — the source has fewer than
56    ///   `dst.len()` bytes at `offset` in patch `patch`.
57    /// - [`ZiPatchError::Io`] — underlying I/O failure (only meaningful for
58    ///   sources backed by real I/O, e.g. [`FilePatchSource`]).
59    fn read(&mut self, patch: u32, offset: u64, dst: &mut [u8]) -> Result<()>;
60}
61
62/// [`PatchSource`] backed by one or more owned [`std::fs::File`]s, one per
63/// patch in the chain.
64///
65/// Single-threaded — no internal `Mutex`. Callers that need concurrent reads
66/// should construct one source per thread.
67#[derive(Debug)]
68pub struct FilePatchSource {
69    files: Vec<File>,
70}
71
72impl FilePatchSource {
73    /// Open a single patch file and wrap it as a one-patch chain.
74    ///
75    /// Equivalent to [`FilePatchSource::open_chain`] with a one-element iterator.
76    ///
77    /// # Errors
78    ///
79    /// Returns [`ZiPatchError::Io`] if the file cannot be opened.
80    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
81        let file = File::open(path)?;
82        Ok(Self { files: vec![file] })
83    }
84
85    /// Open every patch file in `paths` and wrap them as a chain. The first
86    /// path becomes patch index `0`, the second index `1`, and so on, matching
87    /// the order [`crate::index::PlanBuilder::add_patch`] consumed them.
88    ///
89    /// # Errors
90    ///
91    /// Returns [`ZiPatchError::Io`] on the first path that fails to open.
92    pub fn open_chain<I, P>(paths: I) -> Result<Self>
93    where
94        I: IntoIterator<Item = P>,
95        P: AsRef<Path>,
96    {
97        let iter = paths.into_iter();
98        // `IntoIterator` doesn't promise `ExactSizeIterator`, but `Vec` /
99        // `&[P]` inputs (the common case) do report an accurate lower bound
100        // through `size_hint().0` and so skip the geometric regrow.
101        let mut files = Vec::with_capacity(iter.size_hint().0);
102        for p in iter {
103            files.push(File::open(p).map_err(ZiPatchError::Io)?);
104        }
105        Ok(Self { files })
106    }
107
108    /// Wrap an already-open [`File`] as a one-patch chain.
109    #[must_use]
110    pub fn from_file(file: File) -> Self {
111        Self { files: vec![file] }
112    }
113
114    /// Number of patch files in the chain.
115    #[must_use]
116    pub fn patch_count(&self) -> usize {
117        self.files.len()
118    }
119}
120
121impl PatchSource for FilePatchSource {
122    fn read(&mut self, patch: u32, offset: u64, dst: &mut [u8]) -> Result<()> {
123        let count = self.files.len();
124        let file = self
125            .files
126            .get_mut(patch as usize)
127            .ok_or(ZiPatchError::PatchIndexOutOfRange { patch, count })?;
128        file.seek(SeekFrom::Start(offset))?;
129        match file.read_exact(dst) {
130            Ok(()) => Ok(()),
131            Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
132                Err(ZiPatchError::PatchSourceTooShort {
133                    offset,
134                    requested: dst.len(),
135                })
136            }
137            Err(e) => Err(ZiPatchError::Io(e)),
138        }
139    }
140}
141
142/// [`PatchSource`] backed by one or more in-memory byte buffers, one per
143/// patch in the chain.
144///
145/// Test-only — gated behind the `test-utils` feature.
146#[cfg(any(test, feature = "test-utils"))]
147#[derive(Debug, Clone)]
148pub struct MemoryPatchSource {
149    bufs: Vec<std::sync::Arc<[u8]>>,
150}
151
152#[cfg(any(test, feature = "test-utils"))]
153impl MemoryPatchSource {
154    /// Wrap a single `Vec<u8>` as a one-patch chain.
155    #[must_use]
156    pub fn new(buf: Vec<u8>) -> Self {
157        Self {
158            bufs: vec![buf.into()],
159        }
160    }
161
162    /// Wrap a single byte slice as a one-patch chain (copying it).
163    #[must_use]
164    pub fn from_slice(buf: &[u8]) -> Self {
165        Self {
166            bufs: vec![Vec::from(buf).into()],
167        }
168    }
169
170    /// Wrap multiple `Vec<u8>`s as a multi-patch chain.
171    #[must_use]
172    pub fn new_chain(bufs: Vec<Vec<u8>>) -> Self {
173        Self {
174            bufs: bufs.into_iter().map(Into::into).collect(),
175        }
176    }
177
178    /// Wrap multiple byte slices as a multi-patch chain (copying each).
179    #[must_use]
180    pub fn from_slices(bufs: &[&[u8]]) -> Self {
181        Self {
182            bufs: bufs.iter().map(|b| Vec::from(*b).into()).collect(),
183        }
184    }
185
186    /// Number of patches in the chain.
187    #[must_use]
188    pub fn patch_count(&self) -> usize {
189        self.bufs.len()
190    }
191}
192
193#[cfg(any(test, feature = "test-utils"))]
194impl PatchSource for MemoryPatchSource {
195    fn read(&mut self, patch: u32, offset: u64, dst: &mut [u8]) -> Result<()> {
196        let count = self.bufs.len();
197        let buf = self
198            .bufs
199            .get(patch as usize)
200            .ok_or(ZiPatchError::PatchIndexOutOfRange { patch, count })?;
201        let start = usize::try_from(offset).map_err(|_| ZiPatchError::PatchSourceTooShort {
202            offset,
203            requested: dst.len(),
204        })?;
205        let end = start
206            .checked_add(dst.len())
207            .ok_or(ZiPatchError::PatchSourceTooShort {
208                offset,
209                requested: dst.len(),
210            })?;
211        if end > buf.len() {
212            return Err(ZiPatchError::PatchSourceTooShort {
213                offset,
214                requested: dst.len(),
215            });
216        }
217        dst.copy_from_slice(&buf[start..end]);
218        Ok(())
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225
226    #[test]
227    fn memory_source_round_trips_arbitrary_ranges() {
228        let bytes: Vec<u8> = (0..=255u8).collect();
229        let mut src = MemoryPatchSource::new(bytes.clone());
230
231        let mut head = [0u8; 16];
232        src.read(0, 0, &mut head).unwrap();
233        assert_eq!(&head, &bytes[..16]);
234
235        let mut mid = [0u8; 32];
236        src.read(0, 100, &mut mid).unwrap();
237        assert_eq!(&mid, &bytes[100..132]);
238
239        let mut tail = [0u8; 16];
240        src.read(0, 240, &mut tail).unwrap();
241        assert_eq!(&tail, &bytes[240..256]);
242
243        // Zero-length read at any in-range offset is a no-op success.
244        let mut empty = [0u8; 0];
245        src.read(0, 0, &mut empty).unwrap();
246        src.read(0, 256, &mut empty).unwrap();
247    }
248
249    #[test]
250    fn memory_source_out_of_range_returns_too_short() {
251        let mut src = MemoryPatchSource::new(vec![0u8; 16]);
252        let mut buf = [0u8; 4];
253
254        let err = src
255            .read(0, 15, &mut buf)
256            .expect_err("read past end must fail");
257        match err {
258            ZiPatchError::PatchSourceTooShort { offset, requested } => {
259                assert_eq!(offset, 15);
260                assert_eq!(requested, 4);
261            }
262            other => panic!("expected PatchSourceTooShort, got {other:?}"),
263        }
264
265        let err = src
266            .read(0, 1_000_000, &mut buf)
267            .expect_err("read far past end must fail");
268        assert!(matches!(err, ZiPatchError::PatchSourceTooShort { .. }));
269    }
270
271    #[test]
272    fn memory_source_chain_indexes_each_patch() {
273        // Two patches: patch 0 = [0xA0, 0xA1, ...], patch 1 = [0xB0, 0xB1, ...].
274        let p0: Vec<u8> = (0..16u8).map(|i| 0xA0 | i).collect();
275        let p1: Vec<u8> = (0..16u8).map(|i| 0xB0 | i).collect();
276        let mut src = MemoryPatchSource::new_chain(vec![p0.clone(), p1.clone()]);
277
278        let mut buf = [0u8; 4];
279        src.read(0, 0, &mut buf).unwrap();
280        assert_eq!(&buf, &p0[..4]);
281        src.read(1, 0, &mut buf).unwrap();
282        assert_eq!(&buf, &p1[..4]);
283        src.read(0, 12, &mut buf).unwrap();
284        assert_eq!(&buf, &p0[12..16]);
285    }
286
287    #[test]
288    fn memory_source_chain_rejects_out_of_range_patch() {
289        let mut src = MemoryPatchSource::new_chain(vec![vec![0u8; 16]]);
290        let mut buf = [0u8; 4];
291        let err = src
292            .read(1, 0, &mut buf)
293            .expect_err("patch 1 must be out of range");
294        match err {
295            ZiPatchError::PatchIndexOutOfRange { patch, count } => {
296                assert_eq!(patch, 1);
297                assert_eq!(count, 1);
298            }
299            other => panic!("expected PatchIndexOutOfRange, got {other:?}"),
300        }
301    }
302
303    #[test]
304    fn file_source_round_trips_arbitrary_ranges() {
305        let bytes: Vec<u8> = (0..=255u8).collect();
306        let tmp = tempfile::tempdir().unwrap();
307        let path = tmp.path().join("source.bin");
308        std::fs::write(&path, &bytes).unwrap();
309
310        let mut src = FilePatchSource::open(&path).unwrap();
311
312        let mut head = [0u8; 16];
313        src.read(0, 0, &mut head).unwrap();
314        assert_eq!(&head, &bytes[..16]);
315
316        let mut mid = [0u8; 32];
317        src.read(0, 100, &mut mid).unwrap();
318        assert_eq!(&mid, &bytes[100..132]);
319    }
320
321    #[test]
322    fn file_source_short_returns_too_short() {
323        let tmp = tempfile::tempdir().unwrap();
324        let path = tmp.path().join("source.bin");
325        std::fs::write(&path, [0u8; 16]).unwrap();
326
327        let mut src = FilePatchSource::open(&path).unwrap();
328        let mut buf = [0u8; 32];
329        let err = src
330            .read(0, 0, &mut buf)
331            .expect_err("read past end must fail");
332        assert!(matches!(err, ZiPatchError::PatchSourceTooShort { .. }));
333    }
334
335    #[test]
336    fn file_source_chain_indexes_each_file() {
337        let tmp = tempfile::tempdir().unwrap();
338        let p0 = tmp.path().join("p0.bin");
339        let p1 = tmp.path().join("p1.bin");
340        std::fs::write(&p0, b"AAAAAAAA").unwrap();
341        std::fs::write(&p1, b"BBBBBBBB").unwrap();
342
343        let mut src = FilePatchSource::open_chain([&p0, &p1]).unwrap();
344        assert_eq!(src.patch_count(), 2);
345
346        let mut buf = [0u8; 4];
347        src.read(0, 0, &mut buf).unwrap();
348        assert_eq!(&buf, b"AAAA");
349        src.read(1, 4, &mut buf).unwrap();
350        assert_eq!(&buf, b"BBBB");
351    }
352}