Skip to main content

fits_well/reader/
source.rs

1//! Byte sources the reader fetches header and data units from.
2//!
3//! A [`Source`] hands back the bytes for a `[offset, offset+len)` range. In-memory
4//! sources ([`SliceSource`], and `MmapSource` under the `mmap` feature) return a
5//! zero-copy borrow, so decoding reads straight from the resident bytes; a
6//! streaming source ([`StreamSource`] over any `Read + Seek`) copies the range into
7//! the reader's reused scratch first. For in-memory sources that saves a whole
8//! memory pass over the data — the staging copy the seeking path can't avoid.
9
10use std::io::Read;
11use std::io::Seek;
12use std::io::SeekFrom;
13
14use crate::error::FitsError;
15use crate::error::Result;
16
17/// Seals [`Source`] so it is a closed set implemented only by the in-tree source
18/// types (`StreamSource`/`SliceSource`/`MmapSource`) — not an extension point, and
19/// its `slice`/`read_owned` plumbing is not a public contract.
20mod sealed {
21    pub trait Sealed {}
22}
23impl<R> sealed::Sealed for StreamSource<R> {}
24impl sealed::Sealed for SliceSource<'_> {}
25#[cfg(feature = "mmap")]
26impl sealed::Sealed for MmapSource {}
27
28/// A seekable byte source the reader fetches HDU header and data units from.
29/// Sealed — implemented only by this crate's source types, never externally.
30pub trait Source: sealed::Sealed {
31    /// Total byte length of the source. Fixed for the source's lifetime and used to
32    /// reject ranges that run past the end before allocating for them.
33    fn size(&self) -> u64;
34
35    /// The `len` bytes at `offset`, borrowed. In-memory sources return a slice of
36    /// themselves (zero-copy); a streaming source reads into `scratch` and returns a
37    /// slice of that. Errors if the range runs past the source.
38    fn slice<'a>(
39        &'a mut self,
40        offset: u64,
41        len: usize,
42        scratch: &'a mut Vec<u8>,
43    ) -> Result<&'a [u8]>;
44
45    /// The `len` bytes at `offset` in a fresh owned buffer — used where the bytes
46    /// must outlive the read (the parsed table / ASCII-table backing store). Kept
47    /// distinct from `slice().to_vec()` so a streaming source reads straight into
48    /// the owned buffer (one copy) instead of staging through `scratch` first (two).
49    fn read_owned(&mut self, offset: u64, len: usize) -> Result<Vec<u8>>;
50}
51
52/// Reject `[offset, offset+len)` that overflows or runs past `size`. A hostile
53/// header can claim a unit far larger than the file; refusing up front avoids a
54/// huge allocation that would only fail at `read_exact`.
55fn check_range(offset: u64, len: usize, size: u64) -> Result<()> {
56    if offset.checked_add(len as u64).is_none_or(|end| end > size) {
57        return Err(FitsError::UnexpectedEof);
58    }
59    Ok(())
60}
61
62/// Borrow `[offset, offset+len)` from an already-resident byte slice — the shared
63/// bounds-checked fetch behind the in-memory sources ([`SliceSource`], [`MmapSource`]).
64fn mem_slice(bytes: &[u8], offset: u64, len: usize) -> Result<&[u8]> {
65    check_range(offset, len, bytes.len() as u64)?;
66    let off = offset as usize;
67    Ok(&bytes[off..off + len])
68}
69
70/// Owned copy of [`mem_slice`] — the `read_owned` form for the in-memory sources.
71fn mem_owned(bytes: &[u8], offset: u64, len: usize) -> Result<Vec<u8>> {
72    Ok(mem_slice(bytes, offset, len)?.to_vec())
73}
74
75/// A streaming `Read + Seek` source. Each fetch seeks and copies the range out —
76/// there is no resident image to borrow, so reads cost one extra memory pass.
77#[derive(Debug)]
78pub struct StreamSource<R> {
79    inner: R,
80    len: u64,
81}
82
83impl<R: Read + Seek> StreamSource<R> {
84    /// Capture the source length once (a single seek to the end), so later reads
85    /// bounds-check without re-seeking.
86    pub(crate) fn new(mut inner: R) -> Result<StreamSource<R>> {
87        let len = inner.seek(SeekFrom::End(0))?;
88        Ok(StreamSource { inner, len })
89    }
90}
91
92impl<R: Read + Seek> Source for StreamSource<R> {
93    fn size(&self) -> u64 {
94        self.len
95    }
96
97    fn slice<'a>(
98        &'a mut self,
99        offset: u64,
100        len: usize,
101        scratch: &'a mut Vec<u8>,
102    ) -> Result<&'a [u8]> {
103        check_range(offset, len, self.len)?;
104        self.inner.seek(SeekFrom::Start(offset))?;
105        // Resize keeps `scratch`'s capacity across calls, so a reused buffer
106        // reallocates only when a larger unit appears.
107        scratch.resize(len, 0);
108        self.inner.read_exact(scratch.as_mut_slice())?;
109        Ok(&scratch[..len])
110    }
111
112    fn read_owned(&mut self, offset: u64, len: usize) -> Result<Vec<u8>> {
113        check_range(offset, len, self.len)?;
114        self.inner.seek(SeekFrom::Start(offset))?;
115        let mut buf = vec![0u8; len];
116        self.inner.read_exact(&mut buf)?;
117        Ok(buf)
118    }
119}
120
121/// An in-memory byte source: the whole file already resident as a slice (e.g. an
122/// mmap, or bytes read up front). Fetches return a zero-copy borrow, so a decode
123/// reads straight from these bytes with no staging copy.
124#[derive(Debug)]
125pub struct SliceSource<'a> {
126    bytes: &'a [u8],
127}
128
129impl<'a> SliceSource<'a> {
130    pub(crate) fn new(bytes: &'a [u8]) -> SliceSource<'a> {
131        SliceSource { bytes }
132    }
133}
134
135impl Source for SliceSource<'_> {
136    fn size(&self) -> u64 {
137        self.bytes.len() as u64
138    }
139
140    fn slice<'a>(
141        &'a mut self,
142        offset: u64,
143        len: usize,
144        _scratch: &'a mut Vec<u8>,
145    ) -> Result<&'a [u8]> {
146        mem_slice(self.bytes, offset, len)
147    }
148
149    fn read_owned(&mut self, offset: u64, len: usize) -> Result<Vec<u8>> {
150        mem_owned(self.bytes, offset, len)
151    }
152}
153
154/// A memory-mapped file source: the kernel pages the file in on access, and data
155/// units decode straight from the mapping (no staging copy, no read syscalls). The
156/// owned [`memmap2::Mmap`] keeps the mapping alive for the reader's lifetime.
157#[cfg(feature = "mmap")]
158#[derive(Debug)]
159pub struct MmapSource {
160    map: memmap2::Mmap,
161}
162
163#[cfg(feature = "mmap")]
164impl MmapSource {
165    pub(crate) fn open(path: &std::path::Path) -> Result<MmapSource> {
166        let file = std::fs::File::open(path)?;
167        // SAFETY: standard mmap contract — the mapping is read-only and owned here
168        // (no mutable view is ever handed out). The one inherent caveat is that an
169        // external process truncating or modifying the file underneath can change the
170        // bytes; choosing `mmap` accepts that, exactly as in cfitsio/astropy.
171        let map = unsafe { memmap2::Mmap::map(&file)? };
172        Ok(MmapSource { map })
173    }
174}
175
176#[cfg(feature = "mmap")]
177impl Source for MmapSource {
178    fn size(&self) -> u64 {
179        self.map.len() as u64
180    }
181
182    fn slice<'a>(
183        &'a mut self,
184        offset: u64,
185        len: usize,
186        _scratch: &'a mut Vec<u8>,
187    ) -> Result<&'a [u8]> {
188        mem_slice(&self.map, offset, len)
189    }
190
191    fn read_owned(&mut self, offset: u64, len: usize) -> Result<Vec<u8>> {
192        mem_owned(&self.map, offset, len)
193    }
194}