fits_well/reader/source.rs
1//! Byte sources the reader fetches header and data units from.
2//!
3//! A [`Source`] hands back the bytes for a `[offset, offset+len)` range. In-memory
4//! sources ([`SliceSource`], and `MmapSource` under the `mmap` feature) return a
5//! zero-copy borrow, so decoding reads straight from the resident bytes; a
6//! streaming source ([`StreamSource`] over any `Read + Seek`) copies the range into
7//! the reader's reused scratch first. For in-memory sources that saves a whole
8//! memory pass over the data — the staging copy the seeking path can't avoid.
9
10use std::io::Read;
11use std::io::Seek;
12use std::io::SeekFrom;
13
14use crate::error::FitsError;
15use crate::error::Result;
16
17/// Seals [`Source`] so it is a closed set implemented only by the in-tree source
18/// types (`StreamSource`/`SliceSource`/`MmapSource`) — not an extension point, and
19/// its `slice`/`read_owned` plumbing is not a public contract.
20mod sealed {
21 pub trait Sealed {}
22}
23impl<R> sealed::Sealed for StreamSource<R> {}
24impl sealed::Sealed for SliceSource<'_> {}
25#[cfg(feature = "mmap")]
26impl sealed::Sealed for MmapSource {}
27
28/// A seekable byte source the reader fetches HDU header and data units from.
29/// Sealed — implemented only by this crate's source types, never externally.
30pub trait Source: sealed::Sealed {
31 /// Total byte length of the source. Fixed for the source's lifetime and used to
32 /// reject ranges that run past the end before allocating for them.
33 fn size(&self) -> u64;
34
35 /// The `len` bytes at `offset`, borrowed. In-memory sources return a slice of
36 /// themselves (zero-copy); a streaming source reads into `scratch` and returns a
37 /// slice of that. Errors if the range runs past the source.
38 fn slice<'a>(
39 &'a mut self,
40 offset: u64,
41 len: usize,
42 scratch: &'a mut Vec<u8>,
43 ) -> Result<&'a [u8]>;
44
45 /// The `len` bytes at `offset` in a fresh owned buffer — used where the bytes
46 /// must outlive the read (the parsed table / ASCII-table backing store). Kept
47 /// distinct from `slice().to_vec()` so a streaming source reads straight into
48 /// the owned buffer (one copy) instead of staging through `scratch` first (two).
49 fn read_owned(&mut self, offset: u64, len: usize) -> Result<Vec<u8>>;
50}
51
52/// Reject `[offset, offset+len)` that overflows or runs past `size`. A hostile
53/// header can claim a unit far larger than the file; refusing up front avoids a
54/// huge allocation that would only fail at `read_exact`.
55fn check_range(offset: u64, len: usize, size: u64) -> Result<()> {
56 if offset.checked_add(len as u64).is_none_or(|end| end > size) {
57 return Err(FitsError::UnexpectedEof);
58 }
59 Ok(())
60}
61
62/// Borrow `[offset, offset+len)` from an already-resident byte slice — the shared
63/// bounds-checked fetch behind the in-memory sources ([`SliceSource`], [`MmapSource`]).
64fn mem_slice(bytes: &[u8], offset: u64, len: usize) -> Result<&[u8]> {
65 check_range(offset, len, bytes.len() as u64)?;
66 let off = offset as usize;
67 Ok(&bytes[off..off + len])
68}
69
70/// Owned copy of [`mem_slice`] — the `read_owned` form for the in-memory sources.
71fn mem_owned(bytes: &[u8], offset: u64, len: usize) -> Result<Vec<u8>> {
72 Ok(mem_slice(bytes, offset, len)?.to_vec())
73}
74
75/// A streaming `Read + Seek` source. Each fetch seeks and copies the range out —
76/// there is no resident image to borrow, so reads cost one extra memory pass.
77#[derive(Debug)]
78pub struct StreamSource<R> {
79 inner: R,
80 len: u64,
81}
82
83impl<R: Read + Seek> StreamSource<R> {
84 /// Capture the source length once (a single seek to the end), so later reads
85 /// bounds-check without re-seeking.
86 pub(crate) fn new(mut inner: R) -> Result<StreamSource<R>> {
87 let len = inner.seek(SeekFrom::End(0))?;
88 Ok(StreamSource { inner, len })
89 }
90}
91
92impl<R: Read + Seek> Source for StreamSource<R> {
93 fn size(&self) -> u64 {
94 self.len
95 }
96
97 fn slice<'a>(
98 &'a mut self,
99 offset: u64,
100 len: usize,
101 scratch: &'a mut Vec<u8>,
102 ) -> Result<&'a [u8]> {
103 check_range(offset, len, self.len)?;
104 self.inner.seek(SeekFrom::Start(offset))?;
105 // Resize keeps `scratch`'s capacity across calls, so a reused buffer
106 // reallocates only when a larger unit appears.
107 scratch.resize(len, 0);
108 self.inner.read_exact(scratch.as_mut_slice())?;
109 Ok(&scratch[..len])
110 }
111
112 fn read_owned(&mut self, offset: u64, len: usize) -> Result<Vec<u8>> {
113 check_range(offset, len, self.len)?;
114 self.inner.seek(SeekFrom::Start(offset))?;
115 let mut buf = vec![0u8; len];
116 self.inner.read_exact(&mut buf)?;
117 Ok(buf)
118 }
119}
120
121/// An in-memory byte source: the whole file already resident as a slice (e.g. an
122/// mmap, or bytes read up front). Fetches return a zero-copy borrow, so a decode
123/// reads straight from these bytes with no staging copy.
124#[derive(Debug)]
125pub struct SliceSource<'a> {
126 bytes: &'a [u8],
127}
128
129impl<'a> SliceSource<'a> {
130 pub(crate) fn new(bytes: &'a [u8]) -> SliceSource<'a> {
131 SliceSource { bytes }
132 }
133}
134
135impl Source for SliceSource<'_> {
136 fn size(&self) -> u64 {
137 self.bytes.len() as u64
138 }
139
140 fn slice<'a>(
141 &'a mut self,
142 offset: u64,
143 len: usize,
144 _scratch: &'a mut Vec<u8>,
145 ) -> Result<&'a [u8]> {
146 mem_slice(self.bytes, offset, len)
147 }
148
149 fn read_owned(&mut self, offset: u64, len: usize) -> Result<Vec<u8>> {
150 mem_owned(self.bytes, offset, len)
151 }
152}
153
154/// A memory-mapped file source: the kernel pages the file in on access, and data
155/// units decode straight from the mapping (no staging copy, no read syscalls). The
156/// owned [`memmap2::Mmap`] keeps the mapping alive for the reader's lifetime.
157#[cfg(feature = "mmap")]
158#[derive(Debug)]
159pub struct MmapSource {
160 map: memmap2::Mmap,
161}
162
163#[cfg(feature = "mmap")]
164impl MmapSource {
165 pub(crate) fn open(path: &std::path::Path) -> Result<MmapSource> {
166 let file = std::fs::File::open(path)?;
167 // SAFETY: standard mmap contract — the mapping is read-only and owned here
168 // (no mutable view is ever handed out). The one inherent caveat is that an
169 // external process truncating or modifying the file underneath can change the
170 // bytes; choosing `mmap` accepts that, exactly as in cfitsio/astropy.
171 let map = unsafe { memmap2::Mmap::map(&file)? };
172 Ok(MmapSource { map })
173 }
174}
175
176#[cfg(feature = "mmap")]
177impl Source for MmapSource {
178 fn size(&self) -> u64 {
179 self.map.len() as u64
180 }
181
182 fn slice<'a>(
183 &'a mut self,
184 offset: u64,
185 len: usize,
186 _scratch: &'a mut Vec<u8>,
187 ) -> Result<&'a [u8]> {
188 mem_slice(&self.map, offset, len)
189 }
190
191 fn read_owned(&mut self, offset: u64, len: usize) -> Result<Vec<u8>> {
192 mem_owned(&self.map, offset, len)
193 }
194}