fits_well/reader/mod.rs
1use std::io::Read;
2use std::io::Seek;
3use std::ops::Range;
4
5use crate::ascii::AsciiTable;
6use crate::bitpix::Bitpix;
7use crate::block::BLOCK_SIZE;
8use crate::block::CARD_SIZE;
9use crate::block::padded_len;
10use crate::checksum;
11use crate::data::ImageView;
12use crate::data::RawImage;
13use crate::data::Scaling;
14use crate::data::shape_product;
15use crate::data::swap_into_words;
16use crate::data::view_words;
17use crate::error::FitsError;
18use crate::error::Result;
19use crate::groups::RandomGroups;
20use crate::hdu::HduKind;
21use crate::hdu::data_extent;
22use crate::header::Header;
23use crate::table::BinTable;
24
25pub(crate) mod source;
26
27use source::SliceSource;
28use source::Source;
29use source::StreamSource;
30
31#[cfg(feature = "compression")]
32use crate::compress::{decompress_image, uncompress_table};
33#[cfg(feature = "compression")]
34use crate::data::Image;
35#[cfg(feature = "compression")]
36use crate::data::copy_samples_into_words;
37
38/// One Header/Data Unit located by the reader.
39///
40/// The data unit itself is read lazily via [`FitsReader::read_data_raw`]; this
41/// record only carries the parsed header, the inferred [`HduKind`], and the
42/// data unit's byte range within the source.
43#[derive(Debug)]
44pub struct Hdu {
45 pub header: Header,
46 pub kind: HduKind,
47 /// The raw, block-padded header-unit bytes as read — retained for checksum
48 /// verification (the exact bytes matter).
49 pub(crate) header_bytes: Vec<u8>,
50 /// Byte offset of the data unit from the start of the source.
51 pub(crate) data_offset: u64,
52 /// Unpadded data length (`Nbits / 8`) — where the meaningful data ends within
53 /// the padded unit. The on-disk length is `padded_len(data_bytes)`.
54 pub(crate) data_bytes: u64,
55}
56
57impl Hdu {
58 /// Validate that this HDU is a readable plain image array — a `Primary`/`Image`
59 /// kind with no group structure — the shared precondition of
60 /// [`FitsReader::read_image`] and [`FitsReader::read_image_view`].
61 fn ensure_plain_image(&self) -> Result<()> {
62 if !matches!(self.kind, HduKind::Primary | HduKind::Image) {
63 return Err(FitsError::NotAnImage);
64 }
65 // §4.3: a plain image array has no group structure. A non-conforming
66 // `PCOUNT`/`GCOUNT` would make `data_extent` size extra bytes, so reject it
67 // up front (on untrusted input) rather than expose mismatched samples.
68 if self.header.get_integer("PCOUNT").unwrap_or(0) != 0
69 || self.header.get_integer("GCOUNT").unwrap_or(1) != 1
70 {
71 return Err(FitsError::ImageHasGroups);
72 }
73 Ok(())
74 }
75}
76
77/// A data unit read from the source: the full block-padded bytes plus the range
78/// within them holding the actual data. The bytes after `data_range` are FITS
79/// block fill, not part of the data array.
80#[derive(Debug, Clone)]
81pub struct DataUnit {
82 /// The on-disk data unit, padded to the 2880-byte block grid.
83 pub bytes: Vec<u8>,
84 /// The sub-range of `bytes` that is meaningful data (`0..Nbits/8`).
85 pub data_range: Range<usize>,
86}
87
88impl DataUnit {
89 /// The meaningful data with the trailing block fill sliced off — what a
90 /// decoder should consume.
91 pub fn data(&self) -> &[u8] {
92 &self.bytes[self.data_range.clone()]
93 }
94}
95
96/// A FITS file opened over a seekable byte source. Opening scans HDU boundaries
97/// from headers alone (no data is read); data units are fetched on demand.
98#[derive(Debug)]
99pub struct FitsReader<S> {
100 source: S,
101 /// The scanned HDU records; exposed read-only via [`FitsReader::hdus`].
102 pub(crate) hdus: Vec<Hdu>,
103 /// Reused staging buffer for the seeking-source reads: a [`StreamSource`] copies
104 /// each data unit here before decoding (an in-memory source borrows instead, so
105 /// this stays empty). Grows once to the largest unit touched, then holds.
106 scratch: Vec<u8>,
107}
108
109/// A [`FitsReader`] over a seeking byte source (`Read + Seek`, e.g. a `File`) — the
110/// type [`FitsReader::open`] returns. A friendlier name for `FitsReader<StreamSource<R>>`.
111pub type StreamReader<R> = FitsReader<StreamSource<R>>;
112
113/// A [`FitsReader`] over an in-memory byte slice — the type [`FitsReader::from_bytes`]
114/// returns. The lifetime is that of the borrowed bytes.
115pub type SliceReader<'a> = FitsReader<SliceSource<'a>>;
116
117/// A [`FitsReader`] over a memory-mapped file — the type [`FitsReader::open_mmap`]
118/// returns. Requires the `mmap` feature.
119#[cfg(feature = "mmap")]
120pub type MmapReader = FitsReader<source::MmapSource>;
121
122impl<R: Read + Seek> FitsReader<StreamSource<R>> {
123 /// Open a seekable byte source (file, cursor). Data units are copied into the
124 /// reader's scratch on demand; for an in-memory file prefer
125 /// [`FitsReader::from_bytes`], which decodes straight from the bytes.
126 pub fn open(source: R) -> Result<StreamReader<R>> {
127 FitsReader::from_source(StreamSource::new(source)?)
128 }
129}
130
131impl<'a> FitsReader<SliceSource<'a>> {
132 /// Open an in-memory FITS file — the whole thing as a byte slice (e.g. an mmap,
133 /// or bytes already in RAM). Data units decode straight from the borrowed bytes
134 /// with no staging copy, and no scratch allocation.
135 pub fn from_bytes(bytes: &'a [u8]) -> Result<SliceReader<'a>> {
136 FitsReader::from_source(SliceSource::new(bytes))
137 }
138}
139
140#[cfg(feature = "mmap")]
141impl FitsReader<source::MmapSource> {
142 /// Memory-map a FITS file and read it zero-copy: data units decode straight from
143 /// the mapped pages (no staging copy, no read syscalls). Best for large files
144 /// and random HDU access. Requires the `mmap` feature.
145 pub fn open_mmap(path: impl AsRef<std::path::Path>) -> Result<MmapReader> {
146 FitsReader::from_source(source::MmapSource::open(path.as_ref())?)
147 }
148}
149
150impl<S: Source> FitsReader<S> {
151 /// Scan the whole HDU sequence, parsing every header and recording the byte
152 /// range of each data unit — without reading any data.
153 fn from_source(mut source: S) -> Result<FitsReader<S>> {
154 let mut scratch = Vec::new();
155 let mut hdus = Vec::new();
156 let mut offset = 0u64;
157 loop {
158 match scan_header_unit(&mut source, &mut offset, &mut scratch)? {
159 NextHeader::Found(header_bytes) => {
160 let header = Header::parse(&header_bytes)?;
161 let kind = HduKind::classify(&header);
162 let data_offset = offset;
163 let extent = data_extent(&header)?;
164 let next = data_offset
165 .checked_add(extent.padded_bytes)
166 .ok_or(FitsError::DataUnitOverflow)?;
167 hdus.push(Hdu {
168 header,
169 kind,
170 header_bytes,
171 data_offset,
172 data_bytes: extent.data_bytes,
173 });
174 // Skip past the data unit to the next header. Clamp at the source
175 // end so a declared unit larger than the file just ends the scan
176 // (the HDU is still recorded; a later read bounds-checks it).
177 offset = next.min(source.size());
178 }
179 NextHeader::End => break,
180 // §3.5/§3.6: special records and a trailing partial / fill block may
181 // follow the last HDU; a reader disregards them. But the same shape
182 // *before* any valid HDU means there is no conforming primary.
183 NextHeader::Trailing if hdus.is_empty() => return Err(FitsError::UnexpectedEof),
184 NextHeader::Trailing => break,
185 }
186 }
187 Ok(FitsReader {
188 source,
189 hdus,
190 scratch,
191 })
192 }
193
194 /// The HDU at `index`, or [`FitsError::HduIndexOutOfBounds`] — the checked form
195 /// the `read_*` methods bound-check through.
196 fn checked_hdu(&self, index: usize) -> Result<&Hdu> {
197 self.hdus.get(index).ok_or(FitsError::HduIndexOutOfBounds {
198 index,
199 len: self.hdus.len(),
200 })
201 }
202
203 /// The scanned HDU records, read-only and in file order — each carrying its
204 /// parsed [`Header`] and [`HduKind`]. Index, iterate, or `.len()` the slice; pick
205 /// an index for a `read_*` method (or use [`FitsReader::image_indices`] /
206 /// [`FitsReader::hdu_index`] to find one).
207 pub fn hdus(&self) -> &[Hdu] {
208 &self.hdus
209 }
210
211 /// Index of the extension named `name` by its `EXTNAME` keyword (compared
212 /// case-insensitively, as `EXTNAME` is conventionally matched), or `None`. When
213 /// `version` is `Some`, also require a matching `EXTVER` (which defaults to `1`
214 /// where the card is absent, §4.4.1) — the way duplicate extensions like
215 /// `('SCI', 1)` and `('SCI', 2)` are told apart. The primary array has no
216 /// `EXTNAME`. Pair the returned index with a `read_*` method.
217 pub fn hdu_index(&self, name: &str, version: Option<i64>) -> Option<usize> {
218 self.hdus.iter().position(|h| {
219 h.header
220 .get_text("EXTNAME")
221 .is_some_and(|n| n.eq_ignore_ascii_case(name))
222 && version.is_none_or(|v| h.header.get_integer("EXTVER").unwrap_or(1) == v)
223 })
224 }
225
226 /// The indices of every HDU [`FitsReader::read_image`] can read as an image: image
227 /// extensions, tiled-compressed images, and a non-empty primary array (an empty
228 /// `NAXIS = 0` primary is a container, not an image, and is skipped). A FITS file
229 /// may hold any number of images — pick an `index` from this list to pass to
230 /// [`FitsReader::read_image`] without inspecting [`HduKind`] yourself.
231 pub fn image_indices(&self) -> Vec<usize> {
232 self.hdus
233 .iter()
234 .enumerate()
235 .filter(|(_, h)| match h.kind {
236 HduKind::Image | HduKind::CompressedImage => true,
237 HduKind::Primary => h.header.naxis().is_ok_and(|n| n > 0),
238 _ => false,
239 })
240 .map(|(i, _)| i)
241 .collect()
242 }
243
244 /// Read the raw, still-encoded (big-endian, unscaled) data unit into a fresh,
245 /// caller-owned buffer. The returned [`DataUnit`] carries the full block-padded
246 /// bytes plus the range of actual data within them, so a decoder consumes
247 /// [`DataUnit::data`] and the block fill is never mistaken for samples.
248 ///
249 /// This is the owned form, backing the table readers (which keep the bytes as
250 /// the parsed table's storage). Image and random-groups reads instead stage
251 /// through the reader's reused internal scratch — see [`FitsReader::read_image`].
252 pub fn read_data_raw(&mut self, index: usize) -> Result<DataUnit> {
253 let hdu = self.checked_hdu(index)?;
254 let (data_offset, data_bytes) = (hdu.data_offset, hdu.data_bytes);
255 let bytes = self
256 .source
257 .read_owned(data_offset, padded_len(data_bytes) as usize)?;
258 Ok(DataUnit {
259 bytes,
260 data_range: 0..data_bytes as usize,
261 })
262 }
263
264 /// Decompress a tiled-compressed (`ZIMAGE`) HDU into its owned [`Image`] — the
265 /// shared step behind [`FitsReader::read_image`] and
266 /// [`FitsReader::read_image_view`], which then package the result differently.
267 #[cfg(feature = "compression")]
268 fn decompress_at(&mut self, index: usize) -> Result<Image> {
269 let table = self.read_table(index)?;
270 decompress_image(&self.hdus[index].header, &table)
271 }
272
273 /// Read an HDU's image as a [`RawImage`], transparently handling **both** plain
274 /// and tiled-compressed (`ZIMAGE`) images — the caller doesn't need to know which.
275 /// Errors with [`FitsError::NotAnImage`] for tables, random groups, and unmodelled
276 /// extensions.
277 ///
278 /// A plain image is **zero-copy**: its big-endian bytes are viewed in place over
279 /// the source (or the reader's reused scratch for a seeking source), decoded only
280 /// when you ask. A compressed image is decompressed into an owned buffer (with the
281 /// `compression` feature; without it a `ZIMAGE` HDU reads as a plain `BINTABLE`, so
282 /// this returns [`FitsError::NotAnImage`]). Either way, reach for the samples via
283 /// [`RawImage::u8`] (zero-copy `BITPIX = 8`), [`RawImage::decode`] (host-endian),
284 /// or [`RawImage::physical`] (scaled). The result borrows the reader, so handle
285 /// one image before reading the next.
286 pub fn read_image(&mut self, index: usize) -> Result<RawImage<'_>> {
287 // §10.1: a tiled-compressed image is classified [`HduKind::CompressedImage`]
288 // (a `ZIMAGE` BINTABLE). Route it through the decompressor so callers see one
289 // image API regardless of storage.
290 #[cfg(feature = "compression")]
291 if self.checked_hdu(index)?.kind == HduKind::CompressedImage {
292 let img = self.decompress_at(index)?;
293 return Ok(RawImage::decoded(img.samples, img.shape, img.scaling));
294 }
295
296 let hdu = self.checked_hdu(index)?;
297 hdu.ensure_plain_image()?;
298 let bitpix = hdu.header.bitpix()?;
299 let shape = hdu.header.axes()?;
300 let scaling = Scaling::from_header(&hdu.header);
301 let (data_offset, data_bytes) = (hdu.data_offset, hdu.data_bytes);
302 let unit = self.source.slice(
303 data_offset,
304 padded_len(data_bytes) as usize,
305 &mut self.scratch,
306 )?;
307 let bytes = &unit[..data_bytes as usize];
308
309 // With PCOUNT=0/GCOUNT=1 (checked above), `data_extent` sized the unit as
310 // `elem · Π(axes)`, so the borrowed data is exactly `shape_product` elements
311 // wide. This is an invariant between `data_extent` and the shape, not a
312 // runtime failure mode — assert it rather than return an error that can't occur.
313 debug_assert_eq!(
314 bytes.len(),
315 shape_product(&shape) * bitpix.elem_size(),
316 "image data length must match the axis product"
317 );
318 Ok(RawImage::raw(shape, bitpix, scaling, bytes))
319 }
320
321 /// Read an image as a borrowed, host-endian [`ImageView`], byte-swapping into the
322 /// caller-owned `scratch` — the fast, low-copy path for a loop that processes each
323 /// image and moves on. Where [`read_image`](FitsReader::read_image)`.decode()`
324 /// allocates a fresh owned buffer per call (page-fault-bound — profiling found
325 /// that dominates a plain typed read), this reuses `scratch`, so a hot loop pays
326 /// the output allocation once and reuses it across reads — even across differing
327 /// `BITPIX`. The caller owns `scratch`, so the reader retains nothing image-sized;
328 /// pass the same `Vec` each call and drop it when the loop ends.
329 ///
330 /// `scratch` is `Vec<u64>` so the swapped samples stay 8-byte aligned for the
331 /// typed views. A `BITPIX = 8` image needs no swap and the view borrows the source
332 /// bytes directly (zero-copy, `scratch` untouched); a compressed image is
333 /// decompressed and copied into `scratch`. The view borrows the reader and
334 /// `scratch`, so handle one image before reading the next. For samples you need to
335 /// keep, use [`RawImage::decode`].
336 pub fn read_image_view<'a>(
337 &'a mut self,
338 index: usize,
339 scratch: &'a mut Vec<u64>,
340 ) -> Result<ImageView<'a>> {
341 // §10.1: a compressed image has no on-disk byte form to borrow — decompress
342 // and copy the host-endian pixels into the caller's scratch, then view that.
343 #[cfg(feature = "compression")]
344 if self.checked_hdu(index)?.kind == HduKind::CompressedImage {
345 let img = self.decompress_at(index)?;
346 let bitpix = img.samples.bitpix();
347 let nbytes = copy_samples_into_words(&img.samples, scratch);
348 return Ok(view_words(scratch, bitpix, nbytes));
349 }
350
351 let hdu = self.checked_hdu(index)?;
352 hdu.ensure_plain_image()?;
353 let bitpix = hdu.header.bitpix()?;
354 let data_bytes = hdu.data_bytes as usize;
355 let padded = padded_len(hdu.data_bytes) as usize;
356 let data_offset = hdu.data_offset;
357 // `hdu` (the self.hdus borrow) is unused past here, so the source/scratch
358 // borrows below don't conflict — same staging as `read_image`.
359 let unit = self.source.slice(data_offset, padded, &mut self.scratch)?;
360 let be = &unit[..data_bytes];
361 if bitpix == Bitpix::U8 {
362 // No byte-swap: the on-disk bytes already are the host-endian samples, so
363 // borrow them straight (zero-copy) — `scratch` stays untouched.
364 return Ok(ImageView::U8(be));
365 }
366 swap_into_words(be, bitpix, scratch);
367 Ok(view_words(scratch, bitpix, data_bytes))
368 }
369
370 /// Read a `BINTABLE` extension and parse its column structure. Decode
371 /// individual columns lazily with [`BinTable::column_by_idx`]. Errors with
372 /// [`FitsError::NotABinTable`] for any other HDU kind.
373 pub fn read_table(&mut self, index: usize) -> Result<BinTable> {
374 let unit = self.read_data_raw(index)?; // also bounds-checks the index
375 let hdu = &self.hdus[index];
376 // Compressed images/tables are structurally BINTABLEs; the compression layer
377 // reads their raw table form through here, so accept those kinds too.
378 if !matches!(
379 hdu.kind,
380 HduKind::BinTable | HduKind::CompressedImage | HduKind::CompressedTable
381 ) {
382 return Err(FitsError::NotABinTable);
383 }
384 BinTable::from_data(&hdu.header, unit.bytes)
385 }
386
387 /// Read an `TABLE` (ASCII table) extension and parse its column structure.
388 /// Errors with [`FitsError::NotAnAsciiTable`] for any other HDU.
389 pub fn read_ascii_table(&mut self, index: usize) -> Result<AsciiTable> {
390 let unit = self.read_data_raw(index)?;
391 let hdu = &self.hdus[index];
392 if hdu.kind != HduKind::AsciiTable {
393 return Err(FitsError::NotAnAsciiTable);
394 }
395 AsciiTable::from_data(&hdu.header, unit.bytes)
396 }
397
398 /// Read and decode a random-groups primary array (§6). Errors with
399 /// [`FitsError::NotRandomGroups`] for any other HDU.
400 pub fn read_groups(&mut self, index: usize) -> Result<RandomGroups> {
401 let hdu = self.checked_hdu(index)?;
402 if hdu.kind != HduKind::RandomGroups {
403 return Err(FitsError::NotRandomGroups);
404 }
405 let (data_offset, data_bytes) = (hdu.data_offset, hdu.data_bytes);
406 let unit = self.source.slice(
407 data_offset,
408 padded_len(data_bytes) as usize,
409 &mut self.scratch,
410 )?;
411 RandomGroups::from_data(&self.hdus[index].header, &unit[..data_bytes as usize])
412 }
413
414 /// Read a tiled-compressed table (§10.3) — a `BINTABLE` with `ZTABLE = T` —
415 /// and uncompress it into the original [`BinTable`]. Fixed-width columns only
416 /// (`GZIP_1`/`GZIP_2`/`RICE_1`). Requires the `compression` feature.
417 #[cfg(feature = "compression")]
418 pub fn read_compressed_table(&mut self, index: usize) -> Result<BinTable> {
419 let table = self.read_table(index)?;
420 let header = self.hdus[index].header.clone();
421 let parts = uncompress_table(&header, &table)?;
422 BinTable::from_data(&parts.header, parts.data)
423 }
424
425 /// Verify the `DATASUM`/`CHECKSUM` integrity keywords of an HDU (§J). Each
426 /// field of the report is `None` if that keyword is absent, else `Some(true)`
427 /// when it matches the recomputed checksum.
428 pub fn verify_checksum(&mut self, index: usize) -> Result<ChecksumReport> {
429 let hdu = self.checked_hdu(index)?;
430 let (data_offset, data_bytes) = (hdu.data_offset, hdu.data_bytes);
431 // The block-padded data unit (length = the padded size — the checksum covers
432 // the block fill too).
433 let unit = self.source.slice(
434 data_offset,
435 padded_len(data_bytes) as usize,
436 &mut self.scratch,
437 )?;
438 let data_sum = checksum::accumulate(unit, 0);
439 let hdu = &self.hdus[index];
440 // Whole HDU = header (incl. the stored CHECKSUM card) then data.
441 let hdu_sum = checksum::accumulate(unit, checksum::accumulate(&hdu.header_bytes, 0));
442 Ok(ChecksumReport {
443 datasum_ok: hdu
444 .header
445 .get_text("DATASUM")
446 .map(|s| s.trim().parse::<u32>().ok() == Some(data_sum)),
447 checksum_ok: hdu
448 .header
449 .get_text("CHECKSUM")
450 .map(|_| hdu_sum == 0xFFFF_FFFF),
451 })
452 }
453}
454
455/// Result of [`FitsReader::verify_checksum`]. A field is `None` when its keyword
456/// is absent.
457#[derive(Debug, Clone, Copy, PartialEq, Eq)]
458pub struct ChecksumReport {
459 pub datasum_ok: Option<bool>,
460 pub checksum_ok: Option<bool>,
461}
462
463/// Outcome of scanning for the next header unit.
464enum NextHeader {
465 /// A complete header unit terminated by an `END` card.
466 Found(Vec<u8>),
467 /// Clean end of stream at a block boundary — no more HDUs.
468 End,
469 /// Trailing bytes carrying no `END`: special records (§3.5) or a trailing
470 /// partial / fill block (§3.6). Disregarded after the last HDU.
471 Trailing,
472}
473
474/// Read one header unit at `*offset`, advancing `offset` past each consumed block,
475/// until a block carries the `END` record. Blocks come through [`Source::slice`], so
476/// the same scan drives both seeking and in-memory sources.
477fn scan_header_unit<S: Source>(
478 source: &mut S,
479 offset: &mut u64,
480 scratch: &mut Vec<u8>,
481) -> Result<NextHeader> {
482 let size = source.size();
483 // Most headers are a single block; reserve it so the common case parses with one
484 // allocation and only multi-block headers grow.
485 let mut bytes = Vec::with_capacity(BLOCK_SIZE);
486 loop {
487 match size - *offset {
488 // Clean end at a block boundary, or trailing blocks with no `END`.
489 0 if bytes.is_empty() => return Ok(NextHeader::End),
490 0 => return Ok(NextHeader::Trailing),
491 // A sub-block remnant before EOF: trailing content (§3.6).
492 avail if avail < BLOCK_SIZE as u64 => return Ok(NextHeader::Trailing),
493 _ => {}
494 }
495 let block = source.slice(*offset, BLOCK_SIZE, scratch)?;
496 *offset += BLOCK_SIZE as u64;
497 bytes.extend_from_slice(block);
498 if block_has_end(block) {
499 return Ok(NextHeader::Found(bytes));
500 }
501 }
502}
503
504fn block_has_end(block: &[u8]) -> bool {
505 block
506 .chunks_exact(CARD_SIZE)
507 .any(|card| &card[..3] == b"END" && card[3..].iter().all(|&b| b == b' '))
508}
509
510#[cfg(test)]
511mod tests;