Skip to main content

fits_well/reader/
mod.rs

1use std::io::Read;
2use std::io::Seek;
3use std::ops::Range;
4
5use crate::ascii::AsciiTable;
6use crate::bitpix::Bitpix;
7use crate::block::BLOCK_SIZE;
8use crate::block::CARD_SIZE;
9use crate::block::padded_len;
10use crate::checksum;
11use crate::data::ImageView;
12use crate::data::RawImage;
13use crate::data::Scaling;
14use crate::data::shape_product;
15use crate::data::swap_into_words;
16use crate::data::view_words;
17use crate::error::FitsError;
18use crate::error::Result;
19use crate::groups::RandomGroups;
20use crate::hdu::HduKind;
21use crate::hdu::data_extent;
22use crate::header::Header;
23use crate::table::BinTable;
24
25pub(crate) mod source;
26
27use source::SliceSource;
28use source::Source;
29use source::StreamSource;
30
31#[cfg(feature = "compression")]
32use crate::compress::{decompress_image, uncompress_table};
33#[cfg(feature = "compression")]
34use crate::data::Image;
35#[cfg(feature = "compression")]
36use crate::data::copy_samples_into_words;
37
38/// One Header/Data Unit located by the reader.
39///
40/// The data unit itself is read lazily via [`FitsReader::read_data_raw`]; this
41/// record only carries the parsed header, the inferred [`HduKind`], and the
42/// data unit's byte range within the source.
43#[derive(Debug)]
44pub struct Hdu {
45    pub header: Header,
46    pub kind: HduKind,
47    /// The raw, block-padded header-unit bytes as read — retained for checksum
48    /// verification (the exact bytes matter).
49    pub(crate) header_bytes: Vec<u8>,
50    /// Byte offset of the data unit from the start of the source.
51    pub(crate) data_offset: u64,
52    /// Unpadded data length (`Nbits / 8`) — where the meaningful data ends within
53    /// the padded unit. The on-disk length is `padded_len(data_bytes)`.
54    pub(crate) data_bytes: u64,
55}
56
57impl Hdu {
58    /// Validate that this HDU is a readable plain image array — a `Primary`/`Image`
59    /// kind with no group structure — the shared precondition of
60    /// [`FitsReader::read_image`] and [`FitsReader::read_image_view`].
61    fn ensure_plain_image(&self) -> Result<()> {
62        if !matches!(self.kind, HduKind::Primary | HduKind::Image) {
63            return Err(FitsError::NotAnImage);
64        }
65        // §4.3: a plain image array has no group structure. A non-conforming
66        // `PCOUNT`/`GCOUNT` would make `data_extent` size extra bytes, so reject it
67        // up front (on untrusted input) rather than expose mismatched samples.
68        if self.header.get_integer("PCOUNT").unwrap_or(0) != 0
69            || self.header.get_integer("GCOUNT").unwrap_or(1) != 1
70        {
71            return Err(FitsError::ImageHasGroups);
72        }
73        Ok(())
74    }
75}
76
77/// A data unit read from the source: the full block-padded bytes plus the range
78/// within them holding the actual data. The bytes after `data_range` are FITS
79/// block fill, not part of the data array.
80#[derive(Debug, Clone)]
81pub struct DataUnit {
82    /// The on-disk data unit, padded to the 2880-byte block grid.
83    pub bytes: Vec<u8>,
84    /// The sub-range of `bytes` that is meaningful data (`0..Nbits/8`).
85    pub data_range: Range<usize>,
86}
87
88impl DataUnit {
89    /// The meaningful data with the trailing block fill sliced off — what a
90    /// decoder should consume.
91    pub fn data(&self) -> &[u8] {
92        &self.bytes[self.data_range.clone()]
93    }
94}
95
96/// A FITS file opened over a seekable byte source. Opening scans HDU boundaries
97/// from headers alone (no data is read); data units are fetched on demand.
98#[derive(Debug)]
99pub struct FitsReader<S> {
100    source: S,
101    /// The scanned HDU records; exposed read-only via [`FitsReader::hdus`].
102    pub(crate) hdus: Vec<Hdu>,
103    /// Reused staging buffer for the seeking-source reads: a [`StreamSource`] copies
104    /// each data unit here before decoding (an in-memory source borrows instead, so
105    /// this stays empty). Grows once to the largest unit touched, then holds.
106    scratch: Vec<u8>,
107}
108
109/// A [`FitsReader`] over a seeking byte source (`Read + Seek`, e.g. a `File`) — the
110/// type [`FitsReader::open`] returns. A friendlier name for `FitsReader<StreamSource<R>>`.
111pub type StreamReader<R> = FitsReader<StreamSource<R>>;
112
113/// A [`FitsReader`] over an in-memory byte slice — the type [`FitsReader::from_bytes`]
114/// returns. The lifetime is that of the borrowed bytes.
115pub type SliceReader<'a> = FitsReader<SliceSource<'a>>;
116
117/// A [`FitsReader`] over a memory-mapped file — the type [`FitsReader::open_mmap`]
118/// returns. Requires the `mmap` feature.
119#[cfg(feature = "mmap")]
120pub type MmapReader = FitsReader<source::MmapSource>;
121
122impl<R: Read + Seek> FitsReader<StreamSource<R>> {
123    /// Open a seekable byte source (file, cursor). Data units are copied into the
124    /// reader's scratch on demand; for an in-memory file prefer
125    /// [`FitsReader::from_bytes`], which decodes straight from the bytes.
126    pub fn open(source: R) -> Result<StreamReader<R>> {
127        FitsReader::from_source(StreamSource::new(source)?)
128    }
129}
130
131impl<'a> FitsReader<SliceSource<'a>> {
132    /// Open an in-memory FITS file — the whole thing as a byte slice (e.g. an mmap,
133    /// or bytes already in RAM). Data units decode straight from the borrowed bytes
134    /// with no staging copy, and no scratch allocation.
135    pub fn from_bytes(bytes: &'a [u8]) -> Result<SliceReader<'a>> {
136        FitsReader::from_source(SliceSource::new(bytes))
137    }
138}
139
140#[cfg(feature = "mmap")]
141impl FitsReader<source::MmapSource> {
142    /// Memory-map a FITS file and read it zero-copy: data units decode straight from
143    /// the mapped pages (no staging copy, no read syscalls). Best for large files
144    /// and random HDU access. Requires the `mmap` feature.
145    pub fn open_mmap(path: impl AsRef<std::path::Path>) -> Result<MmapReader> {
146        FitsReader::from_source(source::MmapSource::open(path.as_ref())?)
147    }
148}
149
150impl<S: Source> FitsReader<S> {
151    /// Scan the whole HDU sequence, parsing every header and recording the byte
152    /// range of each data unit — without reading any data.
153    fn from_source(mut source: S) -> Result<FitsReader<S>> {
154        let mut scratch = Vec::new();
155        let mut hdus = Vec::new();
156        let mut offset = 0u64;
157        loop {
158            match scan_header_unit(&mut source, &mut offset, &mut scratch)? {
159                NextHeader::Found(header_bytes) => {
160                    let header = Header::parse(&header_bytes)?;
161                    let kind = HduKind::classify(&header);
162                    let data_offset = offset;
163                    let extent = data_extent(&header)?;
164                    let next = data_offset
165                        .checked_add(extent.padded_bytes)
166                        .ok_or(FitsError::DataUnitOverflow)?;
167                    hdus.push(Hdu {
168                        header,
169                        kind,
170                        header_bytes,
171                        data_offset,
172                        data_bytes: extent.data_bytes,
173                    });
174                    // Skip past the data unit to the next header. Clamp at the source
175                    // end so a declared unit larger than the file just ends the scan
176                    // (the HDU is still recorded; a later read bounds-checks it).
177                    offset = next.min(source.size());
178                }
179                NextHeader::End => break,
180                // §3.5/§3.6: special records and a trailing partial / fill block may
181                // follow the last HDU; a reader disregards them. But the same shape
182                // *before* any valid HDU means there is no conforming primary.
183                NextHeader::Trailing if hdus.is_empty() => return Err(FitsError::UnexpectedEof),
184                NextHeader::Trailing => break,
185            }
186        }
187        Ok(FitsReader {
188            source,
189            hdus,
190            scratch,
191        })
192    }
193
194    /// The HDU at `index`, or [`FitsError::HduIndexOutOfBounds`] — the checked form
195    /// the `read_*` methods bound-check through.
196    fn checked_hdu(&self, index: usize) -> Result<&Hdu> {
197        self.hdus.get(index).ok_or(FitsError::HduIndexOutOfBounds {
198            index,
199            len: self.hdus.len(),
200        })
201    }
202
203    /// The scanned HDU records, read-only and in file order — each carrying its
204    /// parsed [`Header`] and [`HduKind`]. Index, iterate, or `.len()` the slice; pick
205    /// an index for a `read_*` method (or use [`FitsReader::image_indices`] /
206    /// [`FitsReader::hdu_index`] to find one).
207    pub fn hdus(&self) -> &[Hdu] {
208        &self.hdus
209    }
210
211    /// Index of the extension named `name` by its `EXTNAME` keyword (compared
212    /// case-insensitively, as `EXTNAME` is conventionally matched), or `None`. When
213    /// `version` is `Some`, also require a matching `EXTVER` (which defaults to `1`
214    /// where the card is absent, §4.4.1) — the way duplicate extensions like
215    /// `('SCI', 1)` and `('SCI', 2)` are told apart. The primary array has no
216    /// `EXTNAME`. Pair the returned index with a `read_*` method.
217    pub fn hdu_index(&self, name: &str, version: Option<i64>) -> Option<usize> {
218        self.hdus.iter().position(|h| {
219            h.header
220                .get_text("EXTNAME")
221                .is_some_and(|n| n.eq_ignore_ascii_case(name))
222                && version.is_none_or(|v| h.header.get_integer("EXTVER").unwrap_or(1) == v)
223        })
224    }
225
226    /// The indices of every HDU [`FitsReader::read_image`] can read as an image: image
227    /// extensions, tiled-compressed images, and a non-empty primary array (an empty
228    /// `NAXIS = 0` primary is a container, not an image, and is skipped). A FITS file
229    /// may hold any number of images — pick an `index` from this list to pass to
230    /// [`FitsReader::read_image`] without inspecting [`HduKind`] yourself.
231    pub fn image_indices(&self) -> Vec<usize> {
232        self.hdus
233            .iter()
234            .enumerate()
235            .filter(|(_, h)| match h.kind {
236                HduKind::Image | HduKind::CompressedImage => true,
237                HduKind::Primary => h.header.naxis().is_ok_and(|n| n > 0),
238                _ => false,
239            })
240            .map(|(i, _)| i)
241            .collect()
242    }
243
244    /// Read the raw, still-encoded (big-endian, unscaled) data unit into a fresh,
245    /// caller-owned buffer. The returned [`DataUnit`] carries the full block-padded
246    /// bytes plus the range of actual data within them, so a decoder consumes
247    /// [`DataUnit::data`] and the block fill is never mistaken for samples.
248    ///
249    /// This is the owned form, backing the table readers (which keep the bytes as
250    /// the parsed table's storage). Image and random-groups reads instead stage
251    /// through the reader's reused internal scratch — see [`FitsReader::read_image`].
252    pub fn read_data_raw(&mut self, index: usize) -> Result<DataUnit> {
253        let hdu = self.checked_hdu(index)?;
254        let (data_offset, data_bytes) = (hdu.data_offset, hdu.data_bytes);
255        let bytes = self
256            .source
257            .read_owned(data_offset, padded_len(data_bytes) as usize)?;
258        Ok(DataUnit {
259            bytes,
260            data_range: 0..data_bytes as usize,
261        })
262    }
263
264    /// Decompress a tiled-compressed (`ZIMAGE`) HDU into its owned [`Image`] — the
265    /// shared step behind [`FitsReader::read_image`] and
266    /// [`FitsReader::read_image_view`], which then package the result differently.
267    #[cfg(feature = "compression")]
268    fn decompress_at(&mut self, index: usize) -> Result<Image> {
269        let table = self.read_table(index)?;
270        decompress_image(&self.hdus[index].header, &table)
271    }
272
273    /// Read an HDU's image as a [`RawImage`], transparently handling **both** plain
274    /// and tiled-compressed (`ZIMAGE`) images — the caller doesn't need to know which.
275    /// Errors with [`FitsError::NotAnImage`] for tables, random groups, and unmodelled
276    /// extensions.
277    ///
278    /// A plain image is **zero-copy**: its big-endian bytes are viewed in place over
279    /// the source (or the reader's reused scratch for a seeking source), decoded only
280    /// when you ask. A compressed image is decompressed into an owned buffer (with the
281    /// `compression` feature; without it a `ZIMAGE` HDU reads as a plain `BINTABLE`, so
282    /// this returns [`FitsError::NotAnImage`]). Either way, reach for the samples via
283    /// [`RawImage::u8`] (zero-copy `BITPIX = 8`), [`RawImage::decode`] (host-endian),
284    /// or [`RawImage::physical`] (scaled). The result borrows the reader, so handle
285    /// one image before reading the next.
286    pub fn read_image(&mut self, index: usize) -> Result<RawImage<'_>> {
287        // §10.1: a tiled-compressed image is classified [`HduKind::CompressedImage`]
288        // (a `ZIMAGE` BINTABLE). Route it through the decompressor so callers see one
289        // image API regardless of storage.
290        #[cfg(feature = "compression")]
291        if self.checked_hdu(index)?.kind == HduKind::CompressedImage {
292            let img = self.decompress_at(index)?;
293            return Ok(RawImage::decoded(img.samples, img.shape, img.scaling));
294        }
295
296        let hdu = self.checked_hdu(index)?;
297        hdu.ensure_plain_image()?;
298        let bitpix = hdu.header.bitpix()?;
299        let shape = hdu.header.axes()?;
300        let scaling = Scaling::from_header(&hdu.header);
301        let (data_offset, data_bytes) = (hdu.data_offset, hdu.data_bytes);
302        let unit = self.source.slice(
303            data_offset,
304            padded_len(data_bytes) as usize,
305            &mut self.scratch,
306        )?;
307        let bytes = &unit[..data_bytes as usize];
308
309        // With PCOUNT=0/GCOUNT=1 (checked above), `data_extent` sized the unit as
310        // `elem · Π(axes)`, so the borrowed data is exactly `shape_product` elements
311        // wide. This is an invariant between `data_extent` and the shape, not a
312        // runtime failure mode — assert it rather than return an error that can't occur.
313        debug_assert_eq!(
314            bytes.len(),
315            shape_product(&shape) * bitpix.elem_size(),
316            "image data length must match the axis product"
317        );
318        Ok(RawImage::raw(shape, bitpix, scaling, bytes))
319    }
320
321    /// Read an image as a borrowed, host-endian [`ImageView`], byte-swapping into the
322    /// caller-owned `scratch` — the fast, low-copy path for a loop that processes each
323    /// image and moves on. Where [`read_image`](FitsReader::read_image)`.decode()`
324    /// allocates a fresh owned buffer per call (page-fault-bound — profiling found
325    /// that dominates a plain typed read), this reuses `scratch`, so a hot loop pays
326    /// the output allocation once and reuses it across reads — even across differing
327    /// `BITPIX`. The caller owns `scratch`, so the reader retains nothing image-sized;
328    /// pass the same `Vec` each call and drop it when the loop ends.
329    ///
330    /// `scratch` is `Vec<u64>` so the swapped samples stay 8-byte aligned for the
331    /// typed views. A `BITPIX = 8` image needs no swap and the view borrows the source
332    /// bytes directly (zero-copy, `scratch` untouched); a compressed image is
333    /// decompressed and copied into `scratch`. The view borrows the reader and
334    /// `scratch`, so handle one image before reading the next. For samples you need to
335    /// keep, use [`RawImage::decode`].
336    pub fn read_image_view<'a>(
337        &'a mut self,
338        index: usize,
339        scratch: &'a mut Vec<u64>,
340    ) -> Result<ImageView<'a>> {
341        // §10.1: a compressed image has no on-disk byte form to borrow — decompress
342        // and copy the host-endian pixels into the caller's scratch, then view that.
343        #[cfg(feature = "compression")]
344        if self.checked_hdu(index)?.kind == HduKind::CompressedImage {
345            let img = self.decompress_at(index)?;
346            let bitpix = img.samples.bitpix();
347            let nbytes = copy_samples_into_words(&img.samples, scratch);
348            return Ok(view_words(scratch, bitpix, nbytes));
349        }
350
351        let hdu = self.checked_hdu(index)?;
352        hdu.ensure_plain_image()?;
353        let bitpix = hdu.header.bitpix()?;
354        let data_bytes = hdu.data_bytes as usize;
355        let padded = padded_len(hdu.data_bytes) as usize;
356        let data_offset = hdu.data_offset;
357        // `hdu` (the self.hdus borrow) is unused past here, so the source/scratch
358        // borrows below don't conflict — same staging as `read_image`.
359        let unit = self.source.slice(data_offset, padded, &mut self.scratch)?;
360        let be = &unit[..data_bytes];
361        if bitpix == Bitpix::U8 {
362            // No byte-swap: the on-disk bytes already are the host-endian samples, so
363            // borrow them straight (zero-copy) — `scratch` stays untouched.
364            return Ok(ImageView::U8(be));
365        }
366        swap_into_words(be, bitpix, scratch);
367        Ok(view_words(scratch, bitpix, data_bytes))
368    }
369
370    /// Read a `BINTABLE` extension and parse its column structure. Decode
371    /// individual columns lazily with [`BinTable::column_by_idx`]. Errors with
372    /// [`FitsError::NotABinTable`] for any other HDU kind.
373    pub fn read_table(&mut self, index: usize) -> Result<BinTable> {
374        let unit = self.read_data_raw(index)?; // also bounds-checks the index
375        let hdu = &self.hdus[index];
376        // Compressed images/tables are structurally BINTABLEs; the compression layer
377        // reads their raw table form through here, so accept those kinds too.
378        if !matches!(
379            hdu.kind,
380            HduKind::BinTable | HduKind::CompressedImage | HduKind::CompressedTable
381        ) {
382            return Err(FitsError::NotABinTable);
383        }
384        BinTable::from_data(&hdu.header, unit.bytes)
385    }
386
387    /// Read an `TABLE` (ASCII table) extension and parse its column structure.
388    /// Errors with [`FitsError::NotAnAsciiTable`] for any other HDU.
389    pub fn read_ascii_table(&mut self, index: usize) -> Result<AsciiTable> {
390        let unit = self.read_data_raw(index)?;
391        let hdu = &self.hdus[index];
392        if hdu.kind != HduKind::AsciiTable {
393            return Err(FitsError::NotAnAsciiTable);
394        }
395        AsciiTable::from_data(&hdu.header, unit.bytes)
396    }
397
398    /// Read and decode a random-groups primary array (§6). Errors with
399    /// [`FitsError::NotRandomGroups`] for any other HDU.
400    pub fn read_groups(&mut self, index: usize) -> Result<RandomGroups> {
401        let hdu = self.checked_hdu(index)?;
402        if hdu.kind != HduKind::RandomGroups {
403            return Err(FitsError::NotRandomGroups);
404        }
405        let (data_offset, data_bytes) = (hdu.data_offset, hdu.data_bytes);
406        let unit = self.source.slice(
407            data_offset,
408            padded_len(data_bytes) as usize,
409            &mut self.scratch,
410        )?;
411        RandomGroups::from_data(&self.hdus[index].header, &unit[..data_bytes as usize])
412    }
413
414    /// Read a tiled-compressed table (§10.3) — a `BINTABLE` with `ZTABLE = T` —
415    /// and uncompress it into the original [`BinTable`]. Fixed-width columns only
416    /// (`GZIP_1`/`GZIP_2`/`RICE_1`). Requires the `compression` feature.
417    #[cfg(feature = "compression")]
418    pub fn read_compressed_table(&mut self, index: usize) -> Result<BinTable> {
419        let table = self.read_table(index)?;
420        let header = self.hdus[index].header.clone();
421        let parts = uncompress_table(&header, &table)?;
422        BinTable::from_data(&parts.header, parts.data)
423    }
424
425    /// Verify the `DATASUM`/`CHECKSUM` integrity keywords of an HDU (§J). Each
426    /// field of the report is `None` if that keyword is absent, else `Some(true)`
427    /// when it matches the recomputed checksum.
428    pub fn verify_checksum(&mut self, index: usize) -> Result<ChecksumReport> {
429        let hdu = self.checked_hdu(index)?;
430        let (data_offset, data_bytes) = (hdu.data_offset, hdu.data_bytes);
431        // The block-padded data unit (length = the padded size — the checksum covers
432        // the block fill too).
433        let unit = self.source.slice(
434            data_offset,
435            padded_len(data_bytes) as usize,
436            &mut self.scratch,
437        )?;
438        let data_sum = checksum::accumulate(unit, 0);
439        let hdu = &self.hdus[index];
440        // Whole HDU = header (incl. the stored CHECKSUM card) then data.
441        let hdu_sum = checksum::accumulate(unit, checksum::accumulate(&hdu.header_bytes, 0));
442        Ok(ChecksumReport {
443            datasum_ok: hdu
444                .header
445                .get_text("DATASUM")
446                .map(|s| s.trim().parse::<u32>().ok() == Some(data_sum)),
447            checksum_ok: hdu
448                .header
449                .get_text("CHECKSUM")
450                .map(|_| hdu_sum == 0xFFFF_FFFF),
451        })
452    }
453}
454
455/// Result of [`FitsReader::verify_checksum`]. A field is `None` when its keyword
456/// is absent.
457#[derive(Debug, Clone, Copy, PartialEq, Eq)]
458pub struct ChecksumReport {
459    pub datasum_ok: Option<bool>,
460    pub checksum_ok: Option<bool>,
461}
462
463/// Outcome of scanning for the next header unit.
464enum NextHeader {
465    /// A complete header unit terminated by an `END` card.
466    Found(Vec<u8>),
467    /// Clean end of stream at a block boundary — no more HDUs.
468    End,
469    /// Trailing bytes carrying no `END`: special records (§3.5) or a trailing
470    /// partial / fill block (§3.6). Disregarded after the last HDU.
471    Trailing,
472}
473
474/// Read one header unit at `*offset`, advancing `offset` past each consumed block,
475/// until a block carries the `END` record. Blocks come through [`Source::slice`], so
476/// the same scan drives both seeking and in-memory sources.
477fn scan_header_unit<S: Source>(
478    source: &mut S,
479    offset: &mut u64,
480    scratch: &mut Vec<u8>,
481) -> Result<NextHeader> {
482    let size = source.size();
483    // Most headers are a single block; reserve it so the common case parses with one
484    // allocation and only multi-block headers grow.
485    let mut bytes = Vec::with_capacity(BLOCK_SIZE);
486    loop {
487        match size - *offset {
488            // Clean end at a block boundary, or trailing blocks with no `END`.
489            0 if bytes.is_empty() => return Ok(NextHeader::End),
490            0 => return Ok(NextHeader::Trailing),
491            // A sub-block remnant before EOF: trailing content (§3.6).
492            avail if avail < BLOCK_SIZE as u64 => return Ok(NextHeader::Trailing),
493            _ => {}
494        }
495        let block = source.slice(*offset, BLOCK_SIZE, scratch)?;
496        *offset += BLOCK_SIZE as u64;
497        bytes.extend_from_slice(block);
498        if block_has_end(block) {
499            return Ok(NextHeader::Found(bytes));
500        }
501    }
502}
503
504fn block_has_end(block: &[u8]) -> bool {
505    block
506        .chunks_exact(CARD_SIZE)
507        .any(|card| &card[..3] == b"END" && card[3..].iter().all(|&b| b == b' '))
508}
509
510#[cfg(test)]
511mod tests;