Skip to main content

fits_well/table/
mod.rs

1//! Binary-table (`BINTABLE`) reading (§7.3).
2//!
3//! A binary table is `NAXIS2` rows of `NAXIS1` bytes; each of `TFIELDS` columns
4//! occupies a fixed byte range in every row, typed by its `TFORMn` code. This
5//! module parses that structure into [`Column`] descriptors; decoding goes through
6//! a [`ColumnReader`] (from [`BinTable::column_by_idx`] / [`BinTable::column_by_name`]),
7//! whose methods yield typed [`ColumnData`] ([`ColumnReader::raw`]), the
8//! `TSCALn`/`TZEROn` physical plane ([`ColumnReader::physical`]), and `P`/`Q`
9//! variable-length arrays out of the heap ([`ColumnReader::vla`]).
10
11use std::ops::Index;
12
13use bitvec::order::Msb0;
14use bitvec::slice::BitSlice;
15use bitvec::view::BitView;
16use num_complex::Complex;
17
18use crate::data::U16_OFFSET;
19use crate::data::U32_OFFSET;
20use crate::data::U64_OFFSET;
21use crate::data::UnsignedView;
22use crate::endian::decode_be;
23use crate::error::FitsError;
24use crate::error::Result;
25use crate::header::Header;
26use crate::keyword::key;
27
28/// The element type of a binary-table column, from the letter of its `TFORMn`
29/// code (Table 18).
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31pub enum TformKind {
32    /// `L` — logical (one ASCII `T`/`F` byte per element).
33    Logical,
34    /// `X` — bit array (`repeat` bits packed into `ceil(repeat/8)` bytes).
35    Bit,
36    /// `B` — unsigned byte.
37    Byte,
38    /// `I` — 16-bit integer.
39    I16,
40    /// `J` — 32-bit integer.
41    I32,
42    /// `K` — 64-bit integer.
43    I64,
44    /// `A` — character (a `repeat`-length string per row).
45    Char,
46    /// `E` — single-precision float.
47    F32,
48    /// `D` — double-precision float.
49    F64,
50    /// `C` — single-precision complex (real, imaginary).
51    ComplexF32,
52    /// `M` — double-precision complex.
53    ComplexF64,
54    /// `P` — 32-bit variable-length-array descriptor (into the heap).
55    ArrayDesc32,
56    /// `Q` — 64-bit variable-length-array descriptor.
57    ArrayDesc64,
58}
59
60impl TformKind {
61    fn from_code(code: u8) -> Option<TformKind> {
62        Some(match code {
63            b'L' => TformKind::Logical,
64            b'X' => TformKind::Bit,
65            b'B' => TformKind::Byte,
66            b'I' => TformKind::I16,
67            b'J' => TformKind::I32,
68            b'K' => TformKind::I64,
69            b'A' => TformKind::Char,
70            b'E' => TformKind::F32,
71            b'D' => TformKind::F64,
72            b'C' => TformKind::ComplexF32,
73            b'M' => TformKind::ComplexF64,
74            b'P' => TformKind::ArrayDesc32,
75            b'Q' => TformKind::ArrayDesc64,
76            _ => return None,
77        })
78    }
79
80    /// The `TFORMn` letter for this kind.
81    pub fn code(self) -> char {
82        match self {
83            TformKind::Logical => 'L',
84            TformKind::Bit => 'X',
85            TformKind::Byte => 'B',
86            TformKind::I16 => 'I',
87            TformKind::I32 => 'J',
88            TformKind::I64 => 'K',
89            TformKind::Char => 'A',
90            TformKind::F32 => 'E',
91            TformKind::F64 => 'D',
92            TformKind::ComplexF32 => 'C',
93            TformKind::ComplexF64 => 'M',
94            TformKind::ArrayDesc32 => 'P',
95            TformKind::ArrayDesc64 => 'Q',
96        }
97    }
98
99    /// Bytes per element. For `X` this is the per-*bit* size (1) — use
100    /// [`Tform::byte_width`] for a column's true in-row width.
101    pub(crate) fn elem_size(self) -> usize {
102        match self {
103            TformKind::Logical | TformKind::Bit | TformKind::Byte | TformKind::Char => 1,
104            TformKind::I16 => 2,
105            TformKind::I32 | TformKind::F32 => 4,
106            TformKind::I64 | TformKind::F64 | TformKind::ComplexF32 | TformKind::ArrayDesc32 => 8,
107            TformKind::ComplexF64 | TformKind::ArrayDesc64 => 16,
108        }
109    }
110}
111
112/// A parsed `TFORMn` value: a repeat count, an element kind, and (for the `P`/`Q`
113/// variable-length-array descriptors) the kind of the array elements in the heap.
114/// The `rTa` form's trailing `(emax)` size hint is not retained.
115#[derive(Debug, Clone, Copy, PartialEq, Eq)]
116pub struct Tform {
117    pub repeat: usize,
118    pub kind: TformKind,
119    /// For `P`/`Q` columns, the element kind of the heap array (the `t` in
120    /// `rPt(emax)`); `None` for fixed-width columns.
121    pub vla_elem: Option<TformKind>,
122}
123
124impl Tform {
125    /// Parse a `TFORMn` value such as `"8A"`, `"3D"`, `"1J"`, `"E"`, or `"1PE(5)"`.
126    pub fn parse(value: &str) -> Result<Tform> {
127        let s = value.trim();
128        let invalid = || FitsError::InvalidTform {
129            tform: value.to_string(),
130        };
131        let pos = s
132            .bytes()
133            .position(|b| b.is_ascii_alphabetic())
134            .ok_or_else(invalid)?;
135        let repeat = if pos == 0 {
136            1
137        } else {
138            s[..pos].parse().map_err(|_| invalid())?
139        };
140        let kind = TformKind::from_code(s.as_bytes()[pos]).ok_or_else(invalid)?;
141        // A P/Q descriptor is followed by its heap element-type letter (`rPt`).
142        let vla_elem = if matches!(kind, TformKind::ArrayDesc32 | TformKind::ArrayDesc64) {
143            let elem = s.as_bytes().get(pos + 1).copied().ok_or_else(invalid)?;
144            // §6.3: a `P`/`Q` descriptor's repeat count is restricted to 0 or 1.
145            if repeat > 1 {
146                return Err(invalid());
147            }
148            Some(TformKind::from_code(elem).ok_or_else(invalid)?)
149        } else {
150            None
151        };
152        Ok(Tform {
153            repeat,
154            kind,
155            vla_elem,
156        })
157    }
158
159    /// The number of bytes this column occupies in every row.
160    pub fn byte_width(self) -> usize {
161        match self.kind {
162            TformKind::Bit => self.repeat.div_ceil(8),
163            // Saturating: an absurd `repeat` from a hostile `TFORMn` saturates to
164            // `usize::MAX` rather than wrapping to a small width that could slip
165            // past the row-width check in `from_data`.
166            _ => self.repeat.saturating_mul(self.kind.elem_size()),
167        }
168    }
169}
170
171/// The format letter of a `TDISPn` display format (§7.3.4, Table 20).
172#[derive(Debug, Clone, Copy, PartialEq, Eq)]
173pub enum TDispKind {
174    /// `Aw` character.
175    Char,
176    /// `Lw` logical.
177    Logical,
178    /// `Iw[.m]` integer.
179    Integer,
180    /// `Bw[.m]` binary.
181    Binary,
182    /// `Ow[.m]` octal.
183    Octal,
184    /// `Zw[.m]` hexadecimal.
185    Hex,
186    /// `Fw.d` fixed-point float.
187    Float,
188    /// `Ew.d[Ee]` exponential.
189    Exponential,
190    /// `ENw.d` engineering (exponent a multiple of 3).
191    Engineering,
192    /// `ESw.d` scientific (mantissa 1–10).
193    Scientific,
194    /// `Gw.d[Ee]` general.
195    General,
196    /// `Dw.d[Ee]` double-precision exponential.
197    Double,
198}
199
200/// A parsed `TDISPn` display format: the format letter, field width, optional
201/// decimal places (`.d`/`.m`), and optional exponent width (a trailing `Ee`).
202#[derive(Debug, Clone, Copy, PartialEq, Eq)]
203pub struct TDisp {
204    pub kind: TDispKind,
205    pub width: usize,
206    pub decimals: Option<usize>,
207    pub exponent: Option<usize>,
208}
209
210impl TDisp {
211    /// Parse a `TDISPn` value such as `"I5"`, `"F8.2"`, `"E12.5E3"`, `"ES15.6"`, or
212    /// `"A20"`. Returns `None` if the format letter or width is missing/invalid.
213    pub fn parse(s: &str) -> Option<TDisp> {
214        let s = s.trim().to_ascii_uppercase();
215        let (kind, rest) = if let Some(r) = s.strip_prefix("EN") {
216            (TDispKind::Engineering, r)
217        } else if let Some(r) = s.strip_prefix("ES") {
218            (TDispKind::Scientific, r)
219        } else {
220            let kind = match s.bytes().next()? {
221                b'A' => TDispKind::Char,
222                b'L' => TDispKind::Logical,
223                b'I' => TDispKind::Integer,
224                b'B' => TDispKind::Binary,
225                b'O' => TDispKind::Octal,
226                b'Z' => TDispKind::Hex,
227                b'F' => TDispKind::Float,
228                b'E' => TDispKind::Exponential,
229                b'G' => TDispKind::General,
230                b'D' => TDispKind::Double,
231                _ => return None,
232            };
233            (kind, &s[1..])
234        };
235        // rest = width[.decimals][E exponent]
236        let (main, exponent) = match rest.split_once('E') {
237            Some((m, e)) => (m, Some(e.parse().ok()?)),
238            None => (rest, None),
239        };
240        let (width, decimals) = match main.split_once('.') {
241            Some((w, d)) => (w, Some(d.parse().ok()?)),
242            None => (main, None),
243        };
244        Some(TDisp {
245            kind,
246            width: width.parse().ok()?,
247            decimals,
248            exponent,
249        })
250    }
251}
252
253/// One column of a binary table: its `TFORMn` format, optional name/unit, the
254/// `TSCALn`/`TZEROn`/`TNULLn` metadata, and its byte offset within a row.
255#[derive(Debug, Clone)]
256pub struct Column {
257    pub name: Option<String>,
258    pub unit: Option<String>,
259    pub tform: Tform,
260    /// `TSCALn` (default 1.0); applied by [`ColumnReader::physical`].
261    pub tscale: f64,
262    /// `TZEROn` (default 0.0); applied by [`ColumnReader::physical`].
263    pub tzero: f64,
264    /// `TNULLn`, the integer value denoting an undefined element, if declared.
265    pub tnull: Option<i64>,
266    /// `TDIMn` array shape (e.g. `'(4,4)'` → `[4, 4]`), if declared — reshapes the
267    /// `repeat` elements of each row into a multidimensional array (§7.3.2).
268    pub tdim: Option<Vec<usize>>,
269    /// `TDISPn` display format (§7.3.4), parsed, if declared.
270    pub tdisp: Option<TDisp>,
271    /// Byte offset of this column from the start of a row.
272    pub byte_offset: usize,
273}
274
275/// A decoded column, flattened across all rows in row order. For array columns
276/// (`repeat > 1`) each row contributes `repeat` consecutive elements; for `A`,
277/// each row contributes one [`String`]. Values are raw (big-endian decoded but
278/// not `TSCALn`/`TZEROn`-scaled).
279#[derive(Debug, Clone, PartialEq)]
280pub enum ColumnData {
281    /// `L` — `Some(true)`/`Some(false)`, or `None` for the `0x00` null value (§7.3.3).
282    Logical(Vec<Option<bool>>),
283    /// `B` (bytes) and `X` (packed bits).
284    Bytes(Vec<u8>),
285    I16(Vec<i16>),
286    I32(Vec<i32>),
287    I64(Vec<i64>),
288    F32(Vec<f32>),
289    F64(Vec<f64>),
290    ComplexF32(Vec<Complex<f32>>),
291    ComplexF64(Vec<Complex<f64>>),
292    /// `A` — one string per row, trailing spaces and NULs trimmed.
293    Text(Vec<String>),
294}
295
296impl ColumnData {
297    /// Total element count across all rows (the backing `Vec`'s length).
298    pub fn element_count(&self) -> usize {
299        match self {
300            ColumnData::Logical(v) => v.len(),
301            ColumnData::Bytes(v) => v.len(),
302            ColumnData::I16(v) => v.len(),
303            ColumnData::I32(v) => v.len(),
304            ColumnData::I64(v) => v.len(),
305            ColumnData::F32(v) => v.len(),
306            ColumnData::F64(v) => v.len(),
307            ColumnData::ComplexF32(v) => v.len(),
308            ColumnData::ComplexF64(v) => v.len(),
309            ColumnData::Text(v) => v.len(),
310        }
311    }
312}
313
314/// A binary table's structure plus its data unit.
315#[derive(Debug, Clone)]
316pub struct BinTable {
317    pub nrows: usize,
318    pub columns: Vec<Column>,
319    pub(crate) row_len: usize,
320    /// Byte offset of the heap within `bytes` (`THEAP`, default = main-table size).
321    heap_offset: usize,
322    /// Byte offset just past the real heap data (`nrows·row_len + PCOUNT`). `P`/`Q`
323    /// spans must lie within `[heap_offset, heap_end)`, never the block fill beyond.
324    heap_end: usize,
325    /// The whole data unit (the `nrows * row_len` main table, then the heap and
326    /// block fill). Fixed-width reads index the main-table prefix; `P`/`Q` columns
327    /// follow their descriptors into the heap.
328    bytes: Vec<u8>,
329}
330
331impl BinTable {
332    /// Build a table from its header and owned data unit (`data` is the main
333    /// table followed by the optional heap, as returned by the reader).
334    pub(crate) fn from_data(header: &Header, data: Vec<u8>) -> Result<BinTable> {
335        let row_len = header
336            .get_integer("NAXIS1")
337            .ok_or(FitsError::MissingKeyword { name: "NAXIS1" })?
338            .max(0) as usize;
339        let nrows = header
340            .get_integer("NAXIS2")
341            .ok_or(FitsError::MissingKeyword { name: "NAXIS2" })?
342            .max(0) as usize;
343        // §7.3.1: `0 ≤ TFIELDS ≤ 999` — also a guard, since `tfields` sizes the
344        // column `Vec` and drives the `TFORMn` loop (an absurd value would abort).
345        let tfields = match header.get_integer("TFIELDS") {
346            Some(t) if (0..=999).contains(&t) => t as usize,
347            Some(_) => return Err(FitsError::KeywordOutOfRange { name: "TFIELDS" }),
348            None => return Err(FitsError::MissingKeyword { name: "TFIELDS" }),
349        };
350
351        let mut columns = Vec::with_capacity(tfields);
352        let mut offset = 0;
353        for n in 1..=tfields {
354            let tform_value = header
355                .get_text(key!("TFORM{n}").as_str())
356                .ok_or(FitsError::MissingKeyword { name: "TFORMn" })?;
357            let tform = Tform::parse(tform_value)?;
358            let tdim = header
359                .get_text(key!("TDIM{n}").as_str())
360                .and_then(parse_tdim);
361            // §7.3.2: for a fixed-width column a `TDIMn` shape must reshape exactly the
362            // repeat count (checked product so a hostile shape can't overflow past the
363            // equality). Variable-length (`P`/`Q`) columns are exempt — there `TDIMn`
364            // describes the heap array's shape, not the descriptor repeat (1), as in a
365            // §10.3 compressed-table container that carries the original column's TDIM.
366            let is_vla = matches!(tform.kind, TformKind::ArrayDesc32 | TformKind::ArrayDesc64);
367            if let Some(dims) = &tdim
368                && !is_vla
369                && dims.iter().try_fold(1usize, |a, &x| a.checked_mul(x)) != Some(tform.repeat)
370            {
371                return Err(FitsError::KeywordOutOfRange { name: "TDIMn" });
372            }
373            columns.push(Column {
374                name: header
375                    .get_text(key!("TTYPE{n}").as_str())
376                    .map(str::to_string)
377                    .filter(|s| !s.is_empty()),
378                unit: header
379                    .get_text(key!("TUNIT{n}").as_str())
380                    .map(str::to_string)
381                    .filter(|s| !s.is_empty()),
382                tform,
383                tscale: header.get_real(key!("TSCAL{n}").as_str()).unwrap_or(1.0),
384                tzero: header.get_real(key!("TZERO{n}").as_str()).unwrap_or(0.0),
385                tnull: header.get_integer(key!("TNULL{n}").as_str()),
386                tdim,
387                tdisp: header
388                    .get_text(key!("TDISP{n}").as_str())
389                    .and_then(TDisp::parse),
390                byte_offset: offset,
391            });
392            offset = offset.saturating_add(tform.byte_width());
393        }
394        if offset != row_len {
395            return Err(FitsError::RowWidthMismatch {
396                computed: offset,
397                declared: row_len,
398            });
399        }
400
401        // `nrows · row_len` from untrusted axes: check once (guards a 32-bit-usize
402        // overflow that `data_extent`'s u64 math wouldn't catch) and reuse.
403        let main_table = nrows.checked_mul(row_len).ok_or(FitsError::UnexpectedEof)?;
404        if data.len() < main_table {
405            return Err(FitsError::UnexpectedEof);
406        }
407        let heap_offset = header
408            .get_integer("THEAP")
409            .map_or(main_table, |t| t.max(0) as usize);
410        // §6.6: the heap follows the main table, so THEAP must be ≥ its size.
411        if heap_offset < main_table {
412            return Err(FitsError::KeywordOutOfRange { name: "THEAP" });
413        }
414        // PCOUNT counts the gap-plus-heap bytes after the main table, so the real
415        // heap ends here — anything past it is block fill (§6.6).
416        let pcount = header
417            .get_integer("PCOUNT")
418            .map_or(0, |p| p.max(0) as usize);
419        let heap_end = main_table
420            .checked_add(pcount)
421            .ok_or(FitsError::UnexpectedEof)?
422            .min(data.len());
423        Ok(BinTable {
424            nrows,
425            columns,
426            row_len,
427            heap_offset,
428            heap_end,
429            bytes: data,
430        })
431    }
432
433    /// The fixed-width main table (`nrows × NAXIS1` bytes), excluding the heap.
434    #[cfg(feature = "compression")]
435    pub(crate) fn raw_rows(&self) -> &[u8] {
436        &self.bytes[..self.nrows * self.row_len]
437    }
438
439    /// The index of the first column whose `TTYPEn` matches `name`, compared
440    /// case-insensitively per §6.7.
441    pub fn column_index(&self, name: &str) -> Option<usize> {
442        self.columns.iter().position(|c| {
443            c.name
444                .as_deref()
445                .is_some_and(|n| n.eq_ignore_ascii_case(name))
446        })
447    }
448
449    fn column_index_checked(&self, name: &str) -> Result<usize> {
450        self.column_index(name)
451            .ok_or_else(|| FitsError::ColumnNotFound {
452                name: name.to_string(),
453            })
454    }
455
456    /// A reader handle for the column at `index`. Decode through it — [`ColumnReader`]
457    /// exposes `raw`/`physical`/`unsigned`/`complex`/`bits` and the `vla*` variants —
458    /// without re-passing the column descriptor. Errors with
459    /// [`FitsError::ColumnIndexOutOfBounds`] for a bad index.
460    pub fn column_by_idx(&self, index: usize) -> Result<ColumnReader<'_>> {
461        if index >= self.columns.len() {
462            return Err(FitsError::ColumnIndexOutOfBounds {
463                index,
464                len: self.columns.len(),
465            });
466        }
467        Ok(ColumnReader { table: self, index })
468    }
469
470    /// A reader handle for the column named `name` (`TTYPEn`, case-insensitive, §6.7).
471    /// Errors with [`FitsError::ColumnNotFound`] if no such column exists.
472    pub fn column_by_name(&self, name: &str) -> Result<ColumnReader<'_>> {
473        let index = self.column_index_checked(name)?;
474        Ok(ColumnReader { table: self, index })
475    }
476
477    /// The `nbytes` of heap at descriptor `offset`, bounds-checked against the heap.
478    /// All arithmetic is checked so a crafted `P`/`Q` descriptor (huge offset/count)
479    /// cannot wrap past the guard or read outside the heap proper.
480    fn bounded_heap(&self, offset: usize, nbytes: usize) -> Result<&[u8]> {
481        let start = self
482            .heap_offset
483            .checked_add(offset)
484            .ok_or(FitsError::UnexpectedEof)?;
485        let end = start.checked_add(nbytes).ok_or(FitsError::UnexpectedEof)?;
486        if end > self.heap_end {
487            return Err(FitsError::UnexpectedEof);
488        }
489        self.bytes.get(start..end).ok_or(FitsError::UnexpectedEof)
490    }
491
492    /// The raw bytes of column `col` in row `r`.
493    fn cell(&self, col: &Column, r: usize) -> &[u8] {
494        let start = r * self.row_len + col.byte_offset;
495        &self.bytes[start..start + col.tform.byte_width()]
496    }
497
498    /// Concatenate the raw cell bytes of `col` across every row.
499    fn flatten(&self, col: &Column) -> Vec<u8> {
500        let mut out = Vec::with_capacity(self.nrows * col.tform.byte_width());
501        for r in 0..self.nrows {
502            out.extend_from_slice(self.cell(col, r));
503        }
504        out
505    }
506}
507
508/// A handle to one column of a [`BinTable`], from [`BinTable::column_by_idx`] or
509/// [`BinTable::column_by_name`]. Decode through it without re-passing the column
510/// descriptor: [`raw`](Self::raw) for the typed values, [`physical`](Self::physical)
511/// for the scaled `f64` plane, [`unsigned`](Self::unsigned)/[`complex`](Self::complex)/
512/// [`bits`](Self::bits) for the special kinds, and [`vla`](Self::vla) (+
513/// [`vla_physical`](Self::vla_physical)/[`vla_bits`](Self::vla_bits)) for
514/// variable-length `P`/`Q` columns. Borrows the table, so it cannot outlive it.
515#[derive(Debug, Clone, Copy)]
516pub struct ColumnReader<'a> {
517    table: &'a BinTable,
518    index: usize,
519}
520
521impl<'a> ColumnReader<'a> {
522    /// The column's [`Column`] descriptor — name, `TFORMn`, `TSCALn`/`TZEROn`/`TNULLn`,
523    /// `TDIMn`, `TDISPn`.
524    pub fn descriptor(&self) -> &'a Column {
525        &self.table.columns[self.index]
526    }
527
528    /// Decode a fixed-width column into a typed, row-flattened [`ColumnData`]: `A` is
529    /// one [`String`] per row, every other fixed kind decodes from the concatenated
530    /// cell bytes. Variable-length (`P`/`Q`) columns error here — use
531    /// [`ColumnReader::vla`].
532    pub fn raw(&self) -> Result<ColumnData> {
533        let col = self.descriptor();
534        if matches!(
535            col.tform.kind,
536            TformKind::ArrayDesc32 | TformKind::ArrayDesc64
537        ) {
538            return Err(FitsError::VariableLengthColumn {
539                code: col.tform.kind.code(),
540            });
541        }
542        Ok(if col.tform.kind == TformKind::Char {
543            ColumnData::Text(
544                (0..self.table.nrows)
545                    .map(|r| trim_text(self.table.cell(col, r)))
546                    .collect(),
547            )
548        } else {
549            decode_array(col.tform.kind, &self.table.flatten(col))
550        })
551    }
552
553    /// The numeric column scaled to its physical `f64` plane: `TZEROn + TSCALn × raw`,
554    /// mapping integers equal to `TNULLn` to `NaN`. Errors for the non-numeric kinds
555    /// (`A`/`L`/`X`/`C`/`M`) and variable-length columns.
556    pub fn physical(&self) -> Result<Vec<f64>> {
557        let col = self.descriptor();
558        column_data_physical(
559            &self.raw()?,
560            col.tform.kind,
561            col.tscale,
562            col.tzero,
563            col.tnull,
564        )
565    }
566
567    /// Exact typed integers when the column uses the FITS unsigned (or signed-byte)
568    /// convention — `TSCALn == 1`, no `TNULLn`, `TZEROn` the matching sign-bit offset
569    /// on a `B`/`I`/`J`/`K` column — without the `f64` rounding of
570    /// [`physical`](Self::physical). `Ok(None)` for any other column; errors only for a
571    /// variable-length column. Mirrors [`crate::Image::unsigned`].
572    pub fn unsigned(&self) -> Result<Option<UnsignedView>> {
573        let col = self.descriptor();
574        if col.tscale != 1.0 || col.tnull.is_some() {
575            return Ok(None);
576        }
577        let tzero = col.tzero;
578        Ok(match (self.raw()?, col.tform.kind) {
579            (ColumnData::Bytes(v), TformKind::Byte) if tzero == -128.0 => {
580                Some(UnsignedView::from_signed_byte(&v))
581            }
582            (ColumnData::I16(v), _) if tzero == U16_OFFSET => {
583                Some(UnsignedView::from_offset_i16(&v))
584            }
585            (ColumnData::I32(v), _) if tzero == U32_OFFSET => {
586                Some(UnsignedView::from_offset_i32(&v))
587            }
588            (ColumnData::I64(v), _) if tzero == U64_OFFSET => {
589                Some(UnsignedView::from_offset_i64(&v))
590            }
591            _ => None,
592        })
593    }
594
595    /// A `C`/`M` complex column as [`Complex<f64>`] values, applying `TZEROn + TSCALn ×`
596    /// to each component (§6.4). Errors on non-complex columns.
597    pub fn complex(&self) -> Result<Vec<Complex<f64>>> {
598        let col = self.descriptor();
599        let scale = |re: f64, im: f64| Complex {
600            re: col.tzero + col.tscale * re,
601            im: col.tzero + col.tscale * im,
602        };
603        Ok(match self.raw()? {
604            ColumnData::ComplexF32(v) => v
605                .iter()
606                .map(|&Complex { re, im }| scale(re as f64, im as f64))
607                .collect(),
608            ColumnData::ComplexF64(v) => {
609                v.iter().map(|&Complex { re, im }| scale(re, im)).collect()
610            }
611            _ => {
612                return Err(FitsError::NotAComplexColumn {
613                    code: col.tform.kind.code(),
614                });
615            }
616        })
617    }
618
619    /// An `X` (bit-array) column as a borrowed 2-D [`BitColumn`] — `nrows × repeat`
620    /// bits viewed in place over the data unit, MSB-first (bit 0 is the MSB of the
621    /// first byte, §7.3.2), with no per-row allocation. Errors on any non-`X` column.
622    pub fn bits(&self) -> Result<BitColumn<'a>> {
623        let col = self.descriptor();
624        if col.tform.kind != TformKind::Bit {
625            return Err(FitsError::NotABitColumn {
626                code: col.tform.kind.code(),
627            });
628        }
629        Ok(BitColumn {
630            table: self.table,
631            index: self.index,
632        })
633    }
634
635    /// Decode a variable-length (`P`/`Q`) column: one [`ColumnData`] per row, each
636    /// holding that row's heap array (which may be empty). Errors for fixed-width
637    /// columns.
638    pub fn vla(&self) -> Result<Vec<ColumnData>> {
639        let col = self.descriptor();
640        let (elem, wide) = match (col.tform.kind, col.tform.vla_elem) {
641            (TformKind::ArrayDesc32, Some(e)) => (e, false),
642            (TformKind::ArrayDesc64, Some(e)) => (e, true),
643            _ => {
644                return Err(FitsError::NotAVla {
645                    code: col.tform.kind.code(),
646                });
647            }
648        };
649        let mut out = Vec::with_capacity(self.table.nrows);
650        for r in 0..self.table.nrows {
651            let d = decode_descriptor(self.table.cell(col, r), wide);
652            let nbytes = match elem {
653                TformKind::Bit => d.nelem.div_ceil(8),
654                _ => d
655                    .nelem
656                    .checked_mul(elem.elem_size())
657                    .ok_or(FitsError::UnexpectedEof)?,
658            };
659            out.push(decode_array(
660                elem,
661                self.table.bounded_heap(d.offset, nbytes)?,
662            ));
663        }
664        Ok(out)
665    }
666
667    /// Scale each row of a `P`/`Q` column to its physical plane: `TZEROn + TSCALn ×
668    /// element`, mapping integers equal to `TNULLn` to `NaN` (§6.4 — scaling applies to
669    /// the heap values). Errors for fixed-width or non-numeric-heap columns.
670    pub fn vla_physical(&self) -> Result<Vec<Vec<f64>>> {
671        let rows = self.vla()?; // validates VLA + heap bounds
672        let col = self.descriptor();
673        let elem = col
674            .tform
675            .vla_elem
676            .expect("vla() succeeded ⇒ vla_elem is Some");
677        rows.iter()
678            .map(|row| column_data_physical(row, elem, col.tscale, col.tzero, col.tnull))
679            .collect()
680    }
681
682    /// A variable-length `X` (`1PX`/`1QX`) column as a borrowed 2-D [`BitColumn`],
683    /// MSB-first (§7.3.2/§7.3.5 — the descriptor's element count is the bit count). The
684    /// rows are *jagged* (each its own length), so [`BitColumn::row`]`(r).len()` gives a
685    /// row's width. Errors on any non-bit VLA.
686    pub fn vla_bits(&self) -> Result<BitColumn<'a>> {
687        let col = self.descriptor();
688        let wide = match (col.tform.kind, col.tform.vla_elem) {
689            (TformKind::ArrayDesc32, Some(TformKind::Bit)) => false,
690            (TformKind::ArrayDesc64, Some(TformKind::Bit)) => true,
691            _ => {
692                return Err(FitsError::NotABitColumn {
693                    code: col.tform.kind.code(),
694                });
695            }
696        };
697        // Validate every row's heap span up front (no allocation) so [`BitColumn::row`]
698        // can resolve a row lazily and infallibly — the only place an overrun surfaces.
699        for r in 0..self.table.nrows {
700            let d = decode_descriptor(self.table.cell(col, r), wide);
701            self.table.bounded_heap(d.offset, d.nelem.div_ceil(8))?;
702        }
703        Ok(BitColumn {
704            table: self.table,
705            index: self.index,
706        })
707    }
708}
709
710/// A binary table's `X` (bit-array) column as a borrowed, 2-D bit view — from
711/// [`ColumnReader::bits`] (rectangular, `nrows × repeat`) or [`ColumnReader::vla_bits`]
712/// (jagged `PX`/`QX`). Bits are MSB-first (§7.3.2) and viewed in place over the data
713/// unit (zero-copy), so this borrows the table and can't outlive it.
714///
715/// Index a row (`flags[row]` → a [`BitSlice`]), a bit by nesting (`flags[row][col]`)
716/// or by cell (`flags[(row, col)]`), reach for the checked [`get`](Self::get), or take
717/// a row with the source lifetime via [`row`](Self::row). Rows are full `bitvec`
718/// slices — `count_ones()`, `iter_ones()`, `.to_bitvec()` to own, etc.
719///
720/// ```ignore
721/// let flags = table.column_by_name("DQ")?.bits()?;
722/// let bit = flags[(row, 3)];             // bool (panics out of range)
723/// let bit = flags[row][3];               // same, via the row slice
724/// let bit = flags.get(row, 3);           // Option<bool> (checked)
725/// let set = flags[row].count_ones();     // bitvec ops on the row
726/// ```
727#[derive(Debug, Clone, Copy)]
728pub struct BitColumn<'a> {
729    table: &'a BinTable,
730    index: usize,
731}
732
733impl<'a> BitColumn<'a> {
734    /// The number of rows.
735    pub fn nrows(&self) -> usize {
736        self.table.nrows
737    }
738
739    /// Whether the column has no rows.
740    pub fn is_empty(&self) -> bool {
741        self.table.nrows == 0
742    }
743
744    /// Row `r`'s bits as a borrowed [`BitSlice`], MSB-first — resolved on demand from
745    /// the data unit (no per-row storage). Index it (`row[c]`), iterate it, or
746    /// `.to_bitvec()` to own it. Panics if `r >= nrows()`.
747    pub fn row(&self, r: usize) -> &'a BitSlice<u8, Msb0> {
748        assert!(
749            r < self.table.nrows,
750            "row {r} out of bounds ({} rows)",
751            self.table.nrows
752        );
753        let col = &self.table.columns[self.index];
754        if col.tform.kind == TformKind::Bit {
755            // Fixed `rX`: the row's cell, truncated to `repeat` bits.
756            &self.table.cell(col, r).view_bits::<Msb0>()[..col.tform.repeat]
757        } else {
758            // Variable-length `PX`/`QX`: follow the descriptor into the heap. The span
759            // was bounds-checked by `vla_bits`, so the lookup can't fail here.
760            let wide = col.tform.kind == TformKind::ArrayDesc64;
761            let d = decode_descriptor(self.table.cell(col, r), wide);
762            let cell = self
763                .table
764                .bounded_heap(d.offset, d.nelem.div_ceil(8))
765                .expect("vla_bits validated every heap span");
766            &cell.view_bits::<Msb0>()[..d.nelem]
767        }
768    }
769
770    /// The bit at `(row, col)`, MSB-first — `None` if either index is out of range.
771    pub fn get(&self, row: usize, col: usize) -> Option<bool> {
772        if row >= self.table.nrows {
773            return None;
774        }
775        let bits = self.row(row);
776        (col < bits.len()).then(|| bits[col])
777    }
778
779    /// Iterate the rows, each a borrowed [`BitSlice`], resolved on demand.
780    pub fn iter(&self) -> impl ExactSizeIterator<Item = &'a BitSlice<u8, Msb0>> + '_ {
781        (0..self.table.nrows).map(move |r| self.row(r))
782    }
783}
784
785/// `bits[row]` is row `row`'s [`BitSlice`] (panics out of range, like slice indexing);
786/// `bits[row][col]` is the bit. Use [`BitColumn::get`] for the checked element.
787impl Index<usize> for BitColumn<'_> {
788    type Output = BitSlice<u8, Msb0>;
789
790    fn index(&self, row: usize) -> &BitSlice<u8, Msb0> {
791        self.row(row)
792    }
793}
794
795/// `bits[(row, col)]` is the bit at that cell (panics out of range) — the matrix-style
796/// counterpart of [`BitColumn::get`].
797impl Index<(usize, usize)> for BitColumn<'_> {
798    type Output = bool;
799
800    fn index(&self, (row, col): (usize, usize)) -> &bool {
801        &self.row(row)[col]
802    }
803}
804
805/// Parse a `TDIMn` value `'(d1,d2,…)'` into axis lengths (fastest-varying first).
806fn parse_tdim(value: &str) -> Option<Vec<usize>> {
807    let inner = value.trim().strip_prefix('(')?.strip_suffix(')')?;
808    inner
809        .split(',')
810        .map(|s| s.trim().parse::<usize>().ok())
811        .collect()
812}
813
814/// Scale a decoded numeric [`ColumnData`] to its physical `f64` plane:
815/// `TZEROn + TSCALn × element`, mapping integers equal to `TNULLn` to `NaN`.
816/// `kind` disambiguates `Bytes` (`B` integer vs `X` bits). Errors for the
817/// non-numeric kinds (`A`/`L`/`X`/`C`/`M`).
818fn column_data_physical(
819    data: &ColumnData,
820    kind: TformKind,
821    tscale: f64,
822    tzero: f64,
823    tnull: Option<i64>,
824) -> Result<Vec<f64>> {
825    let scale = |x: f64| tzero + tscale * x;
826    let scaled_int = |xi: i64| {
827        if tnull == Some(xi) {
828            f64::NAN
829        } else {
830            scale(xi as f64)
831        }
832    };
833    Ok(match data {
834        ColumnData::Bytes(v) if kind == TformKind::Byte => {
835            v.iter().map(|&b| scaled_int(b as i64)).collect()
836        }
837        ColumnData::I16(v) => v.iter().map(|&x| scaled_int(x as i64)).collect(),
838        ColumnData::I32(v) => v.iter().map(|&x| scaled_int(x as i64)).collect(),
839        ColumnData::I64(v) => v.iter().map(|&x| scaled_int(x)).collect(),
840        ColumnData::F32(v) => v.iter().map(|&x| scale(x as f64)).collect(),
841        ColumnData::F64(v) => v.iter().map(|&x| scale(x)).collect(),
842        _ => return Err(FitsError::NonNumericColumn { code: kind.code() }),
843    })
844}
845
846/// Decode `bytes` as a contiguous run of `kind` elements. Shared by fixed-width
847/// reads (concatenated cells) and heap arrays.
848fn decode_array(kind: TformKind, bytes: &[u8]) -> ColumnData {
849    match kind {
850        TformKind::Logical => ColumnData::Logical(
851            bytes
852                .iter()
853                .map(|&b| match b {
854                    b'T' => Some(true),
855                    b'F' => Some(false),
856                    _ => None, // 0x00 (or any non-T/F byte) is the undefined value
857                })
858                .collect(),
859        ),
860        TformKind::Byte | TformKind::Bit => ColumnData::Bytes(bytes.to_vec()),
861        TformKind::Char => ColumnData::Text(vec![trim_text(bytes)]),
862        TformKind::I16 => ColumnData::I16(decode_be(bytes, i16::from_be_bytes)),
863        TformKind::I32 => ColumnData::I32(decode_be(bytes, i32::from_be_bytes)),
864        TformKind::I64 => ColumnData::I64(decode_be(bytes, i64::from_be_bytes)),
865        TformKind::F32 => ColumnData::F32(decode_be(bytes, f32::from_be_bytes)),
866        TformKind::F64 => ColumnData::F64(decode_be(bytes, f64::from_be_bytes)),
867        TformKind::ComplexF32 => ColumnData::ComplexF32(decode_be(bytes, |b: [u8; 8]| Complex {
868            re: f32::from_be_bytes([b[0], b[1], b[2], b[3]]),
869            im: f32::from_be_bytes([b[4], b[5], b[6], b[7]]),
870        })),
871        TformKind::ComplexF64 => ColumnData::ComplexF64(decode_be(bytes, |b: [u8; 16]| Complex {
872            re: f64::from_be_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]),
873            im: f64::from_be_bytes([b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]]),
874        })),
875        // A heap element can't itself be a descriptor; keep the raw bytes.
876        TformKind::ArrayDesc32 | TformKind::ArrayDesc64 => ColumnData::Bytes(bytes.to_vec()),
877    }
878}
879
880/// Decode an `A`-field cell: ASCII text truncated at the first NUL (§6.3 — a NUL
881/// terminates the string early), then with trailing spaces removed.
882fn trim_text(cell: &[u8]) -> String {
883    let nul = cell.iter().position(|&b| b == 0).unwrap_or(cell.len());
884    let head = &cell[..nul];
885    let end = head.iter().rposition(|&b| b != b' ').map_or(0, |i| i + 1);
886    String::from_utf8_lossy(&head[..end]).into_owned()
887}
888
889/// A decoded `P`/`Q` array descriptor: a row's heap array element count and byte
890/// offset into the heap.
891#[derive(Debug, Clone, Copy)]
892struct Descriptor {
893    nelem: usize,
894    offset: usize,
895}
896
897/// Decode an array descriptor — a pair of 32-bit (`P`) or 64-bit (`Q`) big-endian
898/// integers — from a variable-length column cell.
899fn decode_descriptor(desc: &[u8], wide: bool) -> Descriptor {
900    if wide {
901        Descriptor {
902            nelem: be_u64(&desc[0..8]),
903            offset: be_u64(&desc[8..16]),
904        }
905    } else {
906        Descriptor {
907            nelem: be_u32(&desc[0..4]),
908            offset: be_u32(&desc[4..8]),
909        }
910    }
911}
912
913/// Decode a big-endian `P`/`Q` array-descriptor field (element count or heap
914/// offset). The standard treats these as unsigned; an out-of-range value is left
915/// to the heap-bounds check to reject (rather than silently clamping it to 0).
916fn be_u32(b: &[u8]) -> usize {
917    u32::from_be_bytes([b[0], b[1], b[2], b[3]]) as usize
918}
919
920fn be_u64(b: &[u8]) -> usize {
921    // On a 32-bit target a `Q` count/offset can exceed `usize`; saturate so it fails
922    // the heap bounds check rather than wrapping into a spuriously in-range value.
923    // On 64-bit this is the identity (`usize == u64`).
924    usize::try_from(u64::from_be_bytes([
925        b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7],
926    ]))
927    .unwrap_or(usize::MAX)
928}
929
930#[cfg(test)]
931mod tests;