Skip to main content

fits_well/ascii/
mod.rs

1//! ASCII-table extension (§7.2): `TABLE`.
2//!
3//! Rows are fixed-length lines of ASCII text; each column occupies a fixed byte
4//! range starting at `TBCOLn` (1-based), formatted per a Fortran `TFORMn` code
5//! (`Aw`, `Iw`, `Fw.d`, `Ew.d`, `Dw.d`). Decoded values reuse [`ColumnData`]
6//! (`Text`/`I64`/`F64`); ASCII columns are always scalar.
7
8use crate::error::FitsError;
9use crate::error::Result;
10use crate::header::Header;
11use crate::keyword::key;
12use crate::table::ColumnData;
13
14/// The value type of an ASCII-table column.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub enum AsciiKind {
17    /// `Aw` — character string.
18    Char,
19    /// `Iw` — decimal integer.
20    Integer,
21    /// `Fw.d` / `Ew.d` / `Dw.d` — floating point.
22    Float,
23}
24
25/// One ASCII-table column.
26#[derive(Debug, Clone)]
27pub struct AsciiColumn {
28    pub name: Option<String>,
29    pub unit: Option<String>,
30    pub kind: AsciiKind,
31    /// 0-based byte offset of the field within a row (`TBCOLn − 1`).
32    pub start: usize,
33    pub width: usize,
34    /// Digits after the decimal point (`Fw.d`); 0 for non-floats.
35    pub decimals: usize,
36    /// `TSCALn` / `TZEROn` for the physical plane (`physical = TZERO + TSCAL·raw`).
37    pub tscale: f64,
38    pub tzero: f64,
39    /// `TNULLn`: the exact field text that marks an undefined value (§7.2.5).
40    pub null: Option<String>,
41}
42
43/// A parsed ASCII table plus its row bytes.
44#[derive(Debug, Clone)]
45pub struct AsciiTable {
46    pub nrows: usize,
47    pub columns: Vec<AsciiColumn>,
48    row_len: usize,
49    bytes: Vec<u8>,
50}
51
52impl AsciiTable {
53    pub(crate) fn from_data(header: &Header, data: Vec<u8>) -> Result<AsciiTable> {
54        let row_len = header
55            .get_integer("NAXIS1")
56            .ok_or(FitsError::MissingKeyword { name: "NAXIS1" })?
57            .max(0) as usize;
58        let nrows = header
59            .get_integer("NAXIS2")
60            .ok_or(FitsError::MissingKeyword { name: "NAXIS2" })?
61            .max(0) as usize;
62        // §7.2.1: `0 ≤ TFIELDS ≤ 999` — also a guard, since `tfields` sizes the
63        // column `Vec` and drives the `TFORMn` loop (an absurd value would abort).
64        let tfields = match header.get_integer("TFIELDS") {
65            Some(t) if (0..=999).contains(&t) => t as usize,
66            Some(_) => return Err(FitsError::KeywordOutOfRange { name: "TFIELDS" }),
67            None => return Err(FitsError::MissingKeyword { name: "TFIELDS" }),
68        };
69
70        let mut columns = Vec::with_capacity(tfields);
71        for n in 1..=tfields {
72            let tbcol = header
73                .get_integer(key!("TBCOL{n}").as_str())
74                .ok_or(FitsError::MissingKeyword { name: "TBCOLn" })?;
75            let tform = header
76                .get_text(key!("TFORM{n}").as_str())
77                .ok_or(FitsError::MissingKeyword { name: "TFORMn" })?;
78            let fmt = parse_ascii_tform(tform)?;
79            let start = (tbcol.max(1) - 1) as usize;
80            // §7.2.3: each field must lie within the row (`NAXIS1`). A column declared
81            // past the row width is malformed — reject it rather than let `field()`
82            // silently truncate to empty.
83            if start.checked_add(fmt.width).is_none_or(|end| end > row_len) {
84                return Err(FitsError::KeywordOutOfRange { name: "TBCOLn" });
85            }
86            columns.push(AsciiColumn {
87                name: header
88                    .get_text(key!("TTYPE{n}").as_str())
89                    .map(str::to_string)
90                    .filter(|s| !s.is_empty()),
91                unit: header
92                    .get_text(key!("TUNIT{n}").as_str())
93                    .map(str::to_string)
94                    .filter(|s| !s.is_empty()),
95                kind: fmt.kind,
96                start,
97                width: fmt.width,
98                decimals: fmt.decimals,
99                tscale: header.get_real(key!("TSCAL{n}").as_str()).unwrap_or(1.0),
100                tzero: header.get_real(key!("TZERO{n}").as_str()).unwrap_or(0.0),
101                null: header
102                    .get_text(key!("TNULL{n}").as_str())
103                    .map(|s| s.trim().to_string()),
104            });
105        }
106
107        // `nrows · row_len` from untrusted axes: check the product can't overflow
108        // (a 32-bit-usize hazard `data_extent`'s u64 math wouldn't catch).
109        let total = nrows.checked_mul(row_len).ok_or(FitsError::UnexpectedEof)?;
110        if data.len() < total {
111            return Err(FitsError::UnexpectedEof);
112        }
113        Ok(AsciiTable {
114            nrows,
115            columns,
116            row_len,
117            bytes: data,
118        })
119    }
120
121    /// The index of the first column whose `TTYPEn` matches `name`, compared
122    /// case-insensitively per §7.2.2.
123    pub fn column_index(&self, name: &str) -> Option<usize> {
124        self.columns.iter().position(|c| {
125            c.name
126                .as_deref()
127                .is_some_and(|n| n.eq_ignore_ascii_case(name))
128        })
129    }
130
131    fn column_index_checked(&self, name: &str) -> Result<usize> {
132        self.column_index(name)
133            .ok_or_else(|| FitsError::ColumnNotFound {
134                name: name.to_string(),
135            })
136    }
137
138    /// A reader handle for the column at `index`. Decode through it —
139    /// [`AsciiColumnReader::raw`]/[`physical`](AsciiColumnReader::physical) — without
140    /// re-passing the descriptor. Errors with [`FitsError::ColumnIndexOutOfBounds`].
141    pub fn column_by_idx(&self, index: usize) -> Result<AsciiColumnReader<'_>> {
142        if index >= self.columns.len() {
143            return Err(FitsError::ColumnIndexOutOfBounds {
144                index,
145                len: self.columns.len(),
146            });
147        }
148        Ok(AsciiColumnReader { table: self, index })
149    }
150
151    /// A reader handle for the column named `name` (`TTYPEn`, case-insensitive, §7.2.2).
152    /// Errors with [`FitsError::ColumnNotFound`] if no such column exists.
153    pub fn column_by_name(&self, name: &str) -> Result<AsciiColumnReader<'_>> {
154        let index = self.column_index_checked(name)?;
155        Ok(AsciiColumnReader { table: self, index })
156    }
157
158    /// The trimmed text of column `col` in row `r`. Errors on non-UTF-8 bytes — a
159    /// FITS ASCII table is ASCII, so a non-ASCII field is malformed; surfacing it
160    /// (rather than the old `unwrap_or("")`) stops a corrupt byte from masquerading
161    /// as a blank field and silently decoding to 0 in a numeric column.
162    fn field(&self, col: &AsciiColumn, r: usize) -> Result<&str> {
163        let row = &self.bytes[r * self.row_len..(r + 1) * self.row_len];
164        let end = (col.start + col.width).min(row.len());
165        let raw = if col.start < end {
166            &row[col.start..end]
167        } else {
168            &[]
169        };
170        let text = std::str::from_utf8(raw).map_err(|_| FitsError::InvalidValue {
171            card: "non-UTF-8 bytes in ASCII-table field".to_string(),
172        })?;
173        Ok(text.trim())
174    }
175}
176
177/// A handle to one column of an [`AsciiTable`], from [`AsciiTable::column_by_idx`] or
178/// [`AsciiTable::column_by_name`]. Decode through it without re-passing the
179/// descriptor: [`raw`](Self::raw) for the typed values, [`physical`](Self::physical)
180/// for the scaled plane. Borrows the table, so it cannot outlive it.
181#[derive(Debug, Clone, Copy)]
182pub struct AsciiColumnReader<'a> {
183    table: &'a AsciiTable,
184    index: usize,
185}
186
187impl<'a> AsciiColumnReader<'a> {
188    /// The column's [`AsciiColumn`] descriptor.
189    pub fn descriptor(&self) -> &'a AsciiColumn {
190        &self.table.columns[self.index]
191    }
192
193    /// Decode the column into a typed [`ColumnData`] (`Text`/`I64`/`F64`). A blank
194    /// numeric field decodes to 0 (§7.2.5); a field equal to `TNULLn` decodes to a 0
195    /// placeholder in this raw plane — use [`physical`](Self::physical) for `NaN`. A
196    /// non-blank, non-null unparseable field errors.
197    pub fn raw(&self) -> Result<ColumnData> {
198        let table = self.table;
199        let col = self.descriptor();
200        match col.kind {
201            AsciiKind::Char => Ok(ColumnData::Text(
202                (0..table.nrows)
203                    .map(|r| Ok(table.field(col, r)?.to_string()))
204                    .collect::<Result<_>>()?,
205            )),
206            AsciiKind::Integer => {
207                let mut out = Vec::with_capacity(table.nrows);
208                for r in 0..table.nrows {
209                    let s = table.field(col, r)?;
210                    out.push(if s.is_empty() || col.is_null(s) {
211                        0
212                    } else {
213                        s.parse().map_err(|_| FitsError::InvalidValue {
214                            card: s.to_string(),
215                        })?
216                    });
217                }
218                Ok(ColumnData::I64(out))
219            }
220            AsciiKind::Float => {
221                let mut out = Vec::with_capacity(table.nrows);
222                for r in 0..table.nrows {
223                    let s = table.field(col, r)?;
224                    out.push(if s.is_empty() || col.is_null(s) {
225                        0.0
226                    } else {
227                        parse_ascii_float(s, col.decimals).ok_or_else(|| {
228                            FitsError::InvalidValue {
229                                card: s.to_string(),
230                            }
231                        })?
232                    });
233                }
234                Ok(ColumnData::F64(out))
235            }
236        }
237    }
238
239    /// The numeric column on its physical `f64` plane: `TZEROn + TSCALn × field`
240    /// (§7.2.2). A blank field is 0 before scaling; a field equal to `TNULLn` is
241    /// undefined and maps to `NaN`. Errors on a character column.
242    ///
243    /// Unlike the binary-table [`ColumnReader::physical`](crate::ColumnReader::physical),
244    /// this re-reads the field text (the raw plane has already collapsed nulls to 0),
245    /// which is how `TNULLn` survives as `NaN`.
246    pub fn physical(&self) -> Result<Vec<f64>> {
247        let table = self.table;
248        let col = self.descriptor();
249        if col.kind == AsciiKind::Char {
250            return Err(FitsError::NonNumericColumn { code: 'A' });
251        }
252        let mut out = Vec::with_capacity(table.nrows);
253        for r in 0..table.nrows {
254            let s = table.field(col, r)?;
255            if col.is_null(s) {
256                out.push(f64::NAN);
257                continue;
258            }
259            let raw = if s.is_empty() {
260                0.0
261            } else {
262                parse_ascii_float(s, col.decimals).ok_or_else(|| FitsError::InvalidValue {
263                    card: s.to_string(),
264                })?
265            };
266            out.push(col.tzero + col.tscale * raw);
267        }
268        Ok(out)
269    }
270}
271
272impl AsciiColumn {
273    /// Whether the trimmed field text marks an undefined value (`TNULLn`).
274    fn is_null(&self, field: &str) -> bool {
275        self.null.as_deref() == Some(field)
276    }
277}
278
279/// Parse a Fortran `Fw.d`/`Ew.d`/`Dw.d` field. When the mantissa carries no
280/// explicit `.`, the decimal point is implied `decimals` digits from the right
281/// (§7.2.1, deprecated): the integer mantissa is scaled by `10⁻ᵈ`.
282fn parse_ascii_float(field: &str, decimals: usize) -> Option<f64> {
283    let (mantissa, exponent) = match split_mantissa_exponent(field) {
284        Some((m, e)) => (m, Some(e)),
285        None => (field, None),
286    };
287    let mut value: f64 = if mantissa.contains('.') || decimals == 0 {
288        mantissa.parse().ok()?
289    } else {
290        mantissa.parse::<f64>().ok()? / 10f64.powi(decimals as i32)
291    };
292    if let Some(e) = exponent {
293        value *= 10f64.powi(e.trim().parse::<i32>().ok()?);
294    }
295    Some(value)
296}
297
298/// Split a numeric string into mantissa and exponent text. The exponent is
299/// introduced by `E`/`e` or the Fortran double-precision `D`/`d` (§7.2.1), **or** by
300/// a bare `+`/`-` sign past the leading mantissa sign (the letter-less form, §7.2.5
301/// rule 3, e.g. `3.14159-2` = 3.14159 × 10⁻²). Matching `D`/`d` here means the parse
302/// never has to normalize the field into a fresh `String` first.
303fn split_mantissa_exponent(s: &str) -> Option<(&str, &str)> {
304    if let Some(i) = s.find(['E', 'e', 'D', 'd']) {
305        return Some((&s[..i], &s[i + 1..]));
306    }
307    s.char_indices()
308        .find(|&(i, c)| i > 0 && (c == '+' || c == '-'))
309        .map(|(i, _)| (&s[..i], &s[i..]))
310}
311
312/// A parsed ASCII `TFORMn`: element kind, field width, and decimal count.
313#[derive(Debug, Clone, Copy, PartialEq, Eq)]
314struct AsciiFormat {
315    kind: AsciiKind,
316    width: usize,
317    decimals: usize,
318}
319
320/// Parse an ASCII `TFORMn` (`Aw`, `Iw`, `Fw.d`, `Ew.d`, `Dw.d`).
321fn parse_ascii_tform(value: &str) -> Result<AsciiFormat> {
322    let s = value.trim();
323    let invalid = || FitsError::InvalidTform {
324        tform: value.to_string(),
325    };
326    let letter = s.bytes().next().ok_or_else(invalid)?;
327    let kind = match letter {
328        b'A' => AsciiKind::Char,
329        b'I' => AsciiKind::Integer,
330        b'F' | b'E' | b'D' => AsciiKind::Float,
331        _ => return Err(invalid()),
332    };
333    let rest = &s[1..];
334    let (width, decimals) = match rest.split_once('.') {
335        Some((w, d)) => (
336            w.trim().parse().map_err(|_| invalid())?,
337            d.trim().parse().map_err(|_| invalid())?,
338        ),
339        None => (rest.trim().parse().map_err(|_| invalid())?, 0),
340    };
341    Ok(AsciiFormat {
342        kind,
343        width,
344        decimals,
345    })
346}
347
348#[cfg(test)]
349mod tests;