use std::ops::Index;
use bitvec::order::Msb0;
use bitvec::slice::BitSlice;
use bitvec::view::BitView;
use num_complex::Complex;
use crate::data::U16_OFFSET;
use crate::data::U32_OFFSET;
use crate::data::U64_OFFSET;
use crate::data::UnsignedView;
use crate::endian::decode_be;
use crate::error::FitsError;
use crate::error::Result;
use crate::header::Header;
use crate::keyword::key;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TformKind {
Logical,
Bit,
Byte,
I16,
I32,
I64,
Char,
F32,
F64,
ComplexF32,
ComplexF64,
ArrayDesc32,
ArrayDesc64,
}
impl TformKind {
fn from_code(code: u8) -> Option<TformKind> {
Some(match code {
b'L' => TformKind::Logical,
b'X' => TformKind::Bit,
b'B' => TformKind::Byte,
b'I' => TformKind::I16,
b'J' => TformKind::I32,
b'K' => TformKind::I64,
b'A' => TformKind::Char,
b'E' => TformKind::F32,
b'D' => TformKind::F64,
b'C' => TformKind::ComplexF32,
b'M' => TformKind::ComplexF64,
b'P' => TformKind::ArrayDesc32,
b'Q' => TformKind::ArrayDesc64,
_ => return None,
})
}
pub fn code(self) -> char {
match self {
TformKind::Logical => 'L',
TformKind::Bit => 'X',
TformKind::Byte => 'B',
TformKind::I16 => 'I',
TformKind::I32 => 'J',
TformKind::I64 => 'K',
TformKind::Char => 'A',
TformKind::F32 => 'E',
TformKind::F64 => 'D',
TformKind::ComplexF32 => 'C',
TformKind::ComplexF64 => 'M',
TformKind::ArrayDesc32 => 'P',
TformKind::ArrayDesc64 => 'Q',
}
}
pub(crate) fn elem_size(self) -> usize {
match self {
TformKind::Logical | TformKind::Bit | TformKind::Byte | TformKind::Char => 1,
TformKind::I16 => 2,
TformKind::I32 | TformKind::F32 => 4,
TformKind::I64 | TformKind::F64 | TformKind::ComplexF32 | TformKind::ArrayDesc32 => 8,
TformKind::ComplexF64 | TformKind::ArrayDesc64 => 16,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Tform {
pub repeat: usize,
pub kind: TformKind,
pub vla_elem: Option<TformKind>,
}
impl Tform {
pub fn parse(value: &str) -> Result<Tform> {
let s = value.trim();
let invalid = || FitsError::InvalidTform {
tform: value.to_string(),
};
let pos = s
.bytes()
.position(|b| b.is_ascii_alphabetic())
.ok_or_else(invalid)?;
let repeat = if pos == 0 {
1
} else {
s[..pos].parse().map_err(|_| invalid())?
};
let kind = TformKind::from_code(s.as_bytes()[pos]).ok_or_else(invalid)?;
let vla_elem = if matches!(kind, TformKind::ArrayDesc32 | TformKind::ArrayDesc64) {
let elem = s.as_bytes().get(pos + 1).copied().ok_or_else(invalid)?;
if repeat > 1 {
return Err(invalid());
}
Some(TformKind::from_code(elem).ok_or_else(invalid)?)
} else {
None
};
Ok(Tform {
repeat,
kind,
vla_elem,
})
}
pub fn byte_width(self) -> usize {
match self.kind {
TformKind::Bit => self.repeat.div_ceil(8),
_ => self.repeat.saturating_mul(self.kind.elem_size()),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TDispKind {
Char,
Logical,
Integer,
Binary,
Octal,
Hex,
Float,
Exponential,
Engineering,
Scientific,
General,
Double,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TDisp {
pub kind: TDispKind,
pub width: usize,
pub decimals: Option<usize>,
pub exponent: Option<usize>,
}
impl TDisp {
pub fn parse(s: &str) -> Option<TDisp> {
let s = s.trim().to_ascii_uppercase();
let (kind, rest) = if let Some(r) = s.strip_prefix("EN") {
(TDispKind::Engineering, r)
} else if let Some(r) = s.strip_prefix("ES") {
(TDispKind::Scientific, r)
} else {
let kind = match s.bytes().next()? {
b'A' => TDispKind::Char,
b'L' => TDispKind::Logical,
b'I' => TDispKind::Integer,
b'B' => TDispKind::Binary,
b'O' => TDispKind::Octal,
b'Z' => TDispKind::Hex,
b'F' => TDispKind::Float,
b'E' => TDispKind::Exponential,
b'G' => TDispKind::General,
b'D' => TDispKind::Double,
_ => return None,
};
(kind, &s[1..])
};
let (main, exponent) = match rest.split_once('E') {
Some((m, e)) => (m, Some(e.parse().ok()?)),
None => (rest, None),
};
let (width, decimals) = match main.split_once('.') {
Some((w, d)) => (w, Some(d.parse().ok()?)),
None => (main, None),
};
Some(TDisp {
kind,
width: width.parse().ok()?,
decimals,
exponent,
})
}
}
#[derive(Debug, Clone)]
pub struct Column {
pub name: Option<String>,
pub unit: Option<String>,
pub tform: Tform,
pub tscale: f64,
pub tzero: f64,
pub tnull: Option<i64>,
pub tdim: Option<Vec<usize>>,
pub tdisp: Option<TDisp>,
pub byte_offset: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ColumnData {
Logical(Vec<Option<bool>>),
Bytes(Vec<u8>),
I16(Vec<i16>),
I32(Vec<i32>),
I64(Vec<i64>),
F32(Vec<f32>),
F64(Vec<f64>),
ComplexF32(Vec<Complex<f32>>),
ComplexF64(Vec<Complex<f64>>),
Text(Vec<String>),
}
impl ColumnData {
pub fn element_count(&self) -> usize {
match self {
ColumnData::Logical(v) => v.len(),
ColumnData::Bytes(v) => v.len(),
ColumnData::I16(v) => v.len(),
ColumnData::I32(v) => v.len(),
ColumnData::I64(v) => v.len(),
ColumnData::F32(v) => v.len(),
ColumnData::F64(v) => v.len(),
ColumnData::ComplexF32(v) => v.len(),
ColumnData::ComplexF64(v) => v.len(),
ColumnData::Text(v) => v.len(),
}
}
}
#[derive(Debug, Clone)]
pub struct BinTable {
pub nrows: usize,
pub columns: Vec<Column>,
pub(crate) row_len: usize,
heap_offset: usize,
heap_end: usize,
bytes: Vec<u8>,
}
impl BinTable {
pub(crate) fn from_data(header: &Header, data: Vec<u8>) -> Result<BinTable> {
let row_len = header
.get_integer("NAXIS1")
.ok_or(FitsError::MissingKeyword { name: "NAXIS1" })?
.max(0) as usize;
let nrows = header
.get_integer("NAXIS2")
.ok_or(FitsError::MissingKeyword { name: "NAXIS2" })?
.max(0) as usize;
let tfields = match header.get_integer("TFIELDS") {
Some(t) if (0..=999).contains(&t) => t as usize,
Some(_) => return Err(FitsError::KeywordOutOfRange { name: "TFIELDS" }),
None => return Err(FitsError::MissingKeyword { name: "TFIELDS" }),
};
let mut columns = Vec::with_capacity(tfields);
let mut offset = 0;
for n in 1..=tfields {
let tform_value = header
.get_text(key!("TFORM{n}").as_str())
.ok_or(FitsError::MissingKeyword { name: "TFORMn" })?;
let tform = Tform::parse(tform_value)?;
let tdim = header
.get_text(key!("TDIM{n}").as_str())
.and_then(parse_tdim);
let is_vla = matches!(tform.kind, TformKind::ArrayDesc32 | TformKind::ArrayDesc64);
if let Some(dims) = &tdim
&& !is_vla
&& dims.iter().try_fold(1usize, |a, &x| a.checked_mul(x)) != Some(tform.repeat)
{
return Err(FitsError::KeywordOutOfRange { name: "TDIMn" });
}
columns.push(Column {
name: header
.get_text(key!("TTYPE{n}").as_str())
.map(str::to_string)
.filter(|s| !s.is_empty()),
unit: header
.get_text(key!("TUNIT{n}").as_str())
.map(str::to_string)
.filter(|s| !s.is_empty()),
tform,
tscale: header.get_real(key!("TSCAL{n}").as_str()).unwrap_or(1.0),
tzero: header.get_real(key!("TZERO{n}").as_str()).unwrap_or(0.0),
tnull: header.get_integer(key!("TNULL{n}").as_str()),
tdim,
tdisp: header
.get_text(key!("TDISP{n}").as_str())
.and_then(TDisp::parse),
byte_offset: offset,
});
offset = offset.saturating_add(tform.byte_width());
}
if offset != row_len {
return Err(FitsError::RowWidthMismatch {
computed: offset,
declared: row_len,
});
}
let main_table = nrows.checked_mul(row_len).ok_or(FitsError::UnexpectedEof)?;
if data.len() < main_table {
return Err(FitsError::UnexpectedEof);
}
let heap_offset = header
.get_integer("THEAP")
.map_or(main_table, |t| t.max(0) as usize);
if heap_offset < main_table {
return Err(FitsError::KeywordOutOfRange { name: "THEAP" });
}
let pcount = header
.get_integer("PCOUNT")
.map_or(0, |p| p.max(0) as usize);
let heap_end = main_table
.checked_add(pcount)
.ok_or(FitsError::UnexpectedEof)?
.min(data.len());
Ok(BinTable {
nrows,
columns,
row_len,
heap_offset,
heap_end,
bytes: data,
})
}
#[cfg(feature = "compression")]
pub(crate) fn raw_rows(&self) -> &[u8] {
&self.bytes[..self.nrows * self.row_len]
}
pub fn column_index(&self, name: &str) -> Option<usize> {
self.columns.iter().position(|c| {
c.name
.as_deref()
.is_some_and(|n| n.eq_ignore_ascii_case(name))
})
}
fn column_index_checked(&self, name: &str) -> Result<usize> {
self.column_index(name)
.ok_or_else(|| FitsError::ColumnNotFound {
name: name.to_string(),
})
}
pub fn column_by_idx(&self, index: usize) -> Result<ColumnReader<'_>> {
if index >= self.columns.len() {
return Err(FitsError::ColumnIndexOutOfBounds {
index,
len: self.columns.len(),
});
}
Ok(ColumnReader { table: self, index })
}
pub fn column_by_name(&self, name: &str) -> Result<ColumnReader<'_>> {
let index = self.column_index_checked(name)?;
Ok(ColumnReader { table: self, index })
}
fn bounded_heap(&self, offset: usize, nbytes: usize) -> Result<&[u8]> {
let start = self
.heap_offset
.checked_add(offset)
.ok_or(FitsError::UnexpectedEof)?;
let end = start.checked_add(nbytes).ok_or(FitsError::UnexpectedEof)?;
if end > self.heap_end {
return Err(FitsError::UnexpectedEof);
}
self.bytes.get(start..end).ok_or(FitsError::UnexpectedEof)
}
fn cell(&self, col: &Column, r: usize) -> &[u8] {
let start = r * self.row_len + col.byte_offset;
&self.bytes[start..start + col.tform.byte_width()]
}
fn flatten(&self, col: &Column) -> Vec<u8> {
let mut out = Vec::with_capacity(self.nrows * col.tform.byte_width());
for r in 0..self.nrows {
out.extend_from_slice(self.cell(col, r));
}
out
}
}
#[derive(Debug, Clone, Copy)]
pub struct ColumnReader<'a> {
table: &'a BinTable,
index: usize,
}
impl<'a> ColumnReader<'a> {
pub fn descriptor(&self) -> &'a Column {
&self.table.columns[self.index]
}
pub fn raw(&self) -> Result<ColumnData> {
let col = self.descriptor();
if matches!(
col.tform.kind,
TformKind::ArrayDesc32 | TformKind::ArrayDesc64
) {
return Err(FitsError::VariableLengthColumn {
code: col.tform.kind.code(),
});
}
Ok(if col.tform.kind == TformKind::Char {
ColumnData::Text(
(0..self.table.nrows)
.map(|r| trim_text(self.table.cell(col, r)))
.collect(),
)
} else {
decode_array(col.tform.kind, &self.table.flatten(col))
})
}
pub fn physical(&self) -> Result<Vec<f64>> {
let col = self.descriptor();
column_data_physical(
&self.raw()?,
col.tform.kind,
col.tscale,
col.tzero,
col.tnull,
)
}
pub fn unsigned(&self) -> Result<Option<UnsignedView>> {
let col = self.descriptor();
if col.tscale != 1.0 || col.tnull.is_some() {
return Ok(None);
}
let tzero = col.tzero;
Ok(match (self.raw()?, col.tform.kind) {
(ColumnData::Bytes(v), TformKind::Byte) if tzero == -128.0 => {
Some(UnsignedView::from_signed_byte(&v))
}
(ColumnData::I16(v), _) if tzero == U16_OFFSET => {
Some(UnsignedView::from_offset_i16(&v))
}
(ColumnData::I32(v), _) if tzero == U32_OFFSET => {
Some(UnsignedView::from_offset_i32(&v))
}
(ColumnData::I64(v), _) if tzero == U64_OFFSET => {
Some(UnsignedView::from_offset_i64(&v))
}
_ => None,
})
}
pub fn complex(&self) -> Result<Vec<Complex<f64>>> {
let col = self.descriptor();
let scale = |re: f64, im: f64| Complex {
re: col.tzero + col.tscale * re,
im: col.tzero + col.tscale * im,
};
Ok(match self.raw()? {
ColumnData::ComplexF32(v) => v
.iter()
.map(|&Complex { re, im }| scale(re as f64, im as f64))
.collect(),
ColumnData::ComplexF64(v) => {
v.iter().map(|&Complex { re, im }| scale(re, im)).collect()
}
_ => {
return Err(FitsError::NotAComplexColumn {
code: col.tform.kind.code(),
});
}
})
}
pub fn bits(&self) -> Result<BitColumn<'a>> {
let col = self.descriptor();
if col.tform.kind != TformKind::Bit {
return Err(FitsError::NotABitColumn {
code: col.tform.kind.code(),
});
}
Ok(BitColumn {
table: self.table,
index: self.index,
})
}
pub fn vla(&self) -> Result<Vec<ColumnData>> {
let col = self.descriptor();
let (elem, wide) = match (col.tform.kind, col.tform.vla_elem) {
(TformKind::ArrayDesc32, Some(e)) => (e, false),
(TformKind::ArrayDesc64, Some(e)) => (e, true),
_ => {
return Err(FitsError::NotAVla {
code: col.tform.kind.code(),
});
}
};
let mut out = Vec::with_capacity(self.table.nrows);
for r in 0..self.table.nrows {
let d = decode_descriptor(self.table.cell(col, r), wide);
let nbytes = match elem {
TformKind::Bit => d.nelem.div_ceil(8),
_ => d
.nelem
.checked_mul(elem.elem_size())
.ok_or(FitsError::UnexpectedEof)?,
};
out.push(decode_array(
elem,
self.table.bounded_heap(d.offset, nbytes)?,
));
}
Ok(out)
}
pub fn vla_physical(&self) -> Result<Vec<Vec<f64>>> {
let rows = self.vla()?; let col = self.descriptor();
let elem = col
.tform
.vla_elem
.expect("vla() succeeded ⇒ vla_elem is Some");
rows.iter()
.map(|row| column_data_physical(row, elem, col.tscale, col.tzero, col.tnull))
.collect()
}
pub fn vla_bits(&self) -> Result<BitColumn<'a>> {
let col = self.descriptor();
let wide = match (col.tform.kind, col.tform.vla_elem) {
(TformKind::ArrayDesc32, Some(TformKind::Bit)) => false,
(TformKind::ArrayDesc64, Some(TformKind::Bit)) => true,
_ => {
return Err(FitsError::NotABitColumn {
code: col.tform.kind.code(),
});
}
};
for r in 0..self.table.nrows {
let d = decode_descriptor(self.table.cell(col, r), wide);
self.table.bounded_heap(d.offset, d.nelem.div_ceil(8))?;
}
Ok(BitColumn {
table: self.table,
index: self.index,
})
}
}
#[derive(Debug, Clone, Copy)]
pub struct BitColumn<'a> {
table: &'a BinTable,
index: usize,
}
impl<'a> BitColumn<'a> {
pub fn nrows(&self) -> usize {
self.table.nrows
}
pub fn is_empty(&self) -> bool {
self.table.nrows == 0
}
pub fn row(&self, r: usize) -> &'a BitSlice<u8, Msb0> {
assert!(
r < self.table.nrows,
"row {r} out of bounds ({} rows)",
self.table.nrows
);
let col = &self.table.columns[self.index];
if col.tform.kind == TformKind::Bit {
&self.table.cell(col, r).view_bits::<Msb0>()[..col.tform.repeat]
} else {
let wide = col.tform.kind == TformKind::ArrayDesc64;
let d = decode_descriptor(self.table.cell(col, r), wide);
let cell = self
.table
.bounded_heap(d.offset, d.nelem.div_ceil(8))
.expect("vla_bits validated every heap span");
&cell.view_bits::<Msb0>()[..d.nelem]
}
}
pub fn get(&self, row: usize, col: usize) -> Option<bool> {
if row >= self.table.nrows {
return None;
}
let bits = self.row(row);
(col < bits.len()).then(|| bits[col])
}
pub fn iter(&self) -> impl ExactSizeIterator<Item = &'a BitSlice<u8, Msb0>> + '_ {
(0..self.table.nrows).map(move |r| self.row(r))
}
}
impl Index<usize> for BitColumn<'_> {
type Output = BitSlice<u8, Msb0>;
fn index(&self, row: usize) -> &BitSlice<u8, Msb0> {
self.row(row)
}
}
impl Index<(usize, usize)> for BitColumn<'_> {
type Output = bool;
fn index(&self, (row, col): (usize, usize)) -> &bool {
&self.row(row)[col]
}
}
fn parse_tdim(value: &str) -> Option<Vec<usize>> {
let inner = value.trim().strip_prefix('(')?.strip_suffix(')')?;
inner
.split(',')
.map(|s| s.trim().parse::<usize>().ok())
.collect()
}
fn column_data_physical(
data: &ColumnData,
kind: TformKind,
tscale: f64,
tzero: f64,
tnull: Option<i64>,
) -> Result<Vec<f64>> {
let scale = |x: f64| tzero + tscale * x;
let scaled_int = |xi: i64| {
if tnull == Some(xi) {
f64::NAN
} else {
scale(xi as f64)
}
};
Ok(match data {
ColumnData::Bytes(v) if kind == TformKind::Byte => {
v.iter().map(|&b| scaled_int(b as i64)).collect()
}
ColumnData::I16(v) => v.iter().map(|&x| scaled_int(x as i64)).collect(),
ColumnData::I32(v) => v.iter().map(|&x| scaled_int(x as i64)).collect(),
ColumnData::I64(v) => v.iter().map(|&x| scaled_int(x)).collect(),
ColumnData::F32(v) => v.iter().map(|&x| scale(x as f64)).collect(),
ColumnData::F64(v) => v.iter().map(|&x| scale(x)).collect(),
_ => return Err(FitsError::NonNumericColumn { code: kind.code() }),
})
}
fn decode_array(kind: TformKind, bytes: &[u8]) -> ColumnData {
match kind {
TformKind::Logical => ColumnData::Logical(
bytes
.iter()
.map(|&b| match b {
b'T' => Some(true),
b'F' => Some(false),
_ => None, })
.collect(),
),
TformKind::Byte | TformKind::Bit => ColumnData::Bytes(bytes.to_vec()),
TformKind::Char => ColumnData::Text(vec![trim_text(bytes)]),
TformKind::I16 => ColumnData::I16(decode_be(bytes, i16::from_be_bytes)),
TformKind::I32 => ColumnData::I32(decode_be(bytes, i32::from_be_bytes)),
TformKind::I64 => ColumnData::I64(decode_be(bytes, i64::from_be_bytes)),
TformKind::F32 => ColumnData::F32(decode_be(bytes, f32::from_be_bytes)),
TformKind::F64 => ColumnData::F64(decode_be(bytes, f64::from_be_bytes)),
TformKind::ComplexF32 => ColumnData::ComplexF32(decode_be(bytes, |b: [u8; 8]| Complex {
re: f32::from_be_bytes([b[0], b[1], b[2], b[3]]),
im: f32::from_be_bytes([b[4], b[5], b[6], b[7]]),
})),
TformKind::ComplexF64 => ColumnData::ComplexF64(decode_be(bytes, |b: [u8; 16]| Complex {
re: f64::from_be_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]),
im: f64::from_be_bytes([b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]]),
})),
TformKind::ArrayDesc32 | TformKind::ArrayDesc64 => ColumnData::Bytes(bytes.to_vec()),
}
}
fn trim_text(cell: &[u8]) -> String {
let nul = cell.iter().position(|&b| b == 0).unwrap_or(cell.len());
let head = &cell[..nul];
let end = head.iter().rposition(|&b| b != b' ').map_or(0, |i| i + 1);
String::from_utf8_lossy(&head[..end]).into_owned()
}
#[derive(Debug, Clone, Copy)]
struct Descriptor {
nelem: usize,
offset: usize,
}
fn decode_descriptor(desc: &[u8], wide: bool) -> Descriptor {
if wide {
Descriptor {
nelem: be_u64(&desc[0..8]),
offset: be_u64(&desc[8..16]),
}
} else {
Descriptor {
nelem: be_u32(&desc[0..4]),
offset: be_u32(&desc[4..8]),
}
}
}
fn be_u32(b: &[u8]) -> usize {
u32::from_be_bytes([b[0], b[1], b[2], b[3]]) as usize
}
fn be_u64(b: &[u8]) -> usize {
usize::try_from(u64::from_be_bytes([
b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7],
]))
.unwrap_or(usize::MAX)
}
#[cfg(test)]
mod tests;