igwn-ligolw 0.1.0

Rust-native reader and writer for the LIGO_LW XML format used by the International Gravitational-Wave Observatory Network.
Documentation
use std::collections::HashMap;

use crate::error::{Error, Result};
use crate::types::LigoType;
use crate::value::Value;

/// A parsed LIGO_LW document.
///
/// The full nested element tree is preserved on [`Document::root`] so that
/// writers can emit the document back out unchanged (DTT files in
/// particular nest `<LIGO_LW>` containers several levels deep). For the
/// common flat case — most coinc.xml outputs from the LVK low-latency
/// pipelines — the convenience HashMaps on this struct index every
/// table, param, array, and time element in the document by its `Name`
/// attribute, regardless of how deeply it sits in the tree. Indexing
/// uses last-one-wins on name collisions; consult `root` if you need
/// the original ordering.
#[derive(Debug, Clone)]
pub struct Document {
    pub root: LigoLwElement,
    pub tables: HashMap<String, Table>,
    pub params: HashMap<String, Param>,
    pub arrays: HashMap<String, Array>,
    pub times: HashMap<String, Time>,
}

impl Document {
    pub(crate) fn from_root(root: LigoLwElement) -> Self {
        let mut tables = HashMap::new();
        let mut params = HashMap::new();
        let mut arrays = HashMap::new();
        let mut times = HashMap::new();
        index_into(&root, &mut tables, &mut params, &mut arrays, &mut times);
        Self {
            root,
            tables,
            params,
            arrays,
            times,
        }
    }

    /// Borrow a table by its bare name (e.g. `"coinc_inspiral"`).
    pub fn table(&self, name: &str) -> Option<&Table> {
        self.tables.get(name)
    }

    /// Borrow a table or return [`Error::MissingTable`].
    pub fn require_table(&self, name: &str) -> Result<&Table> {
        self.tables
            .get(name)
            .ok_or_else(|| Error::MissingTable(name.to_string()))
    }

    /// Borrow an array by its bare name.
    pub fn array(&self, name: &str) -> Option<&Array> {
        self.arrays.get(name)
    }

    /// Borrow a [`Time`] element by name.
    pub fn time(&self, name: &str) -> Option<&Time> {
        self.times.get(name)
    }
}

fn index_into(
    elem: &LigoLwElement,
    tables: &mut HashMap<String, Table>,
    params: &mut HashMap<String, Param>,
    arrays: &mut HashMap<String, Array>,
    times: &mut HashMap<String, Time>,
) {
    for child in &elem.children {
        match child {
            Child::LigoLw(nested) => index_into(nested, tables, params, arrays, times),
            Child::Table(t) => {
                tables.insert(t.name.clone(), t.clone());
            }
            Child::Param(p) => {
                params.insert(p.name.clone(), p.clone());
            }
            Child::Array(a) => {
                if let Some(n) = &a.name {
                    arrays.insert(n.clone(), a.clone());
                }
            }
            Child::Time(t) => {
                if let Some(n) = &t.name {
                    times.insert(n.clone(), t.clone());
                }
            }
            Child::Comment(_) => {}
            Child::Other(g) => {
                // Walk through generic containers so extension elements
                // do not hide nested tables, params, arrays, or times
                // from the flat accessors.
                index_generic(g, tables, params, arrays, times);
            }
        }
    }
}

fn index_generic(
    elem: &GenericElement,
    tables: &mut HashMap<String, Table>,
    params: &mut HashMap<String, Param>,
    arrays: &mut HashMap<String, Array>,
    times: &mut HashMap<String, Time>,
) {
    for child in &elem.children {
        match child {
            Child::LigoLw(nested) => index_into(nested, tables, params, arrays, times),
            Child::Other(g) => index_generic(g, tables, params, arrays, times),
            Child::Table(t) => {
                tables.insert(t.name.clone(), t.clone());
            }
            Child::Param(p) => {
                params.insert(p.name.clone(), p.clone());
            }
            Child::Array(a) => {
                if let Some(n) = &a.name {
                    arrays.insert(n.clone(), a.clone());
                }
            }
            Child::Time(t) => {
                if let Some(n) = &t.name {
                    times.insert(n.clone(), t.clone());
                }
            }
            Child::Comment(_) => {}
        }
    }
}

/// A `<LIGO_LW>` element. The document's root is one of these; DTT-style
/// documents nest them.
#[derive(Debug, Clone, Default)]
pub struct LigoLwElement {
    pub name: Option<String>,
    pub element_type: Option<String>,
    pub children: Vec<Child>,
}

impl LigoLwElement {
    pub fn tables(&self) -> impl Iterator<Item = &Table> {
        self.children.iter().filter_map(|c| match c {
            Child::Table(t) => Some(t),
            _ => None,
        })
    }

    pub fn arrays(&self) -> impl Iterator<Item = &Array> {
        self.children.iter().filter_map(|c| match c {
            Child::Array(a) => Some(a),
            _ => None,
        })
    }

    pub fn params(&self) -> impl Iterator<Item = &Param> {
        self.children.iter().filter_map(|c| match c {
            Child::Param(p) => Some(p),
            _ => None,
        })
    }
}

/// One element that can sit inside a `<LIGO_LW>` (or other) container.
#[derive(Debug, Clone)]
pub enum Child {
    LigoLw(LigoLwElement),
    Table(Table),
    Param(Param),
    Array(Array),
    Time(Time),
    Comment(String),
    /// Any element the parser does not have explicit support for — DTT
    /// extension elements such as `<AdcData>`, `<DetectorData>`,
    /// `<Frame>`, `<Detector>`, `<IGWDFrame>`, or future producer
    /// additions. The element's tag, attributes, text body, and
    /// children are all preserved so the writer can round-trip it.
    Other(GenericElement),
}

/// A generic LIGO_LW element preserved verbatim for elements the parser
/// does not have a dedicated representation for.
#[derive(Debug, Clone, Default)]
pub struct GenericElement {
    /// Element tag as it appeared in the source (e.g. `"AdcData"`).
    pub tag: String,
    /// `Name` attribute, if present.
    pub name: Option<String>,
    /// `Type` attribute, if present.
    pub element_type: Option<String>,
    /// Any other attributes, preserved in source order.
    pub attributes: Vec<(String, String)>,
    /// Container children — child Tables, Arrays, Params, nested generic
    /// elements, and so on.
    pub children: Vec<Child>,
    /// Inline text body, if any. Pure container elements leave this empty.
    pub text: String,
}

/// A `Table` element.
#[derive(Debug, Clone)]
pub struct Table {
    pub name: String,
    pub delimiter: char,
    pub columns: Vec<Column>,
    pub rows: Vec<Vec<Value>>,
}

impl Table {
    pub fn column_index(&self, name: &str) -> Option<usize> {
        self.columns.iter().position(|c| c.name == name)
    }

    pub fn cell(&self, row: usize, column: &str) -> Option<&Value> {
        let idx = self.column_index(column)?;
        self.rows.get(row)?.get(idx)
    }

    pub fn require_cell(&self, row: usize, column: &str) -> Result<&Value> {
        let idx = self
            .column_index(column)
            .ok_or_else(|| Error::MissingColumn {
                table: self.name.clone(),
                column: column.to_string(),
            })?;
        Ok(&self.rows[row][idx])
    }
}

#[derive(Debug, Clone)]
pub struct Column {
    pub name: String,
    pub ty: LigoType,
}

/// A `Param` element. Carries metadata about a single typed value and the
/// raw text payload. Decode the payload by inspecting [`Param::ty`].
#[derive(Debug, Clone)]
pub struct Param {
    pub name: String,
    pub ty: LigoType,
    pub unit: Option<String>,
    pub raw: String,
}

/// A `Time` element. The `value` is preserved as text because the
/// upstream type system distinguishes GPS, ISO-8601, and other formats
/// that we leave to the caller to interpret.
#[derive(Debug, Clone)]
pub struct Time {
    pub name: Option<String>,
    pub time_type: String,
    pub value: String,
}

/// A `Dim` element inside an `<Array>`.
#[derive(Debug, Clone)]
pub struct Dim {
    pub name: Option<String>,
    pub size: usize,
    pub scale: Option<f64>,
    pub start: Option<f64>,
    pub unit: Option<String>,
}

/// An `Array` element.
///
/// Numeric data is widened to `f64` so callers do not have to enumerate
/// per-type variants; the original [`LigoType`] is recorded so the writer
/// re-encodes at the correct precision. Shape is in [`Array::dims`]; the
/// flat `values` buffer is in C order (row-major).
#[derive(Debug, Clone)]
pub struct Array {
    pub name: Option<String>,
    pub ty: LigoType,
    pub unit: Option<String>,
    pub dims: Vec<Dim>,
    pub encoding: ArrayEncoding,
    pub delimiter: char,
    pub values: Vec<f64>,
}

/// How the `<Stream>` body of an `<Array>` was encoded on the wire.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ArrayEncoding {
    Text,
    LittleEndianBase64,
    BigEndianBase64,
}

impl ArrayEncoding {
    pub fn parse(s: &str) -> Self {
        let lower = s.trim().to_ascii_lowercase();
        if lower.contains("base64") {
            if lower.contains("bigendian") {
                Self::BigEndianBase64
            } else {
                Self::LittleEndianBase64
            }
        } else {
            Self::Text
        }
    }

    pub fn as_attribute(self) -> Option<&'static str> {
        match self {
            Self::Text => None,
            Self::LittleEndianBase64 => Some("LittleEndian,base64"),
            Self::BigEndianBase64 => Some("BigEndian,base64"),
        }
    }
}

impl Array {
    /// Product of declared dim sizes, or the actual `values.len()` if no
    /// dims were declared.
    pub fn expected_len(&self) -> usize {
        if self.dims.is_empty() {
            self.values.len()
        } else {
            self.dims.iter().map(|d| d.size).product()
        }
    }
}