Skip to main content

igwn_ligolw/
document.rs

1use std::collections::HashMap;
2
3use crate::error::{Error, Result};
4use crate::types::LigoType;
5use crate::value::Value;
6
7/// A parsed LIGO_LW document.
8///
9/// The full nested element tree is preserved on [`Document::root`] so that
10/// writers can emit the document back out unchanged (DTT files in
11/// particular nest `<LIGO_LW>` containers several levels deep). For the
12/// common flat case — most coinc.xml outputs from the LVK low-latency
13/// pipelines — the convenience HashMaps on this struct index every
14/// table, param, array, and time element in the document by its `Name`
15/// attribute, regardless of how deeply it sits in the tree. Indexing
16/// uses last-one-wins on name collisions; consult `root` if you need
17/// the original ordering.
18#[derive(Debug, Clone)]
19pub struct Document {
20    pub root: LigoLwElement,
21    pub tables: HashMap<String, Table>,
22    pub params: HashMap<String, Param>,
23    pub arrays: HashMap<String, Array>,
24    pub times: HashMap<String, Time>,
25}
26
27impl Document {
28    pub(crate) fn from_root(root: LigoLwElement) -> Self {
29        let mut tables = HashMap::new();
30        let mut params = HashMap::new();
31        let mut arrays = HashMap::new();
32        let mut times = HashMap::new();
33        index_into(&root, &mut tables, &mut params, &mut arrays, &mut times);
34        Self {
35            root,
36            tables,
37            params,
38            arrays,
39            times,
40        }
41    }
42
43    /// Borrow a table by its bare name (e.g. `"coinc_inspiral"`).
44    pub fn table(&self, name: &str) -> Option<&Table> {
45        self.tables.get(name)
46    }
47
48    /// Borrow a table or return [`Error::MissingTable`].
49    pub fn require_table(&self, name: &str) -> Result<&Table> {
50        self.tables
51            .get(name)
52            .ok_or_else(|| Error::MissingTable(name.to_string()))
53    }
54
55    /// Borrow an array by its bare name.
56    pub fn array(&self, name: &str) -> Option<&Array> {
57        self.arrays.get(name)
58    }
59
60    /// Borrow a [`Time`] element by name.
61    pub fn time(&self, name: &str) -> Option<&Time> {
62        self.times.get(name)
63    }
64}
65
66fn index_into(
67    elem: &LigoLwElement,
68    tables: &mut HashMap<String, Table>,
69    params: &mut HashMap<String, Param>,
70    arrays: &mut HashMap<String, Array>,
71    times: &mut HashMap<String, Time>,
72) {
73    for child in &elem.children {
74        match child {
75            Child::LigoLw(nested) => index_into(nested, tables, params, arrays, times),
76            Child::Table(t) => {
77                tables.insert(t.name.clone(), t.clone());
78            }
79            Child::Param(p) => {
80                params.insert(p.name.clone(), p.clone());
81            }
82            Child::Array(a) => {
83                if let Some(n) = &a.name {
84                    arrays.insert(n.clone(), a.clone());
85                }
86            }
87            Child::Time(t) => {
88                if let Some(n) = &t.name {
89                    times.insert(n.clone(), t.clone());
90                }
91            }
92            Child::Comment(_) => {}
93            Child::Other(g) => {
94                // Walk through generic containers so extension elements
95                // do not hide nested tables, params, arrays, or times
96                // from the flat accessors.
97                index_generic(g, tables, params, arrays, times);
98            }
99        }
100    }
101}
102
103fn index_generic(
104    elem: &GenericElement,
105    tables: &mut HashMap<String, Table>,
106    params: &mut HashMap<String, Param>,
107    arrays: &mut HashMap<String, Array>,
108    times: &mut HashMap<String, Time>,
109) {
110    for child in &elem.children {
111        match child {
112            Child::LigoLw(nested) => index_into(nested, tables, params, arrays, times),
113            Child::Other(g) => index_generic(g, tables, params, arrays, times),
114            Child::Table(t) => {
115                tables.insert(t.name.clone(), t.clone());
116            }
117            Child::Param(p) => {
118                params.insert(p.name.clone(), p.clone());
119            }
120            Child::Array(a) => {
121                if let Some(n) = &a.name {
122                    arrays.insert(n.clone(), a.clone());
123                }
124            }
125            Child::Time(t) => {
126                if let Some(n) = &t.name {
127                    times.insert(n.clone(), t.clone());
128                }
129            }
130            Child::Comment(_) => {}
131        }
132    }
133}
134
135/// A `<LIGO_LW>` element. The document's root is one of these; DTT-style
136/// documents nest them.
137#[derive(Debug, Clone, Default)]
138pub struct LigoLwElement {
139    pub name: Option<String>,
140    pub element_type: Option<String>,
141    pub children: Vec<Child>,
142}
143
144impl LigoLwElement {
145    pub fn tables(&self) -> impl Iterator<Item = &Table> {
146        self.children.iter().filter_map(|c| match c {
147            Child::Table(t) => Some(t),
148            _ => None,
149        })
150    }
151
152    pub fn arrays(&self) -> impl Iterator<Item = &Array> {
153        self.children.iter().filter_map(|c| match c {
154            Child::Array(a) => Some(a),
155            _ => None,
156        })
157    }
158
159    pub fn params(&self) -> impl Iterator<Item = &Param> {
160        self.children.iter().filter_map(|c| match c {
161            Child::Param(p) => Some(p),
162            _ => None,
163        })
164    }
165}
166
167/// One element that can sit inside a `<LIGO_LW>` (or other) container.
168#[derive(Debug, Clone)]
169pub enum Child {
170    LigoLw(LigoLwElement),
171    Table(Table),
172    Param(Param),
173    Array(Array),
174    Time(Time),
175    Comment(String),
176    /// Any element the parser does not have explicit support for — DTT
177    /// extension elements such as `<AdcData>`, `<DetectorData>`,
178    /// `<Frame>`, `<Detector>`, `<IGWDFrame>`, or future producer
179    /// additions. The element's tag, attributes, text body, and
180    /// children are all preserved so the writer can round-trip it.
181    Other(GenericElement),
182}
183
184/// A generic LIGO_LW element preserved verbatim for elements the parser
185/// does not have a dedicated representation for.
186#[derive(Debug, Clone, Default)]
187pub struct GenericElement {
188    /// Element tag as it appeared in the source (e.g. `"AdcData"`).
189    pub tag: String,
190    /// `Name` attribute, if present.
191    pub name: Option<String>,
192    /// `Type` attribute, if present.
193    pub element_type: Option<String>,
194    /// Any other attributes, preserved in source order.
195    pub attributes: Vec<(String, String)>,
196    /// Container children — child Tables, Arrays, Params, nested generic
197    /// elements, and so on.
198    pub children: Vec<Child>,
199    /// Inline text body, if any. Pure container elements leave this empty.
200    pub text: String,
201}
202
203/// A `Table` element.
204#[derive(Debug, Clone)]
205pub struct Table {
206    pub name: String,
207    pub delimiter: char,
208    pub columns: Vec<Column>,
209    pub rows: Vec<Vec<Value>>,
210}
211
212impl Table {
213    pub fn column_index(&self, name: &str) -> Option<usize> {
214        self.columns.iter().position(|c| c.name == name)
215    }
216
217    pub fn cell(&self, row: usize, column: &str) -> Option<&Value> {
218        let idx = self.column_index(column)?;
219        self.rows.get(row)?.get(idx)
220    }
221
222    pub fn require_cell(&self, row: usize, column: &str) -> Result<&Value> {
223        let idx = self
224            .column_index(column)
225            .ok_or_else(|| Error::MissingColumn {
226                table: self.name.clone(),
227                column: column.to_string(),
228            })?;
229        Ok(&self.rows[row][idx])
230    }
231}
232
233#[derive(Debug, Clone)]
234pub struct Column {
235    pub name: String,
236    pub ty: LigoType,
237}
238
239/// A `Param` element. Carries metadata about a single typed value and the
240/// raw text payload. Decode the payload by inspecting [`Param::ty`].
241#[derive(Debug, Clone)]
242pub struct Param {
243    pub name: String,
244    pub ty: LigoType,
245    pub unit: Option<String>,
246    pub raw: String,
247}
248
249/// A `Time` element. The `value` is preserved as text because the
250/// upstream type system distinguishes GPS, ISO-8601, and other formats
251/// that we leave to the caller to interpret.
252#[derive(Debug, Clone)]
253pub struct Time {
254    pub name: Option<String>,
255    pub time_type: String,
256    pub value: String,
257}
258
259/// A `Dim` element inside an `<Array>`.
260#[derive(Debug, Clone)]
261pub struct Dim {
262    pub name: Option<String>,
263    pub size: usize,
264    pub scale: Option<f64>,
265    pub start: Option<f64>,
266    pub unit: Option<String>,
267}
268
269/// An `Array` element.
270///
271/// Numeric data is widened to `f64` so callers do not have to enumerate
272/// per-type variants; the original [`LigoType`] is recorded so the writer
273/// re-encodes at the correct precision. Shape is in [`Array::dims`]; the
274/// flat `values` buffer is in C order (row-major).
275#[derive(Debug, Clone)]
276pub struct Array {
277    pub name: Option<String>,
278    pub ty: LigoType,
279    pub unit: Option<String>,
280    pub dims: Vec<Dim>,
281    pub encoding: ArrayEncoding,
282    pub delimiter: char,
283    pub values: Vec<f64>,
284}
285
286/// How the `<Stream>` body of an `<Array>` was encoded on the wire.
287#[derive(Debug, Clone, Copy, PartialEq, Eq)]
288pub enum ArrayEncoding {
289    Text,
290    LittleEndianBase64,
291    BigEndianBase64,
292}
293
294impl ArrayEncoding {
295    pub fn parse(s: &str) -> Self {
296        let lower = s.trim().to_ascii_lowercase();
297        if lower.contains("base64") {
298            if lower.contains("bigendian") {
299                Self::BigEndianBase64
300            } else {
301                Self::LittleEndianBase64
302            }
303        } else {
304            Self::Text
305        }
306    }
307
308    pub fn as_attribute(self) -> Option<&'static str> {
309        match self {
310            Self::Text => None,
311            Self::LittleEndianBase64 => Some("LittleEndian,base64"),
312            Self::BigEndianBase64 => Some("BigEndian,base64"),
313        }
314    }
315}
316
317impl Array {
318    /// Product of declared dim sizes, or the actual `values.len()` if no
319    /// dims were declared.
320    pub fn expected_len(&self) -> usize {
321        if self.dims.is_empty() {
322            self.values.len()
323        } else {
324            self.dims.iter().map(|d| d.size).product()
325        }
326    }
327}