igwn-ligolw 0.1.0

Rust-native reader and writer for the LIGO_LW XML format used by the International Gravitational-Wave Observatory Network.
Documentation
//! Serialize a [`Document`] back to LIGO_LW XML.
//!
//! The writer is intentionally minimal: it produces output that
//! `parse_bytes` round-trips through unchanged for the supported
//! element types (`<Table>`, `<Column>`, `<Stream>`, `<Param>`,
//! `<Time>`, `<Array>`, `<Dim>`, nested `<LIGO_LW>`). It does not
//! reproduce the exact whitespace, attribute ordering, DOCTYPE
//! comments, or other lossy details of the original source — those
//! are not preserved through the parser.
//!
//! Output uses the `:table` / `:array` / `:param` name suffixes that
//! the LIGO command-line tooling expects when re-reading the file.

use std::io::Write;

use base64::engine::general_purpose::STANDARD as BASE64;
use base64::Engine as _;

use crate::document::{
    Array, ArrayEncoding, Child, Document, GenericElement, LigoLwElement, Param, Table, Time,
};
use crate::error::{Error, Result};
use crate::types::LigoType;
use crate::value::Value;

const DOCTYPE: &str = r#"<!DOCTYPE LIGO_LW SYSTEM "http://ldas-sw.ligo.caltech.edu/doc/ligolwAPI/html/ligolw_dtd.txt">"#;

/// Serialize a document to a byte buffer. The output begins with an
/// XML 1.0 declaration and the standard LIGO_LW DOCTYPE.
pub fn write_to_bytes(doc: &Document) -> Result<Vec<u8>> {
    let mut buf = Vec::new();
    write_to_writer(doc, &mut buf)?;
    Ok(buf)
}

/// Serialize a document to any [`Write`] sink.
pub fn write_to_writer<W: Write>(doc: &Document, mut w: W) -> Result<()> {
    writeln!(w, r#"<?xml version="1.0" encoding="utf-8"?>"#)
        .map_err(|e| Error::Write(e.to_string()))?;
    writeln!(w, "{DOCTYPE}").map_err(|e| Error::Write(e.to_string()))?;
    write_ligo_lw(&mut w, &doc.root, 0)?;
    Ok(())
}

/// Serialize a document to a filesystem path. The file extension
/// controls compression: `.gz` writes through `flate2`; `.bz2` is
/// behind the `bzip2` feature; `.xz` is behind the `xz` feature.
/// Anything else is written uncompressed.
pub fn write_to_path(doc: &Document, path: impl AsRef<std::path::Path>) -> Result<()> {
    let path = path.as_ref();
    let file = std::fs::File::create(path).map_err(|e| Error::Io(e.to_string()))?;
    match path.extension().and_then(|e| e.to_str()) {
        Some("gz") => {
            let mut w = flate2::write::GzEncoder::new(file, flate2::Compression::default());
            write_to_writer(doc, &mut w)?;
            w.finish().map_err(|e| Error::Io(e.to_string()))?;
            Ok(())
        }
        #[cfg(feature = "bzip2")]
        Some("bz2") => {
            let mut w = bzip2::write::BzEncoder::new(file, bzip2::Compression::default());
            write_to_writer(doc, &mut w)?;
            w.finish().map_err(|e| Error::Io(e.to_string()))?;
            Ok(())
        }
        #[cfg(feature = "xz")]
        Some("xz") => {
            let mut w = xz2::write::XzEncoder::new(file, 6);
            write_to_writer(doc, &mut w)?;
            w.finish().map_err(|e| Error::Io(e.to_string()))?;
            Ok(())
        }
        _ => write_to_writer(doc, file),
    }
}

fn write_ligo_lw<W: Write>(w: &mut W, elem: &LigoLwElement, depth: usize) -> Result<()> {
    let indent = " ".repeat(depth * 2);
    let mut attrs = String::new();
    if let Some(n) = &elem.name {
        attrs.push_str(&format!(r#" Name="{}""#, escape_attr(n)));
    }
    if let Some(t) = &elem.element_type {
        attrs.push_str(&format!(r#" Type="{}""#, escape_attr(t)));
    }
    writeln!(w, "{indent}<LIGO_LW{attrs}>").map_err(io_err)?;
    for child in &elem.children {
        write_child(w, child, depth + 1)?;
    }
    writeln!(w, "{indent}</LIGO_LW>").map_err(io_err)?;
    Ok(())
}

fn write_child<W: Write>(w: &mut W, child: &Child, depth: usize) -> Result<()> {
    match child {
        Child::LigoLw(nested) => write_ligo_lw(w, nested, depth),
        Child::Table(t) => write_table(w, t, depth),
        Child::Param(p) => write_param(w, p, depth),
        Child::Time(t) => write_time(w, t, depth),
        Child::Array(a) => write_array(w, a, depth),
        Child::Comment(c) => {
            let indent = " ".repeat(depth * 2);
            writeln!(w, "{indent}<!--{c}-->").map_err(io_err)
        }
        Child::Other(g) => write_generic(w, g, depth),
    }
}

fn write_generic<W: Write>(w: &mut W, g: &GenericElement, depth: usize) -> Result<()> {
    let indent = " ".repeat(depth * 2);
    let mut attrs = String::new();
    if let Some(n) = &g.name {
        attrs.push_str(&format!(r#" Name="{}""#, escape_attr(n)));
    }
    if let Some(t) = &g.element_type {
        attrs.push_str(&format!(r#" Type="{}""#, escape_attr(t)));
    }
    for (k, v) in &g.attributes {
        attrs.push_str(&format!(r#" {k}="{}""#, escape_attr(v)));
    }
    // Self-closing form for elements that are pure leaves with no text or
    // children, to keep the output compact and round-trippable.
    if g.children.is_empty() && g.text.trim().is_empty() {
        writeln!(w, "{indent}<{tag}{attrs}/>", tag = g.tag).map_err(io_err)
    } else if g.children.is_empty() {
        writeln!(
            w,
            "{indent}<{tag}{attrs}>{value}</{tag}>",
            tag = g.tag,
            value = escape_text(&g.text)
        )
        .map_err(io_err)
    } else {
        writeln!(w, "{indent}<{tag}{attrs}>", tag = g.tag).map_err(io_err)?;
        for child in &g.children {
            write_child(w, child, depth + 1)?;
        }
        writeln!(w, "{indent}</{tag}>", tag = g.tag).map_err(io_err)
    }
}

fn write_table<W: Write>(w: &mut W, t: &Table, depth: usize) -> Result<()> {
    let indent = " ".repeat(depth * 2);
    let inner = " ".repeat((depth + 1) * 2);
    writeln!(w, r#"{indent}<Table Name="{}:table">"#, t.name).map_err(io_err)?;
    for col in &t.columns {
        writeln!(
            w,
            r#"{inner}<Column Name="{name}" Type="{ty}"/>"#,
            name = escape_attr(&col.name),
            ty = col.ty.canonical_name(),
        )
        .map_err(io_err)?;
    }
    writeln!(
        w,
        r#"{inner}<Stream Name="{}:table" Type="Local" Delimiter="{}">"#,
        t.name,
        escape_attr(&t.delimiter.to_string())
    )
    .map_err(io_err)?;
    write_stream_body(w, t, depth + 2)?;
    writeln!(w, "{inner}</Stream>").map_err(io_err)?;
    writeln!(w, "{indent}</Table>").map_err(io_err)?;
    Ok(())
}

fn write_stream_body<W: Write>(w: &mut W, t: &Table, depth: usize) -> Result<()> {
    let indent = " ".repeat(depth * 2);
    let delim = t.delimiter.to_string();
    // Always emit a delimiter between fields. Within a row, we follow each
    // cell with the delimiter; the trailing delimiter at the end of a row
    // is intentional — the LIGO_LW parser is tolerant of it and it keeps
    // the field count unambiguous when the final cell of the final row is
    // NULL or an empty string.
    for row in &t.rows {
        write!(w, "{indent}").map_err(io_err)?;
        for (ci, cell) in row.iter().enumerate() {
            let ty = t.columns[ci].ty;
            if ci > 0 {
                w.write_all(delim.as_bytes()).map_err(io_err)?;
            }
            write_value(w, cell, ty)?;
        }
        w.write_all(delim.as_bytes()).map_err(io_err)?;
        writeln!(w).map_err(io_err)?;
    }
    Ok(())
}

fn write_value<W: Write>(w: &mut W, v: &Value, ty: LigoType) -> Result<()> {
    if matches!(v, Value::Null) {
        return Ok(());
    }
    match (v, ty) {
        (Value::Str(s), LigoType::Str) | (Value::Str(s), LigoType::Ilwd) => {
            write!(w, r#""{}""#, escape_quoted(s)).map_err(io_err)?
        }
        (Value::Int(i), _) => write!(w, "{i}").map_err(io_err)?,
        (Value::UInt(u), _) => write!(w, "{u}").map_err(io_err)?,
        (Value::Real(f), _) => write!(w, "{}", format_float(*f)).map_err(io_err)?,
        (Value::Str(s), _) => write!(w, "{s}").map_err(io_err)?,
        (Value::Null, _) => {}
    }
    Ok(())
}

fn write_param<W: Write>(w: &mut W, p: &Param, depth: usize) -> Result<()> {
    let indent = " ".repeat(depth * 2);
    let unit_attr = match &p.unit {
        Some(u) => format!(r#" Unit="{}""#, escape_attr(u)),
        None => String::new(),
    };
    writeln!(
        w,
        r#"{indent}<Param Name="{name}" Type="{ty}"{unit}>{value}</Param>"#,
        name = escape_attr(&p.name),
        ty = p.ty.canonical_name(),
        unit = unit_attr,
        value = escape_text(&p.raw),
    )
    .map_err(io_err)
}

fn write_time<W: Write>(w: &mut W, t: &Time, depth: usize) -> Result<()> {
    let indent = " ".repeat(depth * 2);
    let name_attr = match &t.name {
        Some(n) => format!(r#" Name="{}""#, escape_attr(n)),
        None => String::new(),
    };
    writeln!(
        w,
        r#"{indent}<Time{name} Type="{ty}">{value}</Time>"#,
        name = name_attr,
        ty = escape_attr(&t.time_type),
        value = escape_text(&t.value),
    )
    .map_err(io_err)
}

fn write_array<W: Write>(w: &mut W, a: &Array, depth: usize) -> Result<()> {
    let indent = " ".repeat(depth * 2);
    let inner = " ".repeat((depth + 1) * 2);
    let name_attr = match &a.name {
        Some(n) => format!(r#" Name="{}:array""#, escape_attr(n)),
        None => String::new(),
    };
    let unit_attr = match &a.unit {
        Some(u) => format!(r#" Unit="{}""#, escape_attr(u)),
        None => String::new(),
    };
    writeln!(
        w,
        r#"{indent}<Array{name} Type="{ty}"{unit}>"#,
        name = name_attr,
        ty = a.ty.canonical_name(),
        unit = unit_attr
    )
    .map_err(io_err)?;
    for dim in &a.dims {
        let mut da = String::new();
        if let Some(n) = &dim.name {
            da.push_str(&format!(r#" Name="{}""#, escape_attr(n)));
        }
        if let Some(s) = dim.scale {
            da.push_str(&format!(r#" Scale="{}""#, format_float(s)));
        }
        if let Some(s) = dim.start {
            da.push_str(&format!(r#" Start="{}""#, format_float(s)));
        }
        if let Some(u) = &dim.unit {
            da.push_str(&format!(r#" Unit="{}""#, escape_attr(u)));
        }
        writeln!(w, "{inner}<Dim{da}>{}</Dim>", dim.size).map_err(io_err)?;
    }
    let enc_attr = a
        .encoding
        .as_attribute()
        .map(|s| format!(r#" Encoding="{}""#, s))
        .unwrap_or_default();
    writeln!(
        w,
        r#"{inner}<Stream Type="Local" Delimiter="{}"{enc}>"#,
        escape_attr(&a.delimiter.to_string()),
        enc = enc_attr
    )
    .map_err(io_err)?;
    match a.encoding {
        ArrayEncoding::Text => write_array_text(w, a, depth + 2)?,
        ArrayEncoding::LittleEndianBase64 => write_array_base64(w, a, depth + 2, true)?,
        ArrayEncoding::BigEndianBase64 => write_array_base64(w, a, depth + 2, false)?,
    }
    writeln!(w, "{inner}</Stream>").map_err(io_err)?;
    writeln!(w, "{indent}</Array>").map_err(io_err)?;
    Ok(())
}

fn write_array_text<W: Write>(w: &mut W, a: &Array, depth: usize) -> Result<()> {
    let indent = " ".repeat(depth * 2);
    let stride = a
        .dims
        .last()
        .map(|d| d.size)
        .unwrap_or(a.values.len())
        .max(1);
    let delim = a.delimiter.to_string();
    for chunk in a.values.chunks(stride) {
        write!(w, "{indent}").map_err(io_err)?;
        for (i, v) in chunk.iter().enumerate() {
            if i > 0 {
                w.write_all(delim.as_bytes()).map_err(io_err)?;
            }
            write_array_value(w, *v, a.ty)?;
        }
        writeln!(w).map_err(io_err)?;
    }
    Ok(())
}

fn write_array_base64<W: Write>(w: &mut W, a: &Array, depth: usize, little: bool) -> Result<()> {
    let mut bytes: Vec<u8> = Vec::with_capacity(a.values.len() * 8);
    match a.ty {
        LigoType::Real4 => {
            for v in &a.values {
                let f = *v as f32;
                let b = if little {
                    f.to_le_bytes()
                } else {
                    f.to_be_bytes()
                };
                bytes.extend_from_slice(&b);
            }
        }
        LigoType::Real8 => {
            for v in &a.values {
                let b = if little {
                    v.to_le_bytes()
                } else {
                    v.to_be_bytes()
                };
                bytes.extend_from_slice(&b);
            }
        }
        LigoType::Int | LigoType::UInt => {
            for v in &a.values {
                let i = *v as i32;
                let b = if little {
                    i.to_le_bytes()
                } else {
                    i.to_be_bytes()
                };
                bytes.extend_from_slice(&b);
            }
        }
        _ => {}
    }
    let indent = " ".repeat(depth * 2);
    writeln!(w, "{indent}{}", BASE64.encode(bytes)).map_err(io_err)
}

fn write_array_value<W: Write>(w: &mut W, v: f64, ty: LigoType) -> Result<()> {
    match ty {
        LigoType::Real4 | LigoType::Real8 => write!(w, "{}", format_float(v)).map_err(io_err),
        LigoType::Int | LigoType::UInt => write!(w, "{}", v as i64).map_err(io_err),
        _ => write!(w, "{}", format_float(v)).map_err(io_err),
    }
}

fn format_float(v: f64) -> String {
    if v == v.trunc() && v.is_finite() && v.abs() < 1e16 {
        format!("{v:.1}")
    } else {
        format!("{v}")
    }
}

fn escape_attr(s: &str) -> String {
    s.replace('&', "&amp;")
        .replace('<', "&lt;")
        .replace('"', "&quot;")
}

fn escape_text(s: &str) -> String {
    s.replace('&', "&amp;").replace('<', "&lt;")
}

fn escape_quoted(s: &str) -> String {
    s.replace('"', r#""""#)
}

fn io_err(e: std::io::Error) -> Error {
    Error::Write(e.to_string())
}