kobold-xml 0.1.0

A clean-room, Rust-native COBOL-data XML layer (real XML GENERATE). Independent of GnuCOBOL/libcob; not a GnuCOBOL parity claim.
Documentation
//! A clean-room COBOL-record data model and its XML rendering. **This is independent of GnuCOBOL/libcob**
//! -- it is a plain Rust description of a record (a tree of named fields with decoded values) and a
//! deterministic mapping to the [`crate::generate`] XML element tree. The caller decodes their COBOL data
//! into [`CobolField`]s by whatever means they like; `kobold-xml` does not link any COBOL runtime.
//!
//! ## v0.1 value policy (explicit + stable)
//! * **Alphanumeric**: the bytes (UTF-8 lossy), trailing spaces/NULs trimmed.
//! * **Numeric**: leading zeros stripped to one integer digit, the implied decimal point inserted at the
//!   `scale`, a leading `-` when negative.
//! * **Group**: nested element with the child fields rendered in order.
//! * A field named `"FILLER"` (case-insensitive) is omitted from the output.

use crate::generate::{XmlElement, XmlNode};

/// A decoded COBOL field value.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CobolValue {
    /// An alphanumeric (`PIC X`/`A`) value -- raw bytes, trimmed of trailing spaces/NULs on render.
    Alphanumeric(Vec<u8>),
    /// A numeric (`PIC 9`) value: the digit characters, the implied decimal `scale`, and the sign.
    Numeric { digits: Vec<u8>, scale: usize, negative: bool },
    /// A group: child fields, rendered as nested elements in order.
    Group(Vec<CobolField>),
}

/// A named COBOL field.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CobolField {
    /// The COBOL data name (e.g. `CUST-NAME`).
    pub name: String,
    /// The field's decoded value.
    pub value: CobolValue,
}

impl CobolField {
    /// An alphanumeric field from bytes.
    pub fn alnum(name: impl Into<String>, data: impl Into<Vec<u8>>) -> Self {
        CobolField { name: name.into(), value: CobolValue::Alphanumeric(data.into()) }
    }
    /// A numeric field from its digit characters + scale + sign.
    pub fn numeric(name: impl Into<String>, digits: impl Into<Vec<u8>>, scale: usize, negative: bool) -> Self {
        CobolField { name: name.into(), value: CobolValue::Numeric { digits: digits.into(), scale, negative } }
    }
    /// A group field from child fields.
    pub fn group(name: impl Into<String>, children: Vec<CobolField>) -> Self {
        CobolField { name: name.into(), value: CobolValue::Group(children) }
    }
}

/// Sanitize a COBOL data name into a valid XML element name: COBOL `-` is legal in XML names, but a name
/// must not start with a digit -- such a name is prefixed with `_`. Empty becomes `_`.
pub fn sanitize_name(name: &str) -> String {
    if name.is_empty() {
        return "_".to_string();
    }
    let mut out = String::new();
    if name.chars().next().unwrap().is_ascii_digit() {
        out.push('_');
    }
    out.push_str(name);
    out
}

/// Render an alphanumeric value: UTF-8 lossy, trailing spaces and NULs trimmed.
fn render_alnum(data: &[u8]) -> String {
    let mut end = data.len();
    while end > 0 && (data[end - 1] == b' ' || data[end - 1] == 0) {
        end -= 1;
    }
    String::from_utf8_lossy(&data[..end]).into_owned()
}

/// Render a numeric value per the v0.1 policy: leading zeros stripped to one integer digit, the decimal
/// point inserted at `scale`, a leading `-` when negative.
fn render_numeric(digits: &[u8], scale: usize, negative: bool) -> String {
    // keep only digit characters; default to a single zero if none.
    let mut d: Vec<u8> = digits.iter().copied().filter(|b| b.is_ascii_digit()).collect();
    if d.is_empty() {
        d.push(b'0');
    }
    let int_len = d.len().saturating_sub(scale);
    let int_part = &d[..int_len];
    let mut start = 0;
    while start + 1 < int_part.len() && int_part[start] == b'0' {
        start += 1;
    }
    let mut s = String::new();
    if negative {
        s.push('-');
    }
    if int_part.is_empty() {
        s.push('0');
    } else {
        s.push_str(&String::from_utf8_lossy(&int_part[start..]));
    }
    if scale > 0 {
        s.push('.');
        s.push_str(&String::from_utf8_lossy(&d[int_len..]));
    }
    s
}

/// Render a single [`CobolField`] to an [`XmlElement`], or `None` for a `FILLER` field.
fn field_to_element(f: &CobolField) -> Option<XmlElement> {
    if f.name.eq_ignore_ascii_case("FILLER") {
        return None;
    }
    let name = sanitize_name(&f.name);
    let el = match &f.value {
        CobolValue::Alphanumeric(d) => XmlElement::leaf(name, render_alnum(d)),
        CobolValue::Numeric { digits, scale, negative } => XmlElement::leaf(name, render_numeric(digits, *scale, *negative)),
        CobolValue::Group(children) => {
            let kids: Vec<XmlNode> = children.iter().filter_map(field_to_element).map(XmlNode::Element).collect();
            XmlElement::group(name, kids)
        }
    };
    Some(el)
}

/// `EXT.XML.GENERATE.1` (COBOL adapter) -- build the XML element tree for a top-level COBOL record. The
/// `root` is typically a [`CobolValue::Group`] field (the `01` level). `FILLER` fields are omitted.
pub fn record_to_xml(root: &CobolField) -> XmlElement {
    field_to_element(root).unwrap_or_else(|| XmlElement::empty(sanitize_name(&root.name)))
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::generate::{generate, GenerateOptions};

    #[test]
    fn sanitize_names() {
        assert_eq!(sanitize_name("CUST-NAME"), "CUST-NAME");
        assert_eq!(sanitize_name("1ST"), "_1ST");
        assert_eq!(sanitize_name(""), "_");
    }

    #[test]
    fn numeric_rendering() {
        assert_eq!(render_numeric(b"042", 0, false), "42");
        assert_eq!(render_numeric(b"01250", 2, false), "12.50");
        assert_eq!(render_numeric(b"0000", 0, false), "0");
        assert_eq!(render_numeric(b"042", 0, true), "-42");
    }

    #[test]
    fn record_to_xml_full() {
        // 01 G. 05 NAME PIC X(4)="JOHN". 05 AMT 9(3)V99=12.50. 05 GRP. 10 X="hi". 05 FILLER X(2).
        let rec = CobolField::group(
            "G",
            vec![
                CobolField::alnum("NAME", &b"JOHN"[..]),
                CobolField::numeric("AMT", &b"01250"[..], 2, false),
                CobolField::group("GRP", vec![CobolField::alnum("X", &b"hi"[..])]),
                CobolField::alnum("FILLER", &b"  "[..]),
                CobolField::alnum("MSG", &b"a<b&c "[..]),
            ],
        );
        let xml = generate(&record_to_xml(&rec), &GenerateOptions::default());
        assert_eq!(
            xml,
            "<G><NAME>JOHN</NAME><AMT>12.50</AMT><GRP><X>hi</X></GRP><MSG>a&lt;b&amp;c</MSG></G>"
        );
    }
}