kobold-json 0.1.0

Forensic JSON evidence packets for COBOL record migration: raw-byte custody, copybook/record hashes, field findings, round-trip proof. Clean-room; independent of GnuCOBOL/libcob.
Documentation
//! The clean-room COBOL-record data model for kobold-json.
//!
//! This is a plain Rust description of a record layout (a tree of named fields with offsets, lengths and
//! PIC kinds) plus the decoded result of applying it to raw bytes. **It is independent of GnuCOBOL/libcob**
//! -- it links no COBOL runtime and reproduces no runtime's exact behavior. The caller describes their
//! copybook by whatever means they like and feeds raw record bytes to the `KOBOLD.JSON.EXPORT.1` court.

/// The storage kind of a field, as declared by the copybook.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FieldKind {
    /// `PIC X`/`A` -- alphanumeric display bytes.
    Alphanumeric,
    /// `PIC 9` -- zoned-decimal display digits. `scale` = implied decimal places; `signed` if `PIC S9`.
    Numeric { scale: usize, signed: bool },
    /// A group (`01`/`05` with subordinate items): the child fields, in order. A group's `length` is the
    /// sum of its children's lengths.
    Group(Vec<FieldDecl>),
}

/// A single field declaration from a copybook: a name, its PIC string, its byte `offset` within the record,
/// its byte `length`, and its [`FieldKind`].
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FieldDecl {
    /// The COBOL data name (e.g. `CUST-NAME`).
    pub name: String,
    /// The PIC clause text, kept verbatim for the audit packet (e.g. `X(20)`, `S9(5)V99`).
    pub pic: String,
    /// Byte offset of the field within the record.
    pub offset: usize,
    /// Byte length of the field.
    pub length: usize,
    /// The storage kind.
    pub kind: FieldKind,
}

impl FieldDecl {
    /// An alphanumeric field.
    pub fn alnum(name: impl Into<String>, pic: impl Into<String>, offset: usize, length: usize) -> Self {
        FieldDecl { name: name.into(), pic: pic.into(), offset, length, kind: FieldKind::Alphanumeric }
    }

    /// A numeric (zoned display) field.
    pub fn numeric(
        name: impl Into<String>,
        pic: impl Into<String>,
        offset: usize,
        length: usize,
        scale: usize,
        signed: bool,
    ) -> Self {
        FieldDecl {
            name: name.into(),
            pic: pic.into(),
            offset,
            length,
            kind: FieldKind::Numeric { scale, signed },
        }
    }

    /// A group field with the given children. `length` is the sum of child lengths.
    pub fn group(name: impl Into<String>, offset: usize, children: Vec<FieldDecl>) -> Self {
        let length = children.iter().map(|c| c.length).sum();
        FieldDecl { name: name.into(), pic: String::new(), offset, length, kind: FieldKind::Group(children) }
    }
}

/// A copybook: the record name, the declared character encoding (informational, carried into the packet),
/// and the top-level fields in order.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Copybook {
    /// The `01`-level record name.
    pub record_name: String,
    /// The declared character encoding, e.g. `ascii` or `ebcdic-cp-us`. Carried into the packet as
    /// `encoding`; kobold-json decodes display bytes as-is and does not transcode.
    pub encoding: String,
    /// The top-level fields, in record order.
    pub fields: Vec<FieldDecl>,
}

impl Copybook {
    /// The total declared record length = sum of top-level field lengths.
    pub fn record_length(&self) -> usize {
        self.fields.iter().map(|f| f.length).sum()
    }

    /// A stable, canonical byte serialization of the copybook layout, used as the input to
    /// `copybook_hash`. Deterministic: a pure function of the structure (names/pics/offsets/lengths/kinds),
    /// independent of insertion-time state.
    pub fn canonical_bytes(&self) -> Vec<u8> {
        let mut out = Vec::new();
        out.extend_from_slice(b"COPYBOOK\x1f");
        out.extend_from_slice(self.record_name.as_bytes());
        out.push(0x1f);
        out.extend_from_slice(self.encoding.as_bytes());
        out.push(0x1e);
        for f in &self.fields {
            canon_field(f, &mut out);
        }
        out
    }
}

fn canon_field(f: &FieldDecl, out: &mut Vec<u8>) {
    out.extend_from_slice(f.name.as_bytes());
    out.push(0x1f);
    out.extend_from_slice(f.pic.as_bytes());
    out.push(0x1f);
    out.extend_from_slice(f.offset.to_string().as_bytes());
    out.push(0x1f);
    out.extend_from_slice(f.length.to_string().as_bytes());
    out.push(0x1f);
    match &f.kind {
        FieldKind::Alphanumeric => out.extend_from_slice(b"A"),
        FieldKind::Numeric { scale, signed } => {
            out.extend_from_slice(b"N");
            out.extend_from_slice(scale.to_string().as_bytes());
            out.push(if *signed { b'S' } else { b'U' });
        }
        FieldKind::Group(children) => {
            out.extend_from_slice(b"G{");
            for c in children {
                canon_field(c, out);
            }
            out.push(b'}');
        }
    }
    out.push(0x1e);
}

/// A finding emitted by a court instead of a silent coercion: a stable `code` and a human `message`.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Finding {
    /// A stable machine code, e.g. `NUMERIC_NONDIGIT`, `VALUE_OVERFLOW`, `FIELD_OUT_OF_RANGE`.
    pub code: String,
    /// A human-readable message.
    pub message: String,
}

impl Finding {
    /// Construct a finding.
    pub fn new(code: impl Into<String>, message: impl Into<String>) -> Self {
        Finding { code: code.into(), message: message.into() }
    }
}

/// A decoded leaf field: its name, the rendered semantic `value`, the raw bytes, the declaration it came
/// from, and any findings raised while decoding it.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DecodedField {
    /// The field name.
    pub name: String,
    /// The rendered value (see [`crate::export`] for the rendering policy).
    pub value: String,
    /// The exact raw bytes of the field.
    pub raw: Vec<u8>,
    /// The originating declaration.
    pub decl: FieldDecl,
    /// Findings raised while decoding (empty = clean).
    pub findings: Vec<Finding>,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn record_length_sums_children() {
        let cb = Copybook {
            record_name: "REC".into(),
            encoding: "ascii".into(),
            fields: vec![
                FieldDecl::alnum("NAME", "X(4)", 0, 4),
                FieldDecl::numeric("AMT", "9(3)V99", 4, 5, 2, false),
            ],
        };
        assert_eq!(cb.record_length(), 9);
    }

    #[test]
    fn group_length_is_sum() {
        let g = FieldDecl::group(
            "G",
            0,
            vec![FieldDecl::alnum("A", "X(2)", 0, 2), FieldDecl::alnum("B", "X(3)", 2, 3)],
        );
        assert_eq!(g.length, 5);
    }

    #[test]
    fn canonical_bytes_deterministic() {
        let cb = Copybook {
            record_name: "REC".into(),
            encoding: "ascii".into(),
            fields: vec![FieldDecl::alnum("NAME", "X(4)", 0, 4)],
        };
        assert_eq!(cb.canonical_bytes(), cb.canonical_bytes());
        assert!(!cb.canonical_bytes().is_empty());
    }
}