kobold-json 0.1.0

Forensic JSON evidence packets for COBOL record migration: raw-byte custody, copybook/record hashes, field findings, round-trip proof. Clean-room; independent of GnuCOBOL/libcob.
Documentation
//! `KOBOLD.JSON.EXPORT.1` -- build a forensic JSON evidence packet from a raw COBOL record + its copybook.
//!
//! The packet preserves **both** the semantic value **and** the storage truth. Three modes:
//!
//! * [`Mode::Compact`] -- `{record, fields:{NAME:value}}` (decoded values only).
//! * [`Mode::Audit`] -- per field `{value, pic, offset, length, raw_hex, findings}`.
//! * [`Mode::Evidence`] -- adds `encoding`, `copybook_hash`, `record_hash`, and
//!   `roundtrip:{byte_reconstructable, requires_raw_hex}`.
//!
//! Decoding is **fail-open into findings, never silent coercion**: when a numeric field's bytes are not
//! valid digits, a [`crate::model::Finding`] is emitted (the value still renders best-effort, but the raw
//! bytes and the finding make the truth auditable). It is independent of GnuCOBOL/libcob.

use crate::json::JsonValue;
use crate::model::{Copybook, FieldDecl, FieldKind, Finding};
use crate::sha256;

/// The evidence detail level of an exported packet.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Mode {
    /// Values only: `{record, fields:{NAME:value}}`.
    Compact,
    /// Per-field storage detail: `{value, pic, offset, length, raw_hex, findings}`.
    Audit,
    /// Full custody: Audit + `copybook_hash`, `record_hash`, `roundtrip`.
    Evidence,
}

/// Render an alphanumeric value: bytes as Latin-1-ish text (each byte -> its code point), trailing spaces
/// and NULs trimmed. Using a 1:1 byte->char mapping keeps the value lossless for the raw_hex companion.
fn render_alnum(data: &[u8]) -> String {
    let mut end = data.len();
    while end > 0 && (data[end - 1] == b' ' || data[end - 1] == 0) {
        end -= 1;
    }
    data[..end].iter().map(|&b| b as char).collect()
}

/// Render a numeric value from its raw display bytes per the v0.1 policy. Returns the rendered string plus
/// any findings. A zoned-decimal sign overpunch in the last byte is recognized for signed fields; otherwise
/// any non-digit byte yields a `NUMERIC_NONDIGIT` finding (NOT a silent coercion).
fn render_numeric(data: &[u8], scale: usize, signed: bool) -> (String, Vec<Finding>) {
    let mut findings = Vec::new();
    let mut digits: Vec<u8> = Vec::with_capacity(data.len());
    let mut negative = false;

    for (idx, &b) in data.iter().enumerate() {
        let is_last = idx + 1 == data.len();
        if b.is_ascii_digit() {
            digits.push(b);
            continue;
        }
        // Recognize a zoned sign overpunch in the last byte of a signed field.
        if signed && is_last {
            if let Some((d, neg)) = overpunch(b) {
                digits.push(d);
                negative = neg;
                continue;
            }
        }
        // Leading/embedded space in an unpopulated numeric is a common-but-suspect input: still a finding.
        findings.push(Finding::new(
            "NUMERIC_NONDIGIT",
            format!("non-digit byte 0x{:02x} at position {} in numeric field", b, idx),
        ));
        // Best-effort: drop the byte from the rendered digits (raw_hex preserves the truth).
    }

    if digits.is_empty() {
        digits.push(b'0');
    }
    let s = format_digits(&digits, scale, negative);
    (s, findings)
}

/// Map a zoned-decimal overpunch byte to `(digit, negative)`. ASCII zoned convention:
/// `{ABCDEFGHI` = +0..+9, `}JKLMNOPQR` = -0..-9.
fn overpunch(b: u8) -> Option<(u8, bool)> {
    match b {
        b'{' => Some((b'0', false)),
        b'A'..=b'I' => Some((b'0' + (b - b'A' + 1), false)),
        b'}' => Some((b'0', true)),
        b'J'..=b'R' => Some((b'0' + (b - b'J' + 1), true)),
        _ => None,
    }
}

/// Format decimal `digits` with the implied `scale` and sign: leading zeros stripped to one integer digit,
/// the decimal point inserted at `scale`, a leading `-` when negative.
fn format_digits(digits: &[u8], scale: usize, negative: bool) -> String {
    let d: Vec<u8> = digits.iter().copied().filter(|b| b.is_ascii_digit()).collect();
    let d = if d.is_empty() { vec![b'0'] } else { d };
    // Pad with leading zeros if fewer digits than the scale demands.
    let mut padded = d;
    while padded.len() <= scale {
        padded.insert(0, b'0');
    }
    let int_len = padded.len() - scale;
    let int_part = &padded[..int_len];
    let mut start = 0;
    while start + 1 < int_part.len() && int_part[start] == b'0' {
        start += 1;
    }
    let mut s = String::new();
    let int_str: String = int_part[start..].iter().map(|&b| b as char).collect();
    // Suppress a "-0" / "-0.00" sign.
    let all_zero = padded.iter().all(|&b| b == b'0');
    if negative && !all_zero {
        s.push('-');
    }
    s.push_str(&int_str);
    if scale > 0 {
        s.push('.');
        let frac: String = padded[int_len..].iter().map(|&b| b as char).collect();
        s.push_str(&frac);
    }
    s
}

fn hex_lower(data: &[u8]) -> String {
    let mut s = String::with_capacity(data.len() * 2);
    for &b in data {
        sha256::push_hex_byte(b, &mut s);
    }
    s
}

/// The result of decoding one declared field against a slice of the record.
struct Decoded {
    value: JsonValue,
    raw: Vec<u8>,
    findings: Vec<Finding>,
    /// For a group: its decoded children (name -> Decoded packet member), else None.
    group: Option<Vec<(String, Decoded)>>,
}

/// Decode one field declaration against the full record bytes.
fn decode_field(decl: &FieldDecl, record: &[u8]) -> Decoded {
    let start = decl.offset;
    let end = decl.offset.saturating_add(decl.length);
    if end > record.len() {
        // Out of range: emit a finding, raw = whatever exists.
        let raw = if start < record.len() { record[start..].to_vec() } else { Vec::new() };
        return Decoded {
            value: JsonValue::Null,
            raw,
            findings: vec![Finding::new(
                "FIELD_OUT_OF_RANGE",
                format!(
                    "field {} [{}..{}] exceeds record length {}",
                    decl.name, start, end, record.len()
                ),
            )],
            group: None,
        };
    }
    let raw = record[start..end].to_vec();
    match &decl.kind {
        FieldKind::Alphanumeric => Decoded {
            value: JsonValue::String(render_alnum(&raw)),
            raw,
            findings: Vec::new(),
            group: None,
        },
        FieldKind::Numeric { scale, signed } => {
            let (s, findings) = render_numeric(&raw, *scale, *signed);
            Decoded { value: JsonValue::String(s), raw, findings, group: None }
        }
        FieldKind::Group(children) => {
            let mut members = Vec::new();
            for c in children {
                members.push((c.name.clone(), decode_field(c, record)));
            }
            Decoded { value: JsonValue::Null, raw, findings: Vec::new(), group: Some(members) }
        }
    }
}

/// Build the `fields` object for `Compact` mode (NAME -> value), recursing groups into nested objects.
fn compact_fields(decls: &[FieldDecl], record: &[u8]) -> JsonValue {
    let mut members = Vec::new();
    for d in decls {
        let dec = decode_field(d, record);
        let val = match dec.group {
            Some(_) => match &d.kind {
                FieldKind::Group(children) => compact_fields(children, record),
                _ => JsonValue::Null,
            },
            None => dec.value,
        };
        members.push((d.name.clone(), val));
    }
    JsonValue::Object(members)
}

/// Build the `fields` object for `Audit`/`Evidence` mode (per-field detail object).
fn audit_fields(decls: &[FieldDecl], record: &[u8]) -> JsonValue {
    let mut members = Vec::new();
    for d in decls {
        let dec = decode_field(d, record);
        if let FieldKind::Group(children) = &d.kind {
            let mut obj = vec![
                ("offset".to_string(), JsonValue::uint(d.offset as u64)),
                ("length".to_string(), JsonValue::uint(d.length as u64)),
                ("raw_hex".to_string(), JsonValue::str(hex_lower(&dec.raw))),
                ("fields".to_string(), audit_fields(children, record)),
            ];
            obj.insert(0, ("group".to_string(), JsonValue::Bool(true)));
            members.push((d.name.clone(), JsonValue::Object(obj)));
            continue;
        }
        let findings_json = findings_to_json(&dec.findings);
        let obj = vec![
            ("value".to_string(), dec.value),
            ("pic".to_string(), JsonValue::str(d.pic.clone())),
            ("offset".to_string(), JsonValue::uint(d.offset as u64)),
            ("length".to_string(), JsonValue::uint(d.length as u64)),
            ("raw_hex".to_string(), JsonValue::str(hex_lower(&dec.raw))),
            ("findings".to_string(), findings_json),
        ];
        members.push((d.name.clone(), JsonValue::Object(obj)));
    }
    JsonValue::Object(members)
}

/// Render a findings list to a JSON array of `{code, message}`.
pub fn findings_to_json(findings: &[Finding]) -> JsonValue {
    JsonValue::Array(
        findings
            .iter()
            .map(|f| {
                JsonValue::Object(vec![
                    ("code".to_string(), JsonValue::str(f.code.clone())),
                    ("message".to_string(), JsonValue::str(f.message.clone())),
                ])
            })
            .collect(),
    )
}

/// `KOBOLD.JSON.EXPORT.1` -- export a record + copybook into a JSON evidence packet at the given [`Mode`].
pub fn export(copybook: &Copybook, record: &[u8], mode: Mode) -> JsonValue {
    let mut top: Vec<(String, JsonValue)> = Vec::new();
    top.push(("record".to_string(), JsonValue::str(copybook.record_name.clone())));

    match mode {
        Mode::Compact => {
            top.push(("fields".to_string(), compact_fields(&copybook.fields, record)));
        }
        Mode::Audit => {
            top.push(("encoding".to_string(), JsonValue::str(copybook.encoding.clone())));
            top.push(("fields".to_string(), audit_fields(&copybook.fields, record)));
        }
        Mode::Evidence => {
            top.push(("encoding".to_string(), JsonValue::str(copybook.encoding.clone())));
            top.push((
                "copybook_hash".to_string(),
                JsonValue::str(format!("sha256:{}", sha256::hex_digest(&copybook.canonical_bytes()))),
            ));
            top.push((
                "record_hash".to_string(),
                JsonValue::str(format!("sha256:{}", sha256::hex_digest(record))),
            ));
            top.push(("fields".to_string(), audit_fields(&copybook.fields, record)));
            // Round-trip self-assessment: the Evidence packet carries raw_hex for every leaf, so it is
            // byte-reconstructable and that reconstruction requires the raw_hex.
            top.push((
                "roundtrip".to_string(),
                JsonValue::Object(vec![
                    ("byte_reconstructable".to_string(), JsonValue::Bool(true)),
                    ("requires_raw_hex".to_string(), JsonValue::Bool(true)),
                ]),
            ));
        }
    }

    JsonValue::Object(top)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::json::to_string;

    fn copybook() -> Copybook {
        Copybook {
            record_name: "CUST".into(),
            encoding: "ascii".into(),
            fields: vec![
                FieldDecl::alnum("NAME", "X(4)", 0, 4),
                FieldDecl::numeric("AMT", "9(3)V99", 4, 5, 2, false),
            ],
        }
    }

    #[test]
    fn compact_values_only() {
        let cb = copybook();
        let rec = b"JOHN01250";
        let p = export(&cb, rec, Mode::Compact);
        assert_eq!(
            to_string(&p),
            "{\"record\":\"CUST\",\"fields\":{\"NAME\":\"JOHN\",\"AMT\":\"12.50\"}}"
        );
    }

    #[test]
    fn audit_has_storage_truth() {
        let cb = copybook();
        let rec = b"JOHN01250";
        let p = export(&cb, rec, Mode::Audit);
        let fields = p.get("fields").unwrap();
        let amt = fields.get("AMT").unwrap();
        assert_eq!(amt.get("pic").unwrap().as_str(), Some("9(3)V99"));
        assert_eq!(amt.get("offset").unwrap(), &JsonValue::uint(4));
        assert_eq!(amt.get("raw_hex").unwrap().as_str(), Some("3031323530")); // "01250"
    }

    #[test]
    fn evidence_has_hashes_and_roundtrip() {
        let cb = copybook();
        let rec = b"JOHN01250";
        let p = export(&cb, rec, Mode::Evidence);
        let ch = p.get("copybook_hash").unwrap().as_str().unwrap();
        assert!(ch.starts_with("sha256:"));
        let rt = p.get("roundtrip").unwrap();
        assert_eq!(rt.get("byte_reconstructable").unwrap(), &JsonValue::Bool(true));
    }

    #[test]
    fn numeric_nondigit_is_a_finding_not_coercion() {
        let cb = copybook();
        let rec = b"JOHN0AB50"; // AMT bytes "0AB50" -- non-digits
        let p = export(&cb, rec, Mode::Audit);
        let amt = p.get("fields").unwrap().get("AMT").unwrap();
        let findings = amt.get("findings").unwrap();
        if let JsonValue::Array(items) = findings {
            assert!(!items.is_empty(), "expected NUMERIC_NONDIGIT finding");
            assert_eq!(items[0].get("code").unwrap().as_str(), Some("NUMERIC_NONDIGIT"));
        } else {
            panic!("findings not an array");
        }
        // raw_hex still preserves the truth
        assert_eq!(amt.get("raw_hex").unwrap().as_str(), Some("3041423530"));
    }

    #[test]
    fn numeric_formatting() {
        assert_eq!(format_digits(b"042", 0, false), "42");
        assert_eq!(format_digits(b"01250", 2, false), "12.50");
        assert_eq!(format_digits(b"0000", 0, false), "0");
        assert_eq!(format_digits(b"042", 0, true), "-42");
        assert_eq!(format_digits(b"00", 2, true), "0.00"); // -0 suppressed
        assert_eq!(format_digits(b"5", 2, false), "0.05"); // scale padding
    }
}