Skip to main content

kobold_json/
parse.rs

1//! `KOBOLD.JSON.PARSE.1` (FAIL-CLOSED) -- reconstruct the original record bytes from a packet + copybook.
2//!
3//! Two paths:
4//!
5//! * If the packet carries `raw_hex` for its leaf fields (an `Audit`/`Evidence` packet), reconstruct **the
6//!   exact bytes** from the hex (`KOBOLD.JSON.ROUNDTRIP.1`: bytes -> packet -> bytes are identical).
7//! * If the packet is value-only (`Compact`), **re-encode** each declared field's value into its PIC bytes,
8//!   **failing closed** on overflow or an invalid value -- a value too long for an alphanumeric field, or a
9//!   non-numeric value into a numeric field, yields a [`Finding`], never a silent truncation/coercion.
10//!
11//! On any failure the whole reconstruction fails closed: it returns `Err(findings)`, not partial bytes.
12//! It is independent of GnuCOBOL/libcob.
13
14use crate::json::JsonValue;
15use crate::model::{Copybook, FieldDecl, FieldKind, Finding};
16
17/// `KOBOLD.JSON.PARSE.1` -- reconstruct the record bytes declared by `copybook` from `packet`.
18///
19/// Returns `Ok(bytes)` exactly `copybook.record_length()` long, or `Err(findings)` listing every problem.
20pub fn parse_into(copybook: &Copybook, packet: &JsonValue) -> Result<Vec<u8>, Vec<Finding>> {
21    let fields = match packet.get("fields") {
22        Some(f) => f,
23        None => {
24            return Err(vec![Finding::new(
25                "PACKET_NO_FIELDS",
26                "packet has no \"fields\" member".to_string(),
27            )]);
28        }
29    };
30
31    let total = copybook.record_length();
32    let mut out = vec![b' '; total];
33    let mut findings = Vec::new();
34
35    encode_fields(&copybook.fields, fields, &mut out, &mut findings);
36
37    if findings.is_empty() {
38        Ok(out)
39    } else {
40        Err(findings)
41    }
42}
43
44/// Encode a list of declarations into `out` (absolute offsets), reading each field's member from `node`.
45fn encode_fields(decls: &[FieldDecl], node: &JsonValue, out: &mut [u8], findings: &mut Vec<Finding>) {
46    for d in decls {
47        let member = match node.get(&d.name) {
48            Some(m) => m,
49            None => {
50                findings.push(Finding::new(
51                    "FIELD_MISSING",
52                    format!("field {} not present in packet", d.name),
53                ));
54                continue;
55            }
56        };
57        encode_field(d, member, out, findings);
58    }
59}
60
61/// Encode a single field. If the member is an Audit/Evidence detail object with `raw_hex`, use it (exact
62/// reconstruction). If it carries nested `fields` (a group detail object), recurse. Otherwise treat the
63/// member as a value and re-encode per the PIC.
64fn encode_field(d: &FieldDecl, member: &JsonValue, out: &mut [u8], findings: &mut Vec<Finding>) {
65    // Group declaration: recurse into nested fields, whether compact (object of values) or audit detail.
66    if let FieldKind::Group(children) = &d.kind {
67        let inner = member.get("fields").unwrap_or(member);
68        encode_fields(children, inner, out, findings);
69        return;
70    }
71
72    // Leaf. Prefer raw_hex when present (exact byte custody).
73    if let Some(raw_hex) = member.get("raw_hex").and_then(|v| v.as_str()) {
74        match decode_hex(raw_hex) {
75            Ok(bytes) => {
76                if bytes.len() != d.length {
77                    findings.push(Finding::new(
78                        "RAW_HEX_LENGTH",
79                        format!(
80                            "field {}: raw_hex decodes to {} bytes, declared length {}",
81                            d.name,
82                            bytes.len(),
83                            d.length
84                        ),
85                    ));
86                    return;
87                }
88                place(out, d.offset, &bytes, findings, &d.name);
89                return;
90            }
91            Err(msg) => {
92                findings.push(Finding::new("RAW_HEX_INVALID", format!("field {}: {}", d.name, msg)));
93                return;
94            }
95        }
96    }
97
98    // Value-only re-encode.
99    let value_node = member.get("value").unwrap_or(member);
100    let value = match value_node {
101        JsonValue::String(s) => s.clone(),
102        JsonValue::Number(n) => n.clone(),
103        JsonValue::Null => String::new(),
104        _ => {
105            findings.push(Finding::new(
106                "VALUE_TYPE",
107                format!("field {}: value is not a string/number", d.name),
108            ));
109            return;
110        }
111    };
112
113    match &d.kind {
114        FieldKind::Alphanumeric => encode_alnum(d, &value, out, findings),
115        FieldKind::Numeric { scale, signed } => encode_numeric(d, &value, *scale, *signed, out, findings),
116        FieldKind::Group(_) => unreachable!("group handled above"),
117    }
118}
119
120/// Place `bytes` at absolute `offset` in `out`, failing closed if it would not fit.
121fn place(out: &mut [u8], offset: usize, bytes: &[u8], findings: &mut Vec<Finding>, name: &str) {
122    let end = offset + bytes.len();
123    if end > out.len() {
124        findings.push(Finding::new(
125            "FIELD_OUT_OF_RANGE",
126            format!("field {}: writing [{}..{}] exceeds record length {}", name, offset, end, out.len()),
127        ));
128        return;
129    }
130    out[offset..end].copy_from_slice(bytes);
131}
132
133/// Encode an alphanumeric value: pad with trailing spaces to the field length; **fail closed** if the value
134/// is longer than the field (NO silent truncation).
135fn encode_alnum(d: &FieldDecl, value: &str, out: &mut [u8], findings: &mut Vec<Finding>) {
136    // 1:1 char->byte mapping (the inverse of export's render_alnum). Reject any char > 0xff.
137    let mut bytes = Vec::with_capacity(value.len());
138    for ch in value.chars() {
139        let cp = ch as u32;
140        if cp > 0xff {
141            findings.push(Finding::new(
142                "ALNUM_NON_BYTE",
143                format!("field {}: char U+{:04X} is not representable in one byte", d.name, cp),
144            ));
145            return;
146        }
147        bytes.push(cp as u8);
148    }
149    if bytes.len() > d.length {
150        findings.push(Finding::new(
151            "VALUE_OVERFLOW",
152            format!(
153                "field {}: value of {} bytes overflows field length {} (fail-closed, no truncation)",
154                d.name,
155                bytes.len(),
156                d.length
157            ),
158        ));
159        return;
160    }
161    let mut buf = vec![b' '; d.length];
162    buf[..bytes.len()].copy_from_slice(&bytes);
163    place(out, d.offset, &buf, findings, &d.name);
164}
165
166/// Encode a numeric value into zoned-decimal display digits per the field's `scale`/`signed`. **Fails
167/// closed** on a non-numeric value, too many integer digits for the field, or sign on an unsigned field.
168fn encode_numeric(
169    d: &FieldDecl,
170    value: &str,
171    scale: usize,
172    signed: bool,
173    out: &mut [u8],
174    findings: &mut Vec<Finding>,
175) {
176    let mut s = value.trim();
177    let mut negative = false;
178    if let Some(rest) = s.strip_prefix('-') {
179        negative = true;
180        s = rest;
181    } else if let Some(rest) = s.strip_prefix('+') {
182        s = rest;
183    }
184    if negative && !signed {
185        findings.push(Finding::new(
186            "SIGN_ON_UNSIGNED",
187            format!("field {}: negative value into unsigned PIC {}", d.name, d.pic),
188        ));
189        return;
190    }
191
192    // Split integer/fraction on a single decimal point.
193    let (int_str, frac_str) = match s.split_once('.') {
194        Some((i, f)) => (i, f),
195        None => (s, ""),
196    };
197    if int_str.is_empty() && frac_str.is_empty() {
198        findings.push(Finding::new("NUMERIC_EMPTY", format!("field {}: empty numeric value", d.name)));
199        return;
200    }
201    for (label, part) in [("integer", int_str), ("fraction", frac_str)] {
202        if !part.chars().all(|c| c.is_ascii_digit()) {
203            findings.push(Finding::new(
204                "NUMERIC_INVALID",
205                format!("field {}: non-numeric {} part {:?} (fail-closed)", d.name, label, part),
206            ));
207            return;
208        }
209    }
210    if frac_str.len() > scale {
211        findings.push(Finding::new(
212            "FRACTION_OVERFLOW",
213            format!(
214                "field {}: {} fraction digits exceed scale {} (fail-closed, no rounding)",
215                d.name,
216                frac_str.len(),
217                scale
218            ),
219        ));
220        return;
221    }
222
223    // Build the full digit string of length d.length: integer-part (left zero padded) + fraction (right
224    // zero padded to scale).
225    let int_digits = d.length.saturating_sub(scale);
226    let int_trimmed = int_str.trim_start_matches('0');
227    if int_trimmed.len() > int_digits {
228        findings.push(Finding::new(
229            "VALUE_OVERFLOW",
230            format!(
231                "field {}: integer part {:?} needs {} digits, field has {} (fail-closed)",
232                d.name,
233                int_str,
234                int_trimmed.len(),
235                int_digits
236            ),
237        ));
238        return;
239    }
240
241    let mut digits = String::with_capacity(d.length);
242    for _ in 0..(int_digits - int_trimmed.len()) {
243        digits.push('0');
244    }
245    digits.push_str(int_trimmed);
246    digits.push_str(frac_str);
247    for _ in 0..(scale - frac_str.len()) {
248        digits.push('0');
249    }
250
251    let mut bytes: Vec<u8> = digits.into_bytes();
252    debug_assert_eq!(bytes.len(), d.length);
253    if bytes.len() != d.length {
254        findings.push(Finding::new(
255            "NUMERIC_LENGTH",
256            format!("field {}: built {} digits, declared length {}", d.name, bytes.len(), d.length),
257        ));
258        return;
259    }
260
261    // Apply the zoned sign overpunch to the last byte for a signed field.
262    if signed {
263        if let Some(last) = bytes.last_mut() {
264            *last = overpunch_byte(*last, negative);
265        }
266    }
267    place(out, d.offset, &bytes, findings, &d.name);
268}
269
270/// Map an ASCII digit byte + sign to its zoned overpunch byte (matches export's `overpunch`).
271fn overpunch_byte(digit: u8, negative: bool) -> u8 {
272    let n = digit.wrapping_sub(b'0');
273    if n > 9 {
274        return digit;
275    }
276    match (negative, n) {
277        (false, 0) => b'{',
278        (false, k) => b'A' + (k - 1),
279        (true, 0) => b'}',
280        (true, k) => b'J' + (k - 1),
281    }
282}
283
284/// Decode a lowercase/uppercase hex string into bytes, fail-closed on odd length / non-hex.
285pub fn decode_hex(s: &str) -> Result<Vec<u8>, String> {
286    let b = s.as_bytes();
287    if b.len() % 2 != 0 {
288        return Err(format!("odd-length hex string ({} chars)", b.len()));
289    }
290    let mut out = Vec::with_capacity(b.len() / 2);
291    let mut i = 0;
292    while i < b.len() {
293        let hi = hex_val(b[i]).ok_or_else(|| format!("invalid hex char {:?}", b[i] as char))?;
294        let lo = hex_val(b[i + 1]).ok_or_else(|| format!("invalid hex char {:?}", b[i + 1] as char))?;
295        out.push((hi << 4) | lo);
296        i += 2;
297    }
298    Ok(out)
299}
300
301fn hex_val(c: u8) -> Option<u8> {
302    match c {
303        b'0'..=b'9' => Some(c - b'0'),
304        b'a'..=b'f' => Some(c - b'a' + 10),
305        b'A'..=b'F' => Some(c - b'A' + 10),
306        _ => None,
307    }
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313    use crate::export::{export, Mode};
314
315    fn copybook() -> Copybook {
316        Copybook {
317            record_name: "CUST".into(),
318            encoding: "ascii".into(),
319            fields: vec![
320                FieldDecl::alnum("NAME", "X(4)", 0, 4),
321                FieldDecl::numeric("AMT", "S9(3)V99", 4, 5, 2, true),
322            ],
323        }
324    }
325
326    #[test]
327    fn roundtrip_evidence_identical_bytes() {
328        // KOBOLD.JSON.ROUNDTRIP.1: bytes -> Evidence packet -> parse_into -> identical bytes.
329        let cb = copybook();
330        // AMT = -12.50 zoned: digits "0125" + overpunch of '0' negative = '}' -> "0125}"
331        let rec = b"JOHN0125}";
332        let packet = export(&cb, rec, Mode::Evidence);
333        let back = parse_into(&cb, &packet).expect("roundtrip should succeed");
334        assert_eq!(&back, rec);
335    }
336
337    #[test]
338    fn roundtrip_audit_identical_bytes() {
339        let cb = copybook();
340        let rec = b"JANE0007A"; // AMT "0007A": A = +1 overpunch -> +0.071? check via export/import identity
341        let packet = export(&cb, rec, Mode::Audit);
342        let back = parse_into(&cb, &packet).expect("roundtrip should succeed");
343        assert_eq!(&back, rec);
344    }
345
346    #[test]
347    fn compact_reencode_succeeds() {
348        let cb = Copybook {
349            record_name: "R".into(),
350            encoding: "ascii".into(),
351            fields: vec![
352                FieldDecl::alnum("NAME", "X(4)", 0, 4),
353                FieldDecl::numeric("AMT", "9(3)V99", 4, 5, 2, false),
354            ],
355        };
356        // Compact packet from values.
357        let packet = export(&cb, b"AL  01250", Mode::Compact);
358        let back = parse_into(&cb, &packet).expect("compact re-encode");
359        assert_eq!(&back, b"AL  01250");
360    }
361
362    #[test]
363    fn fail_closed_overflow_alnum() {
364        // A compact packet whose NAME value is too long for the field -> Finding, not truncation.
365        let cb = Copybook {
366            record_name: "R".into(),
367            encoding: "ascii".into(),
368            fields: vec![FieldDecl::alnum("NAME", "X(4)", 0, 4)],
369        };
370        let packet = JsonValue::Object(vec![
371            ("record".into(), JsonValue::str("R")),
372            (
373                "fields".into(),
374                JsonValue::Object(vec![("NAME".into(), JsonValue::str("TOOLONG"))]),
375            ),
376        ]);
377        let res = parse_into(&cb, &packet);
378        let findings = res.expect_err("must fail closed on overflow");
379        assert_eq!(findings[0].code, "VALUE_OVERFLOW");
380    }
381
382    #[test]
383    fn fail_closed_nonnumeric() {
384        let cb = Copybook {
385            record_name: "R".into(),
386            encoding: "ascii".into(),
387            fields: vec![FieldDecl::numeric("AMT", "9(3)", 0, 3, 0, false)],
388        };
389        let packet = JsonValue::Object(vec![
390            ("record".into(), JsonValue::str("R")),
391            (
392                "fields".into(),
393                JsonValue::Object(vec![("AMT".into(), JsonValue::str("12X"))]),
394            ),
395        ]);
396        let findings = parse_into(&cb, &packet).expect_err("must fail closed on non-numeric");
397        assert_eq!(findings[0].code, "NUMERIC_INVALID");
398    }
399
400    #[test]
401    fn decode_hex_fail_closed() {
402        assert!(decode_hex("abc").is_err()); // odd
403        assert!(decode_hex("zz").is_err()); // non-hex
404        assert_eq!(decode_hex("4a4f").unwrap(), b"JO");
405    }
406}