Skip to main content

kobold_csv/
export.rs

1//! `KOBOLD.CSV.EXPORT.1` -- build forensic delimited (CSV) evidence from raw COBOL records + their copybook.
2//!
3//! CSV is a flat, tabular format: a row is a sequence of columns. That shapes the three modes:
4//!
5//! * [`Mode::Compact`] -- a header row of leaf field NAMES, then one row PER record of decoded values. This
6//!   is the classic analyst extract: `ACCOUNT_NO,BALANCE,STATUS` then a line per account.
7//! * [`Mode::Audit`] -- a LONG/tall table: header `field,value,pic,offset,length,raw_hex,findings`, one row
8//!   per (record, field). A wide row cannot carry per-field metadata, so custody data goes tall.
9//! * [`Mode::Evidence`] -- tall like Audit, prefixed with `record_hash,copybook_hash`:
10//!   `record_hash,copybook_hash,field,value,pic,offset,length,raw_hex,findings`. Hashes are `sha256:`-prefixed.
11//!
12//! Numeric/alnum value rendering is IDENTICAL to kobold-json (leading-zero strip, decimal at scale, sign via
13//! zoned overpunch). A numeric field whose bytes are not valid digits emits a `NUMERIC_NONDIGIT` [`Finding`]
14//! -- NEVER a silent coercion -- and the `raw_hex` column preserves the byte truth regardless.
15//!
16//! This module is independent of GnuCOBOL/libcob.
17
18use crate::dialect::{write_row, Dialect};
19use crate::model::{Copybook, FieldDecl, FieldKind, Finding};
20use crate::sha256;
21
22/// The evidence detail level of an exported table.
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum Mode {
25    /// Header of field names + one row of values per record.
26    Compact,
27    /// Tall: `field,value,pic,offset,length,raw_hex,findings`, one row per (record, field).
28    Audit,
29    /// Tall with custody hashes: `record_hash,copybook_hash,field,value,pic,offset,length,raw_hex,findings`.
30    Evidence,
31}
32
33/// Render an alphanumeric value: bytes as Latin-1-ish text (each byte -> its code point), trailing spaces
34/// and NULs trimmed. A 1:1 byte->char mapping keeps the value lossless against the raw_hex companion.
35pub(crate) fn render_alnum(data: &[u8]) -> String {
36    let mut end = data.len();
37    while end > 0 && (data[end - 1] == b' ' || data[end - 1] == 0) {
38        end -= 1;
39    }
40    data[..end].iter().map(|&b| b as char).collect()
41}
42
43/// Render a numeric value from its raw display bytes. Returns the rendered string plus any findings. A zoned
44/// sign overpunch in the last byte is recognized for signed fields; otherwise any non-digit byte yields a
45/// `NUMERIC_NONDIGIT` finding (NOT a silent coercion).
46pub(crate) fn render_numeric(data: &[u8], scale: usize, signed: bool) -> (String, Vec<Finding>) {
47    let mut findings = Vec::new();
48    let mut digits: Vec<u8> = Vec::with_capacity(data.len());
49    let mut negative = false;
50
51    for (idx, &b) in data.iter().enumerate() {
52        let is_last = idx + 1 == data.len();
53        if b.is_ascii_digit() {
54            digits.push(b);
55            continue;
56        }
57        if signed && is_last {
58            if let Some((dgt, neg)) = overpunch(b) {
59                digits.push(dgt);
60                negative = neg;
61                continue;
62            }
63        }
64        findings.push(Finding::new(
65            "NUMERIC_NONDIGIT",
66            format!("non-digit byte 0x{:02x} at position {} in numeric field", b, idx),
67        ));
68    }
69
70    if digits.is_empty() {
71        digits.push(b'0');
72    }
73    let s = format_digits(&digits, scale, negative);
74    (s, findings)
75}
76
77/// Map a zoned-decimal overpunch byte to `(digit, negative)`. ASCII zoned convention:
78/// `{ABCDEFGHI` = +0..+9, `}JKLMNOPQR` = -0..-9.
79fn overpunch(b: u8) -> Option<(u8, bool)> {
80    match b {
81        b'{' => Some((b'0', false)),
82        b'A'..=b'I' => Some((b'0' + (b - b'A' + 1), false)),
83        b'}' => Some((b'0', true)),
84        b'J'..=b'R' => Some((b'0' + (b - b'J' + 1), true)),
85        _ => None,
86    }
87}
88
89/// Format decimal `digits` with the implied `scale` and sign: leading zeros stripped to one integer digit,
90/// the decimal point inserted at `scale`, a leading `-` when negative. A `-0`/`-0.00` sign is suppressed.
91pub(crate) fn format_digits(digits: &[u8], scale: usize, negative: bool) -> String {
92    let d: Vec<u8> = digits.iter().copied().filter(|b| b.is_ascii_digit()).collect();
93    let d = if d.is_empty() { vec![b'0'] } else { d };
94    let mut padded = d;
95    while padded.len() <= scale {
96        padded.insert(0, b'0');
97    }
98    let int_len = padded.len() - scale;
99    let int_part = &padded[..int_len];
100    let mut start = 0;
101    while start + 1 < int_part.len() && int_part[start] == b'0' {
102        start += 1;
103    }
104    let mut s = String::new();
105    let int_str: String = int_part[start..].iter().map(|&b| b as char).collect();
106    let all_zero = padded.iter().all(|&b| b == b'0');
107    if negative && !all_zero {
108        s.push('-');
109    }
110    s.push_str(&int_str);
111    if scale > 0 {
112        s.push('.');
113        let frac: String = padded[int_len..].iter().map(|&b| b as char).collect();
114        s.push_str(&frac);
115    }
116    s
117}
118
119fn hex_lower(data: &[u8]) -> String {
120    let mut s = String::with_capacity(data.len() * 2);
121    for &b in data {
122        sha256::push_hex_byte(b, &mut s);
123    }
124    s
125}
126
127/// The decoded result of one leaf field against a record.
128pub(crate) struct LeafDecode {
129    pub value: String,
130    pub raw: Vec<u8>,
131    pub findings: Vec<Finding>,
132}
133
134/// Decode one LEAF declaration against the full record bytes (groups are flattened by the caller via
135/// [`Copybook::leaf_fields`], so this only ever sees leaves).
136pub(crate) fn decode_leaf(decl: &FieldDecl, record: &[u8]) -> LeafDecode {
137    let start = decl.offset;
138    let end = decl.offset.saturating_add(decl.length);
139    if end > record.len() {
140        let raw = if start < record.len() { record[start..].to_vec() } else { Vec::new() };
141        return LeafDecode {
142            value: String::new(),
143            raw,
144            findings: vec![Finding::new(
145                "FIELD_OUT_OF_RANGE",
146                format!(
147                    "field {} [{}..{}] exceeds record length {}",
148                    decl.name, start, end, record.len()
149                ),
150            )],
151        };
152    }
153    let raw = record[start..end].to_vec();
154    match &decl.kind {
155        FieldKind::Alphanumeric => LeafDecode { value: render_alnum(&raw), raw, findings: Vec::new() },
156        FieldKind::Numeric { scale, signed } => {
157            let (s, findings) = render_numeric(&raw, *scale, *signed);
158            LeafDecode { value: s, raw, findings }
159        }
160        // leaf_fields never yields a group, but keep this total.
161        FieldKind::Group(_) => LeafDecode { value: String::new(), raw, findings: Vec::new() },
162    }
163}
164
165/// Flatten a findings list into one CSV cell: `code:message` entries joined by `; `. Empty = clean.
166fn findings_cell(findings: &[Finding]) -> String {
167    let mut parts: Vec<String> = Vec::with_capacity(findings.len());
168    for f in findings {
169        parts.push(format!("{}:{}", f.code, f.message));
170    }
171    parts.join("; ")
172}
173
174/// `KOBOLD.CSV.EXPORT.1` -- export `records` against `copybook` into delimited text at the given [`Mode`]
175/// under dialect `d`.
176pub fn export(copybook: &Copybook, records: &[&[u8]], mode: Mode, dialect: &Dialect) -> String {
177    let leaves = copybook.leaf_fields();
178    let mut out = String::new();
179
180    match mode {
181        Mode::Compact => {
182            // Header: leaf field names.
183            let header: Vec<String> = leaves.iter().map(|f| f.name.clone()).collect();
184            write_row(&header, dialect, &mut out);
185            // One row of values per record.
186            for rec in records {
187                let row: Vec<String> =
188                    leaves.iter().map(|f| decode_leaf(f, rec).value).collect();
189                write_row(&row, dialect, &mut out);
190            }
191        }
192        Mode::Audit => {
193            let header: Vec<String> = vec![
194                "field".into(),
195                "value".into(),
196                "pic".into(),
197                "offset".into(),
198                "length".into(),
199                "raw_hex".into(),
200                "findings".into(),
201            ];
202            write_row(&header, dialect, &mut out);
203            for rec in records {
204                for f in &leaves {
205                    let dec = decode_leaf(f, rec);
206                    let row = vec![
207                        f.name.clone(),
208                        dec.value,
209                        f.pic.clone(),
210                        f.offset.to_string(),
211                        f.length.to_string(),
212                        hex_lower(&dec.raw),
213                        findings_cell(&dec.findings),
214                    ];
215                    write_row(&row, dialect, &mut out);
216                }
217            }
218        }
219        Mode::Evidence => {
220            let header: Vec<String> = vec![
221                "record_hash".into(),
222                "copybook_hash".into(),
223                "field".into(),
224                "value".into(),
225                "pic".into(),
226                "offset".into(),
227                "length".into(),
228                "raw_hex".into(),
229                "findings".into(),
230            ];
231            write_row(&header, dialect, &mut out);
232            let copybook_hash = format!("sha256:{}", sha256::hex_digest(&copybook.canonical_bytes()));
233            for rec in records {
234                let record_hash = format!("sha256:{}", sha256::hex_digest(rec));
235                for f in &leaves {
236                    let dec = decode_leaf(f, rec);
237                    let row = vec![
238                        record_hash.clone(),
239                        copybook_hash.clone(),
240                        f.name.clone(),
241                        dec.value,
242                        f.pic.clone(),
243                        f.offset.to_string(),
244                        f.length.to_string(),
245                        hex_lower(&dec.raw),
246                        findings_cell(&dec.findings),
247                    ];
248                    write_row(&row, dialect, &mut out);
249                }
250            }
251        }
252    }
253
254    out
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    fn copybook() -> Copybook {
262        Copybook {
263            record_name: "CUST".into(),
264            encoding: "ascii".into(),
265            fields: vec![
266                FieldDecl::alnum("NAME", "X(4)", 0, 4),
267                FieldDecl::numeric("AMT", "9(3)V99", 4, 5, 2, false),
268            ],
269        }
270    }
271
272    #[test]
273    fn compact_header_and_values() {
274        let cb = copybook();
275        let recs: Vec<&[u8]> = vec![b"JOHN01250", b"JANE00099"];
276        let csv = export(&cb, &recs, Mode::Compact, &Dialect::csv());
277        assert_eq!(csv, "NAME,AMT\nJOHN,12.50\nJANE,0.99\n");
278    }
279
280    #[test]
281    fn audit_is_tall_with_storage_truth() {
282        let cb = copybook();
283        let recs: Vec<&[u8]> = vec![b"JOHN01250"];
284        let csv = export(&cb, &recs, Mode::Audit, &Dialect::csv());
285        let lines: Vec<&str> = csv.lines().collect();
286        assert_eq!(lines[0], "field,value,pic,offset,length,raw_hex,findings");
287        // AMT row: value 12.50, pic 9(3)V99, offset 4, length 5, raw_hex of "01250"
288        assert!(lines.iter().any(|l| l.starts_with("AMT,12.50,9(3)V99,4,5,3031323530,")));
289    }
290
291    #[test]
292    fn evidence_has_hashes() {
293        let cb = copybook();
294        let recs: Vec<&[u8]> = vec![b"JOHN01250"];
295        let csv = export(&cb, &recs, Mode::Evidence, &Dialect::csv());
296        let lines: Vec<&str> = csv.lines().collect();
297        assert_eq!(
298            lines[0],
299            "record_hash,copybook_hash,field,value,pic,offset,length,raw_hex,findings"
300        );
301        assert!(lines[1].starts_with("sha256:"));
302    }
303
304    #[test]
305    fn numeric_nondigit_is_a_finding_not_coercion() {
306        let cb = copybook();
307        let recs: Vec<&[u8]> = vec![b"JOHN0AB50"]; // AMT = "0AB50", non-digits
308        let csv = export(&cb, &recs, Mode::Audit, &Dialect::csv());
309        // The findings cell is quoted (contains a comma); ensure NUMERIC_NONDIGIT appears and raw_hex intact.
310        assert!(csv.contains("NUMERIC_NONDIGIT"));
311        assert!(csv.contains("3041423530")); // raw_hex of "0AB50" preserved
312    }
313
314    #[test]
315    fn numeric_formatting() {
316        assert_eq!(format_digits(b"042", 0, false), "42");
317        assert_eq!(format_digits(b"01250", 2, false), "12.50");
318        assert_eq!(format_digits(b"0000", 0, false), "0");
319        assert_eq!(format_digits(b"042", 0, true), "-42");
320        assert_eq!(format_digits(b"00", 2, true), "0.00");
321        assert_eq!(format_digits(b"5", 2, false), "0.05");
322    }
323}