Skip to main content

kobold_csv/
model.rs

1//! The clean-room COBOL-record data model for kobold-csv.
2//!
3//! This is a plain Rust description of a record layout (a tree of named fields with offsets, lengths and
4//! PIC kinds) plus the decoded result of applying it to raw bytes. **It is independent of GnuCOBOL/libcob**
5//! -- it links no COBOL runtime and reproduces no runtime's exact behavior. The caller describes their
6//! copybook by whatever means they like and feeds raw record bytes to the `KOBOLD.CSV.EXPORT.1` court.
7//!
8//! This model is a clean-room COPY of the kobold-json model; kobold-csv does NOT depend on kobold-json (or
9//! any GnuCOBOL crate) -- the two crates are deliberately self-contained so each can ship independently.
10
11/// The storage kind of a field, as declared by the copybook.
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub enum FieldKind {
14    /// `PIC X`/`A` -- alphanumeric display bytes.
15    Alphanumeric,
16    /// `PIC 9` -- zoned-decimal display digits. `scale` = implied decimal places; `signed` if `PIC S9`.
17    Numeric { scale: usize, signed: bool },
18    /// A group (`01`/`05` with subordinate items): the child fields, in order. A group's `length` is the
19    /// sum of its children's lengths.
20    Group(Vec<FieldDecl>),
21}
22
23/// A single field declaration from a copybook: a name, its PIC string, its byte `offset` within the record,
24/// its byte `length`, and its [`FieldKind`].
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct FieldDecl {
27    /// The COBOL data name (e.g. `CUST-NAME`).
28    pub name: String,
29    /// The PIC clause text, kept verbatim for the audit packet (e.g. `X(20)`, `S9(5)V99`).
30    pub pic: String,
31    /// Byte offset of the field within the record.
32    pub offset: usize,
33    /// Byte length of the field.
34    pub length: usize,
35    /// The storage kind.
36    pub kind: FieldKind,
37}
38
39impl FieldDecl {
40    /// An alphanumeric field.
41    pub fn alnum(name: impl Into<String>, pic: impl Into<String>, offset: usize, length: usize) -> Self {
42        FieldDecl { name: name.into(), pic: pic.into(), offset, length, kind: FieldKind::Alphanumeric }
43    }
44
45    /// A numeric (zoned display) field.
46    pub fn numeric(
47        name: impl Into<String>,
48        pic: impl Into<String>,
49        offset: usize,
50        length: usize,
51        scale: usize,
52        signed: bool,
53    ) -> Self {
54        FieldDecl {
55            name: name.into(),
56            pic: pic.into(),
57            offset,
58            length,
59            kind: FieldKind::Numeric { scale, signed },
60        }
61    }
62
63    /// A group field with the given children. `length` is the sum of child lengths.
64    pub fn group(name: impl Into<String>, offset: usize, children: Vec<FieldDecl>) -> Self {
65        let length = children.iter().map(|c| c.length).sum();
66        FieldDecl { name: name.into(), pic: String::new(), offset, length, kind: FieldKind::Group(children) }
67    }
68}
69
70/// A copybook: the record name, the declared character encoding (informational, carried into the packet),
71/// and the top-level fields in order.
72#[derive(Debug, Clone, PartialEq, Eq)]
73pub struct Copybook {
74    /// The `01`-level record name.
75    pub record_name: String,
76    /// The declared character encoding, e.g. `ascii` or `ebcdic-cp-us`. Carried into the packet as
77    /// `encoding`; kobold-csv decodes display bytes as-is and does not transcode.
78    pub encoding: String,
79    /// The top-level fields, in record order.
80    pub fields: Vec<FieldDecl>,
81}
82
83impl Copybook {
84    /// The total declared record length = sum of top-level field lengths.
85    pub fn record_length(&self) -> usize {
86        self.fields.iter().map(|f| f.length).sum()
87    }
88
89    /// A flat, in-order list of the LEAF fields (groups recursed into their children). CSV is inherently a
90    /// flat tabular format: a row is a sequence of columns, so the export/parse courts work over the leaf
91    /// fields in record order. This is the bridge from the (possibly nested) copybook tree to columns.
92    pub fn leaf_fields(&self) -> Vec<&FieldDecl> {
93        let mut out = Vec::new();
94        for f in &self.fields {
95            collect_leaves(f, &mut out);
96        }
97        out
98    }
99
100    /// A stable, canonical byte serialization of the copybook layout, used as the input to
101    /// `copybook_hash`. Deterministic: a pure function of the structure (names/pics/offsets/lengths/kinds),
102    /// independent of insertion-time state.
103    pub fn canonical_bytes(&self) -> Vec<u8> {
104        let mut out = Vec::new();
105        out.extend_from_slice(b"COPYBOOK\x1f");
106        out.extend_from_slice(self.record_name.as_bytes());
107        out.push(0x1f);
108        out.extend_from_slice(self.encoding.as_bytes());
109        out.push(0x1e);
110        for f in &self.fields {
111            canon_field(f, &mut out);
112        }
113        out
114    }
115}
116
117fn collect_leaves<'a>(f: &'a FieldDecl, out: &mut Vec<&'a FieldDecl>) {
118    match &f.kind {
119        FieldKind::Group(children) => {
120            for c in children {
121                collect_leaves(c, out);
122            }
123        }
124        _ => out.push(f),
125    }
126}
127
128fn canon_field(f: &FieldDecl, out: &mut Vec<u8>) {
129    out.extend_from_slice(f.name.as_bytes());
130    out.push(0x1f);
131    out.extend_from_slice(f.pic.as_bytes());
132    out.push(0x1f);
133    out.extend_from_slice(f.offset.to_string().as_bytes());
134    out.push(0x1f);
135    out.extend_from_slice(f.length.to_string().as_bytes());
136    out.push(0x1f);
137    match &f.kind {
138        FieldKind::Alphanumeric => out.extend_from_slice(b"A"),
139        FieldKind::Numeric { scale, signed } => {
140            out.extend_from_slice(b"N");
141            out.extend_from_slice(scale.to_string().as_bytes());
142            out.push(if *signed { b'S' } else { b'U' });
143        }
144        FieldKind::Group(children) => {
145            out.extend_from_slice(b"G{");
146            for c in children {
147                canon_field(c, out);
148            }
149            out.push(b'}');
150        }
151    }
152    out.push(0x1e);
153}
154
155/// A finding emitted by a court instead of a silent coercion: a stable `code` and a human `message`.
156#[derive(Debug, Clone, PartialEq, Eq)]
157pub struct Finding {
158    /// A stable machine code, e.g. `NUMERIC_NONDIGIT`, `VALUE_OVERFLOW`, `FIELD_OUT_OF_RANGE`.
159    pub code: String,
160    /// A human-readable message.
161    pub message: String,
162}
163
164impl Finding {
165    /// Construct a finding.
166    pub fn new(code: impl Into<String>, message: impl Into<String>) -> Self {
167        Finding { code: code.into(), message: message.into() }
168    }
169}
170
171/// A decoded leaf field: its name, the rendered semantic `value`, the raw bytes, the declaration it came
172/// from, and any findings raised while decoding it.
173#[derive(Debug, Clone, PartialEq, Eq)]
174pub struct DecodedField {
175    /// The field name.
176    pub name: String,
177    /// The rendered value (see [`crate::export`] for the rendering policy).
178    pub value: String,
179    /// The exact raw bytes of the field.
180    pub raw: Vec<u8>,
181    /// The originating declaration.
182    pub decl: FieldDecl,
183    /// Findings raised while decoding (empty = clean).
184    pub findings: Vec<Finding>,
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn record_length_sums_children() {
193        let cb = Copybook {
194            record_name: "REC".into(),
195            encoding: "ascii".into(),
196            fields: vec![
197                FieldDecl::alnum("NAME", "X(4)", 0, 4),
198                FieldDecl::numeric("AMT", "9(3)V99", 4, 5, 2, false),
199            ],
200        };
201        assert_eq!(cb.record_length(), 9);
202    }
203
204    #[test]
205    fn group_length_is_sum() {
206        let g = FieldDecl::group(
207            "G",
208            0,
209            vec![FieldDecl::alnum("A", "X(2)", 0, 2), FieldDecl::alnum("B", "X(3)", 2, 3)],
210        );
211        assert_eq!(g.length, 5);
212    }
213
214    #[test]
215    fn leaf_fields_flattens_groups() {
216        let cb = Copybook {
217            record_name: "REC".into(),
218            encoding: "ascii".into(),
219            fields: vec![
220                FieldDecl::alnum("ID", "X(2)", 0, 2),
221                FieldDecl::group(
222                    "G",
223                    2,
224                    vec![FieldDecl::alnum("A", "X(2)", 2, 2), FieldDecl::alnum("B", "X(3)", 4, 3)],
225                ),
226            ],
227        };
228        let leaves: Vec<&str> = cb.leaf_fields().iter().map(|f| f.name.as_str()).collect();
229        assert_eq!(leaves, vec!["ID", "A", "B"]);
230    }
231
232    #[test]
233    fn canonical_bytes_deterministic() {
234        let cb = Copybook {
235            record_name: "REC".into(),
236            encoding: "ascii".into(),
237            fields: vec![FieldDecl::alnum("NAME", "X(4)", 0, 4)],
238        };
239        assert_eq!(cb.canonical_bytes(), cb.canonical_bytes());
240        assert!(!cb.canonical_bytes().is_empty());
241    }
242}