1use crate::dialect::{parse_row, Dialect};
23use crate::export::{export, Mode};
24use crate::model::{Copybook, FieldDecl, FieldKind, Finding};
25
26pub fn parse_into(
32 copybook: &Copybook,
33 csv_text: &[u8],
34 dialect: &Dialect,
35) -> Result<Vec<Vec<u8>>, Vec<Finding>> {
36 let leaves = copybook.leaf_fields();
37 let mut findings: Vec<Finding> = Vec::new();
38
39 let lines: Vec<&[u8]> = split_lines(csv_text);
43 if lines.is_empty() {
44 return Err(vec![Finding::new("CSV_EMPTY", "no rows (expected a header row)".to_string())]);
45 }
46
47 let header = match parse_row(lines[0], dialect) {
49 Ok(h) => h,
50 Err(f) => return Err(vec![f]),
51 };
52 let expected: Vec<&str> = leaves.iter().map(|f| f.name.as_str()).collect();
53 if header.len() != expected.len() || header.iter().zip(&expected).any(|(h, e)| h != e) {
54 findings.push(Finding::new(
55 "HEADER_MISMATCH",
56 format!("header {:?} does not match copybook leaf fields {:?}", header, expected),
57 ));
58 return Err(findings);
60 }
61
62 let total = copybook.record_length();
63 let mut records: Vec<Vec<u8>> = Vec::new();
64
65 for (row_idx, line) in lines.iter().enumerate().skip(1) {
66 let row = match parse_row(line, dialect) {
67 Ok(r) => r,
68 Err(mut f) => {
69 f.message = format!("row {}: {}", row_idx, f.message);
70 findings.push(f);
71 continue;
72 }
73 };
74 if row.len() != leaves.len() {
75 findings.push(Finding::new(
76 "COLUMN_COUNT",
77 format!("row {}: {} columns, expected {}", row_idx, row.len(), leaves.len()),
78 ));
79 continue;
80 }
81 let mut out = vec![b' '; total];
82 let before = findings.len();
83 for (f, val) in leaves.iter().zip(&row) {
84 encode_leaf(f, val, &mut out, &mut findings, row_idx);
85 }
86 if findings.len() == before {
87 records.push(out);
88 }
89 }
90
91 if findings.is_empty() {
92 Ok(records)
93 } else {
94 Err(findings)
95 }
96}
97
98fn split_lines(text: &[u8]) -> Vec<&[u8]> {
101 let mut lines = Vec::new();
102 let mut start = 0;
103 for i in 0..text.len() {
104 if text[i] == 0x0a {
105 lines.push(&text[start..i]);
106 start = i + 1;
107 }
108 }
109 if start < text.len() {
110 lines.push(&text[start..]);
111 }
112 lines
113}
114
115fn encode_leaf(d: &FieldDecl, value: &str, out: &mut [u8], findings: &mut Vec<Finding>, row: usize) {
117 match &d.kind {
118 FieldKind::Alphanumeric => encode_alnum(d, value, out, findings, row),
119 FieldKind::Numeric { scale, signed } => {
120 encode_numeric(d, value, *scale, *signed, out, findings, row)
121 }
122 FieldKind::Group(_) => {} }
124}
125
126fn place(out: &mut [u8], offset: usize, bytes: &[u8], findings: &mut Vec<Finding>, name: &str, row: usize) {
128 let end = offset + bytes.len();
129 if end > out.len() {
130 findings.push(Finding::new(
131 "FIELD_OUT_OF_RANGE",
132 format!(
133 "row {}: field {}: writing [{}..{}] exceeds record length {}",
134 row, name, offset, end, out.len()
135 ),
136 ));
137 return;
138 }
139 out[offset..end].copy_from_slice(bytes);
140}
141
142fn encode_alnum(d: &FieldDecl, value: &str, out: &mut [u8], findings: &mut Vec<Finding>, row: usize) {
144 let mut bytes = Vec::with_capacity(value.len());
145 for ch in value.chars() {
146 let cp = ch as u32;
147 if cp > 0xff {
148 findings.push(Finding::new(
149 "ALNUM_NON_BYTE",
150 format!("row {}: field {}: char U+{:04X} not representable in one byte", row, d.name, cp),
151 ));
152 return;
153 }
154 bytes.push(cp as u8);
155 }
156 if bytes.len() > d.length {
157 findings.push(Finding::new(
158 "VALUE_OVERFLOW",
159 format!(
160 "row {}: field {}: value of {} bytes overflows field length {} (fail-closed, no truncation)",
161 row,
162 d.name,
163 bytes.len(),
164 d.length
165 ),
166 ));
167 return;
168 }
169 let mut buf = vec![b' '; d.length];
170 buf[..bytes.len()].copy_from_slice(&bytes);
171 place(out, d.offset, &buf, findings, &d.name, row);
172}
173
174fn encode_numeric(
177 d: &FieldDecl,
178 value: &str,
179 scale: usize,
180 signed: bool,
181 out: &mut [u8],
182 findings: &mut Vec<Finding>,
183 row: usize,
184) {
185 let mut s = value.trim();
186 let mut negative = false;
187 if let Some(rest) = s.strip_prefix('-') {
188 negative = true;
189 s = rest;
190 } else if let Some(rest) = s.strip_prefix('+') {
191 s = rest;
192 }
193 if negative && !signed {
194 findings.push(Finding::new(
195 "SIGN_ON_UNSIGNED",
196 format!("row {}: field {}: negative value into unsigned PIC {}", row, d.name, d.pic),
197 ));
198 return;
199 }
200
201 let (int_str, frac_str) = match s.split_once('.') {
202 Some((i, f)) => (i, f),
203 None => (s, ""),
204 };
205 if int_str.is_empty() && frac_str.is_empty() {
206 findings.push(Finding::new(
207 "NUMERIC_EMPTY",
208 format!("row {}: field {}: empty numeric value", row, d.name),
209 ));
210 return;
211 }
212 for (label, part) in [("integer", int_str), ("fraction", frac_str)] {
213 if !part.chars().all(|c| c.is_ascii_digit()) {
214 findings.push(Finding::new(
215 "NUMERIC_INVALID",
216 format!("row {}: field {}: non-numeric {} part {:?} (fail-closed)", row, d.name, label, part),
217 ));
218 return;
219 }
220 }
221 if frac_str.len() > scale {
222 findings.push(Finding::new(
223 "FRACTION_OVERFLOW",
224 format!(
225 "row {}: field {}: {} fraction digits exceed scale {} (fail-closed, no rounding)",
226 row,
227 d.name,
228 frac_str.len(),
229 scale
230 ),
231 ));
232 return;
233 }
234
235 let int_digits = d.length.saturating_sub(scale);
236 let int_trimmed = int_str.trim_start_matches('0');
237 if int_trimmed.len() > int_digits {
238 findings.push(Finding::new(
239 "VALUE_OVERFLOW",
240 format!(
241 "row {}: field {}: integer part {:?} needs {} digits, field has {} (fail-closed)",
242 row,
243 d.name,
244 int_str,
245 int_trimmed.len(),
246 int_digits
247 ),
248 ));
249 return;
250 }
251
252 let mut digits = String::with_capacity(d.length);
253 for _ in 0..(int_digits - int_trimmed.len()) {
254 digits.push('0');
255 }
256 digits.push_str(int_trimmed);
257 digits.push_str(frac_str);
258 for _ in 0..(scale - frac_str.len()) {
259 digits.push('0');
260 }
261
262 let mut bytes: Vec<u8> = digits.into_bytes();
263 if bytes.len() != d.length {
264 findings.push(Finding::new(
265 "NUMERIC_LENGTH",
266 format!("row {}: field {}: built {} digits, declared length {}", row, d.name, bytes.len(), d.length),
267 ));
268 return;
269 }
270 if signed {
271 if let Some(last) = bytes.last_mut() {
272 *last = overpunch_byte(*last, negative);
273 }
274 }
275 place(out, d.offset, &bytes, findings, &d.name, row);
276}
277
278fn overpunch_byte(digit: u8, negative: bool) -> u8 {
280 let n = digit.wrapping_sub(b'0');
281 if n > 9 {
282 return digit;
283 }
284 match (negative, n) {
285 (false, 0) => b'{',
286 (false, k) => b'A' + (k - 1),
287 (true, 0) => b'}',
288 (true, k) => b'J' + (k - 1),
289 }
290}
291
292#[derive(Debug, Clone, PartialEq, Eq)]
294pub struct RoundtripRecord {
295 pub index: usize,
297 pub identical: bool,
299 pub original: Vec<u8>,
301 pub reconstructed: Vec<u8>,
303 pub findings: Vec<Finding>,
305}
306
307#[derive(Debug, Clone, PartialEq, Eq)]
309pub struct RoundtripReport {
310 pub records: Vec<RoundtripRecord>,
312}
313
314impl RoundtripReport {
315 pub fn all_identical(&self) -> bool {
317 self.records.iter().all(|r| r.identical)
318 }
319}
320
321pub fn roundtrip(copybook: &Copybook, records: &[&[u8]], dialect: &Dialect) -> RoundtripReport {
329 let csv = export(copybook, records, Mode::Compact, dialect);
330 let parsed = parse_into(copybook, csv.as_bytes(), dialect);
331
332 let mut out = Vec::with_capacity(records.len());
333 match parsed {
334 Ok(recs) => {
335 for (i, orig) in records.iter().enumerate() {
336 let recon = recs.get(i).cloned().unwrap_or_default();
337 let identical = recon.as_slice() == *orig;
338 let mut findings = Vec::new();
339 if !identical {
340 findings.push(Finding::new(
341 "NON_CANONICAL_STORAGE",
342 format!(
343 "record {}: compact extract round-trips to canonical bytes which differ from the \
344 stored form (a value-only extract cannot preserve a non-canonical zoned/padded \
345 representation)",
346 i
347 ),
348 ));
349 }
350 out.push(RoundtripRecord {
351 index: i,
352 identical,
353 original: orig.to_vec(),
354 reconstructed: recon,
355 findings,
356 });
357 }
358 }
359 Err(findings) => {
360 for (i, orig) in records.iter().enumerate() {
363 out.push(RoundtripRecord {
364 index: i,
365 identical: false,
366 original: orig.to_vec(),
367 reconstructed: Vec::new(),
368 findings: findings.clone(),
369 });
370 }
371 }
372 }
373
374 RoundtripReport { records: out }
375}
376
377#[cfg(test)]
378mod tests {
379 use super::*;
380
381 fn signed_copybook() -> Copybook {
382 Copybook {
383 record_name: "CUST".into(),
384 encoding: "ascii".into(),
385 fields: vec![
386 FieldDecl::alnum("NAME", "X(4)", 0, 4),
387 FieldDecl::numeric("AMT", "S9(3)V99", 4, 5, 2, true),
388 ],
389 }
390 }
391
392 fn unsigned_copybook() -> Copybook {
393 Copybook {
394 record_name: "R".into(),
395 encoding: "ascii".into(),
396 fields: vec![
397 FieldDecl::alnum("NAME", "X(4)", 0, 4),
398 FieldDecl::numeric("AMT", "9(3)V99", 4, 5, 2, false),
399 ],
400 }
401 }
402
403 #[test]
404 fn compact_roundtrip_identical_bytes() {
405 let cb = signed_copybook();
407 let recs: Vec<&[u8]> = vec![b"JOHN0125}", b"JANE0007A"]; let report = roundtrip(&cb, &recs, &Dialect::csv());
410 assert!(report.all_identical(), "report: {:?}", report);
411 }
412
413 #[test]
414 fn parse_into_reconstructs() {
415 let cb = unsigned_copybook();
416 let csv = b"NAME,AMT\nAL,12.50\n";
417 let recs = parse_into(&cb, csv, &Dialect::csv()).expect("parse");
418 assert_eq!(recs.len(), 1);
419 assert_eq!(&recs[0], b"AL 01250");
420 }
421
422 #[test]
423 fn fail_closed_overflow_value() {
424 let cb = unsigned_copybook();
425 let csv = b"NAME,AMT\nTOOLONG,12.50\n"; let findings = parse_into(&cb, csv, &Dialect::csv()).expect_err("must fail closed");
427 assert!(findings.iter().any(|f| f.code == "VALUE_OVERFLOW"));
428 }
429
430 #[test]
431 fn fail_closed_nonnumeric() {
432 let cb = unsigned_copybook();
433 let csv = b"NAME,AMT\nAL,1X.50\n"; let findings = parse_into(&cb, csv, &Dialect::csv()).expect_err("must fail closed");
435 assert!(findings.iter().any(|f| f.code == "NUMERIC_INVALID"));
436 }
437
438 #[test]
439 fn fail_closed_header_mismatch() {
440 let cb = unsigned_copybook();
441 let csv = b"NAME,BALANCE\nAL,12.50\n"; let findings = parse_into(&cb, csv, &Dialect::csv()).expect_err("must fail closed");
443 assert_eq!(findings[0].code, "HEADER_MISMATCH");
444 }
445
446 #[test]
447 fn fail_closed_wrong_column_count() {
448 let cb = unsigned_copybook();
449 let csv = b"NAME,AMT\nAL,12.50,EXTRA\n";
450 let findings = parse_into(&cb, csv, &Dialect::csv()).expect_err("must fail closed");
451 assert!(findings.iter().any(|f| f.code == "COLUMN_COUNT"));
452 }
453
454 #[test]
455 fn non_canonical_storage_reported_honestly() {
456 let cb = unsigned_copybook();
457 let recs: Vec<&[u8]> = vec![b"AL 099"];
459 let report = roundtrip(&cb, &recs, &Dialect::csv());
460 assert!(!report.all_identical());
461 assert_eq!(report.records[0].findings[0].code, "NON_CANONICAL_STORAGE");
462 }
463}